diff --git a/DESCRIPTION b/DESCRIPTION index 3b8532d..4d3decd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sigurd Type: Package Title: Single cell Genotyping Using RNA Data -Version: 0.2.23 +Version: 0.2.32 Authors@R: c( person(given = "Martin", family = "Grasshoff", @@ -20,21 +20,38 @@ Encoding: UTF-8 LazyData: true Imports: archive, - bigmemory, - circlize, + BiocGenerics, ComplexHeatmap, + circlize, + data.table, dplyr, fastmatch, + GenomeInfoDb, ggplot2, + ggsci, + glue, grid, - magrittr, Matrix, + MatrixGenerics, + magrittr, + methods, parallel, rcompanion, S4Vectors, Seurat, SummarizedExperiment, + scales, + tibble, tidyr, tidyverse, VariantAnnotation RoxygenNote: 7.2.3 +Suggests: + GenomicRanges, + IRanges, + knitr, + rmarkdown, + SeuratObject, + testthat (>= 3.0.0) +VignetteBuilder: knitr +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index d71eb89..48515c2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(GetCellInfoPerVariant) export(GetVariantInfo) export(HeatmapVoi) export(LoadingMAEGATK_typewise) +export(LoadingVCF_typewise) export(LoadingVarTrix_typewise) export(Merging_SE_list) export(RowWiseSplit) @@ -27,10 +28,12 @@ export(VariantFisherTestHeatmap) export(VariantQuantileThresholding) export(VariantWiseCorrelation) export(VariantWiseFisherTest) +export(char_to_numeric) export(combine_NAMES) export(combine_SparseMatrix) export(computeAFMutMatrix) export(getAltMatrix) +export(getMutMatrix) export(getReadMatrix) export(getRefMatrix) export(get_consensus) @@ -39,26 +42,54 @@ export(load_object) export(save_object) export(sdiv) import(BiocGenerics) -import(ComplexHeatmap) -import(Matrix) -import(MatrixGenerics) -import(Seurat) -import(SummarizedExperiment) -import(VariantAnnotation) import(archive) -import(assertthat) -import(circlize) -import(data.table) -import(dplyr) -import(fastmatch) -import(ggplot2) import(ggsci) -import(glue) -import(grid) -import(parallel) -import(rcompanion) -import(scales) -import(stats) -import(tibble) -import(tidyr) -import(tidyverse) +importFrom(BiocGenerics,start) +importFrom(ComplexHeatmap,Heatmap) +importFrom(ComplexHeatmap,columnAnnotation) +importFrom(ComplexHeatmap,draw) +importFrom(ComplexHeatmap,rowAnnotation) +importFrom(GenomeInfoDb,seqnames) +importFrom(Matrix,colMeans) +importFrom(Matrix,colSums) +importFrom(Matrix,readMM) +importFrom(Matrix,rowMeans) +importFrom(Matrix,rowSums) +importFrom(Matrix,sparseMatrix) +importFrom(Matrix,summary) +importFrom(Matrix,t) +importFrom(S4Vectors,merge) +importFrom(Seurat,AddMetaData) +importFrom(SummarizedExperiment,SummarizedExperiment) +importFrom(SummarizedExperiment,assays) +importFrom(SummarizedExperiment,colData) +importFrom(SummarizedExperiment,rowData) +importFrom(SummarizedExperiment,rowRanges) +importFrom(VariantAnnotation,alt) +importFrom(VariantAnnotation,info) +importFrom(VariantAnnotation,readGeno) +importFrom(VariantAnnotation,readVcf) +importFrom(VariantAnnotation,ref) +importFrom(circlize,colorRamp2) +importFrom(data.table,data.table) +importFrom(dplyr,"%>%") +importFrom(dplyr,left_join) +importFrom(dplyr,sym) +importFrom(glue,glue) +importFrom(grDevices,dev.off) +importFrom(grDevices,png) +importFrom(grid,gpar) +importFrom(grid,unit) +importFrom(methods,as) +importFrom(parallel,mclapply) +importFrom(scales,hue_pal) +importFrom(stats,cor.test) +importFrom(stats,fisher.test) +importFrom(stats,na.omit) +importFrom(stats,p.adjust) +importFrom(stats,quantile) +importFrom(tibble,as_tibble) +importFrom(tibble,tibble) +importFrom(utils,read.csv) +importFrom(utils,read.table) +importFrom(utils,tail) diff --git a/R/AmpliconSupplementing.R b/R/AmpliconSupplementing.R index 28eb7d2..bcf7507 100644 --- a/R/AmpliconSupplementing.R +++ b/R/AmpliconSupplementing.R @@ -2,83 +2,89 @@ #' #'@description #'We replace the values from an scRNAseq experiment with values we have from an amplicon experiment. -#' -#'@import archive Matrix SummarizedExperiment VariantAnnotation +#'@importFrom S4Vectors merge +#'@importFrom SummarizedExperiment colData rowData assays SummarizedExperiment +#'@importFrom stats na.omit #'@param scRNAseq The SummarizedExperiment object containing the scRNAseq data. #'@param amplicon The SummarizedExperiment object containing the amplicon data. +#'@param verbose Should the function be verbose? Default = TRUE #'@export -AmpliconSupplementing <- function(scRNAseq, amplicon){ +AmpliconSupplementing <- function(scRNAseq, amplicon, verbose = TRUE){ # We supplement the scRNAseq data with the amplicon data. - print("We get the new meta data.") - new_meta_data <- merge(colData(scRNAseq), colData(amplicon), by = "Cell", all.x = TRUE, all.y = TRUE, - suffixes = c("scRNAseq", "Amplicon")) + if(verbose) print("We get the new meta data.") + new_meta_data <- S4Vectors::merge(SummarizedExperiment::colData(scRNAseq), SummarizedExperiment::colData(amplicon), by = "Cell", all.x = TRUE, all.y = TRUE, + suffixes = c("scRNAseq", "Amplicon")) rownames(new_meta_data) <- new_meta_data$Cell # We add an AverageCoverage column to the new meta data. new_meta_data$AverageCoverage <- new_meta_data$AverageCoveragescRNAseq amplicon_value <- new_meta_data$AverageCoverageAmplicon names(amplicon_value) <- colnames(amplicon) - amplicon_value <- na.omit(amplicon_value) + amplicon_value <- stats::na.omit(amplicon_value) new_meta_data[names(amplicon_value), "AverageCoverage"] <- amplicon_value - new_row_data <- merge(rowData(scRNAseq), rowData(amplicon), by = "VariantName", all.x = TRUE, all.y = TRUE, + new_row_data <- merge(SummarizedExperiment::rowData(scRNAseq), SummarizedExperiment::rowData(amplicon), by = "VariantName", all.x = TRUE, all.y = TRUE, suffixes = c("scRNAseq", "Amplicon")) rownames(new_row_data) <- new_row_data$VariantName - # We add a VariantQuality column to the row data, showing the scRNAseq quality with the supplemented amplicon quality. - # We do the same for the concordance and the depth. - new_row_data$VariantQuality <- new_row_data$VariantQualityscRNAseq - amplicon_value <- rowData(amplicon)$VariantQuality - names(amplicon_value) <- rownames(amplicon) - amplicon_value <- na.omit(amplicon_value) - new_row_data[names(amplicon_value), "VariantQuality"] <- amplicon_value - - new_row_data$Concordance <- new_row_data$ConcordancescRNAseq - amplicon_value <- rowData(amplicon)$Concordance - names(amplicon_value) <- rownames(amplicon) - amplicon_value <- na.omit(amplicon_value) - new_row_data[names(amplicon_value), "Concordance"] <- amplicon_value - + if("VariantQualityscRNAseq" %in% colnames(new_row_data)){ + # We add a VariantQuality column to the row data, showing the scRNAseq quality with the supplemented amplicon quality. + # We do the same for the concordance and the depth. + new_row_data$VariantQuality <- new_row_data$VariantQualityscRNAseq + amplicon_value <- SummarizedExperiment::rowData(amplicon)$VariantQuality + names(amplicon_value) <- rownames(amplicon) + amplicon_value <- stats::na.omit(amplicon_value) + new_row_data[names(amplicon_value), "VariantQuality"] <- amplicon_value + } + + if("ConcordancescRNAseq" %in% colnames(new_row_data)){ + new_row_data$Concordance <- new_row_data$ConcordancescRNAseq + amplicon_value <- SummarizedExperiment::rowData(amplicon)$Concordance + names(amplicon_value) <- rownames(amplicon) + amplicon_value <- stats::na.omit(amplicon_value) + new_row_data[names(amplicon_value), "Concordance"] <- amplicon_value + } + new_row_data$Depth <- new_row_data$DepthscRNAseq - amplicon_value <- rowData(amplicon)$Depth + amplicon_value <- SummarizedExperiment::rowData(amplicon)$Depth names(amplicon_value) <- rownames(amplicon) - amplicon_value <- na.omit(amplicon_value) + amplicon_value <- stats::na.omit(amplicon_value) new_row_data[names(amplicon_value), "Depth"] <- amplicon_value - print("We get all cells and variants.") + if(verbose) print("We get all cells and variants.") all_cells <- unique(c(colnames(scRNAseq), colnames(amplicon))) all_variants <- unique(c(rownames(scRNAseq), rownames(amplicon))) new_meta_data <- new_meta_data[all_cells,] new_row_data <- new_row_data[all_variants,] - print("We generate our output matrices.") + if(verbose) print("We generate our output matrices.") consensus <- matrix(0, ncol = length(all_cells), nrow = length(all_variants)) rownames(consensus) <- all_variants colnames(consensus) <- all_cells - #consensus <- sparseMatrix(i = 1, j = 1, dims = c(length(all_variants), length(all_cells)), repr = "C") + #consensus <- Matrix::sparseMatrix(i = 1, j = 1, dims = c(length(all_variants), length(all_cells)), repr = "C") fraction <- consensus reads <- consensus alts <- consensus refs <- consensus - print("We fill the output matrices.") - consensus[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$consensus) - fraction[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$fraction) - reads[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$coverage) - alts[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$alts) - refs[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$refs) + if(verbose) print("We fill the output matrices.") + consensus[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$consensus) + fraction[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$fraction) + reads[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$coverage) + alts[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$alts) + refs[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$refs) - print("We add the the amplicon information.") - consensus[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$consensus) - fraction[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$fraction) - reads[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$coverage) - alts[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$alts) - refs[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$refs) + if(verbose) print("We add the the amplicon information.") + consensus[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$consensus) + fraction[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$fraction) + reads[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$coverage) + alts[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$alts) + refs[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$refs) - #print("We add the the amplicon information.") - #assays(scRNAseq)[["consensus"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$consensus) - #assays(scRNAseq)[["fraction"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$fraction) - #assays(scRNAseq)[["coverage"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$coverage) + #if(verbose) print("We add the the amplicon information.") + #SummarizedExperiment::assays(scRNAseq)[["consensus"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$consensus) + #SummarizedExperiment::assays(scRNAseq)[["fraction"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$fraction) + #SummarizedExperiment::assays(scRNAseq)[["coverage"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$coverage) - se <- SummarizedExperiment(assays = list(consensus = as(consensus, "dgCMatrix"), fraction = as(fraction, "dgCMatrix"), coverage = as(reads, "dgCMatrix"), alts = as(alts, "dgCMatrix"), refs = as(refs, "dgCMatrix")), - colData = new_meta_data, rowData = new_row_data) + se <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = methods::as(consensus, "dgCMatrix"), fraction = methods::as(fraction, "dgCMatrix"), coverage = methods::as(reads, "dgCMatrix"), alts = methods::as(alts, "dgCMatrix"), refs = methods::as(refs, "dgCMatrix")), + colData = new_meta_data, rowData = new_row_data) return(se) } diff --git a/R/CalculateAlleleFrequency.R b/R/CalculateAlleleFrequency.R index 1e79c2d..6a6f36c 100644 --- a/R/CalculateAlleleFrequency.R +++ b/R/CalculateAlleleFrequency.R @@ -1,11 +1,13 @@ #'Calculating the Minor Allele Frequency. #'@description -#'We calculate the MAF for the MAEGATK results. -#'@import MatrixGenerics SummarizedExperiment +#'We calculate the MAF from a reference reads matrix and an alternative reads matrix. +#'This function is intended to be used with the mitochondrial genome and not with other somatic mutations. +# #'@import MatrixGenerics SummarizedExperiment #'@param reference_reads Reference reads matrix. #'@param alternative_reads List of matrices for the alternative reads. +#'@param pseudo_count = What is the pseudo count you want to add to the reference_reads matrix. Default = 0 #'@export -CalculateAlleleFrequency <- function(reference_reads, alternative_reads){ +CalculateAlleleFrequency <- function(reference_reads, alternative_reads, pseudo_count = 0){ # We remove the potential N at position 3107 of the human genome. alternative_reads <- alternative_reads[!grepl("_N", rownames(alternative_reads)),] # We get the first part of the ref row name. This includes the position and the ref allele. @@ -19,7 +21,7 @@ CalculateAlleleFrequency <- function(reference_reads, alternative_reads){ # We match the new ref reads matrices to be the same order as the alt read matrices. reference_reads <- reference_reads[match(rows_alt_reads, rows_ref_reads),] # We divide the alt matrix by alt + ref matrix. - allelefrequency <- as.matrix(alternative_reads / (alternative_reads + reference_reads)) + allelefrequency <- as.matrix(alternative_reads / (alternative_reads + reference_reads + pseudo_count)) allelefrequency[is.na(allelefrequency)] <- 0 rownames(allelefrequency) <- gsub(">", "_", rownames(allelefrequency)) return(allelefrequency) diff --git a/R/CalculateAltReads.R b/R/CalculateAltReads.R index 0b0c165..c8042ec 100644 --- a/R/CalculateAltReads.R +++ b/R/CalculateAltReads.R @@ -1,25 +1,26 @@ #'CalculateAltReads #'@description #'We calculate the number of reads covering a variant using forward and reverse reads. -#'@import MatrixGenerics SummarizedExperiment +# #'@import MatrixGenerics +#'@importFrom SummarizedExperiment SummarizedExperiment assays rowRanges #'@param SE SummarizedExperiment object. #'@param chromosome_prefix List of matrices for the alternative reads. #'@export CalculateAltReads <- function(SE, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) - reads_A <- assays(SE)[["A_counts_fw"]] + assays(SE)[["A_counts_rev"]] + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) + reads_A <- SummarizedExperiment::assays(SE)[["A_counts_fw"]] + SummarizedExperiment::assays(SE)[["A_counts_rev"]] rownames(reads_A) <- paste0(chromosome_prefix, "_", 1:nrow(reads_A), "_", ref_allele, "_A") reads_A <- reads_A[ref_allele != "A",] - reads_C <- assays(SE)[["C_counts_fw"]] + assays(SE)[["C_counts_rev"]] + reads_C <- SummarizedExperiment::assays(SE)[["C_counts_fw"]] + SummarizedExperiment::assays(SE)[["C_counts_rev"]] rownames(reads_C) <- paste0(chromosome_prefix, "_", 1:nrow(reads_C), "_", ref_allele, "_C") reads_C <- reads_C[ref_allele != "C",] - reads_G <- assays(SE)[["G_counts_fw"]] + assays(SE)[["G_counts_rev"]] + reads_G <- SummarizedExperiment::assays(SE)[["G_counts_fw"]] + SummarizedExperiment::assays(SE)[["G_counts_rev"]] rownames(reads_G) <- paste0(chromosome_prefix, "_", 1:nrow(reads_G), "_", ref_allele, "_G") reads_G <- reads_G[ref_allele != "G",] - reads_T <- assays(SE)[["T_counts_fw"]] + assays(SE)[["T_counts_rev"]] + reads_T <- SummarizedExperiment::assays(SE)[["T_counts_fw"]] + SummarizedExperiment::assays(SE)[["T_counts_rev"]] rownames(reads_T) <- paste0(chromosome_prefix, "_", 1:nrow(reads_T), "_", ref_allele, "_T") reads_T <- reads_T[ref_allele != "T",] diff --git a/R/CalculateConsensus.R b/R/CalculateConsensus.R index 84172fb..7bedf90 100644 --- a/R/CalculateConsensus.R +++ b/R/CalculateConsensus.R @@ -1,17 +1,32 @@ +#'CalculateConsensus +#'@description #'We calculate the consensus information from the MAEGATK results. -#'@import dplyr MatrixGenerics SummarizedExperiment +#'We set cells that have only alternative reads to 2 (Alternative). +#'We set cells that have only reference reads to 1 (Reference). +#'We set cells that have a mixture of alternative and reference reads to 3 (Both). +#'We set cells that have no reads to 0 (NoCall). +#' +#'Please note. Cells can have reads for the reference of a specific variant and no reads for the alternative. +#'The cell can still have a reads for the other alternative alleles. Then the cell is still considered as 0 (NoCall) for this variant. +#'For example: +#'A cell has at position 3: 0 A reads, 53 T reads, 63 C reads, 148 T reads. +#'For the variant chrM_3_T_A, the cell would have 53 reference reads, but also reads for other variants at this position. +#'To make sure that there is no confusion, the cell is set to NoCall. +# #'@import MatrixGenerics +#'@importFrom SummarizedExperiment rowRanges #'@param SE SummarizedExperiment object. #'@param chromosome_prefix The chromosome name used as a prefix. +#'@param verbose Should the function be verbose? Default = FALSE #'@export -CalculateConsensus <- function(SE, chromosome_prefix = "chrM"){ +CalculateConsensus <- function(SE, chromosome_prefix = "chrM", verbose = FALSE){ # 0 NoCall = coverage is 0. # 1 Reference = only reference reads. # 2 Alternative = only alternative reads of one variant. # 3 Both = reads for reference and one or more variants. - - print("We get the read information per position.") + + if(verbose) print("We get the read information per position.") letter <- c("A", "C", "G", "T") - ref_allele <- as.character(rowRanges(SE)$refAllele) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) reads <- lapply(letter, getReadMatrix, SE = SE, chromosome_prefix = chromosome_prefix) # Since we have always the same 4 bases, we get all possible combinations by assigning numeric values. # A = 8, C = 4, G = 2, T = 1. @@ -21,44 +36,54 @@ CalculateConsensus <- function(SE, chromosome_prefix = "chrM"){ reads[[2]][reads[[2]] > 0] <- 4 reads[[3]][reads[[3]] > 0] <- 2 reads[[4]][reads[[4]] > 0] <- 1 - print("We add the values together.") + if(verbose) print("We add the values together.") # The row names are the names from the first matrix and not accurate any more. # The only relevant parts are the position and the reference base. variants_matrix <- reads[[1]] + reads[[2]] + reads[[3]] + reads[[4]] rm(reads) gc() - - print("We get the position according to their reference base.") + + if(verbose) print("We get the position according to their reference base.") # Now, we have a list for each set of position with the same base reference. - variants_matrix_ls <- list(A = variants_matrix[grep("_A_", rownames(variants_matrix), value = TRUE),], - C = variants_matrix[grep("_C_", rownames(variants_matrix), value = TRUE),], - G = variants_matrix[grep("_G_", rownames(variants_matrix), value = TRUE),], - T = variants_matrix[grep("_T_", rownames(variants_matrix), value = TRUE),], - N = variants_matrix[grep("_N_", rownames(variants_matrix), value = TRUE),]) - variants_matrix_ls[["N"]] <- matrix(variants_matrix_ls[["N"]], nrow = 1, ncol = length(variants_matrix_ls[["N"]])) - colnames(variants_matrix_ls[["N"]]) <- colnames(variants_matrix) - rownames(variants_matrix_ls[["N"]]) <- paste0(chromosome_prefix, "_3107_N_A") + variants_matrix_ls <- list(A = variants_matrix[grep("_A_", rownames(variants_matrix), value = TRUE), , drop = FALSE], + C = variants_matrix[grep("_C_", rownames(variants_matrix), value = TRUE), , drop = FALSE], + G = variants_matrix[grep("_G_", rownames(variants_matrix), value = TRUE), , drop = FALSE], + T = variants_matrix[grep("_T_", rownames(variants_matrix), value = TRUE), , drop = FALSE], + N = variants_matrix[grep("_N_", rownames(variants_matrix), value = TRUE), , drop = FALSE]) + # We check if the N reference is even used. If the variants_matrix_ls[["N"]] is empty (zero rows), we do not perform the consensus determination. + n_binding <- FALSE + if(nrow(variants_matrix_ls[["N"]]) > 0){ + n_binding <- TRUE + variants_matrix_ls[["N"]] <- matrix(variants_matrix_ls[["N"]], nrow = 1, ncol = ncol(variants_matrix)) + colnames(variants_matrix_ls[["N"]]) <- colnames(variants_matrix) + rownames(variants_matrix_ls[["N"]]) <- paste0(chromosome_prefix, "_3107_N_A") + } rm(variants_matrix) gc() - - print("Now, we check the consensus value for all positions with the same reference base.") + + if(verbose) print("Now, we check the consensus value for all positions with the same reference base.") # Then we can rbind these matrices again and return one large consensus matrix in the end. - print("A") + if(verbose) print("A") consensus_a <- lapply(c("C", "G", "T"), get_consensus, ref_base = "A", input_matrix = as.matrix(variants_matrix_ls[[1]]), chromosome_prefix = chromosome_prefix) consensus_a <- do.call("rbind", consensus_a) - print("C") + if(verbose) print("C") consensus_c <- lapply(c("A", "G", "T"), get_consensus, ref_base = "C", input_matrix = as.matrix(variants_matrix_ls[[2]]), chromosome_prefix = chromosome_prefix) consensus_c <- do.call("rbind", consensus_c) - print("G") + if(verbose) print("G") consensus_g <- lapply(c("A", "C", "T"), get_consensus, ref_base = "G", input_matrix = as.matrix(variants_matrix_ls[[3]]), chromosome_prefix = chromosome_prefix) consensus_g <- do.call("rbind", consensus_g) - print("T") + if(verbose) print("T") consensus_t <- lapply(c("A", "C", "G"), get_consensus, ref_base = "T", input_matrix = as.matrix(variants_matrix_ls[[4]]), chromosome_prefix = chromosome_prefix) consensus_t <- do.call("rbind", consensus_t) - print("N") - consensus_n <- lapply(c("A", "C", "G", "T"), get_consensus, ref_base = "N", input_matrix = variants_matrix_ls[[5]], chromosome_prefix = chromosome_prefix) - consensus_n <- do.call("rbind", consensus_n) - print("Binding the matrices.") - consensus <- rbind(consensus_a, consensus_c, consensus_g, consensus_t, consensus_n) + if(n_binding){ + if(verbose) print("N") + consensus_n <- lapply(c("A", "C", "G", "T"), get_consensus, ref_base = "N", input_matrix = variants_matrix_ls[[5]], chromosome_prefix = chromosome_prefix) + consensus_n <- do.call("rbind", consensus_n) + } else{ + if(verbose) print("N reference not present.") + } + if(verbose) print("Binding the matrices.") + consensus <- rbind(consensus_a, consensus_c, consensus_g, consensus_t) + if(n_binding) consensus <- rbind(consensus, consensus_n) return(consensus) } diff --git a/R/CalculateCorrelationPValue.R b/R/CalculateCorrelationPValue.R index 4f63056..0a0e947 100644 --- a/R/CalculateCorrelationPValue.R +++ b/R/CalculateCorrelationPValue.R @@ -2,7 +2,7 @@ #' #'@description #'We perform the correlation of SNVs and calculate the P values. -#'@import stats +#'@importFrom stats cor.test #'@param variant_values The fraction values you are analysing. A vector. #'@param other_mutation All other variants you have. A vector of variant names. #'@param all_variants_list List of fraction values for all the variants you want to compare your variant with. @@ -27,7 +27,7 @@ CalculateCorrelationPValue <- function(variant_values, other_mutation, all_varia result <- c(NA,NA,NA,NA,NA,NA) return(result) } else if(length(variant_values) > 2){ - result <- cor.test(variant_values, other_variant_values) + result <- stats::cor.test(variant_values, other_variant_values) cells_som_alt <- sum(variant_values == 1) cells_som_ref <- sum(variant_values == 0) cells_MT_alt <- sum(other_variant_values == 1) diff --git a/R/CalculateCoverage.R b/R/CalculateCoverage.R index 9ef7f5a..a82c116 100644 --- a/R/CalculateCoverage.R +++ b/R/CalculateCoverage.R @@ -1,13 +1,14 @@ #'CalculateCoverage #'@description #'We calculate the coverage information per variant from the MAEGATK results. -#'@import MatrixGenerics SummarizedExperiment +# #'@import MatrixGenerics +#'@importFrom SummarizedExperiment rowRanges assays #'@param SE SummarizedExperiment object. #'@param chromosome_prefix List of matrices for the alternative reads. #'@export CalculateCoverage <- function(SE, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) - coverage <- assays(SE)[["coverage"]] + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) + coverage <- SummarizedExperiment::assays(SE)[["coverage"]] rownames(coverage) <- paste0(chromosome_prefix, "_", 1:nrow(coverage), "_", ref_allele, "_A") coverage_A <- coverage[ref_allele != "A",] diff --git a/R/CalculateFisherTestPValue.R b/R/CalculateFisherTestPValue.R index 3c7652e..c59726a 100644 --- a/R/CalculateFisherTestPValue.R +++ b/R/CalculateFisherTestPValue.R @@ -2,7 +2,7 @@ #' #'@description #'We perform the Fisher Test of SNVs and calculate the P values. -#'@import stats +#'@importFrom stats fisher.test #'@param variant_values The fraction values you are analysing. A vector. #'@param other_mutation All other variants you have. A vector of variant names. #'@param all_variants_list List of fraction values for all the variants you want to compare your variant with. @@ -39,7 +39,7 @@ CalculateFisherTestPValue <- function(variant_values, other_mutation, all_varian count_matrix[2,1] <- sum(variant_values == 1 & other_variant_values == 0) count_matrix[1,2] <- sum(variant_values == 0 & other_variant_values == 1) count_matrix[2,2] <- sum(variant_values == 0 & other_variant_values == 0) - result <- fisher.test(x = count_matrix) + result <- stats::fisher.test(x = count_matrix) result <- c(result$p.value, result$estimate, count_matrix[1,1], count_matrix[2,1], count_matrix[1,2], count_matrix[2,2]) } else{ #print("We do not have more than 2 cells for the somatic variant.") diff --git a/R/CalculateQuality.R b/R/CalculateQuality.R index 5467d23..b3713f5 100644 --- a/R/CalculateQuality.R +++ b/R/CalculateQuality.R @@ -1,17 +1,21 @@ #'CalculateQuality #'@description #'We calculate the quality per variant. -#'@import MatrixGenerics SummarizedExperiment +# #'@import MatrixGenerics +#'@importFrom SummarizedExperiment assays +#'@importFrom Matrix rowSums +#'@importFrom utils tail #'@param SE SummarizedExperiment object. +#'@param variants The variants you want to get the quality for. #'@param chromosome_prefix List of matrices for the alternative reads. #'@export -CalculateQuality <- function(SE, variants = rownames(reads_alt), chromosome_prefix = "chrM"){ +CalculateQuality <- function(SE, variants, chromosome_prefix = "chrM"){ variants <- gsub(paste0(chromosome_prefix, "_"), "", variants) qualities <- lapply(c("A", "T", "C", "G"), function(x){ - fwrev <- cbind(assays(SE)[[paste0(x, "_counts_fw")]], assays(SE)[[paste0(x, "_counts_rev")]]) - qualities_fwrev <- cbind(assays(SE)[[paste0(x, "_qual_fw")]], assays(SE)[[paste0(x, "_qual_rev")]]) + fwrev <- cbind(SummarizedExperiment::assays(SE)[[paste0(x, "_counts_fw")]], SummarizedExperiment::assays(SE)[[paste0(x, "_counts_rev")]]) + qualities_fwrev <- cbind(SummarizedExperiment::assays(SE)[[paste0(x, "_qual_fw")]], SummarizedExperiment::assays(SE)[[paste0(x, "_qual_rev")]]) variants_use <- strsplit(variants, "") - variants_use <- sapply(variants_use, tail, n = 1) + variants_use <- sapply(variants_use, utils::tail, n = 1) variants_use <- variants_use == x variants_use_names <- variants[variants_use] variants_use <- as.numeric(gsub("_.*", "", variants_use_names)) @@ -22,7 +26,7 @@ CalculateQuality <- function(SE, variants = rownames(reads_alt), chromosome_pref rownames(qualities_fwrev) <- variants_use_names fwrev <- fwrev > 0 qualities_fwrev <- qualities_fwrev * fwrev - qualities <- apply(qualities_fwrev, 1, sum) / rowSums(fwrev > 0) + qualities <- apply(qualities_fwrev, 1, sum) / Matrix::rowSums(fwrev > 0) qualities[qualities == 0] <- NA return(qualities) }) diff --git a/R/CalculateStrandCorrelation.R b/R/CalculateStrandCorrelation.R index 1731609..575ec70 100644 --- a/R/CalculateStrandCorrelation.R +++ b/R/CalculateStrandCorrelation.R @@ -1,79 +1,85 @@ #'CalculateStrandCorrelation #'@description #'We calculate the correlation between the amount of forward and reverse reads per variant. -#'@import MatrixGenerics SummarizedExperiment data.table +#'@importFrom SummarizedExperiment rowRanges assays +#'@importFrom data.table data.table +#'@importFrom dplyr sym #'@param SE SummarizedExperiment object. #'@param chromosome_prefix List of matrices for the alternative reads. #'@export CalculateStrandCorrelation <- function(SE, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) variants_A <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_A") variants_A <- variants_A[ref_allele != "A"] - reads_A_fw <- assays(SE)[["A_counts_fw"]] - reads_A_rev <- assays(SE)[["A_counts_rev"]] + reads_A_fw <- SummarizedExperiment::assays(SE)[["A_counts_fw"]] + reads_A_rev <- SummarizedExperiment::assays(SE)[["A_counts_rev"]] rownames(reads_A_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_A_fw), "_", ref_allele, "_A") rownames(reads_A_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_A_rev), "_", ref_allele, "_A") reads_A_fw <- reads_A_fw[ref_allele != "A",] reads_A_rev <- reads_A_rev[ref_allele != "A",] - dt <- merge(data.table(summary(reads_A_fw)), - data.table(summary(reads_A_rev)), + dt <- merge(data.table::data.table(summary(reads_A_fw)), + data.table::data.table(summary(reads_A_rev)), by.x = c("i", "j"), by.y = c("i", "j"), all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_A[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_A <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + dt <- data.table::data.table(variant = variants_A[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_A <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] variants_C <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_C") variants_C <- variants_C[ref_allele != "C"] - reads_C_fw <- assays(SE)[["C_counts_fw"]] - reads_C_rev <- assays(SE)[["C_counts_rev"]] + reads_C_fw <- SummarizedExperiment::assays(SE)[["C_counts_fw"]] + reads_C_rev <- SummarizedExperiment::assays(SE)[["C_counts_rev"]] rownames(reads_C_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_C_fw), "_", ref_allele, "_C") rownames(reads_C_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_C_rev), "_", ref_allele, "_C") reads_C_fw <- reads_C_fw[ref_allele != "C",] reads_C_rev <- reads_C_rev[ref_allele != "C",] - dt <- merge(data.table(summary(reads_C_fw)), - data.table(summary(reads_C_rev)), +# dt <- merge(data.table::data.table(summary(reads_C_fw)), +# data.table::data.table(summary(reads_C_rev)), +# by.x = c("i", "j"), by.y = c("i", "j"), +# all = TRUE)[x.x > 0 | x.y > 0] + dt <- merge(data.table::data.table(summary(reads_C_fw)), + data.table::data.table(summary(reads_C_rev)), by.x = c("i", "j"), by.y = c("i", "j"), - all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_C[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_C <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + all = TRUE)[!!dplyr::sym("x.x") > 0 | !!dplyr::sym("x.y") > 0] + dt <- data.table::data.table(variant = variants_C[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_C <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] variants_G <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_G") variants_G <- variants_G[ref_allele != "G"] - reads_G_fw <- assays(SE)[["G_counts_fw"]] - reads_G_rev <- assays(SE)[["G_counts_rev"]] + reads_G_fw <- SummarizedExperiment::assays(SE)[["G_counts_fw"]] + reads_G_rev <- SummarizedExperiment::assays(SE)[["G_counts_rev"]] rownames(reads_G_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_G_fw), "_", ref_allele, "_G") rownames(reads_G_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_G_rev), "_", ref_allele, "_G") reads_G_fw <- reads_G_fw[ref_allele != "G",] reads_G_rev <- reads_G_rev[ref_allele != "G",] - dt <- merge(data.table(summary(reads_G_fw)), - data.table(summary(reads_G_rev)), + dt <- merge(data.table::data.table(summary(reads_G_fw)), + data.table::data.table(summary(reads_G_rev)), by.x = c("i", "j"), by.y = c("i", "j"), all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_G[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_G <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + dt <- data.table::data.table(variant = variants_G[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_G <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] variants_T <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_T") variants_T <- variants_T[ref_allele != "T"] - reads_T_fw <- assays(SE)[["T_counts_fw"]] - reads_T_rev <- assays(SE)[["T_counts_rev"]] + reads_T_fw <- SummarizedExperiment::assays(SE)[["T_counts_fw"]] + reads_T_rev <- SummarizedExperiment::assays(SE)[["T_counts_rev"]] rownames(reads_T_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_T_fw), "_", ref_allele, "_T") rownames(reads_T_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_T_rev), "_", ref_allele, "_T") reads_T_fw <- reads_T_fw[ref_allele != "T",] reads_T_rev <- reads_T_rev[ref_allele != "T",] - dt <- merge(data.table(summary(reads_T_fw)), - data.table(summary(reads_T_rev)), + dt <- merge(data.table::data.table(summary(reads_T_fw)), + data.table::data.table(summary(reads_T_rev)), by.x = c("i", "j"), by.y = c("i", "j"), all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_T[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_T <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + dt <- data.table::data.table(variant = variants_T[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_T <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] cor_results <- rbind(cor_result_A, cor_result_C, cor_result_G, cor_result_T) diff --git a/R/CombineSEobjects.R b/R/CombineSEobjects.R index a5de795..bc8900d 100644 --- a/R/CombineSEobjects.R +++ b/R/CombineSEobjects.R @@ -1,28 +1,29 @@ #'CombineSEobjects #'@description #'We combine two SummarizedExperiment objects. -#'@import SummarizedExperiment BiocGenerics +# #'@import BiocGenerics +#'@importFrom SummarizedExperiment assays colData rowData SummarizedExperiment #'@param se_somatic SummarizedExperiment object for the somatic variants. #'@param se_MT SummarizedExperiment object for the MT variants. #'@param suffixes The suffixes you want to add to the meta data.frame. #'@export CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT")){ # We check if the assays are equally named. - assay_names_somatic <- names(assays(se_somatic)) - assay_names_MT <- names(assays(se_MT)) + assay_names_somatic <- names(SummarizedExperiment::assays(se_somatic)) + assay_names_MT <- names(SummarizedExperiment::assays(se_MT)) if(!all(assay_names_somatic == assay_names_MT)){ stop("Your assays are not equally named or ordered.") } features <- combine_NAMES(names(se_somatic), names(se_MT)) cells <- combine_NAMES(colnames(se_somatic), colnames(se_MT)) - meta_data_somatic <- colData(se_somatic) - meta_data_MT <- colData(se_MT) + meta_data_somatic <- SummarizedExperiment::colData(se_somatic) + meta_data_MT <- SummarizedExperiment::colData(se_MT) meta_data <- merge(meta_data_somatic, meta_data_MT, by = "Cell", all = TRUE, suffixes = suffixes) meta_data <- meta_data[match(cells, meta_data$Cell),] - meta_row_somatic <- rowData(se_somatic) - meta_row_MT <- rowData(se_MT) + meta_row_somatic <- SummarizedExperiment::rowData(se_somatic) + meta_row_MT <- SummarizedExperiment::rowData(se_MT) if(ncol(meta_row_somatic) > 0 & ncol(meta_row_MT) > 0){ meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes) meta_row <- meta_row[match(features, meta_row$VariantName),] @@ -31,7 +32,7 @@ CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT")) meta_row_somatic <- matrix(NA, nrow = nrow(meta_row_somatic), ncol = ncol(meta_row_MT)) rownames(meta_row_somatic) <- rownames(se_somatic) colnames(meta_row_somatic) <- colnames(meta_row_MT) - meta_row_somatic <- DataFrame(meta_row_somatic) + meta_row_somatic <- S4Vectors::DataFrame(meta_row_somatic) meta_row_somatic$VariantName <- rownames(meta_row_somatic) meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes) meta_row <- meta_row[match(features, meta_row$VariantName),] @@ -39,31 +40,20 @@ CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT")) meta_row_MT <- matrix(NA, nrow = nrow(meta_row_MT), ncol = ncol(meta_row_somatic)) rownames(meta_row_MT) <- rownames(se_MT) colnames(meta_row_MT) <- colnames(meta_row_somatic) - meta_row_MT <- DataFrame(meta_row_MT) + meta_row_MT <- S4Vectors::DataFrame(meta_row_MT) meta_row_MT$VariantName <- rownames(meta_row_MT) meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes) meta_row <- meta_row[match(features, meta_row$VariantName),] } - #assays_somatic <- assays(se_somatic) - #assays_somatic <- lapply(assays_somatic, as.matrix) - #assays_MT <- assays(se_MT) - #assays_MT <- lapply(assays_MT, as.matrix) - #assays_combined <- S4Vectors::mendoapply(BiocGenerics::combine, assays_somatic, assays_MT) - #assays_combined[[1]] <- as(assays_combined[[1]], "dgCMatrix") - #assays_combined[[2]] <- as(assays_combined[[2]], "dgCMatrix") - #assays_combined[[3]] <- as(assays_combined[[3]], "dgCMatrix") - #assays_combined[["consensus"]]@x[is.na(assays_combined[["consensus"]]@x)] <- 0 - #assays_combined[["fraction"]]@x[is.na(assays_combined[["fraction"]]@x)] <- 0 - #assays_combined[["coverage"]]@x[is.na(assays_combined[["coverage"]]@x)] <- 0 assays_combined <- lapply(assay_names_somatic, function(x){ - result <- combine_SparseMatrix(assays(se_somatic)[[x]], assays(se_MT)[[x]]) + result <- combine_SparseMatrix(SummarizedExperiment::assays(se_somatic)[[x]], SummarizedExperiment::assays(se_MT)[[x]]) }) names(assays_combined) <- assay_names_somatic - se_combined <- SummarizedExperiment(assays = assays_combined, - colData = meta_data, rowData = meta_row) + + se_combined <- SummarizedExperiment::SummarizedExperiment(assays = assays_combined, colData = meta_data, rowData = meta_row) return(se_combined) } diff --git a/R/Filtering.R b/R/Filtering.R index 24b1c09..c874611 100644 --- a/R/Filtering.R +++ b/R/Filtering.R @@ -13,7 +13,9 @@ #' \item all variants that are always NoCall, #' \item set variants with a VAF below a threshold to NoCall or Reference. #' } -#'@import fastmatch Matrix SummarizedExperiment +#'@importFrom Matrix summary +#'@importFrom SummarizedExperiment assays +#'@importFrom utils read.table #'@param se SummarizedExperiment object. #'@param blacklisted_barcodes_path Barcodes you want to remove. Path to a file with one column without header. #'@param fraction_threshold Variants with an VAF below this threshold are set to 0. Numeric. Default = NULL. @@ -21,6 +23,7 @@ #'@param min_cells_per_variant In how many cells should a variant be present to be included? Numeric. Default = 2. #'@param min_variants_per_cell How many variants should be covered in a cell have to be included? Default = 1. #'@param reject_value Should cells that fall below a threshold (fraction_threshold or alts_threshold) be treated as Reference or NoCall? Default = NoCall. +#'@param verbose Should the function be verbose? Default = TRUE #'@examples #' \dontrun{ #' # Removing all variants that are not detected in at least 2 cells. @@ -28,7 +31,7 @@ #' se_geno <- Filtering(se_geno, min_cells_per_variant = 2, fraction_threshold = 0.05) #' } #'@export -Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = NULL, alts_threshold = NULL, min_cells_per_variant = 2, min_variants_per_cell = 1, reject_value = "NoCall"){ +Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = NULL, alts_threshold = NULL, min_cells_per_variant = 2, min_variants_per_cell = 1, reject_value = "NoCall", verbose = TRUE){ # Checking if the reject_value variable is correct. if(!reject_value %in% c("Reference", "NoCall")){ stop(paste0("Your reject_value is ", reject_value, ".\nIt should be Reference or NoCall.")) @@ -39,8 +42,8 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = if(!is.null(blacklisted_barcodes_path)){ - print("We remove the unwanted cell barcodes.") - blacklisted_barcodes <- read.table(blacklisted_barcodes_path, header = FALSE) + if(verbose) print("We remove the unwanted cell barcodes.") + blacklisted_barcodes <- utils::read.table(blacklisted_barcodes_path, header = FALSE) blacklisted_barcodes <- blacklisted_barcodes[,1] barcodes_keep <- colnames(se) barcodes_keep <- barcodes_keep[!barcodes_keep %in% blacklisted_barcodes] @@ -50,30 +53,33 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = # If the fraction_threshold is 0, we skip the thresholding. 0 and NULL would be the same. # We might want to use a fraction_threshold of 0 to use the same variable to create file paths later. + if(!is.null(fraction_threshold)){ if(fraction_threshold == 0){ - fraction_threshold <- NULL + fraction_threshold <- NULL + } } - + + if(!is.null(fraction_threshold)){ if(any(fraction_threshold >= 1, fraction_threshold <= 0)){ stop("Your fraction threshold is not 0 < x < 1.") } - print(paste0("We assume that cells with a fraction smaller than our threshold are actually ", reject_value, ".")) - print(paste0("We set consensus values to ", reject_value_numeric, " (", reject_value, ") and fraction values to 0.")) - print(paste0("We do not set fractions between ", fraction_threshold, " and 1 to 1.")) - print("This way, we retain the heterozygous information.") + if(verbose) print(paste0("We assume that cells with a fraction smaller than our threshold are actually ", reject_value, ".")) + if(verbose) print(paste0("We set consensus values to ", reject_value_numeric, " (", reject_value, ") and fraction values to 0.")) + if(verbose) print(paste0("We do not set fractions between ", fraction_threshold, " and 1 to 1.")) + if(verbose) print("This way, we retain the heterozygous information.") # Filtering using sparse matrices. - consensus_matrix <- assays(se)$consensus - fraction_matrix <- assays(se)$fraction - position_matrix <- summary(fraction_matrix) + consensus_matrix <- SummarizedExperiment::assays(se)$consensus + fraction_matrix <- SummarizedExperiment::assays(se)$fraction + position_matrix <- Matrix::summary(fraction_matrix) position_matrix <- subset(position_matrix, x > 0 & x < fraction_threshold) # If no elements fall between 0 and the fraction_threshold, we do not have to change the matrices. if(nrow(position_matrix) > 0){ ij <- as.matrix(position_matrix[, 1:2]) consensus_matrix[ij] <- reject_value_numeric fraction_matrix[ij] <- 0 - assays(se)$consensus <- consensus_matrix - assays(se)$fraction <- fraction_matrix + SummarizedExperiment::assays(se)$consensus <- consensus_matrix + SummarizedExperiment::assays(se)$fraction <- fraction_matrix } } @@ -82,16 +88,16 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = if(any(!is.numeric(alts_threshold), is.infinite(alts_threshold))){ stop("Your alts_threshold should be a numeric value.") } - print(paste0("We assume that cells with a number of alternative reads smaller than our threshold are actually ", reject_value, ".")) - print(paste0("We set consensus values to ", reject_value_numeric, " (", reject_value, "), fraction values to 0.")) - if(reject_value == "NoCall") print("We set Alts, Refs and Coverage to 0.") - if(reject_value == "Reference") print("We set Alts to 0 and adjust the Coverage.") + if(verbose) print(paste0("We assume that cells with a number of alternative reads smaller than our threshold are actually ", reject_value, ".")) + if(verbose) print(paste0("We set consensus values to ", reject_value_numeric, " (", reject_value, "), fraction values to 0.")) + if(reject_value == "NoCall") if(verbose) print("We set Alts, Refs and Coverage to 0.") + if(reject_value == "Reference") if(verbose) print("We set Alts to 0 and adjust the Coverage.") # Filtering using sparse matrices. - consensus_matrix <- assays(se)$consensus - fraction_matrix <- assays(se)$fraction - coverage_matrix <- assays(se)$coverage - alts_matrix <- assays(se)$alts - refs_matrix <- assays(se)$refs + consensus_matrix <- SummarizedExperiment::assays(se)$consensus + fraction_matrix <- SummarizedExperiment::assays(se)$fraction + coverage_matrix <- SummarizedExperiment::assays(se)$coverage + alts_matrix <- SummarizedExperiment::assays(se)$alts + refs_matrix <- SummarizedExperiment::assays(se)$refs position_matrix <- summary(alts_matrix) position_matrix <- subset(position_matrix, x < alts_threshold) # If no elements fall between 0 and the alts_threshold, we do not have to change the matrices. @@ -109,28 +115,28 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = refs_matrix[ij] <- 0 coverage_matrix[ij] <- 0 } - assays(se)$consensus <- consensus_matrix - assays(se)$fraction <- fraction_matrix - assays(se)$coverage <- coverage_matrix - assays(se)$alts <- alts_matrix - assays(se)$refs <- refs_matrix + SummarizedExperiment::assays(se)$consensus <- consensus_matrix + SummarizedExperiment::assays(se)$fraction <- fraction_matrix + SummarizedExperiment::assays(se)$coverage <- coverage_matrix + SummarizedExperiment::assays(se)$alts <- alts_matrix + SummarizedExperiment::assays(se)$refs <- refs_matrix } } - print("We remove all the variants that are always NoCall.") - consensus_test <- assays(se)$consensus > 0 + if(verbose) print("We remove all the variants that are always NoCall.") + consensus_test <- SummarizedExperiment::assays(se)$consensus > 0 keep_variants <- rowSums(consensus_test) > 0 se <- se[keep_variants,] - print(paste0("We remove variants, that are not at least detected in ", min_cells_per_variant, " cells.")) - keep_variants <- rowSums(assays(se)$consensus >= 1) + if(verbose) print(paste0("We remove variants, that are not at least detected in ", min_cells_per_variant, " cells.")) + keep_variants <- rowSums(SummarizedExperiment::assays(se)$consensus >= 1) keep_variants <- keep_variants >= min_cells_per_variant se <- se[keep_variants,] - print(paste0("We remove all cells that are not >= 1 (Ref) for at least ", min_variants_per_cell, " variant.")) - consensus_test <- assays(se)$consensus >= 1 + if(verbose) print(paste0("We remove all cells that are not >= 1 (Ref) for at least ", min_variants_per_cell, " variant.")) + consensus_test <- SummarizedExperiment::assays(se)$consensus >= 1 keep_cells <- colSums(consensus_test) > min_variants_per_cell se <- se[,keep_cells] return(se) diff --git a/R/GetCellInfoPerVariant.R b/R/GetCellInfoPerVariant.R index d3a423b..65362ee 100644 --- a/R/GetCellInfoPerVariant.R +++ b/R/GetCellInfoPerVariant.R @@ -1,20 +1,23 @@ #'We get the variant information per cell. -#'@import dplyr SummarizedExperiment tibble tidyverse +# #'@import dplyr SummarizedExperiment tibble tidyverse +#'@importFrom SummarizedExperiment assays +#'@importFrom dplyr left_join %>% +#'@importFrom tibble tibble as_tibble #'@param se SummarizedExperiment object. #'@param voi_ch Variants of interest. +#'@param verbose Should the function be verbose? Default = FALSE #'@export -GetCellInfoPerVariant <- function(se, voi_ch){ - print("Generate matrices with coverage, allele frequency and reference / variant reads") - cov_voi_mat <- assays(se)[["coverage"]][voi_ch,] - af_voi_mat <- assays(se)[["fraction"]][voi_ch,] +GetCellInfoPerVariant <- function(se, voi_ch, verbose = FALSE){ + if(verbose) print("Generate matrices with coverage, allele frequency and reference / variant reads") + cov_voi_mat <- SummarizedExperiment::assays(se)[["coverage"]][voi_ch,] + af_voi_mat <- SummarizedExperiment::assays(se)[["fraction"]][voi_ch,] - print("Add coverage and allele frequency info from variants of interest to cells_tib.") - cells_tib <- tibble(cell = colnames(se), - Mean_Cov = se$depth) + if(verbose) print("Add coverage and allele frequency info from variants of interest to cells_tib.") + cells_tib <- tibble::tibble(cell = colnames(se), Mean_Cov = se$depth) for(voi in voi_ch){ cells_tib <- cells_tib %>% - left_join(as_tibble(assays(se)[["coverage"]][voi,], rownames = "cell"), by = "cell") %>% - left_join(as_tibble(assays(se)[["fraction"]][voi,], rownames = "cell"), by = "cell") + dplyr::left_join(tibble::as_tibble(SummarizedExperiment::assays(se)[["coverage"]][voi,], rownames = "cell"), by = "cell") %>% + dplyr::left_join(tibble::as_tibble(SummarizedExperiment::assays(se)[["fraction"]][voi,], rownames = "cell"), by = "cell") colnames(cells_tib) <- gsub("value.x", paste0("cov_", voi), colnames(cells_tib)) colnames(cells_tib) <- gsub("value.y", paste0("af_", voi), colnames(cells_tib)) } diff --git a/R/GetVariantInfo.R b/R/GetVariantInfo.R index 5b2a02f..373c02d 100644 --- a/R/GetVariantInfo.R +++ b/R/GetVariantInfo.R @@ -2,7 +2,7 @@ #'@description #'We get the genotyping information for a set of variants. #'The function returns a matrix with the values from the specified assay. -#'@import SummarizedExperiment +#'@importFrom SummarizedExperiment assays #'@param SE SummarizedExperiment object. #'@param information The assay with the desired information. Default: consensus #'@param variants A vector of variants. @@ -24,11 +24,11 @@ GetVariantInfo <- function(SE, information = "consensus", variants = NULL, cells stop(paste0("Only ", variants_check, " of ", length(variants), " are in the SE object.")) } # We check if the requested assay is actually present. - assay_check <- information %in% names(assays(SE)) + assay_check <- information %in% names(SummarizedExperiment::assays(SE)) if(!assay_check){ stop("The assay you wants is not present in the object.") } - res <- assays(SE)[[information]][variants, , drop = FALSE] + res <- SummarizedExperiment::assays(SE)[[information]][variants, , drop = FALSE] # We subset the result to only include the cells of interest. # We check if the cells vector is not NULL. if(!is.null(cells)){ diff --git a/R/HeatmapVOI.R b/R/HeatmapVOI.R index 0c79a1a..72aa3c4 100644 --- a/R/HeatmapVOI.R +++ b/R/HeatmapVOI.R @@ -1,14 +1,20 @@ #'HeatmapVoi #'@description #'We plot a heatmap of a set of Variants Of Interest using the Variant Allele Frequency values of a SummarizedExperiment object. -#'@import ComplexHeatmap SummarizedExperiment grid circlize scales +#'@importFrom ComplexHeatmap columnAnnotation Heatmap +#'@importFrom SummarizedExperiment assays colData +#'@importFrom grid gpar unit +#'@importFrom circlize colorRamp2 +#'@importFrom scales hue_pal #'@param SE SummarizedExperiment object. #'@param voi Variants Of Interest. -#'@param annotation_trait Cell Annotation at the bottom of the heat map. +#'@param annotation_trait Cell Annotation at the bottom of the heat map. +#'@param column_title The title of the heat map. Default = NULL +#'@param remove_empty_cells Should cells that have a fraction of 0 for all variants be removed? Default = FALSE #'@export HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL, remove_empty_cells = FALSE){ - fraction <- assays(SE)[["fraction"]][voi,] + fraction <- SummarizedExperiment::assays(SE)[["fraction"]][voi,] fraction[is.na(fraction)] <- 0 if(length(voi) == 1){ fraction <- t(as.matrix(fraction)) @@ -22,9 +28,9 @@ HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL, re fraction <- fraction[,cell_check, drop = FALSE] } if(!is.null(annotation_trait)){ - colours_use <- scales::hue_pal(length(unique(colData(SE)[,annotation_trait]))) - names(colours_use) <- unique(colData(SE)[,annotation_trait]) - ha <- ComplexHeatmap::columnAnnotation(annotation_trait = colData(SE)[,annotation_trait], + colours_use <- scales::hue_pal(length(unique(SummarizedExperiment::colData(SE)[,annotation_trait]))) + names(colours_use) <- unique(SummarizedExperiment::colData(SE)[,annotation_trait]) + ha <- ComplexHeatmap::columnAnnotation(annotation_trait = SummarizedExperiment::colData(SE)[,annotation_trait], col = list(annotation_trait = colours_use)) } else if(is.null(annotation_trait)){ ha <- NULL @@ -35,16 +41,16 @@ HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL, re column_title <- "Cells" } - heatmap_voi <- Heatmap(fraction, - column_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), - row_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), - row_names_gp = grid::gpar(fontsize = 20, fontface = "bold"), - col = circlize::colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9), - c("#FCFCFC","#FFEDB0","#FFDF5F","#FEC510","#FA8E24","#F14C2B","#DA2828","#BE2222","#A31D1D")), - show_row_names = T, show_column_names = F, cluster_columns = T, clustering_method_columns = "complete", cluster_rows = F, name = "VAF", - heatmap_legend_param = list(border = "#000000", grid_height = unit(10, "mm")), - bottom_annotation = ha, border = T, use_raster = T, - column_title = column_title, - row_title = "Variants") + heatmap_voi <- ComplexHeatmap::Heatmap(fraction, + column_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), + row_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), + row_names_gp = grid::gpar(fontsize = 20, fontface = "bold"), + col = circlize::colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9), + c("#FCFCFC","#FFEDB0","#FFDF5F","#FEC510","#FA8E24","#F14C2B","#DA2828","#BE2222","#A31D1D")), + show_row_names = T, show_column_names = F, cluster_columns = T, clustering_method_columns = "complete", cluster_rows = F, name = "VAF", + heatmap_legend_param = list(border = "#000000", grid_height = grid::unit(10, "mm")), + bottom_annotation = ha, border = T, use_raster = T, + column_title = column_title, + row_title = "Variants") return(heatmap_voi) } diff --git a/R/LoadingMAEGATK_typewise.R b/R/LoadingMAEGATK_typewise.R index 7071597..b2891f8 100755 --- a/R/LoadingMAEGATK_typewise.R +++ b/R/LoadingMAEGATK_typewise.R @@ -3,92 +3,105 @@ #'We load the MAEGATK output and transform it to be compatible with the VarTrix output. #'The input file is a specifically formated csv file with all the necessary information to run the analysis. #'Note that the source column in the input file needs to be one of the following: vartrix, mgaetk, mgatk. -#'This is hard coded and case insensitive. -#'@import Matrix SummarizedExperiment +#'If you want to only load a single sample without the use of an input file, you have to set the following variables. +#' \enumerate{ +#' \item samples_path +#' \item barcodes_path +#' \item patient +#' \item samples_file = NULL +#' } +#'@importFrom utils read.csv read.table +#'@importFrom SummarizedExperiment SummarizedExperiment #'@param samples_path Path to the input folder. #'@param samples_file Path to the csv file with the samples to be loaded. #'@param type_use The type of input. Has to be one of: scRNAseq_MT, Amplicon_MT. Only used if samples_path is not NULL. #'@param patient The patient you want to load. #'@param chromosome_prefix The prefix you want use. Default: "chrM" +#'@param min_cells The minimum number of cells with coverage for a variant. Variants with coverage in less than this amount of cells are removed. Default = 2 +#'@param barcodes_path Path to the barcodes file tsv. Default = NULL +#'@param verbose Should the function be verbose? Default = TRUE #'@export LoadingMAEGATK_typewise <- function(samples_file, samples_path = NULL, patient, type_use = "scRNAseq_MT", chromosome_prefix = "chrM", - min_cells = 2, barcodes_path = NULL){ + min_cells = 2, barcodes_path = NULL, verbose = TRUE){ if(all(!is.null(samples_path), !is.null(barcodes_path))){ - print(paste0("Loading the data for patient ", patient, ".")) - samples <- list.files(samples_path) - samples <- grep(patient, samples, value = TRUE) - samples_file <- data.frame(patient = patient, sample = samples, input_folder = samples_path, cells = barcodes_path) + if(verbose) print(paste0("Loading the data for patient ", patient, ".")) + samples <- patient + samples_file <- data.frame(patient = patient, sample = samples, input_path = samples_path, cells = barcodes_path) } else{ - print(paste0("Loading the data for patient ", patient, ".")) - print("We read in the samples file.") - samples_file <- read.csv(samples_file) + if(verbose) print(paste0("Loading the data for patient ", patient, ".")) + if(verbose) print("We read in the samples file.") + samples_file <- utils::read.csv(samples_file) - print("We subset to the patient of interest.") + if(verbose) print("We subset to the patient of interest.") samples_file <- samples_file[grep("maegatk|mgatk", samples_file$source, ignore.case = TRUE),] samples_file <- samples_file[grep(patient, samples_file$patient),] samples_file <- samples_file[grep(type_use, samples_file$type),] - print("We get the different samples.") + if(verbose) print("We get the different samples.") samples <- samples_file$sample } - print("We read in the cell barcodes output by CellRanger as a list.") - barcodes <- lapply(samples_file$cells, read.table) + if(verbose) print("We read in the cell barcodes output by CellRanger as a list.") + barcodes <- lapply(samples_file$cells, utils::read.table) names(barcodes) <- samples - print("We load the MAEGATK output files.") + if(verbose) print("We load the MAEGATK output files.") se_ls <- list() for(i in 1:nrow(samples_file)){ - print(paste0("Loading sample ", i, " of ", nrow(samples_file))) - input_folder_use <- samples_file$input_folder[i] - sample_use <- samples_file$sample[i] + if(verbose) print(paste0("Loading sample ", i, " of ", nrow(samples_file))) + input_file_use <- samples_file$input_path[i] + sample_use <- samples_file$sample[i] + + # We check if the file exists. + if(!file.exists(input_file_use)){ + stop(paste0("Error: the file ", input_file_use, " does not exist.")) + } # We get the final output file for either mgatk or maegatk. - final_output_file <- list.files(paste0(input_folder_use, sample_use, "/final/"), full.names = TRUE) - final_output_file <- grep(paste0("maegtk.rds|maegatk.rds|mgatk.rds|", sample_use, ".rds"), final_output_file, value = TRUE) - se_ls[[sample_use]] <- load_object(final_output_file) + se_ls[[sample_use]] <- load_object(input_file_use) colnames(se_ls[[sample_use]]) <- paste0(sample_use, "_", colnames(se_ls[[sample_use]])) - barcodes_use <- paste0(sample_use, "_", barcodes[[sample_use]][,1]) - barcodes_use <- barcodes_use[barcodes_use %in% colnames(se_ls[[sample_use]])] - se_ls[[sample_use]] <- se_ls[[sample_use]][,barcodes_use] + barcodes_use <- paste0(sample_use, "_", barcodes[[sample_use]][,1]) + barcodes_use <- barcodes_use[barcodes_use %in% colnames(se_ls[[sample_use]])] + se_ls[[sample_use]] <- se_ls[[sample_use]][, barcodes_use] } - print("We merge the samples.") + if(verbose) print("We merge the samples.") se_merged <- do.call("cbind", se_ls) rm(se_ls) gc() - print("We get the allele frequency.") - fraction <- computeAFMutMatrix(se_merged, chromosome_prefix = chromosome_prefix) + if(verbose) print("We get the allele frequency.") + fraction <- computeAFMutMatrix(SE = se_merged, chromosome_prefix = chromosome_prefix) - print("We get the coverage information.") + if(verbose) print("We get the coverage information.") coverage <- CalculateCoverage(SE = se_merged, chromosome_prefix = chromosome_prefix) if(!all(rownames(fraction) == rownames(coverage))){ coverage <- coverage[match(rownames(fraction), rownames(coverage)),] } - print("We get the number of alternative reads per variant.") + if(verbose) print("We get the number of alternative reads per variant.") reads_alt <- CalculateAltReads(SE = se_merged, chromosome_prefix = chromosome_prefix) - print("We get the quality information.") + if(verbose) print("We get the quality information.") variant_quality <- CalculateQuality(SE = se_merged, variants = rownames(reads_alt), chromosome_prefix = chromosome_prefix) - print("We get the number of reference reads.") + if(verbose) print("We get the number of reference reads.") reads_ref <- coverage - reads_alt - print("Calculating the strand concordance.") + + if(verbose) print("Calculating the strand concordance.") concordance <- CalculateStrandCorrelation(SE = se_merged, chromosome_prefix = chromosome_prefix) - print("We calculate the consensus information.") + if(verbose) print("We calculate the consensus information.") consensus <- CalculateConsensus(SE = se_merged, chromosome_prefix = chromosome_prefix) # We order the consensus matrix like the coverage matrix. if(!all(rownames(fraction) == rownames(consensus))){ @@ -96,111 +109,55 @@ LoadingMAEGATK_typewise <- function(samples_file, samples_path = NULL, patient, } - print("We perform some filtering to reduce the memory needed.") - print(paste0("We remove variants, which are not covered in at least ", min_cells, " cells .")) - keep_variants <- rowSums(consensus >= 1) - keep_variants <- keep_variants >= min_cells - # If we only have one cell or one variant, we loose the matrix. - cell_ids <- colnames(consensus) - variant_names <- names(keep_variants[keep_variants]) - # consensus <- consensus[keep_variants,] - # coverage <- coverage[keep_variants,] - # fraction <- fraction[keep_variants,] - # concordance <- concordance[keep_variants] - # reads_alt <- reads_alt[keep_variants,] - # reads_ref <- reads_ref[keep_variants,] - consensus <- consensus[keep_variants,] - consensus <- suppressWarnings(matrix(consensus, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(consensus) <- cell_ids - rownames(consensus) <- variant_names - consensus <- as(consensus, "dgCMatrix") - coverage <- coverage[keep_variants,] - coverage <- suppressWarnings(matrix(coverage, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(coverage) <- cell_ids - rownames(coverage) <- variant_names - coverage <- as(coverage, "dgCMatrix") - fraction <- fraction[keep_variants,] - fraction <- suppressWarnings(matrix(fraction, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(fraction) <- cell_ids - rownames(fraction) <- variant_names - fraction <- as(fraction, "dgCMatrix") - concordance <- concordance[keep_variants] + if(verbose) print("We perform some filtering to reduce the memory needed.") + if(verbose) print(paste0("We remove variants, which are not covered in at least ", min_cells, " cells .")) + keep_variants <- rowSums(consensus >= 1) + keep_variants <- keep_variants >= min_cells + consensus <- consensus[keep_variants, , drop = FALSE] + coverage <- coverage[keep_variants, , drop = FALSE] + fraction <- fraction[keep_variants, , drop = FALSE] + concordance <- concordance[keep_variants] variant_quality <- variant_quality[keep_variants] - reads_alt <- reads_alt[keep_variants,] - reads_alt <- suppressWarnings(matrix(reads_alt, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_alt) <- cell_ids - rownames(reads_alt) <- variant_names - reads_alt <- as(reads_alt, "dgCMatrix") - reads_ref <- reads_ref[keep_variants,] - reads_ref <- suppressWarnings(matrix(reads_ref, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_ref) <- cell_ids - rownames(reads_ref) <- variant_names - reads_ref <- as(reads_ref, "dgCMatrix") - - - print("We remove cells that are always NoCall.") + reads_alt <- reads_alt[keep_variants, , drop = FALSE] + reads_ref <- reads_ref[keep_variants, , drop = FALSE] + + + if(verbose) print("We remove cells that are always NoCall.") consensus_test <- consensus > 0 keep_cells <- colSums(consensus_test) > 0 - # If we only have one cell or one variant, we loose the matrix. - cell_ids <- colnames(consensus) - variant_names <- names(keep_variants[keep_variants]) - # consensus <- consensus[,keep_cells] - # coverage <- coverage[,keep_cells] - # fraction <- fraction[,keep_cells] - # reads_alt <- reads_alt[,keep_cells] - # reads_ref <- reads_ref[,keep_cells] - consensus <- consensus[,keep_cells] - consensus <- suppressWarnings(matrix(consensus, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(consensus) <- cell_ids - rownames(consensus) <- variant_names - consensus <- as(consensus, "dgCMatrix") - coverage <- coverage[,keep_cells] - coverage <- suppressWarnings(matrix(coverage, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(coverage) <- cell_ids - rownames(coverage) <- variant_names - coverage <- as(coverage, "dgCMatrix") - fraction <- fraction[,keep_cells] - fraction <- suppressWarnings(matrix(fraction, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(fraction) <- cell_ids - rownames(fraction) <- variant_names - fraction <- as(fraction, "dgCMatrix") - reads_alt <- reads_alt[,keep_cells] - reads_alt <- suppressWarnings(matrix(reads_alt, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_alt) <- cell_ids - rownames(reads_alt) <- variant_names - reads_alt <- as(reads_alt, "dgCMatrix") - reads_ref <- reads_ref[,keep_cells] - reads_ref <- suppressWarnings(matrix(reads_ref, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_ref) <- cell_ids - rownames(reads_ref) <- variant_names - reads_ref <- as(reads_ref, "dgCMatrix") + consensus <- consensus[, keep_cells, drop = FALSE] + coverage <- coverage[, keep_cells, drop = FALSE] + fraction <- fraction[, keep_cells, drop = FALSE] + reads_alt <- reads_alt[, keep_cells, drop = FALSE] + reads_ref <- reads_ref[, keep_cells, drop = FALSE] # We check if the matrices are empty (0 cells, 0 variants). Then we simply return NULL. dim_test <- dim(coverage) if(any(dim_test == 0)){ - print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) - print("Returning NULL.") + if(verbose) print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) + if(verbose) print("Returning NULL.") return(NULL) } else{ - print("We add the information to the merged matrices.") - coverage_depth_per_cell <- rownames(coverage) - coverage_depth_per_cell <- gsub("_._.$", "", coverage_depth_per_cell) - coverage_depth_per_cell <- !duplicated(coverage_depth_per_cell) - coverage_depth_per_cell <- coverage[coverage_depth_per_cell,] - cell_ids <- colnames(coverage_depth_per_cell) - variant_names <- rownames(coverage_depth_per_cell) - coverage_depth_per_cell <- suppressWarnings(matrix(coverage, nrow = length(variant_names), ncol = length(cell_ids))) + if(verbose) print("We add the information to the merged matrices.") + coverage_depth_per_cell <- rownames(coverage) + coverage_depth_per_cell <- gsub("_._.$", "", coverage_depth_per_cell) + coverage_depth_per_cell <- !duplicated(coverage_depth_per_cell) + coverage_depth_per_cell <- coverage[coverage_depth_per_cell,] + cell_ids <- colnames(coverage_depth_per_cell) + variant_names <- rownames(coverage_depth_per_cell) + coverage_depth_per_cell <- suppressWarnings(matrix(coverage, nrow = length(variant_names), ncol = length(cell_ids))) colnames(coverage_depth_per_cell) <- cell_ids rownames(coverage_depth_per_cell) <- variant_names - coverage_depth_per_variant <- rowMeans(coverage) - coverage_depth_per_cell <- colMeans(coverage_depth_per_cell) - meta_data_col <- data.frame(Cell = colnames(consensus), AverageCoverage = coverage_depth_per_cell) - rownames(meta_data_col) <- meta_data_col$Cell - meta_data_row <- data.frame(VariantName = rownames(consensus), Concordance = concordance, VariantQuality = variant_quality, Depth = coverage_depth_per_variant) - rownames(meta_data_row) <- meta_data_row$VariantName - se_output <- SummarizedExperiment(assays = list(consensus = consensus, fraction = fraction, coverage = coverage, alts = reads_alt, refs = reads_ref), - colData = meta_data_col, rowData = meta_data_row) + coverage_depth_per_variant <- rowMeans(coverage) + coverage_depth_per_cell <- colMeans(coverage_depth_per_cell) + meta_data_col <- data.frame(Cell = colnames(consensus), AverageCoverage = coverage_depth_per_cell) + rownames(meta_data_col) <- meta_data_col$Cell + meta_data_row <- data.frame(VariantName = rownames(consensus), Concordance = concordance, VariantQuality = variant_quality, Depth = coverage_depth_per_variant) + rownames(meta_data_row) <- meta_data_row$VariantName + + se_output <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = consensus, fraction = fraction, coverage = coverage, alts = reads_alt, refs = reads_ref), + colData = meta_data_col, rowData = meta_data_row) return(se_output) } } diff --git a/R/LoadingVCF_typewise.R b/R/LoadingVCF_typewise.R new file mode 100644 index 0000000..dbab895 --- /dev/null +++ b/R/LoadingVCF_typewise.R @@ -0,0 +1,248 @@ +#'LoadingVCF_typewise +#'@description +#' We load a cellwise pileup result from a VCF file. +#' If you want to only load a single sample without the use of an input file, you have to set the following variables. +#' \enumerate{ +#' \item samples_path +#' \item barcodes_path +#' \item patient +#' \item samples_file = NULL +#' } +#' +#' It has happened that reads with an N allele were aligned. This can cause problems since these variants are typically not in variants lists. +#' We can remove all of these variants by setting remove_N_alternative to TRUE (the default). +#' Set this option to FALSE, if you really want to retain these variants. +#'@importFrom GenomeInfoDb seqnames +#'@importFrom BiocGenerics start +#'@importFrom utils read.table read.csv +#'@importFrom VariantAnnotation readVcf info readGeno ref alt +#'@importFrom SummarizedExperiment SummarizedExperiment +#'@importFrom Matrix rowSums colSums rowMeans colMeans +#'@param samples_path Path to the input folder. Must include a barcodes file. +#'@param samples_file Path to the csv file with the samples to be loaded. +#'@param vcf_path Path to the VCF file with the variants. +#'@param patient The patient you want to load. +#'@param type_use The type of input. Has to be one of: scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. +#'@param min_reads The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL. +#'@param min_cells The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2. +#'@param barcodes_path Path to the cell barcodes tsv. Default = NULL +#'@param remove_N_alternative Remove all variants that have N as an alternative, see Description. Default = TRUE +#'@param verbose Should the function be verbose? Default = TRUE +#'@export +LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path = NULL, vcf_path, patient, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, remove_N_alternative = TRUE, verbose = TRUE){ + if(all(!is.null(samples_path), !is.null(barcodes_path))){ + if(verbose) print(paste0("Loading the data for sample ", patient, ".")) + samples_file <- data.frame(patient = patient, sample = patient, input_path = samples_path, cells = barcodes_path) + samples <- samples_file$sample + } else{ + if(verbose) print(paste0("Loading the data for patient ", patient, ".")) + if(verbose) print("We read in the samples file.") + samples_file <- utils::read.csv(samples_file, stringsAsFactors = FALSE) + + + if(verbose) print("We subset to the patient of interest.") + samples_file <- samples_file[grep("vcf", samples_file$source, ignore.case = TRUE),] + samples_file <- samples_file[samples_file$patient == patient,] + samples_file <- samples_file[samples_file$type == type_use,] + + + if(verbose) print("We get the different samples.") + samples <- samples_file$sample + } + + + if(verbose) print("We read in the cell barcodes output by CellRanger as a list.") + barcodes <- lapply(samples_file$cells, utils::read.table) + names(barcodes) <- samples + + + if(verbose) print("We read in the vcf file.") + vcf <- VariantAnnotation::readVcf(vcf_path) + vcf_info <- VariantAnnotation::info(vcf) + + + if(verbose) print("We load the VCF file.") + reads_matrix_total <- c() # The total number of reads + coverage_matrix_total <- c() # The alternative reads + ref_matrix_total <- c() # The reference reads + consensus_matrix_total <- c() + for(i in 1:length(samples)){ + if(verbose) print(paste0("Loading sample ", i, " of ", nrow(samples_file))) + input_folder_use <- samples_file$input_path[i] + sample_use <- samples_file$sample[i] + + # The cell barcodes and variants. + cellbarcodes_use <- barcodes[[sample_use]] + + # We load the VCF file. + # vcf_data <- paste0(input_folder_use, sample_use, "/cellSNP.cells.sorted.vcf.gz") + vcf_data <- paste0(input_folder_use) + depth_to_add <- VariantAnnotation::readGeno(vcf_data, "DP") + depth_to_add[is.na(depth_to_add)] <- 0 + rownames(depth_to_add) <- make.names(rownames(depth_to_add)) + colnames(depth_to_add) <- paste0(sample_use, "_", colnames(depth_to_add)) + depth_to_add <- methods::as(depth_to_add, "sparseMatrix") + reads_matrix_total <- cbind(reads_matrix_total, depth_to_add) + + alts_to_add <- VariantAnnotation::readGeno(vcf_data, "AD") + alts_to_add[is.na(alts_to_add)] <- 0 + rownames(alts_to_add) <- make.names(rownames(alts_to_add)) + colnames(alts_to_add) <- paste0(sample_use, "_", colnames(alts_to_add)) + alts_to_add <- methods::as(alts_to_add, "sparseMatrix") + coverage_matrix_total <- cbind(coverage_matrix_total, alts_to_add) + + consensus_to_add <- VariantAnnotation::readGeno(vcf_data, "GT") + consensus_to_add <- matrix(sapply(consensus_to_add, char_to_numeric), nrow = nrow(consensus_to_add), dimnames = list(make.names(rownames(consensus_to_add)), paste0(sample_use, "_", colnames(consensus_to_add)))) + consensus_to_add <- methods::as(consensus_to_add, "sparseMatrix") + consensus_matrix_total <- cbind(consensus_matrix_total, consensus_to_add) + } + ref_matrix_total <- reads_matrix_total - coverage_matrix_total + rm(consensus_to_add, alts_to_add, depth_to_add) + + + # We can get the N allele as an alternative allele. This happened in a visium data set. + # We remove all variants with the N allele as alternative. + if(remove_N_alternative){ + ref_matrix_total_n <- substr(rownames(ref_matrix_total), start = nchar(rownames(ref_matrix_total)), stop = nchar(rownames(ref_matrix_total))) + ref_matrix_total_n <- ref_matrix_total_n != "N" + ref_matrix_total <- ref_matrix_total[ref_matrix_total_n,] + reads_matrix_total <- reads_matrix_total[ref_matrix_total_n,] + coverage_matrix_total <- coverage_matrix_total[ref_matrix_total_n,] + consensus_matrix_total <- consensus_matrix_total[ref_matrix_total_n,] + rm(ref_matrix_total_n) + } else{ + print("We keep all variants with an N as alternative allele. Please ensure that these variants are in your variant VCF file.") + } + + + if(verbose) print("We generate more accessible names.") + if(all(c("GENE", "AA", "CDS") %in% colnames(vcf_info))){ + new_names <- paste0(vcf_info$GENE, "_", vcf_info$AA, "_", vcf_info$CDS) + names(new_names) <- make.names(paste0(as.character(rep(GenomeInfoDb::seqnames(vcf)@values, GenomeInfoDb::seqnames(vcf)@lengths)), ".", BiocGenerics::start(vcf), "_", as.character(VariantAnnotation::ref(vcf)), ".", as.character(unlist(VariantAnnotation::alt(vcf))))) + new_names <- new_names[rownames(ref_matrix_total)] + } else{ + new_names <- rownames(vcf_info) + new_names <- gsub(":|\\/|\\?", "_", new_names) + names(new_names) <- make.names(paste0(as.character(rep(GenomeInfoDb::seqnames(vcf)@values, GenomeInfoDb::seqnames(vcf)@lengths)), ".", BiocGenerics::start(vcf), "_", as.character(VariantAnnotation::ref(vcf)), ".", as.character(unlist(VariantAnnotation::alt(vcf))))) + # new_names <- new_names[names(new_names) %in% rownames(ref_matrix_total)] + new_names <- new_names[rownames(ref_matrix_total)] + } + + + if(!is.null(min_reads)){ + if(verbose) print(paste0("We set read values below the threshold of ", min_reads, " to 0.")) + if(verbose) print("We then generate the consensus matrix again.") + ref_matrix_total@x[ref_matrix_total@x < min_reads] <- 0 + coverage_matrix_total@x[coverage_matrix_total@x < min_reads] <- 0 + + reference_construction <- ref_matrix_total + reference_construction@x[reference_construction@x > 0] <- 1 + + coverage_construction <- coverage_matrix_total + coverage_construction@x[coverage_construction@x > 0] <- 2 + + consensus_matrix_total <- reference_construction + coverage_construction + rm(reference_construction, coverage_construction) + } + + + # We check if number of rows of the matrices are the same as the length of the new names. + if(all(nrow(coverage_matrix_total) != length(new_names))){ + input_rows <- nrow(coverage_matrix_total) + new_names_length <- length(new_names) + stop(paste0("Error: you have ", input_rows, " variants in you matrix and ", new_names_length, " actual variant names.")) + } + + rownames(coverage_matrix_total) <- new_names + rownames(ref_matrix_total) <- new_names + rownames(consensus_matrix_total) <- new_names + + + if(verbose) print(paste0("We remove variants, that are not detected in at least ", min_cells, " cells.")) + keep_variants <- Matrix::rowSums(consensus_matrix_total >= 1) + keep_variants <- keep_variants >= min_cells + # If we only have one cell or one variant, we loose the matrix. + cell_ids <- colnames(consensus_matrix_total) + variant_names <- names(keep_variants[keep_variants]) + consensus_matrix_total <- consensus_matrix_total[keep_variants, ] + consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(consensus_matrix_total) <- cell_ids + rownames(consensus_matrix_total) <- variant_names + consensus_matrix_total <- methods::as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[keep_variants, ] + coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(coverage_matrix_total) <- cell_ids + rownames(coverage_matrix_total) <- variant_names + coverage_matrix_total <- methods::as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[keep_variants, ] + ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(ref_matrix_total) <- cell_ids + rownames(ref_matrix_total) <- variant_names + ref_matrix_total <- methods::as(ref_matrix_total, "dgCMatrix") + + + if(verbose) print("We remove cells that are always NoCall.") + consensus_test <- consensus_matrix_total > 0 + keep_cells <- Matrix::colSums(consensus_test) > 0 + # If we only have one cell or one variant, we loose the matrix. + cell_ids <- names(keep_cells[keep_cells]) + variant_names <- rownames(consensus_matrix_total) + consensus_matrix_total <- consensus_matrix_total[, keep_cells] + consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(consensus_matrix_total) <- cell_ids + rownames(consensus_matrix_total) <- variant_names + consensus_matrix_total <- methods::as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[, keep_cells] + coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(coverage_matrix_total) <- cell_ids + rownames(coverage_matrix_total) <- variant_names + coverage_matrix_total <- methods::as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[, keep_cells] + ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(ref_matrix_total) <- cell_ids + rownames(ref_matrix_total) <- variant_names + ref_matrix_total <- methods::as(ref_matrix_total, "dgCMatrix") + + + if(verbose) print(paste0(type_use, " Variants: ", nrow(consensus_matrix_total))) + if(verbose) print(paste0(type_use, " Cells: ", ncol(consensus_matrix_total))) + + rm(consensus_test, keep_variants, keep_cells) + gc() + + + if(verbose) print("We transform the sparse matrices to matrices, so we can calculate the fraction.") + coverage_matrix_total <- as.matrix(coverage_matrix_total) + ref_matrix_total <- as.matrix(ref_matrix_total) + consensus_matrix_total <- as.matrix(consensus_matrix_total) + reads_total <- coverage_matrix_total + ref_matrix_total + fraction_total <- coverage_matrix_total / reads_total + fraction_total[is.na(fraction_total)] <- 0 + gc() + + + # We check if the matrices are empty (0 cells, 0 variants). Then we simply return NULL. + dim_test <- dim(reads_total) + if(any(dim_test == 0)){ + if(verbose) print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) + if(verbose) print("Returning NULL.") + return(NULL) + } else { + if(verbose) print("We generate a SummarizedExperiment object containing the fraction and the consensus matrices.") + # We want an assay for the Consensus information and for the fraction. + # As meta data we add a data frame showing the cell id, the associated patient and the sample. + coverage_depth_per_cell <- Matrix::colMeans(reads_total) + coverage_depth_per_variant <- Matrix::rowMeans(reads_total) + meta_data <- data.frame(Cell = colnames(consensus_matrix_total), Type = type_use, AverageCoverage = coverage_depth_per_cell) + rownames(meta_data) <- meta_data$Cell + meta_row <- data.frame(VariantName = rownames(consensus_matrix_total), Depth = coverage_depth_per_variant) + rownames(meta_row) <- meta_row$VariantName + #se_merged <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = methods::as(consensus_matrix_total, "dgCMatrix"), fraction = methods::as(fraction_total, "dgCMatrix"), coverage = methods::as(reads_total, "dgCMatrix")), + # colData = meta_data) + se_merged <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = methods::as(consensus_matrix_total, "CsparseMatrix"), fraction = methods::as(fraction_total, "CsparseMatrix"), coverage = methods::as(reads_total, "CsparseMatrix"), + alts = methods::as(coverage_matrix_total, "CsparseMatrix"), refs = methods::as(ref_matrix_total, "CsparseMatrix")), + colData = meta_data, rowData = meta_row) + return(se_merged) + } +} + diff --git a/R/LoadingVarTrix_typewise.R b/R/LoadingVarTrix_typewise.R index 8b4da86..dd531ac 100644 --- a/R/LoadingVarTrix_typewise.R +++ b/R/LoadingVarTrix_typewise.R @@ -7,8 +7,10 @@ #'The input file is a specifically formated csv file with all the necessary information to run the analysis. #'Note that the source column in the input file needs to be one of the following: vartrix, mgaetk, mgatk. #'This is hard coded and case insensitive. -#' -#'@import Matrix SummarizedExperiment VariantAnnotation +#'@importFrom utils read.csv read.table +#'@importFrom VariantAnnotation readVcf info +#'@importFrom SummarizedExperiment SummarizedExperiment +#'@importFrom Matrix readMM #'@param samples_path Path to the input folder. Must include a barcodes file. #'@param samples_file Path to the csv file with the samples to be loaded. #'@param vcf_path Path to the VCF file with the variants. @@ -17,61 +19,53 @@ #'@param type_use The type of input. Has to be one of: scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. #'@param min_reads The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL. #'@param min_cells The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2. +#'@param barcodes_path The path to the cell barcodes tsv. Default = NULL +#'@param verbose Should the function be verbose? Default = TRUE #'@export -LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, sample = NULL, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2){ - if(all(!is.null(samples_path), !is.null(barcodes_path), !is.null(sample), !is.null(snp_path))){ - print(paste0("Loading the data for sample ", sample, ".")) - #samples <- list.files(samples_path) - #samples <- grep(patient, samples, value = TRUE) - - #barcodes_files <- list.files(path = samples_path, pattern = "barcodes") - #barcodes_files <- unlist(lapply(paste0(samples_path, samples, "/"), list.files, pattern = "barcodes", full.names = TRUE)) - - - #samples_file <- data.frame(patient = patient, sample = samples, input_folder = samples_path, cells = barcodes_files) - samples_file <- data.frame(patient = patient, sample = sample, input_folder = samples_path, cells = barcodes_path) +LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, verbose = TRUE){ + if(all(!is.null(samples_path), !is.null(barcodes_path), !is.null(snp_path))){ + if(verbose) print(paste0("Loading the data for sample ", patient, ".")) + samples_file <- data.frame(patient = patient, sample = patient, input_path = samples_path, cells = barcodes_path) samples <- samples_file$sample } else{ - print(paste0("Loading the data for patient ", patient, ".")) - print("We read in the samples file.") - samples_file <- read.csv(samples_file, stringsAsFactors = FALSE) - - - print("We subset to the patient of interest.") + if(verbose) print(paste0("Loading the data for patient ", patient, ".")) + if(verbose) print("We read in the samples file.") + samples_file <- utils::read.csv(samples_file, stringsAsFactors = FALSE) + + if(verbose) print("We subset to the patient of interest.") samples_file <- samples_file[grep("vartrix", samples_file$source, ignore.case = TRUE),] samples_file <- samples_file[samples_file$patient == patient,] samples_file <- samples_file[samples_file$type == type_use,] - - - print("We get the different samples.") + + if(verbose) print("We get the different samples.") samples <- samples_file$sample } - print("We load the SNV files.") + if(verbose) print("We load the SNV files.") if(!is.null(snp_path)){ path_snps <- snp_path } else{ - path_snps <- paste0(samples_file$input_folder, "/SNV.loci.txt") + path_snps <- paste0(samples_file$input_path, "/SNV.loci.txt") } - print("We read the variants.") - snps_list <- lapply(path_snps, read.table, header = FALSE) + if(verbose) print("We read the variants.") + snps_list <- lapply(path_snps, utils::read.table, header = FALSE) names(snps_list) <- samples - print("We read in the cell barcodes output by CellRanger as a list.") - barcodes <- lapply(samples_file$cells, read.table) + if(verbose) print("We read in the cell barcodes output by CellRanger as a list.") + barcodes <- lapply(samples_file$cells, utils::read.table) names(barcodes) <- samples - print("We read in the vcf file.") - vcf <- readVcf(vcf_path) - vcf_info <- info(vcf) + if(verbose) print("We read in the vcf file.") + vcf <- VariantAnnotation::readVcf(vcf_path) + vcf_info <- VariantAnnotation::info(vcf) - print("We generate more accessible names.") + if(verbose) print("We generate more accessible names.") if(all(c("GENE", "AA", "CDS") %in% colnames(vcf_info))){ new_names <- paste0(vcf_info$GENE, "_", vcf_info$AA, "_", vcf_info$CDS) } else{ @@ -79,14 +73,14 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ new_names <- gsub(":|\\/|\\?", "_", new_names) } - print("We read in the different sparse genotype matrices as a list.") - print("We have a slot per type of input data.") + if(verbose) print("We read in the different sparse genotype matrices as a list.") + if(verbose) print("We have a slot per type of input data.") coverage_matrices <- list() ref_matrices <- list() consensus_matrices <- list() for(i in 1:nrow(samples_file)){ - print(paste0("Loading sample ", i, " of ", nrow(samples_file))) - input_folder_use <- samples_file$input_folder[i] + if(verbose) print(paste0("Loading sample ", i, " of ", nrow(samples_file))) + input_folder_use <- samples_file$input_path[i] sample_use <- samples_file$sample[i] # The cell barcodes and variants. @@ -109,20 +103,20 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ } - print("We generate a large data.frame of all the snv matrices.") + if(verbose) print("We generate a large data.frame of all the snv matrices.") coverage_matrix_total <- do.call("cbind", coverage_matrices) ref_matrix_total <- do.call("cbind", ref_matrices) consensus_matrix_total <- do.call("cbind", consensus_matrices) - print("We remove the matrix lists.") + if(verbose) print("We remove the matrix lists.") rm(coverage_matrices, ref_matrices, consensus_matrices) gc() if(!is.null(min_reads)){ - print(paste0("We set read values below the threshold of ", min_reads, " to 0.")) - print("We then generate the consensus matrix again.") + if(verbose) print(paste0("We set read values below the threshold of ", min_reads, " to 0.")) + if(verbose) print("We then generate the consensus matrix again.") ref_matrix_total@x[ref_matrix_total@x < min_reads] <- 0 coverage_matrix_total@x[coverage_matrix_total@x < min_reads] <- 0 @@ -149,146 +143,77 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ rownames(consensus_matrix_total) <- new_names - print(paste0("We remove variants, that are not detected in at least ", min_cells, " cells.")) + if(verbose) print(paste0("We remove variants, that are not detected in at least ", min_cells, " cells.")) keep_variants <- rowSums(consensus_matrix_total >= 1) keep_variants <- keep_variants >= min_cells # If we only have one cell or one variant, we loose the matrix. - cell_ids <- colnames(consensus_matrix_total) - variant_names <- names(keep_variants[keep_variants]) - consensus_matrix_total <- consensus_matrix_total[keep_variants, ] - consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(consensus_matrix_total) <- cell_ids - rownames(consensus_matrix_total) <- variant_names - consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") - coverage_matrix_total <- coverage_matrix_total[keep_variants, ] - coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(coverage_matrix_total) <- cell_ids - rownames(coverage_matrix_total) <- variant_names - coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") - ref_matrix_total <- ref_matrix_total[keep_variants, ] - ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(ref_matrix_total) <- cell_ids - rownames(ref_matrix_total) <- variant_names - ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") - -# if(sum(keep_variants) > 1){ -# consensus_matrix_total <- consensus_matrix_total[keep_variants,] -# coverage_matrix_total <- coverage_matrix_total[keep_variants,] -# ref_matrix_total <- ref_matrix_total[keep_variants,] -# } else if(sum(keep_variants) == 1){ -# cell_ids <- colnames(consensus_matrix_total) -# variant_names <- names(keep_variants[keep_variants]) -# consensus_matrix_total <- consensus_matrix_total[keep_variants,] -# consensus_matrix_total <- matrix(consensus_matrix_total, nrow = 1, ncol = length(consensus_matrix_total)) -# rownames(consensus_matrix_total) <- variant_names -# colnames(consensus_matrix_total) <- cell_ids -# consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") -# coverage_matrix_total <- coverage_matrix_total[keep_variants,] -# coverage_matrix_total <- matrix(coverage_matrix_total, nrow = 1, ncol = length(coverage_matrix_total)) -# rownames(coverage_matrix_total) <- variant_names -# colnames(coverage_matrix_total) <- cell_ids -# coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") -# ref_matrix_total <- ref_matrix_total[keep_variants,] -# ref_matrix_total <- matrix(ref_matrix_total, nrow = 1, ncol = length(ref_matrix_total)) -# rownames(ref_matrix_total) <- variant_names -# colnames(ref_matrix_total) <- cell_ids -# ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") -# rm(cell_ids, variant_names) -# } - - - print("We remove cells that are always NoCall.") + #cell_ids <- colnames(consensus_matrix_total) + #variant_names <- names(keep_variants[keep_variants]) + consensus_matrix_total <- consensus_matrix_total[keep_variants, , drop = FALSE] + #consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(consensus_matrix_total) <- cell_ids + #rownames(consensus_matrix_total) <- variant_names + #consensus_matrix_total <- methods::as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[keep_variants, , drop = FALSE] + #coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(coverage_matrix_total) <- cell_ids + #rownames(coverage_matrix_total) <- variant_names + #coverage_matrix_total <- methods::as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[keep_variants, , drop = FALSE] + #ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(ref_matrix_total) <- cell_ids + #rownames(ref_matrix_total) <- variant_names + #ref_matrix_total <- methods::as(ref_matrix_total, "dgCMatrix") + + + if(verbose) print("We remove cells that are always NoCall.") consensus_test <- consensus_matrix_total > 0 keep_cells <- colSums(consensus_test) > 0 # If we only have one cell or one variant, we loose the matrix. - cell_ids <- names(keep_cells[keep_cells]) - variant_names <- rownames(consensus_matrix_total) - consensus_matrix_total <- consensus_matrix_total[, keep_cells] - consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(consensus_matrix_total) <- cell_ids - rownames(consensus_matrix_total) <- variant_names - consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") - coverage_matrix_total <- coverage_matrix_total[, keep_cells] - coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(coverage_matrix_total) <- cell_ids - rownames(coverage_matrix_total) <- variant_names - coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") - ref_matrix_total <- ref_matrix_total[, keep_cells] - ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(ref_matrix_total) <- cell_ids - rownames(ref_matrix_total) <- variant_names - ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") - -# if(sum(keep_cells) > 1){ -# consensus_matrix_total <- consensus_matrix_total[, keep_cells] -# coverage_matrix_total <- coverage_matrix_total[, keep_cells] -# ref_matrix_total <- ref_matrix_total[, keep_cells] -# } else if(sum(keep_cells) == 1){ -# cell_ids <- names(keep_cells[keep_cells]) -# variant_names <- rownames(consensus_matrix_total) -# consensus_matrix_total <- consensus_matrix_total[,keep_cells] -# consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = 1) -# rownames(consensus_matrix_total) <- variant_names -# colnames(consensus_matrix_total) <- cell_ids -# consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") -# coverage_matrix_total <- coverage_matrix_total[,keep_cells] -# coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = 1) -# rownames(coverage_matrix_total) <- variant_names -# colnames(coverage_matrix_total) <- cell_ids -# coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") -# ref_matrix_total <- ref_matrix_total[, keep_cells] -# ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = 1) -# rownames(ref_matrix_total) <- variant_names -# colnames(ref_matrix_total) <- cell_ids -# ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") -# rm(cell_ids, variant_names) -# } - - print(paste0(type_use, " Variants: ", nrow(consensus_matrix_total))) - print(paste0(type_use, " Cells: ", ncol(consensus_matrix_total))) + #cell_ids <- names(keep_cells[keep_cells]) + #variant_names <- rownames(consensus_matrix_total) + consensus_matrix_total <- consensus_matrix_total[, keep_cells, drop = FALSE] + #consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(consensus_matrix_total) <- cell_ids + #rownames(consensus_matrix_total) <- variant_names + #consensus_matrix_total <- methods::as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[, keep_cells, drop = FALSE] + #coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(coverage_matrix_total) <- cell_ids + #rownames(coverage_matrix_total) <- variant_names + #coverage_matrix_total <- methods::as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[, keep_cells, drop = FALSE] + #ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(ref_matrix_total) <- cell_ids + #rownames(ref_matrix_total) <- variant_names + #ref_matrix_total <- methods::as(ref_matrix_total, "dgCMatrix") + + + if(verbose) print(paste0(type_use, " Variants: ", nrow(consensus_matrix_total))) + if(verbose) print(paste0(type_use, " Cells: ", ncol(consensus_matrix_total))) rm(consensus_test, keep_variants, keep_cells) gc() - print("We transform the sparse matrices to matrices, so we can calculate the fraction.") - # For test purposes - #coverage_matrix_total_ori <- coverage_matrix_total - #ref_matrix_total_ori <- ref_matrix_total - #consensus_matrix_total_ori <- consensus_matrix_total - #coverage_matrix_total <- coverage_matrix_total_ori - #ref_matrix_total <- ref_matrix_total_ori - #consensus_matrix_total <- consensus_matrix_total_ori - - #coverage_matrix_total <- coverage_matrix_total[1:5000,1:5000] - #ref_matrix_total <- ref_matrix_total[1:5000,1:5000] - #consensus_matrix_total <- coverage_matrix_total[1:5000,1:5000] - #colnames(coverage_matrix_total) <- make.names(colnames(coverage_matrix_total)) - #rownames(coverage_matrix_total) <- make.names(rownames(coverage_matrix_total)) - #colnames(ref_matrix_total) <- make.names(colnames(ref_matrix_total)) - #rownames(ref_matrix_total) <- make.names(rownames(ref_matrix_total)) - #colnames(consensus_matrix_total) <- make.names(colnames(consensus_matrix_total)) - #rownames(consensus_matrix_total) <- make.names(rownames(consensus_matrix_total)) - coverage_matrix_total <- as.matrix(coverage_matrix_total) - ref_matrix_total <- as.matrix(ref_matrix_total) - consensus_matrix_total <- as.matrix(consensus_matrix_total) - reads_total <- coverage_matrix_total + ref_matrix_total - fraction_total <- coverage_matrix_total / reads_total + if(verbose) print("We transform the sparse matrices to matrices, so we can calculate the fraction.") + coverage_matrix_total <- as.matrix(coverage_matrix_total) + ref_matrix_total <- as.matrix(ref_matrix_total) + consensus_matrix_total <- as.matrix(consensus_matrix_total) + reads_total <- coverage_matrix_total + ref_matrix_total + fraction_total <- coverage_matrix_total / reads_total fraction_total[is.na(fraction_total)] <- 0 - #fraction_total <- sdiv(X = coverage_matrix_total, Y = reads_total, - # names = dimnames(coverage_matrix_total)) - #rm(coverage_matrix_total, ref_matrix_total) gc() # We check if the matrices are empty (0 cells, 0 variants). Then we simply return NULL. dim_test <- dim(reads_total) if(any(dim_test == 0)){ - print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) - print("Returning NULL.") + if(verbose) print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) + if(verbose) print("Returning NULL.") return(NULL) } else { - print("We generate a SummarizedExperiment object containing the fraction and the consensus matrices.") + if(verbose) print("We generate a SummarizedExperiment object containing the fraction and the consensus matrices.") # We want an assay for the Consensus information and for the fraction. # As meta data we add a data frame showing the cell id, the associated patient and the sample. coverage_depth_per_cell <- colMeans(reads_total) @@ -297,11 +222,9 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ rownames(meta_data) <- meta_data$Cell meta_row <- data.frame(VariantName = rownames(consensus_matrix_total), Depth = coverage_depth_per_variant) rownames(meta_row) <- meta_row$VariantName - #se_merged <- SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "dgCMatrix"), fraction = as(fraction_total, "dgCMatrix"), coverage = as(reads_total, "dgCMatrix")), - # colData = meta_data) - se_merged <- SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "CsparseMatrix"), fraction = as(fraction_total, "CsparseMatrix"), coverage = as(reads_total, "CsparseMatrix"), - alts = as(coverage_matrix_total, "CsparseMatrix"), refs = as(ref_matrix_total, "CsparseMatrix")), - colData = meta_data, rowData = meta_row) + se_merged <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = methods::as(consensus_matrix_total, "CsparseMatrix"), fraction = methods::as(fraction_total, "CsparseMatrix"), coverage = methods::as(reads_total, "CsparseMatrix"), + alts = methods::as(coverage_matrix_total, "CsparseMatrix"), refs = methods::as(ref_matrix_total, "CsparseMatrix")), + colData = meta_data, rowData = meta_row) return(se_merged) } } diff --git a/R/Merging_SE_list.R b/R/Merging_SE_list.R index 2591dc4..94d78c9 100644 --- a/R/Merging_SE_list.R +++ b/R/Merging_SE_list.R @@ -1,7 +1,6 @@ #'Merging list of SummarizedExperiment objects. #'@description #'This function is a wrapper for do.all("cbind", se). -#' #'@import BiocGenerics #'@param se SummarizedExperiment object #'@export diff --git a/R/RowWiseSplit.R b/R/RowWiseSplit.R index bac1734..8114d6a 100644 --- a/R/RowWiseSplit.R +++ b/R/RowWiseSplit.R @@ -3,14 +3,15 @@ #'Performing the correlation or Fisher test association for a SummarizedExperiment object requires extreme amounts of memory. #'To reduce the amount of memory necessary, we instead get the individual rows from the consensus assay. #'We can then remove the NoCalls (no reads) from the individual vectors, further reducing the amount of memory needed. -#'@import Matrix SummarizedExperiment parallel +#'@importFrom parallel mclapply +#'@importFrom SummarizedExperiment assays #'@param se SummarizedExperiment object. #'@param n_cores Number of cores to use. #'@param remove_nocalls Do you want to remove NoCall cells? #'@export RowWiseSplit <- function(se, n_cores = 1, remove_nocalls = TRUE){ - consensus <- assays(se)$consensus - consensus_list <- mclapply(rownames(se), SeparatingMatrixToList, total_matrix = consensus, remove_nocalls = remove_nocalls, mc.cores = n_cores) + consensus <- SummarizedExperiment::assays(se)$consensus + consensus_list <- parallel::mclapply(rownames(se), SeparatingMatrixToList, total_matrix = consensus, remove_nocalls = remove_nocalls, mc.cores = n_cores) names(consensus_list) <- rownames(se) return(consensus_list) } diff --git a/R/SeparatingMatrixToList.R b/R/SeparatingMatrixToList.R index 00970a6..3bb7ff1 100644 --- a/R/SeparatingMatrixToList.R +++ b/R/SeparatingMatrixToList.R @@ -4,13 +4,14 @@ #'Each variant is an entry in the list. #'NoCalls (cells with no reads covering a variant) can be removed. #'This function gets called by RowWiseSplit in return. +#'@importFrom stats na.omit #'@param row_use The row the separate. #'@param total_matrix The matrix to be split. #'@param remove_nocalls Do you want to remove NoCall cells? #'@export SeparatingMatrixToList <- function(row_use, total_matrix, remove_nocalls = TRUE){ selected_row <- total_matrix[row_use,] - selected_row <- na.omit(selected_row) + selected_row <- stats::na.omit(selected_row) if(remove_nocalls == TRUE){ # We remove the NoCall cells. diff --git a/R/SetVariantInfo.R b/R/SetVariantInfo.R index 584ea06..53aefa7 100644 --- a/R/SetVariantInfo.R +++ b/R/SetVariantInfo.R @@ -2,7 +2,9 @@ #'@description #'We add the genotyping information for a set of variants to a Seurat object. #'The function returns a matrix with the values from the specified assay. -#'@import SummarizedExperiment Seurat +#'@importFrom SummarizedExperiment assays +#'@importFrom Matrix t +#'@importFrom Seurat AddMetaData #'@param SE SummarizedExperiment object. #'@param seurat_object The Seurat object. #'@param information The assay with the desired information. Default: consensus @@ -28,7 +30,7 @@ SetVariantInfo <- function(SE, seurat_object, information = "consensus", variant if(!assay_check){ stop("The assay you wants is not present in the object.") } - res <- t(assays(SE)[[information]][variants, , drop = FALSE]) + res <- t(SummarizedExperiment::assays(SE)[[information]][variants, , drop = FALSE]) # We check if all the cells are actually in the Seurat object. # If not, we only add the information for the ones present. # We execute an error function if there are zero cells present. diff --git a/R/VariantBurden.R b/R/VariantBurden.R index 208dab1..5fc840b 100644 --- a/R/VariantBurden.R +++ b/R/VariantBurden.R @@ -2,11 +2,12 @@ #'@description #'Calculate the variant burden per cell. #'We simply sum up the MAF values per cell. -#'@import Matrix SummarizedExperiment +#'@importFrom SummarizedExperiment assays colData +#'@importFrom Matrix colSums #'@param se SummarizedExperiment object #'@export VariantBurden <- function(se){ - burden <- colSums(assays(se)[["fraction"]]) - colData(se)[,"Burden"] <- burden + burden <- Matrix::colSums(SummarizedExperiment::assays(se)[["fraction"]]) + SummarizedExperiment::colData(se)[,"Burden"] <- burden return(se) } diff --git a/R/VariantCloneSizeThresholding.R b/R/VariantCloneSizeThresholding.R index f1a4c24..3c81e52 100644 --- a/R/VariantCloneSizeThresholding.R +++ b/R/VariantCloneSizeThresholding.R @@ -2,41 +2,32 @@ #'@description #'We get variants of interest using a clone size thresholding. #'Source: https://github.com/petervangalen/MAESTER-2021 -#'@import dplyr Matrix SummarizedExperiment tidyverse +#'@importFrom SummarizedExperiment assays #'@param se SummarizedExperiment object. #'@param min_coverage Minimum coverage a variant needs to have. #'@param fraction_negative_cells The fraction of negative cells needed. #'@param min_clone_size minimum number of cells. #'@param vaf_threshold Variant Allele Threshold. Cells above this threshold are considered mutated. +#'@param verbose Should the function be verbose? Default = TRUE #'@export -VariantCloneSizeThresholding <- function(se, min_coverage = 2, fraction_negative_cells = 0.9, min_clone_size = 10, vaf_threshold = 0.5){ - # This function is adapted from the Peter van Galen. - print("Get the mean allele frequency and coverage.") - mean_af <- rowMeans(assays(se)[["fraction"]], na.rm = TRUE) - mean_cov <- rowMeans(assays(se)[["coverage"]], na.rm = TRUE) +VariantCloneSizeThresholding <- function(se, min_coverage = 2, fraction_negative_cells = 0.9, min_clone_size = 10, vaf_threshold = 0.5, verbose = TRUE){ + if(verbose) print("Get the mean allele frequency and coverage.") + mean_af <- rowMeans(SummarizedExperiment::assays(se)[["fraction"]], na.rm = TRUE) + mean_cov <- rowMeans(SummarizedExperiment::assays(se)[["coverage"]], na.rm = TRUE) - print("Collect all information in a tibble") - #vars_tib <- as_tibble(do.call(cbind, c(list(mean_af), list(mean_cov))), rownames = "var") + if(verbose) print("Collect all information in a tibble") vars <- do.call(cbind, c(list(mean_af), list(mean_cov))) - #colnames(vars_tib)[2] <- "mean_af" - #colnames(vars_tib)[3] <- "mean_cov" colnames(vars) <- c("mean_af", "mean_cov") - print("We add the number of cells that exceed the VAF thresholds.") - #vars_tib <- vars_tib %>% - # mutate(n0 = apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE))) %>% - # mutate(VAF_threshold = apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE))) - n0 <- apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) - VAF_threshold <- apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) + if(verbose) print("We add the number of cells that exceed the VAF thresholds.") + n0 <- apply(SummarizedExperiment::assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) + VAF_threshold <- apply(SummarizedExperiment::assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) vars <- cbind(vars, n0, VAF_threshold) - print("Thresholding using the clone size approach.") - #voi_ch <- subset(vars_tib, mean_cov > min_coverage & - # n0 > ceiling(fraction_negative_cells * ncol(se)) & - # VAF_threshold > min_clone_size)$var - voi_ch <- subset(vars, mean_cov > min_coverage & - n0 > ceiling(fraction_negative_cells * ncol(se)) & - VAF_threshold > min_clone_size) + if(verbose) print("Thresholding using the clone size approach.") + voi_ch <- subset(vars, vars$mean_cov > min_coverage & + vars$n0 > ceiling(fraction_negative_cells * ncol(se)) & + vars$VAF_threshold > min_clone_size) voi_ch <- rownames(voi_ch) return(voi_ch) } diff --git a/R/VariantCorrelationHeatmap.R b/R/VariantCorrelationHeatmap.R index 7892988..f278d52 100644 --- a/R/VariantCorrelationHeatmap.R +++ b/R/VariantCorrelationHeatmap.R @@ -1,7 +1,13 @@ #'VariantCorrelationHeatmap #'@description #'We generate a heatmap showing the correlation of somatic variants with the MT variants. -#'@import circlize ComplexHeatmap ggplot2 Matrix parallel rcompanion tidyr grid +#'Packages I want to remove. I cannot see where they are used. +#'ggplot2 parallel rcompanion tidyr +#'@importFrom circlize colorRamp2 +#'@importFrom ComplexHeatmap columnAnnotation rowAnnotation Heatmap draw +#'@importFrom grid gpar +#'@importFrom stats na.omit +#'@importFrom grDevices png dev.off #'@param correlation_results Data.frame with the correlation results. #'@param output_path Path to the output folder. #'@param patient The patient for this heatmap. @@ -10,22 +16,23 @@ #'@param width_use Width of the heatmap in px. #'@param height_use Height of the heatmap in px. #'@param padding_use Space around the heatmap in mm. If this is to low, the variant names might be cut off. +#'@param verbose Should the function be verbose? Default = TRUE #'@export VariantCorrelationHeatmap <- function(correlation_results, output_path = NULL, patient, min_alt_cells = 5, min_correlation = 0.5, - width_use = 2000, height_use = 2000, padding_use = c(165,165,2,2)){ + width_use = 2000, height_use = 2000, padding_use = c(165,165,2,2), verbose = TRUE){ correlation_results$P_adj_logged <- -log10(correlation_results$P_adj) - correlation_results <- subset(correlation_results, P_adj_logged > -log10(0.05)) - correlation_results <- subset(correlation_results, Cells_1_Alt >= min_alt_cells & Cells_2_Alt >= min_alt_cells) - correlation_results <- subset(correlation_results, Corr > min_correlation) + correlation_results <- subset(correlation_results, correlation_results$P_adj_logged > -log10(0.05)) + correlation_results <- subset(correlation_results, correlation_results$Cells_1_Alt >= min_alt_cells & correlation_results$Cells_2_Alt >= min_alt_cells) + correlation_results <- subset(correlation_results, correlation_results$Corr > min_correlation) - print("We get the unique variants.") + if(verbose) print("We get the unique variants.") somatic_uniques <- unique(correlation_results$Variant1) mt_uniques <- unique(correlation_results$Variant2) - print("Getting the maximum P value.") - pvalue_max <- as.numeric(na.omit(correlation_results$P_adj_logged)) + if(verbose) print("Getting the maximum P value.") + pvalue_max <- as.numeric(stats::na.omit(correlation_results$P_adj_logged)) if(length(pvalue_max) > 1){ pvalue_max <- pvalue_max[pvalue_max != Inf] if(length(pvalue_max) >= 1){ @@ -38,51 +45,51 @@ VariantCorrelationHeatmap <- function(correlation_results, output_path = NULL, p pvalue_max <- max(pvalue_max, 100) } correlation_results$P_adj_logged[correlation_results$P_adj_logged == Inf] <- pvalue_max - col_fun <- colorRamp2(c(0,pvalue_max), c("white", "red")) + col_fun <- circlize::colorRamp2(c(0,pvalue_max), c("white", "red")) - print("We set insignificant P values to NA.") + if(verbose) print("We set insignificant P values to NA.") correlation_results$P_adj_logged <- ifelse(correlation_results$P_adj_logged > -log10(0.05), correlation_results$P_adj_logged, NA) - print("We generate a matrix with the adjusted P values.") + if(verbose) print("We generate a matrix with the adjusted P values.") p_values <- matrix(NA, nrow = length(somatic_uniques), ncol = length(mt_uniques)) rownames(p_values) <- somatic_uniques colnames(p_values) <- mt_uniques for(i in 1:length(somatic_uniques)){ - correlation_results_subset <- subset(correlation_results, Variant1 == somatic_uniques[i]) + correlation_results_subset <- subset(correlation_results, correlation_results$Variant1 == somatic_uniques[i]) p_values_use <- correlation_results_subset$P_adj_logged names(p_values_use) <- correlation_results_subset$Variant2 p_values[somatic_uniques[i],names(p_values_use)] <- p_values_use } - print("Setting the column and row annotations for the heat map.") + if(verbose) print("Setting the column and row annotations for the heat map.") annotation_top <- ComplexHeatmap::columnAnnotation(Mutations = mt_uniques, show_legend = FALSE, show_annotation_name = FALSE) annotation_left <- ComplexHeatmap::rowAnnotation(Mutations = somatic_uniques, show_legend = FALSE, show_annotation_name = FALSE) - print("Since we can have no results left after the subsetting, we check if the P value matrix has values.") + if(verbose) print("Since we can have no results left after the subsetting, we check if the P value matrix has values.") if(all(dim(p_values) > 0)){ - print("Generating the actual heat map.") - p1 <- Heatmap(p_values, name = "-log10(P)", - column_title = paste0("Patient ", patient, "\nLogged adj. P values between the mutations"), - row_title = "", show_row_names = TRUE, show_column_names = TRUE, - col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, - column_title_gp = grid::gpar(fontsize = 40), row_title_gp = grid::gpar(fontsize = 40), - column_names_gp = grid::gpar(fontsize = 40), row_names_gp = grid::gpar(fontsize = 40), - column_names_rot = 45, - row_names_side = "left", - heatmap_legend_param = list(labels_gp = gpar(fontsize = 40), title_gp = gpar(fontsize = 40, fontface = "bold")), - cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) + if(verbose) print("Generating the actual heat map.") + p1 <- ComplexHeatmap::Heatmap(p_values, name = "-log10(P)", + column_title = paste0("Patient ", patient, "\nLogged adj. P values between the mutations"), + row_title = "", show_row_names = TRUE, show_column_names = TRUE, + col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, + column_title_gp = grid::gpar(fontsize = 40), row_title_gp = grid::gpar(fontsize = 40), + column_names_gp = grid::gpar(fontsize = 40), row_names_gp = grid::gpar(fontsize = 40), + column_names_rot = 45, + row_names_side = "left", + heatmap_legend_param = list(labels_gp = grid::gpar(fontsize = 40), title_gp = grid::gpar(fontsize = 40, fontface = "bold")), + cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) if(!is.null(output_path)){ - print("Saving the png.") - png(paste0(output_path, "Correlation_Pvalue_", patient, ".png"), width = width_use, height = height_use, units = "px", type = "cairo", antialias = "none") - draw(p1, padding = unit(padding_use, "mm")) - dev.off() + if(verbose) print("Saving the png.") + grDevices::png(paste0(output_path, "Correlation_Pvalue_", patient, ".png"), width = width_use, height = height_use, units = "px", type = "cairo", antialias = "none") + ComplexHeatmap::draw(p1, padding = unit(padding_use, "mm")) + grDevices::dev.off() } else{ return(p1) } diff --git a/R/VariantFisherTestHeatmap.R b/R/VariantFisherTestHeatmap.R index 3efe177..131e373 100644 --- a/R/VariantFisherTestHeatmap.R +++ b/R/VariantFisherTestHeatmap.R @@ -1,26 +1,31 @@ #'VariantFisherTestHeatmap #'@description #'We generate a heatmap showing the Fisher test of somatic variants with the MT variants. -#'@import circlize ComplexHeatmap ggplot2 Matrix parallel rcompanion tidyr grid +#'Packages I want to remove. +#'@importFrom ComplexHeatmap columnAnnotation rowAnnotation Heatmap +#'@importFrom circlize colorRamp2 +#'@importFrom grid gpar +#'@importFrom stats na.omit #'@param fisher_results Data.frame with the correlation results. #'@param patient The patient for this heatmap. #'@param min_alt_cells Minimum number of mutated cells needed, otherwise an association will not be plotted. #'@param min_oddsratio Minimum correlation needed. +#'@param verbose Should the function be verbose? Default = TRUE #'@export -VariantFisherTestHeatmap <- function(fisher_results, patient, min_alt_cells = 5, min_oddsratio = 1){ +VariantFisherTestHeatmap <- function(fisher_results, patient, min_alt_cells = 5, min_oddsratio = 1, verbose = TRUE){ fisher_results$P_adj_logged <- -log10(fisher_results$P_adj) - fisher_results <- subset(fisher_results, P_adj_logged > -log10(0.05)) - fisher_results <- subset(fisher_results, Cells_Alt_1_2 >= min_alt_cells) - fisher_results <- subset(fisher_results, OddsRatio > min_oddsratio) - - - print("We get the unique variants.") + fisher_results <- subset(fisher_results, fisher_results$P_adj_logged > -log10(0.05)) + fisher_results <- subset(fisher_results, fisher_results$Cells_Alt_1_2 >= min_alt_cells) + fisher_results <- subset(fisher_results, fisher_results$OddsRatio > min_oddsratio) + + + if(verbose) print("We get the unique variants.") somatic_uniques <- unique(fisher_results$Variant1) mt_uniques <- unique(fisher_results$Variant2) - - - print("Getting the maximum P value.") - pvalue_max <- as.numeric(na.omit(fisher_results$P_adj_logged)) + + + if(verbose) print("Getting the maximum P value.") + pvalue_max <- as.numeric(stats::na.omit(fisher_results$P_adj_logged)) if(length(pvalue_max) > 1){ pvalue_max <- pvalue_max[pvalue_max != Inf] if(length(pvalue_max) >= 1){ @@ -33,41 +38,41 @@ VariantFisherTestHeatmap <- function(fisher_results, patient, min_alt_cells = 5, pvalue_max <- max(pvalue_max, 100) } fisher_results$P_adj_logged[fisher_results$P_adj_logged == Inf] <- pvalue_max - col_fun <- colorRamp2(c(0,pvalue_max), c("white", "red")) + col_fun <- circlize::colorRamp2(c(0,pvalue_max), c("white", "red")) - print("We set insignificant P values to NA.") + if(verbose) print("We set insignificant P values to NA.") fisher_results$P_adj_logged <- ifelse(fisher_results$P_adj_logged > -log10(0.05), fisher_results$P_adj_logged, NA) - print("We generate a matrix with the adjusted P values.") + if(verbose) print("We generate a matrix with the adjusted P values.") p_values <- matrix(NA, nrow = length(somatic_uniques), ncol = length(mt_uniques)) rownames(p_values) <- somatic_uniques colnames(p_values) <- mt_uniques for(i in 1:length(somatic_uniques)){ - fisher_results_subset <- subset(fisher_results, Variant1 == somatic_uniques[i]) + fisher_results_subset <- subset(fisher_results, fisher_results$Variant1 == somatic_uniques[i]) p_values_use <- fisher_results_subset$P_adj_logged names(p_values_use) <- fisher_results_subset$Variant2 - p_values[somatic_uniques[i],names(p_values_use)] <- p_values_use + p_values[somatic_uniques[i], names(p_values_use)] <- p_values_use } - print("Setting the column and row annotations for the heat map.") + if(verbose) print("Setting the column and row annotations for the heat map.") annotation_top <- ComplexHeatmap::columnAnnotation(Mutations = mt_uniques, show_legend = FALSE, show_annotation_name = FALSE) annotation_left <- ComplexHeatmap::rowAnnotation(Mutations = somatic_uniques, show_legend = FALSE, show_annotation_name = FALSE) - print("Since we can have no results left after the subsetting, we check if the P value matrix has values.") + if(verbose) print("Since we can have no results left after the subsetting, we check if the P value matrix has values.") if(all(dim(p_values) > 0)){ - print("Generating the actual heat map.") - p <- Heatmap(p_values, name = "-log10(P)", - column_title = paste0("Patient ", patient, "\nLogged adj. P values between the variants"), - row_title = "", show_row_names = TRUE, show_column_names = TRUE, - col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, - column_names_rot = 45, row_names_side = "left", - column_names_gp = grid::gpar(hjust = 1), - cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) + if(verbose) print("Generating the actual heat map.") + p <- ComplexHeatmap::Heatmap(p_values, name = "-log10(P)", + column_title = paste0("Patient ", patient, "\nLogged adj. P values between the variants"), + row_title = "", show_row_names = TRUE, show_column_names = TRUE, + col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, + column_names_rot = 45, row_names_side = "left", + column_names_gp = grid::gpar(hjust = 1), + cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) } return(p) } diff --git a/R/VariantQuantileThresholding.R b/R/VariantQuantileThresholding.R index 16b1dfd..a0f7314 100755 --- a/R/VariantQuantileThresholding.R +++ b/R/VariantQuantileThresholding.R @@ -4,7 +4,8 @@ #'If you use top_cells and top_VAF, you have to only supply one quantil value (quantiles = 0.9, thresholds = 0). #'This function is adapted from the Peter van Galen. #'Source: https://github.com/petervangalen/MAESTER-2021 -#'@import dplyr SummarizedExperiment +#'@importFrom SummarizedExperiment assays colData rowData +#'@importFrom stats quantile #'@param SE SummarizedExperiment object. #'@param min_coverage Minimum coverage needed. #'@param quantiles The lower and upper quantile you want to use. @@ -17,70 +18,78 @@ #'@param group1 The first group of interest. #'@param group2 The second group of interest. #'@param group_factor How much higher has the mean allele frequency to be in group 1 when compared to group 2? +#'@param verbose Should the function be verbose? Default = TRUE #'@export -VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, min_quality = 30, mean_allele_frequency = 0, - group_of_interest = NULL, group1 = NULL, group2 = NULL, group_factor = NULL){ - print("Get the mean allele frequency and coverage.") - mean_af <- rowMeans(assays(SE)[["fraction"]], na.rm = TRUE) - mean_cov <- rowMeans(assays(SE)[["coverage"]], na.rm = TRUE) +VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, min_quality = NULL, mean_allele_frequency = 0, + group_of_interest = NULL, group1 = NULL, group2 = NULL, group_factor = NULL, verbose = TRUE){ + if(verbose) print("Get the mean allele frequency and coverage.") + mean_af <- rowMeans(SummarizedExperiment::assays(SE)[["fraction"]], na.rm = TRUE) + mean_cov <- rowMeans(SummarizedExperiment::assays(SE)[["coverage"]], na.rm = TRUE) + if(all(is.null(min_quality), is.numeric(min_quality))) stop("Error: Your minimum quality is not either NULL or a numeric.") + if(all(is.null(mean_allele_frequency), is.numeric(mean_allele_frequency))) stop("Error: Your mean allele frequency is not either NULL or a numeric.") + if(all(!is.null(group_of_interest), !is.null(group1), !is.null(group2))){ - if(!group_of_interest %in% colnames(colData(SE))) stop("Error: Your group_of_interest is not in the colData.") - if(!group1 %in% colData(SE)[,group_of_interest]) stop("Error: Your group1 is not in the group_of_interest.") - if(!group2 %in% colData(SE)[,group_of_interest]) stop("Error: Your group2 is not in the group_of_interest.") - cells_group1 <- colData(SE)[,group_of_interest, drop = FALSE] + if(!group_of_interest %in% colnames(SummarizedExperiment::colData(SE))) stop("Error: Your group_of_interest is not in the colData.") + if(!group1 %in% SummarizedExperiment::colData(SE)[, group_of_interest]) stop("Error: Your group1 is not in the group_of_interest.") + if(!group2 %in% SummarizedExperiment::colData(SE)[, group_of_interest]) stop("Error: Your group2 is not in the group_of_interest.") + cells_group1 <- SummarizedExperiment::colData(SE)[, group_of_interest, drop = FALSE] cells_group1 <- cells_group1[cells_group1[, group_of_interest] == group1, , drop = FALSE] - cells_group2 <- colData(SE)[,group_of_interest, drop = FALSE] + cells_group2 <- SummarizedExperiment::colData(SE)[, group_of_interest, drop = FALSE] cells_group2 <- cells_group2[cells_group2[, group_of_interest] == group2, , drop = FALSE] - mean_af_group1 <- rowMeans(assays(SE)[["fraction"]][,rownames(cells_group1)], na.rm = TRUE) - mean_af_group2 <- rowMeans(assays(SE)[["fraction"]][,rownames(cells_group2)], na.rm = TRUE) + mean_af_group1 <- rowMeans(SummarizedExperiment::assays(SE)[["fraction"]][, rownames(cells_group1)], na.rm = TRUE) + mean_af_group2 <- rowMeans(SummarizedExperiment::assays(SE)[["fraction"]][, rownames(cells_group2)], na.rm = TRUE) mean_af_group_check <- mean_af_group1 > (group_factor * mean_af_group2) - print("Get the quantiles of the VAFs of each variant.") - quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) - # vars <- do.call(cbind, c(list(mean_af), list(mean_cov), list(rowData(SE)$VariantQuality), quantiles)) - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + if(verbose) print("Get the quantiles of the VAFs of each variant.") + quantiles <- lapply(quantiles, function(x) apply(SummarizedExperiment::assays(SE)[["fraction"]], 1, stats::quantile, x, na.rm = TRUE)) + if(!is.null(min_quality)){ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = SummarizedExperiment::rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + vars <- vars[is.na(vars$VariantQuality), ] + vars <- subset(vars, vars$VariantQuality > min_quality) + } else{ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + } vars <- vars[mean_af_group_check,] - print("Thresholding using the quantile approach.") - if(length(quantiles) != 2) stop("Your quantiles are not of length 2.") + if(verbose) print("Thresholding using the quantile approach.") + if(length(quantiles) != 2) stop("Your quantiles are not of length 2.") if(length(thresholds) != 2) stop("Your thresholds are not of length 2.") - #voi_ch <- subset(vars, vars[,1] > mean_allele_frequency & vars[,2] > min_coverage & vars[,4] < thresholds[1] & vars[,5] > thresholds[2]) - voi_ch <- subset(vars, Mean_AF > mean_allele_frequency & Mean_Cov > min_coverage & Quantile1 < thresholds[1] & Quantile2 > thresholds[2]) - if(!is.null(min_quality)){ - voi_ch <- voi_ch[!is.na(voi_ch$Quality),] - voi_ch <- subset(voi_ch, Quality > min_quality) - } + voi_ch <- subset(vars, vars$Mean_AF > mean_allele_frequency & vars$Mean_Cov > min_coverage & vars$Quantile1 < thresholds[1] & vars$Quantile2 > thresholds[2]) + + } else if(any(is.null(top_cells), is.null(top_VAF))){ - print("Get the quantiles of the VAFs of each variant.") - quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) + if(verbose) print("Get the quantiles of the VAFs of each variant.") + quantiles <- lapply(quantiles, function(x) apply(SummarizedExperiment::assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) - # vars <- do.call(cbind, c(list(mean_af), list(mean_cov), list(rowData(SE)$VariantQuality), quantiles)) - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + if(!is.null(min_quality)){ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = SummarizedExperiment::rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + vars <- vars[is.na(vars$VariantQuality), ] + vars <- subset(vars, vars$VariantQuality > min_quality) + } else{ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + } - print("Thresholding using the quantile approach.") + if(verbose) print("Thresholding using the quantile approach.") if(length(quantiles) != 2) stop("Your quantiles are not of length 2.") if(length(thresholds) != 2) stop("Your thresholds are not of length 2.") - #voi_ch <- subset(vars, vars[,1] > mean_allele_frequency & vars[,2] > min_coverage & vars[,4] < thresholds[1] & vars[,5] > thresholds[2]) - voi_ch <- subset(vars, Mean_AF > mean_allele_frequency & Mean_Cov > min_coverage & Quantile1 < thresholds[1] & Quantile2 > thresholds[2]) - if(!is.null(min_quality)){ - voi_ch <- voi_ch[!is.na(voi_ch$Quality),] - voi_ch <- subset(voi_ch, Quality > min_quality) - } + voi_ch <- subset(vars, vars$Mean_AF > mean_allele_frequency & vars$Mean_Cov > min_coverage & vars$Quantile1 < thresholds[1] & vars$Quantile2 > thresholds[2]) } else{ - print("Get the quantile of the VAF of each variant.") + if(verbose) print("Get the quantile of the VAF of each variant.") if(length(quantiles) > 1) stop("You are providing more than 1 quantile. You should only provide 1.") if(length(thresholds) > 1) stop("You are providing more than 1 threshold. You should only provide 1.") - quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) + quantiles <- lapply(quantiles, function(x) apply(SummarizedExperiment::assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) quantiles <- quantiles[[1]] - top_cells_values <- assays(SE)[["fraction"]] + top_cells_values <- SummarizedExperiment::assays(SE)[["fraction"]] top_cells_values <- top_cells_values > top_VAF top_cells_values <- rowSums(top_cells_values) - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile = quantiles, TopCells = top_cells_values) - voi_ch <- subset(vars, Mean_Cov > min_coverage & Quantile <= thresholds[1] & TopCells >= top_cells) if(!is.null(min_quality)){ - voi_ch <- voi_ch[!is.na(voi_ch$Quality),] - voi_ch <- subset(voi_ch, Quality > min_quality) + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = SummarizedExperiment::rowData(SE)$VariantQuality, Quantile = quantiles, TopCells = top_cells_values) + vars <- vars[is.na(vars$VariantQuality), ] + vars <- subset(vars, vars$VariantQuality > min_quality) + } else{ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quantile = quantiles, TopCells = top_cells_values) } + voi_ch <- subset(vars, vars$Mean_Cov > min_coverage & vars$Quantile <= thresholds[1] & vars$TopCells >= top_cells) } voi_ch <- rownames(voi_ch) return(voi_ch) diff --git a/R/VariantWiseCorrelation.R b/R/VariantWiseCorrelation.R index 658a6a0..cb8431d 100644 --- a/R/VariantWiseCorrelation.R +++ b/R/VariantWiseCorrelation.R @@ -2,12 +2,14 @@ #'@description #'We correlate the variants with each other using the Pearson correlation. #'This function calls CalculateCorrelationPValue to perform the actual correlation. -#'@import Matrix parallel SummarizedExperiment +#'@importFrom parallel mclapply +#'@importFrom stats p.adjust #'@param variants_list List of fraction values. #'@param n_cores Number of cores you want to use. Numeric. #'@param p_value_adjustment Method for P value adjustment. See p.adjust for details. +#'@param verbose Should the function be verbose? Default = TRUE #'@export -VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustment = "fdr"){ +VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustment = "fdr", verbose = TRUE){ # We correlate the somatic variants with each other and the MT variants. # Since we have tens of thousands of MT variants, we do not correlate them with each other. variants <- names(variants_list) @@ -18,24 +20,25 @@ VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustmen number_of_variants <- length(variants) for(i in 1:length(variants)){ variant_use <- variants[i] - print(paste0("Correlating Variant: ", variant_use, ", ", i, " out of ", number_of_variants)) + if(verbose) print(paste0("Correlating Variant: ", variant_use, ", ", i, " out of ", number_of_variants)) variants_values_use <- variants_list[[variant_use]] variants_list_use <- variants_list[names(variants_list) != variant_use] all_variants <- names(variants_list_use) - results <- mclapply(X = all_variants, CalculateCorrelationPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) + results <- parallel::mclapply(X = all_variants, CalculateCorrelationPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) results <- do.call("rbind", results) results <- data.frame(Variant1 = variant_use, Variant2 = all_variants, P = results[,1], Corr = results[,2], Cells_1_Alt = results[,3], Cells_1_Ref = results[,4], Cells_2_Alt = results[,5], Cells_2_Ref = results[,6]) results_total <- rbind(results_total, results) } - print("We remove the NA P values.") + if(verbose) print("We remove the NA P values.") results_total <- results_total[!is.na(results_total$P),] - print("We remove the negative corrlated SNPs.") + if(verbose) print("We remove the negative corrlated SNPs.") results_total <- subset(results_total, Corr > 0) - print(paste0("Adjusting P values using ", p_value_adjustment, ".")) - results_total$P_adj <- p.adjust(results_total$P, method = p_value_adjustment) + if(verbose) print(paste0("Adjusting P values using ", p_value_adjustment, ".")) + results_total$P_adj <- stats::p.adjust(results_total$P, method = p_value_adjustment) + rownames(results_total) <- NULL return(results_total) } diff --git a/R/VariantWiseFisherTest.R b/R/VariantWiseFisherTest.R index bcfe6cd..beb7f88 100644 --- a/R/VariantWiseFisherTest.R +++ b/R/VariantWiseFisherTest.R @@ -2,12 +2,14 @@ #'@description #'We perform the Fisher test to determine which variants are associated. #'This function calls CalculateFisherTestPValue to perform the actual testing. -#'@import Matrix parallel SummarizedExperiment +#'@importFrom parallel mclapply +#'@importFrom stats p.adjust #'@param variants_list List of fraction values. #'@param n_cores Number of cores you want to use. Numeric. #'@param p_value_adjustment Method for P value adjustment. See p.adjust for details. +#'@param verbose Should the function be verbose? Default = TRUE #'@export -VariantWiseFisherTest <- function(variants_list, n_cores = 1, p_value_adjustment = "fdr"){ +VariantWiseFisherTest <- function(variants_list, n_cores = 1, p_value_adjustment = "fdr", verbose = TRUE){ # We correlate the somatic variants with each other and the MT variants. # Since we have tens of thousands of MT variants, we do not correlate them with each other. variants <- names(variants_list) @@ -18,24 +20,25 @@ VariantWiseFisherTest <- function(variants_list, n_cores = 1, p_value_adjustment number_of_variants <- length(variants) for(i in 1:number_of_variants){ variant_use <- variants[i] - print(paste0("Testing Variant: ", variant_use, ", ", i, " out of ", number_of_variants)) + if(verbose) print(paste0("Testing Variant: ", variant_use, ", ", i, " out of ", number_of_variants)) variants_values_use <- variants_list[[variant_use]] variants_list_use <- variants_list[names(variants_list) != variant_use] all_variants <- names(variants_list_use) - results <- mclapply(X = all_variants, CalculateFisherTestPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) + results <- parallel::mclapply(X = all_variants, CalculateFisherTestPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) results <- do.call("rbind", results) results <- data.frame(Variant1 = variant_use, Variant2 = all_variants, P = results[,1], OddsRatio = results[,2], Cells_Alt_1_2 = results[,3], Cells_Alt_1_Ref_2 = results[,4], Cells_Alt_2_Ref_1 = results[,5], Cells_Ref_1_2 = results[,6]) results_total <- rbind(results_total, results) } - print("We remove the NA P values.") + if(verbose) print("We remove the NA P values.") results_total <- results_total[!is.na(results_total$P),] - print("We remove the SNPs with a odds ratio lower than 1.") + if(verbose) print("We remove the SNPs with a odds ratio lower than 1.") results_total <- subset(results_total, OddsRatio > 1) - print(paste0("Adjusting P values using ", p_value_adjustment, ".")) - results_total$P_adj <- p.adjust(results_total$P, method = p_value_adjustment) + if(verbose) print(paste0("Adjusting P values using ", p_value_adjustment, ".")) + results_total$P_adj <- stats::p.adjust(results_total$P, method = p_value_adjustment) + rownames(results_total) <- NULL return(results_total) } diff --git a/R/char_to_numeric.R b/R/char_to_numeric.R new file mode 100644 index 0000000..75d4406 --- /dev/null +++ b/R/char_to_numeric.R @@ -0,0 +1,12 @@ +#'char_to_numeric +#'@description +#'A function to convert the heterozygous/homozygous information from the VCF to the consensus information from VarTrix. +#'It is only used in LoadingVCF_typewise.R. +#'@param char_value What is the genotype encoding you want to convert? +#'@export +char_to_numeric <- function(char_value) { + if(char_value == "1/1") return(2) + if(char_value %in% c("1/0", "0/1")) return(2) + if(char_value == "0/0") return(1) + return(0) +} diff --git a/R/combine_NAMES.R b/R/combine_NAMES.R index 46d8450..6f97c1c 100644 --- a/R/combine_NAMES.R +++ b/R/combine_NAMES.R @@ -6,5 +6,6 @@ #'@export combine_NAMES <- function(x, y) { shared_names <- intersect(x, y) - c(x, setdiff(y, shared_names)) + combined_names <- c(x, setdiff(y, shared_names)) + return(combined_names) } diff --git a/R/combine_SparseMatrix.R b/R/combine_SparseMatrix.R index f3ff079..e5ed3ec 100644 --- a/R/combine_SparseMatrix.R +++ b/R/combine_SparseMatrix.R @@ -1,7 +1,7 @@ #'combine_sparseMatrix #'@description #'We combine two sparse matrices -#'@import SummarizedExperiment BiocGenerics Matrix +#'@importFrom Matrix sparseMatrix #'@param matrix_1 Your first sparse matrix. #'@param matrix_2 Your second matrix. #'@export @@ -41,8 +41,8 @@ combine_SparseMatrix <- function(matrix_1, matrix_2){ positions_2[,"j"] <- new_cols[positions_2[,"j"]] positions_combined <- rbind(positions_1, positions_2) - result <- sparseMatrix(i = positions_combined[,"i"], j = positions_combined[,"j"], x = positions_combined[,"x"], - dimnames = list(variants_unique, cells_unique), dims = c(length(variants_unique), length(cells_unique))) + result <- Matrix::sparseMatrix(i = positions_combined[,"i"], j = positions_combined[,"j"], x = positions_combined[,"x"], + dimnames = list(variants_unique, cells_unique), dims = c(length(variants_unique), length(cells_unique))) } diff --git a/R/computeAFMutMatrix.R b/R/computeAFMutMatrix.R index a4cf860..ac01b42 100644 --- a/R/computeAFMutMatrix.R +++ b/R/computeAFMutMatrix.R @@ -6,49 +6,22 @@ #'See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD-is-lower-than-expected #'and https://github.com/caleblareau/mgatk/issues/1 #'We simply set these values to 1, since that is the actual information we have in this case. -#'This issue can be solved on the MAEGATK/GATK side. -#'@import SummarizedExperiment +#'@importFrom SummarizedExperiment assays rowRanges #'@param SE SummarizedExperiment object. +#'@param chromosome_prefix The prefix of the chromosome. #'@export computeAFMutMatrix <- function(SE, chromosome_prefix = "chrM"){ - cov <- assays(SE)[["coverage"]] + 0.000001 - ref_allele <- as.character(rowRanges(SE)$refAllele) + cov <- SummarizedExperiment::assays(SE)[["coverage"]] + 0.000001 + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) - getMutMatrix <- function(letter){ - names_rows <- paste0(chromosome_prefix, "_", 1:nrow(cov), "_", toupper(ref_allele), "_", letter) - names_rows <- names_rows[toupper(ref_allele) != letter] - mat_fow <- assays(SE)[[paste0(letter, "_counts_fw")]] - mat_rev <- assays(SE)[[paste0(letter, "_counts_rev")]] - mat <- mat_fow + mat_rev - mat <- mat[toupper(ref_allele) != letter,] - cov_use <- cov[toupper(ref_allele) != letter,] - mat <- mat / cov_use - gc() - # We can get AF values greater than 1, which is due to uninformative reads. - # See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD-is-lower-than-expected - # and https://github.com/caleblareau/mgatk/issues/1 - # We simply set these values to 1, since that is the actual information we have in this case. - # This issue can be solved on the MAEGATK/GATK side. - mat[mat > 1] <- 1 - rownames(mat) <- names_rows - #mat <- as(mat, "dgCMatrix") - mat <- as(mat, "CsparseMatrix") - return(mat) - } - - A_matrix <- getMutMatrix("A") - #A_matrix <- as.matrix(A_matrix) + A_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "A", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() - C_matrix <- getMutMatrix("C") - #C_matrix <- as.matrix(C_matrix) + C_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "C", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() - G_matrix <- getMutMatrix("G") - #G_matrix <- as.matrix(G_matrix) + G_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "G", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() - T_matrix <- getMutMatrix("T") - #T_matrix <- as.matrix(T_matrix) + T_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "T", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() result <- rbind(A_matrix, C_matrix, G_matrix, T_matrix) -# result <- as.matrix(result) return(result) } diff --git a/R/getAltMatrix.R b/R/getAltMatrix.R index 0979eee..782d00e 100644 --- a/R/getAltMatrix.R +++ b/R/getAltMatrix.R @@ -2,15 +2,14 @@ #'@description #'We get the alt values from the MAEGATK results. #'Source: https://github.com/petervangalen/MAESTER-2021 -#'@import SummarizedExperiment +#'@importFrom SummarizedExperiment assays rowRanges #'@param SE_object SummarizedExperiment object. #'@param letter The base you want to use. Character. -#'@param ref_allele The reference alleles. #'@param chromosome_prefix The chromosome prefix used. #'@export getAltMatrix <- function(SE_object, letter, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE_object)$refAllele) - mat <- (assays(SE_object)[[paste0(letter, "_counts_fw")]] + assays(SE_object)[[paste0(letter, "_counts_rev")]]) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE_object)$refAllele) + mat <- (SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_fw")]] + SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_rev")]]) rownames(mat) <- paste0(chromosome_prefix, "_", as.character(1:dim(mat)[1]), "_", toupper(ref_allele), ">", letter) mat <- mat[toupper(ref_allele) != letter,] return(mat) diff --git a/R/getMutMatrix.R b/R/getMutMatrix.R new file mode 100644 index 0000000..ea0481c --- /dev/null +++ b/R/getMutMatrix.R @@ -0,0 +1,27 @@ +#'getMutMatrix +#'@description +#'This function gets the allele frequency for a specific allele. It is used in computeAFMutMatrix. +#'Source: https://github.com/petervangalen/MAESTER-2021 +#'@importFrom SummarizedExperiment assays +#'@importFrom methods as +#'@param SE SummarizedExperiment object. +#'@param cov The coverage matrix from MAEGATK/MGATK. +#'@param letter The base we are interested in. +#'@param ref_allele Vector of reference alleles. +#'@param chromosome_prefix The chromosome prefix used. +#'@export +getMutMatrix <- function(SE, cov, letter, ref_allele, chromosome_prefix){ + names_rows <- paste0(chromosome_prefix, "_", 1:nrow(cov), "_", toupper(ref_allele), "_", letter) + names_rows <- names_rows[toupper(ref_allele) != letter] + mat_fow <- SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_fw")]] + mat_rev <- SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_rev")]] + mat <- mat_fow + mat_rev + mat <- mat[toupper(ref_allele) != letter,] + cov_use <- cov[toupper(ref_allele) != letter,] + mat <- mat / cov_use + gc() + mat[mat > 1] <- 1 + rownames(mat) <- names_rows + mat <- methods::as(mat, "CsparseMatrix") + return(mat) +} diff --git a/R/getReadMatrix.R b/R/getReadMatrix.R index 6b5e693..deec5e9 100644 --- a/R/getReadMatrix.R +++ b/R/getReadMatrix.R @@ -1,12 +1,12 @@ #'Get the counts for a specific base over all positions. -#'@import SummarizedExperiment +#'@importFrom SummarizedExperiment assays rowRanges #'@param SE SummarizedExperiment object. #'@param letter The base for which we want the counts. #'@param chromosome_prefix The chromosome name used as a prefix. #'@export getReadMatrix <- function(SE, letter, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) - mat <- (assays(SE)[[paste0(letter, "_counts_fw")]] + assays(SE)[[paste0(letter, "_counts_rev")]]) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) + mat <- (SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_fw")]] + SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_rev")]]) rownames(mat) <- paste0(chromosome_prefix, "_", 1:nrow(mat), "_", toupper(ref_allele), "_", letter) return(mat) } diff --git a/R/getRefMatrix.R b/R/getRefMatrix.R index e0374d4..15b1da7 100644 --- a/R/getRefMatrix.R +++ b/R/getRefMatrix.R @@ -2,15 +2,14 @@ #'@description #'We get the reference values from the MAEGATK result. #'Source: https://github.com/petervangalen/MAESTER-2021 -#'@import SummarizedExperiment +#'@importFrom SummarizedExperiment assays rowRanges #'@param SE_object SummarizedExperiment object. #'@param letter The base you are analysing. You get a matrix that shows which cells have how many reference reads for this letter. -#'@param ref_allele The reference alleles. #'@param chromosome_prefix The chromosome prefix used. #'@export getRefMatrix <- function(SE_object, letter, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE_object)$refAllele) - mat <- (assays(SE_object)[[paste0(letter, "_counts_fw")]] + assays(SE_object)[[paste0(letter, "_counts_rev")]]) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE_object)$refAllele) + mat <- (SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_fw")]] + SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_rev")]]) rownames(mat) <- paste0(chromosome_prefix, "_", as.character(1:dim(mat)[1]), "_", toupper(ref_allele), ">", letter) mat <- mat[toupper(ref_allele) %in% letter,] return(mat) diff --git a/R/get_consensus.R b/R/get_consensus.R index 1d5ebd2..2c18557 100644 --- a/R/get_consensus.R +++ b/R/get_consensus.R @@ -1,8 +1,9 @@ #'get_consensus #'@description #'We get the consensus information for a specific matrix. -#'@import dplyr MatrixGenerics SummarizedExperiment -#'@param letter The alternative base. +#'I want to remove some packages if they are not needed. See below which package apperantly wasn't needed. +#'@importFrom methods as +#'@param alt_base The alternative base. #'@param ref_base The reference base. #'@param input_matrix Input matrix with the present reads numerically encoded. #'@param chromosome_prefix The chromosome name used as a prefix. @@ -20,8 +21,6 @@ get_consensus <- function(alt_base, ref_base, input_matrix, chromosome_prefix = # Both is not accurate in this context. Therefore, we set these cases to 0 (NoCall). other_homo_values <- base_numeric[!base_numeric %in% base_numeric[c(alt_base, ref_base)]] - - #output_matrix <- input_matrix output_matrix <- matrix(0, nrow = nrow(input_matrix), ncol = ncol(input_matrix)) rownames(output_matrix) <- rownames(input_matrix) colnames(output_matrix) <- colnames(input_matrix) @@ -43,7 +42,7 @@ get_consensus <- function(alt_base, ref_base, input_matrix, chromosome_prefix = output_matrix[input_matrix %in% other_homo_values] <- 0 rownames(output_matrix) <- paste0(chromosome_prefix, "_", gsub("[^[:digit:]., ]", "", rownames(output_matrix)), "_", ref_base, "_", alt_base) - #output_matrix <- as(output_matrix, "dgCMatrix") - output_matrix <- as(output_matrix, "CsparseMatrix") + #output_matrix <- methods::as(output_matrix, "dgCMatrix") + output_matrix <- methods::as(output_matrix, "CsparseMatrix") return(output_matrix) } diff --git a/R/ggsci_pal.R b/R/ggsci_pal.R index 254506d..b632a2c 100644 --- a/R/ggsci_pal.R +++ b/R/ggsci_pal.R @@ -1,8 +1,10 @@ #'ggsci_pal #'@description #'Function to return colours from a ggsci palette. -#'@import assertthat ggsci glue +#'@import ggsci +#'@importFrom glue glue #'@param option Your colour palette of choice. +#'@param ... Further options passed to the palette function. #'@details #'The function returns a colour palette from ggsci. #'Options are: @@ -14,8 +16,7 @@ #'ucscgb: 26 #'@export ggsci_pal <- function(option, ...){ - func_name = glue("pal_{option}") - func_call = glue('{func_name}(...)') - assertthat::assert_that(func_name %in% ls("package:ggsci")) + func_name = glue::glue("pal_{option}") + func_call = glue::glue('{func_name}(...)') return(eval(parse(text=func_call))) } diff --git a/R/load_object.R b/R/load_object.R index 3f840d0..82d7286 100644 --- a/R/load_object.R +++ b/R/load_object.R @@ -6,8 +6,13 @@ #'@param file_name The path to the file. #'@export load_object <- function(file_name){ - con <- archive::file_read(file = file_name) - res <- readRDS(file = con) - close(con) - return(res) + if(!file.exists(file_name)) stop(paste0("File '",file_name,"' not found.")) + + if(requireNamespace("archive", quietly = TRUE)){ + con <- archive::file_read(file = file_name) + res <- readRDS(file = con) + close(con) + return(res) + } + res <- readRDS(file = file_name) } diff --git a/R/save_object.R b/R/save_object.R index 00968c7..7618f1f 100644 --- a/R/save_object.R +++ b/R/save_object.R @@ -7,18 +7,25 @@ #'@param file_name The path were the file shall be save. #'@param file_format The format of the save file. Has to be one of: zstd, lz4, gzip, bzip2, xz, nocomp. #'@export -save_object <- function(object, file_name, file_format = NULL){ +save_object <- function(object, file_name, file_format = "zstd"){ stopifnot(file_format %in% c("zstd", "lz4", "gzip", "bzip2", "xz", "nocomp")) - if(file_format %in% "nocomp"){ - saveRDS(object = object, file = file_name, compress = FALSE) - return(invisible(NULL)) - } + stopifnot(length(file_format) == 1) if(file_format %in% c("zstd", "lz4")){ - con <- archive::file_write(file = file_name, filter = file_format) - open(con) - saveRDS(object = object, file = con) - close(con) - }else{ - saveRDS(object = object, file = file_name, compress = file_format) + if(requireNamespace("archive", quietly = TRUE)){ + con <- archive::file_write(file = file_name, filter = file_format) + open(con) + saveRDS(object = object, file = con) + close(con) + }else{ + warning("Package 'archive' needs to be installed to compress files in formats 'zstd' and 'lz4'.\n Saving object with default 'saveRDS()' function instead.") + saveRDS(object = object, file = file_name, compress = TRUE) + } + return(invisible(NULL)) } + saveRDS( + object = object, + file = file_name, + compress = ifelse(file_format %in% "nocomp", FALSE, file_format) + ) } + diff --git a/R/sdiv.R b/R/sdiv.R index 8a8cbde..fcefdee 100644 --- a/R/sdiv.R +++ b/R/sdiv.R @@ -1,5 +1,5 @@ #'Division of sparse matrix. -#'@import Matrix +#'@importFrom Matrix sparseMatrix #'@param X First sparse matrix. #'@param Y Second sparse matrix. #'@param names The dimension names (dimnames(X)). @@ -8,6 +8,6 @@ sdiv <- function(X, Y, names = dimnames(X)) { sX <- summary(X) sY <- summary(Y) sRes <- merge(sX, sY, by = c("i", "j")) - sparseMatrix(i = sRes[,1], j = sRes[,2], x = sRes[,3] / sRes[,4], - dimnames = names) + result <- Matrix::sparseMatrix(i = sRes[,1], j = sRes[,2], x = sRes[,3] / sRes[,4], dimnames = names) + return() } diff --git a/README.md b/README.md index 29b2305..f02514c 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu ``` -# Current Features v0.2.23 +# Current Features v0.2.32 - Loading data from VarTrix and MAEGATK. - Transforming the data to be compatible for joint analysis. diff --git a/docs/404.html b/docs/404.html index 78aa6b2..b487215 100644 --- a/docs/404.html +++ b/docs/404.html @@ -24,7 +24,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -39,7 +39,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/articles/SiGURD.html b/docs/articles/SiGURD.html index 23fae85..7ca8514 100644 --- a/docs/articles/SiGURD.html +++ b/docs/articles/SiGURD.html @@ -6,12 +6,12 @@ -SiGURD vignette • sigurd +SiGURD • sigurd - + Calculating the Minor Allele Frequency. — CalculateAlleleFrequency • sigurdCalculating the Minor Allele Frequency. — CalculateAlleleFrequency • sigurd @@ -10,7 +12,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +26,7 @@ Articles - SiGURD vignette + SiGURD @@ -43,12 +45,13 @@ - We calculate the MAF for the MAEGATK results. + We calculate the MAF from a reference reads matrix and an alternative reads matrix. +This function is intended to be used with the mitochondrial genome and not with other somatic mutations. Usage - CalculateAlleleFrequency(reference_reads, alternative_reads) + CalculateAlleleFrequency(reference_reads, alternative_reads, pseudo_count = 0) @@ -60,6 +63,10 @@ ArgumentsOn this page diff --git a/docs/reference/CalculateAltReads.html b/docs/reference/CalculateAltReads.html index 893c4b6..39543ac 100644 --- a/docs/reference/CalculateAltReads.html +++ b/docs/reference/CalculateAltReads.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/CalculateConsensus.html b/docs/reference/CalculateConsensus.html index be4f623..8629475 100644 --- a/docs/reference/CalculateConsensus.html +++ b/docs/reference/CalculateConsensus.html @@ -1,5 +1,25 @@ -We calculate the consensus information from the MAEGATK results. — CalculateConsensus • sigurdCalculateConsensus — CalculateConsensus • sigurd @@ -10,7 +30,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +44,7 @@ Articles - SiGURD vignette + SiGURD @@ -37,18 +57,28 @@ - We calculate the consensus information from the MAEGATK results. + CalculateConsensus CalculateConsensus.Rd - We calculate the consensus information from the MAEGATK results. + We calculate the consensus information from the MAEGATK results. +We set cells that have only alternative reads to 2 (Alternative). +We set cells that have only reference reads to 1 (Reference). +We set cells that have a mixture of alternative and reference reads to 3 (Both). +We set cells that have no reads to 0 (NoCall). +Please note. Cells can have reads for the reference of a specific variant and no reads for the alternative. +The cell can still have a reads for the other alternative alleles. Then the cell is still considered as 0 (NoCall) for this variant. +For example: +A cell has at position 3: 0 A reads, 53 T reads, 63 C reads, 148 T reads. +For the variant chrM_3_T_A, the cell would have 53 reference reads, but also reads for other variants at this position. +To make sure that there is no confusion, the cell is set to NoCall. Usage - CalculateConsensus(SE, chromosome_prefix = "chrM") + CalculateConsensus(SE, chromosome_prefix = "chrM", verbose = FALSE) @@ -60,6 +90,10 @@ ArgumentsOn this page diff --git a/docs/reference/CalculateCorrelationPValue.html b/docs/reference/CalculateCorrelationPValue.html index 34240cf..fe5011e 100644 --- a/docs/reference/CalculateCorrelationPValue.html +++ b/docs/reference/CalculateCorrelationPValue.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/CalculateCoverage.html b/docs/reference/CalculateCoverage.html index c3d4c73..946239b 100644 --- a/docs/reference/CalculateCoverage.html +++ b/docs/reference/CalculateCoverage.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/CalculateFisherTestPValue.html b/docs/reference/CalculateFisherTestPValue.html index f570309..a9f2807 100644 --- a/docs/reference/CalculateFisherTestPValue.html +++ b/docs/reference/CalculateFisherTestPValue.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/CalculateQuality.html b/docs/reference/CalculateQuality.html index 70978f8..09d3996 100644 --- a/docs/reference/CalculateQuality.html +++ b/docs/reference/CalculateQuality.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD @@ -48,11 +48,7 @@ Usage - CalculateQuality( - SE, - variants = rownames(reads_alt), - chromosome_prefix = "chrM" -) + CalculateQuality(SE, variants, chromosome_prefix = "chrM") @@ -61,6 +57,10 @@ Argumentssigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/CombineSEobjects.html b/docs/reference/CombineSEobjects.html index 6f13f77..4d661d1 100644 --- a/docs/reference/CombineSEobjects.html +++ b/docs/reference/CombineSEobjects.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/Filtering.html b/docs/reference/Filtering.html index e06c381..64998cc 100644 --- a/docs/reference/Filtering.html +++ b/docs/reference/Filtering.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD @@ -53,10 +53,10 @@ Usage blacklisted_barcodes_path = NULL, fraction_threshold = NULL, alts_threshold = NULL, - path_seurat = NULL, min_cells_per_variant = 2, min_variants_per_cell = 1, - reject_value = "NoCall" + reject_value = "NoCall", + verbose = TRUE ) @@ -78,10 +78,6 @@ ArgumentsArguments Details We do this for one sample at a time. We want to remove:all cells that are blacklisted, -all cells that are not in a Seurat object, all cells that do not have at least one variant with >1 (Reference), all variants that are for alternative transcripts, all variants that are always NoCall, -set variants with a VAF below a threshold to reference. +set variants with a VAF below a threshold to NoCall or Reference. diff --git a/docs/reference/GetCellInfoPerVariant.html b/docs/reference/GetCellInfoPerVariant.html index e1ff3a2..037b99f 100644 --- a/docs/reference/GetCellInfoPerVariant.html +++ b/docs/reference/GetCellInfoPerVariant.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD @@ -48,7 +48,7 @@ Usage - GetCellInfoPerVariant(se, voi_ch) + GetCellInfoPerVariant(se, voi_ch, verbose = FALSE) @@ -60,6 +60,10 @@ ArgumentsOn this page diff --git a/docs/reference/GetVariantInfo.html b/docs/reference/GetVariantInfo.html new file mode 100644 index 0000000..6e27b0c --- /dev/null +++ b/docs/reference/GetVariantInfo.html @@ -0,0 +1,95 @@ + +GetVariantInfo — GetVariantInfo • sigurd + Skip to contents + + + + + sigurd + + 0.2.30 + + + + + + + + + Reference + + + Articles + + SiGURD + + + + + + + + + + + + + GetVariantInfo + + GetVariantInfo.Rd + + + + We get the genotyping information for a set of variants. +The function returns a matrix with the values from the specified assay. + + + + Usage + GetVariantInfo(SE, information = "consensus", variants = NULL, cells = NULL) + + + + Arguments + SE +SummarizedExperiment object. + + +information +The assay with the desired information. Default: consensus + + +variants +A vector of variants. + + +cells +A vector of cell IDs. On default all cells are returned. Default: NULL + + + + + + + + + + + + + + diff --git a/docs/reference/HeatmapVoi.html b/docs/reference/HeatmapVoi.html index cf703c0..45f63b7 100644 --- a/docs/reference/HeatmapVoi.html +++ b/docs/reference/HeatmapVoi.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD @@ -48,7 +48,13 @@ Usage - HeatmapVoi(SE, voi, annotation_trait = NULL, column_title = NULL) + HeatmapVoi( + SE, + voi, + annotation_trait = NULL, + column_title = NULL, + remove_empty_cells = FALSE +) @@ -64,6 +70,14 @@ ArgumentsOn this page diff --git a/docs/reference/LoadingMAEGATK_typewise.html b/docs/reference/LoadingMAEGATK_typewise.html index 19ff0ed..4c3a0da 100644 --- a/docs/reference/LoadingMAEGATK_typewise.html +++ b/docs/reference/LoadingMAEGATK_typewise.html @@ -2,10 +2,22 @@ LoadingMAEGATK_typewise — LoadingMAEGATK_typewise • sigurdLoadingMAEGATK_typewise — LoadingMAEGATK_typewise • sigurd @@ -16,7 +28,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -30,7 +42,7 @@ Articles - SiGURD vignette + SiGURD @@ -52,8 +64,11 @@ We load the MAEGATK output and transform it to be compatible with the VarTrix output. The input file is a specifically formated csv file with all the necessary information to run the analysis. Note that the source column in the input file needs to be one of the following: vartrix, mgaetk, mgatk. -This is hard coded and case insensitive. - +If you want to only load a single sample without the use of an input file, you have to set the following variables.samples_path +barcodes_path +patient +samples_file = NULL + Usage @@ -64,7 +79,8 @@ Usage type_use = "scRNAseq_MT", chromosome_prefix = "chrM", min_cells = 2, - barcodes_path = NULL + barcodes_path = NULL, + verbose = TRUE ) @@ -89,6 +105,18 @@ Arguments + +min_cells +The minimum number of cells with coverage for a variant. Variants with coverage in less than this amount of cells are removed. Default = 2 + + +barcodes_path +Path to the barcodes file tsv. Default = NULL + + +verbose +Should the function be verbose? Default = TRUE + @@ -90,6 +90,10 @@ ArgumentsArgumentsOn this page diff --git a/docs/reference/Merging_SE_list.html b/docs/reference/Merging_SE_list.html index fadff95..7fd9f96 100644 --- a/docs/reference/Merging_SE_list.html +++ b/docs/reference/Merging_SE_list.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/RowWiseSplit.html b/docs/reference/RowWiseSplit.html index 1d0a970..38bd5e2 100644 --- a/docs/reference/RowWiseSplit.html +++ b/docs/reference/RowWiseSplit.html @@ -14,7 +14,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -28,7 +28,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/SeparatingMatrixToList.html b/docs/reference/SeparatingMatrixToList.html index dfb535d..3e63c14 100644 --- a/docs/reference/SeparatingMatrixToList.html +++ b/docs/reference/SeparatingMatrixToList.html @@ -16,7 +16,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -30,7 +30,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/SetVariantInfo.html b/docs/reference/SetVariantInfo.html new file mode 100644 index 0000000..7d6cc31 --- /dev/null +++ b/docs/reference/SetVariantInfo.html @@ -0,0 +1,95 @@ + +GetVariantInfo — SetVariantInfo • sigurd + Skip to contents + + + + + sigurd + + 0.2.30 + + + + + + + + + Reference + + + Articles + + SiGURD + + + + + + + + + + + + + GetVariantInfo + + SetVariantInfo.Rd + + + + We add the genotyping information for a set of variants to a Seurat object. +The function returns a matrix with the values from the specified assay. + + + + Usage + SetVariantInfo(SE, seurat_object, information = "consensus", variants = NULL) + + + + Arguments + SE +SummarizedExperiment object. + + +seurat_object +The Seurat object. + + +information +The assay with the desired information. Default: consensus + + +variants +A vector of variants. + + + + + + + + + + + + + + diff --git a/docs/reference/VariantBurden.html b/docs/reference/VariantBurden.html index a03b092..85f8c6c 100644 --- a/docs/reference/VariantBurden.html +++ b/docs/reference/VariantBurden.html @@ -12,7 +12,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -26,7 +26,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/VariantCloneSizeThresholding.html b/docs/reference/VariantCloneSizeThresholding.html index c1c4f48..e43e1ad 100644 --- a/docs/reference/VariantCloneSizeThresholding.html +++ b/docs/reference/VariantCloneSizeThresholding.html @@ -12,7 +12,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -26,7 +26,7 @@ Articles - SiGURD vignette + SiGURD @@ -56,7 +56,8 @@ Usage min_coverage = 2, fraction_negative_cells = 0.9, min_clone_size = 10, - vaf_threshold = 0.5 + vaf_threshold = 0.5, + verbose = TRUE ) @@ -81,6 +82,10 @@ ArgumentsOn this page diff --git a/docs/reference/VariantCorrelationHeatmap.html b/docs/reference/VariantCorrelationHeatmap.html index ec22c38..53b40a3 100644 --- a/docs/reference/VariantCorrelationHeatmap.html +++ b/docs/reference/VariantCorrelationHeatmap.html @@ -1,5 +1,9 @@ -VariantCorrelationHeatmap — VariantCorrelationHeatmap • sigurdVariantCorrelationHeatmap — VariantCorrelationHeatmap • sigurd @@ -10,7 +14,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +28,7 @@ Articles - SiGURD vignette + SiGURD @@ -43,7 +47,9 @@ - We generate a heatmap showing the correlation of somatic variants with the MT variants. + We generate a heatmap showing the correlation of somatic variants with the MT variants. +Packages I want to remove. I cannot see where they are used. +ggplot2 parallel rcompanion tidyr @@ -56,7 +62,8 @@ Usage min_correlation = 0.5, width_use = 2000, height_use = 2000, - padding_use = c(165, 165, 2, 2) + padding_use = c(165, 165, 2, 2), + verbose = TRUE ) @@ -93,6 +100,10 @@ ArgumentsOn this page diff --git a/docs/reference/VariantFisherTestHeatmap.html b/docs/reference/VariantFisherTestHeatmap.html index 75c9a4c..4b3c7f6 100644 --- a/docs/reference/VariantFisherTestHeatmap.html +++ b/docs/reference/VariantFisherTestHeatmap.html @@ -1,5 +1,9 @@ -VariantFisherTestHeatmap — VariantFisherTestHeatmap • sigurdVariantFisherTestHeatmap — VariantFisherTestHeatmap • sigurd @@ -10,7 +14,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +28,7 @@ Articles - SiGURD vignette + SiGURD @@ -43,7 +47,9 @@ - We generate a heatmap showing the Fisher test of somatic variants with the MT variants. + We generate a heatmap showing the Fisher test of somatic variants with the MT variants. +Packages I want to remove. +ggplot2 parallel rcompanion tidyr @@ -52,7 +58,8 @@ Usage fisher_results, patient, min_alt_cells = 5, - min_oddsratio = 1 + min_oddsratio = 1, + verbose = TRUE ) @@ -73,6 +80,10 @@ ArgumentsOn this page diff --git a/docs/reference/VariantQuantileThresholding.html b/docs/reference/VariantQuantileThresholding.html index e472815..8bf616e 100644 --- a/docs/reference/VariantQuantileThresholding.html +++ b/docs/reference/VariantQuantileThresholding.html @@ -16,7 +16,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -30,7 +30,7 @@ Articles - SiGURD vignette + SiGURD @@ -64,7 +64,13 @@ Usage thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, - min_quality = 30 + min_quality = NULL, + mean_allele_frequency = 0, + group_of_interest = NULL, + group1 = NULL, + group2 = NULL, + group_factor = NULL, + verbose = TRUE ) @@ -97,6 +103,30 @@ ArgumentsOn this page diff --git a/docs/reference/VariantWiseCorrelation.html b/docs/reference/VariantWiseCorrelation.html index a14a798..74402e0 100644 --- a/docs/reference/VariantWiseCorrelation.html +++ b/docs/reference/VariantWiseCorrelation.html @@ -1,7 +1,11 @@ VariantWiseCorrelation — VariantWiseCorrelation • sigurd @@ -12,7 +16,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -26,7 +30,7 @@ Articles - SiGURD vignette + SiGURD @@ -46,12 +50,19 @@ We correlate the variants with each other using the Pearson correlation. -This function calls CalculateCorrelationPValue to perform the actual correlation. +This function calls CalculateCorrelationPValue to perform the actual correlation. +Packages I want to remove. +SummarizedExperiment Usage - VariantWiseCorrelation(variants_list, n_cores = 1, p_value_adjustment = "fdr") + VariantWiseCorrelation( + variants_list, + n_cores = 1, + p_value_adjustment = "fdr", + verbose = TRUE +) @@ -67,6 +78,10 @@ ArgumentsOn this page diff --git a/docs/reference/VariantWiseFisherTest.html b/docs/reference/VariantWiseFisherTest.html index 6f3464c..4190ce6 100644 --- a/docs/reference/VariantWiseFisherTest.html +++ b/docs/reference/VariantWiseFisherTest.html @@ -1,7 +1,11 @@ VariantWiseFisherTest — VariantWiseFisherTest • sigurd @@ -12,7 +16,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -26,7 +30,7 @@ Articles - SiGURD vignette + SiGURD @@ -46,12 +50,19 @@ We perform the Fisher test to determine which variants are associated. -This function calls CalculateFisherTestPValue to perform the actual testing. +This function calls CalculateFisherTestPValue to perform the actual testing. +Packages I want to remove. +SummarizedExperiment Usage - VariantWiseFisherTest(variants_list, n_cores = 1, p_value_adjustment = "fdr") + VariantWiseFisherTest( + variants_list, + n_cores = 1, + p_value_adjustment = "fdr", + verbose = TRUE +) @@ -67,6 +78,10 @@ ArgumentsOn this page diff --git a/docs/reference/char_to_numeric.html b/docs/reference/char_to_numeric.html new file mode 100644 index 0000000..3af978c --- /dev/null +++ b/docs/reference/char_to_numeric.html @@ -0,0 +1,83 @@ + +char_to_numeric — char_to_numeric • sigurd + Skip to contents + + + + + sigurd + + 0.2.30 + + + + + + + + + Reference + + + Articles + + SiGURD + + + + + + + + + + + + + char_to_numeric + + char_to_numeric.Rd + + + + A function to convert the heterozygous/homozygous information from the VCF to the consensus information from VarTrix. +It is only used in LoadingVCF_typewise.R. + + + + Usage + char_to_numeric(char_value) + + + + Arguments + char_value +What is the genotype encoding you want to convert? + + + + + + + + + + + + + + diff --git a/docs/reference/combine_NAMES.html b/docs/reference/combine_NAMES.html index f5b69b6..6b7c402 100644 --- a/docs/reference/combine_NAMES.html +++ b/docs/reference/combine_NAMES.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/combine_SparseMatrix.html b/docs/reference/combine_SparseMatrix.html index 7391b8a..b58cd9d 100644 --- a/docs/reference/combine_SparseMatrix.html +++ b/docs/reference/combine_SparseMatrix.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/computeAFMutMatrix.html b/docs/reference/computeAFMutMatrix.html index cce0136..56ed031 100644 --- a/docs/reference/computeAFMutMatrix.html +++ b/docs/reference/computeAFMutMatrix.html @@ -4,14 +4,12 @@ We can get AF values greater than 1, which is due to uninformative reads. See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD-is-lower-than-expected and https://github.com/caleblareau/mgatk/issues/1 -We simply set these values to 1, since that is the actual information we have in this case. -This issue can be solved on the MAEGATK/GATK side.">computeAFMutMatrix — computeAFMutMatrix • sigurdcomputeAFMutMatrix — computeAFMutMatrix • sigurd @@ -22,7 +20,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -36,7 +34,7 @@ Articles - SiGURD vignette + SiGURD @@ -60,8 +58,7 @@ We can get AF values greater than 1, which is due to uninformative reads. See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD-is-lower-than-expected and https://github.com/caleblareau/mgatk/issues/1 -We simply set these values to 1, since that is the actual information we have in this case. -This issue can be solved on the MAEGATK/GATK side. +We simply set these values to 1, since that is the actual information we have in this case. @@ -74,6 +71,10 @@ ArgumentsOn this page diff --git a/docs/reference/getAltMatrix.html b/docs/reference/getAltMatrix.html index ddc1493..6215503 100644 --- a/docs/reference/getAltMatrix.html +++ b/docs/reference/getAltMatrix.html @@ -12,7 +12,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -26,7 +26,7 @@ Articles - SiGURD vignette + SiGURD @@ -67,10 +67,6 @@ ArgumentsOn this page diff --git a/docs/reference/getMutMatrix.html b/docs/reference/getMutMatrix.html new file mode 100644 index 0000000..b1f296e --- /dev/null +++ b/docs/reference/getMutMatrix.html @@ -0,0 +1,99 @@ + +getMutMatrix — getMutMatrix • sigurd + Skip to contents + + + + + sigurd + + 0.2.30 + + + + + + + + + Reference + + + Articles + + SiGURD + + + + + + + + + + + + + getMutMatrix + + getMutMatrix.Rd + + + + This function gets the allele frequency for a specific allele. It is used in computeAFMutMatrix. +Source: https://github.com/petervangalen/MAESTER-2021 + + + + Usage + getMutMatrix(SE, cov, letter, ref_allele, chromosome_prefix) + + + + Arguments + SE +SummarizedExperiment object. + + +cov +The coverage matrix from MAEGATK/MGATK. + + +letter +The base we are interested in. + + +ref_allele +Vector of reference alleles. + + +chromosome_prefix +The chromosome prefix used. + + + + + + + + + + + + + + diff --git a/docs/reference/getReadMatrix.html b/docs/reference/getReadMatrix.html index ad4ca69..35f1e5b 100644 --- a/docs/reference/getReadMatrix.html +++ b/docs/reference/getReadMatrix.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/getRefMatrix.html b/docs/reference/getRefMatrix.html index ab50cbc..883dc98 100644 --- a/docs/reference/getRefMatrix.html +++ b/docs/reference/getRefMatrix.html @@ -12,7 +12,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -26,7 +26,7 @@ Articles - SiGURD vignette + SiGURD @@ -67,10 +67,6 @@ ArgumentsOn this page diff --git a/docs/reference/get_consensus.html b/docs/reference/get_consensus.html index e130886..2a637b0 100644 --- a/docs/reference/get_consensus.html +++ b/docs/reference/get_consensus.html @@ -1,5 +1,9 @@ -get_consensus — get_consensus • sigurdget_consensus — get_consensus • sigurd @@ -10,7 +14,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +28,7 @@ Articles - SiGURD vignette + SiGURD @@ -43,7 +47,9 @@ - We get the consensus information for a specific matrix. + We get the consensus information for a specific matrix. +I want to remove some packages if they are not needed. See below which package apperantly wasn't needed. +Package to remove: dplyr, SummarizedExperiment @@ -53,7 +59,11 @@ Usage Arguments - ref_base + alt_base +The alternative base. + + +ref_base The reference base. @@ -64,10 +74,6 @@ ArgumentsOn this page diff --git a/docs/reference/ggsci_pal.html b/docs/reference/ggsci_pal.html index ebc38b2..1994b54 100644 --- a/docs/reference/ggsci_pal.html +++ b/docs/reference/ggsci_pal.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD @@ -56,6 +56,10 @@ Arguments Details diff --git a/docs/reference/index.html b/docs/reference/index.html index d7b15b2..530e3e7 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD @@ -70,7 +70,7 @@ All functionsCalculateConsensus() - We calculate the consensus information from the MAEGATK results. + CalculateConsensus CalculateCorrelationPValue() @@ -113,6 +113,11 @@ All functionsGetVariantInfo() + + GetVariantInfo + + HeatmapVoi() HeatmapVoi @@ -123,9 +128,9 @@ All functionsLoadingVarTrix() + LoadingVCF_typewise() - Loading VarTrix results for the down stream analysis. + LoadingVCF_typewise LoadingVarTrix_typewise() @@ -148,6 +153,11 @@ All functionsSetVariantInfo() + + GetVariantInfo + + VariantBurden() VariantBurden @@ -183,6 +193,11 @@ All functionschar_to_numeric() + + char_to_numeric + + combine_NAMES() combine_NAMES @@ -203,6 +218,11 @@ All functionsgetMutMatrix() + + getMutMatrix + + getReadMatrix() Get the counts for a specific base over all positions. diff --git a/docs/reference/load_object.html b/docs/reference/load_object.html index fa030e6..8748c6c 100644 --- a/docs/reference/load_object.html +++ b/docs/reference/load_object.html @@ -12,7 +12,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -26,7 +26,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/reference/save_object.html b/docs/reference/save_object.html index 12fac08..4cb5524 100644 --- a/docs/reference/save_object.html +++ b/docs/reference/save_object.html @@ -12,7 +12,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -26,7 +26,7 @@ Articles - SiGURD vignette + SiGURD @@ -51,7 +51,7 @@ Usage - save_object(object, file_name, file_format = NULL) + save_object(object, file_name, file_format = "zstd") diff --git a/docs/reference/sdiv.html b/docs/reference/sdiv.html index 5e52e68..60ff92b 100644 --- a/docs/reference/sdiv.html +++ b/docs/reference/sdiv.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 @@ -24,7 +24,7 @@ Articles - SiGURD vignette + SiGURD diff --git a/docs/search.json b/docs/search.json index 46c09fb..161ef3c 100644 --- a/docs/search.json +++ b/docs/search.json @@ -1 +1 @@ -[{"path":"/articles/SiGURD.html","id":"your-input-file-","dir":"Articles","previous_headings":"","what":"Your input file.","title":"SiGURD vignette","text":"","code":"sample_path <- system.file(\"extdata\", \"Input_Example_local.csv\", package = \"sigurd\") sample_file <- read.csv(sample_path) print(sample_file) ## patient sample source type ## 1 Sample1 Minus_Sample1 VarTrix scRNAseq_Somatic ## 2 Sample1 Minus_Sample1 VarTrix scRNAseq_MT ## 3 Sample1 Minus_Sample1 MAEGATK scRNAseq_MT ## 4 Sample1 Plus_Sample1 VarTrix scRNAseq_Somatic ## 5 Sample1 Plus_Sample1 VarTrix scRNAseq_MT ## 6 Sample1 Plus_Sample1 MAEGATK scRNAseq_MT ## 7 SW_CellLineMix_All_mr3 SW_CellLineMix_All_mr3 MAEGATK Amplicon_MT ## 8 SW_CellLineMix_RNAseq_mr3 SW_CellLineMix_RNAseq_mr3 MAEGATK scRNAseq_MT ## 9 TenX_BPDCN712_All_mr3 TenX_BPDCN712_All_mr3 MAEGATK Amplicon_MT ## 10 TenX_BPDCN712_RNAseq_mr3 TenX_BPDCN712_RNAseq_mr3 MAEGATK scRNAseq_MT ## bam ## 1 ~/test_data/Minus_Sample1/possorted_genome_bam.bam ## 2 ~/test_data/Minus_Sample1/possorted_genome_bam.bam ## 3 ~/test_data/Minus_Sample1/possorted_genome_bam.bam ## 4 ~/test_data/Plus_Sample1/possorted_genome_bam.bam ## 5 ~/test_data/Plus_Sample1/possorted_genome_bam.bam ## 6 ~/test_data/Plus_Sample1/possorted_genome_bam.bam ## 7 NADA ## 8 NADA ## 9 NADA ## 10 NADA ## input_folder ## 1 ~/test_data/VarTrix/Somatic/ ## 2 ~/test_data/VarTrix/MT/ ## 3 ~/test_data/MAEGATK/ ## 4 ~/test_data/VarTrix/Somatic/ ## 5 ~/test_data/VarTrix/MT/ ## 6 ~/test_data/MAEGATK/ ## 7 ~/test_data/MAESTER_data/ ## 8 ~/test_data/MAESTER_data/ ## 9 ~/test_data/MAESTER_data/ ## 10 ~/test_data/MAESTER_data/ ## cells ## 1 ~/test_data/Minus_Sample1/Minus_Sample1_barcodes.tsv ## 2 ~/test_data/Minus_Sample1/Minus_Sample1_barcodes.tsv ## 3 ~/test_data/Minus_Sample1/Minus_Sample1_barcodes.tsv ## 4 ~/test_data/Plus_Sample1/Plus_Sample1_barcodes.tsv ## 5 ~/test_data/Plus_Sample1/Plus_Sample1_barcodes.tsv ## 6 ~/test_data/Plus_Sample1/Plus_Sample1_barcodes.tsv ## 7 NADA ## 8 NADA ## 9 NADA ## 10 NADA"},{"path":"/articles/SiGURD.html","id":"your-vcf-files-","dir":"Articles","previous_headings":"","what":"Your vcf files.","title":"SiGURD vignette","text":"files needed VarTrix MGATK/MAEGATK. Since MAEGATK analyses mitochondrial genome, need decide chromosomal prefix data. See loading data .","code":"vcf_path <- system.file(\"extdata\", \"CosmicSubset_filtered.vcf\", package = \"sigurd\") vcf <- readVcf(vcf_path) vcf_info <- info(vcf) print(vcf) ## class: CollapsedVCF ## dim: 1684 0 ## rowRanges(vcf): ## GRanges with 5 metadata columns: paramRangeID, REF, ALT, QUAL, FILTER ## info(vcf): ## DataFrame with 10 columns: GENE, STRAND, GENOMIC_ID, LEGACY_ID, CDS, AA, H... ## info(header(vcf)): ## Number Type Description ## GENE 1 String Gene name ## STRAND 1 String Gene strand ## GENOMIC_ID 1 String Genomic Mutation ID ## LEGACY_ID 1 String Legacy Mutation ID ## CDS 1 String CDS annotation ## AA 1 String Peptide annotation ## HGVSC 1 String HGVS cds syntax ## HGVSP 1 String HGVS peptide syntax ## HGVSG 1 String HGVS genomic syntax ## CNT 1 Integer How many samples have this mutation ## geno(vcf): ## List of length 0: print(vcf_info) ## DataFrame with 1684 rows and 10 columns ## GENE STRAND GENOMIC_ID LEGACY_ID CDS ## ## 1 ABL1 + NA COSN17133235 c.136+2107G>C ## 2 ABL1 + NA COSN14774721 c.136+2599C>T ## 3 ABL1 + NA COSN17133236 c.136+3198G>C ## 4 ABL1 + NA COSN17133237 c.136+4488G>C ## 5 ABL1 + NA COSN17133050 c.136+5055C>T ## ... ... ... ... ... ... ## 1680 WT1 - NA COSN6609219 c.872+82G>T ## 1681 WT1 - NA COSN17132919 c.872+16G>A ## 1682 WT1 - NA COSN17134797 c.770-57C>T ## 1683 WT1 - NA COSM5020955 c.594C>T ## 1684 ZRSR2 + NA COSM3035276 c.1338_1343dup ## AA HGVSC HGVSP ## ## 1 p.? ENST00000372348.6:c... NA ## 2 p.? ENST00000372348.6:c... NA ## 3 p.? ENST00000372348.6:c... NA ## 4 p.? ENST00000372348.6:c... NA ## 5 p.? ENST00000372348.6:c... NA ## ... ... ... ... ## 1680 p.? ENST00000332351.7:c... NA ## 1681 p.? ENST00000332351.7:c... NA ## 1682 p.? ENST00000332351.7:c... NA ## 1683 p.N198%3D ENST00000332351.7:c... ENSP00000331327.3:p... ## 1684 p.S447_R448dup ENST00000307771.7:c... ENSP00000303015.7:p... ## HGVSG CNT ## ## 1 9:g.130716562G>C 10 ## 2 9:g.130717054C>T 10 ## 3 9:g.130717653G>C 10 ## 4 9:g.130718943G>C 10 ## 5 9:g.130719510C>T 11 ## ... ... ... ## 1680 11:g.32427874C>A 74 ## 1681 11:g.32427940C>T 132 ## 1682 11:g.32428115G>A 108 ## 1683 11:g.32434752G>A 73 ## 1684 X:g.15823131_1582313.. 10 vcf_path_mt <- system.file(\"extdata\", \"chrM_Input_VCF_NoMAF_Filtering.vcf\", package = \"sigurd\") vcf_mt <- readVcf(vcf_path_mt) vcf_mt_info <- info(vcf_mt) print(vcf_mt) ## class: CollapsedVCF ## dim: 49708 0 ## rowRanges(vcf): ## GRanges with 5 metadata columns: paramRangeID, REF, ALT, QUAL, FILTER ## info(vcf): ## DataFrame with 1 column: ID ## info(header(vcf)): ## Number Type Description ## ID A Character Mutation ## geno(vcf): ## List of length 0: print(vcf_mt_info) ## DataFrame with 49708 rows and 1 column ## ID ## ## chrM:1_G/A 1_G>A ## chrM:3_T/A 3_T>A ## chrM:4_C/A 4_C>A ## chrM:6_C/A 6_C>A ## chrM:8_G/A 8_G>A ## ... ... ## chrM:16564_A/T 16564_A>T ## chrM:16565_C/T 16565_C>T ## chrM:16566_G/T 16566_G>T ## chrM:16567_A/T 16567_A>T ## chrM:16569_G/T 16569_G>T"},{"path":"/articles/SiGURD.html","id":"loading-and-filtering-the-input-data-","dir":"Articles","previous_headings":"","what":"Loading and filtering the input data.","title":"SiGURD vignette","text":"load data per patient merge associated samples automatically. input file, include software tool used analysis. source can either vartrix maegatk/mgatk. respective loading function load files intended . types data available : - scRNAseq_Somatic: standard 10X results analysed somatic variants. - scRNAseq_MT: standard 10X results analysed MT variants. - Amplicon_Somatic: amplicon data analysed somatic variants. - Amplicon_MT: amplicon data analysed MT variants. Since MT results denser, take longer load.","code":"Sample1_scRNAseq_Somatic <- LoadingVarTrix_typewise(samples_file = sample_path, vcf_path = vcf_path, patient = \"Sample1\", type_use = \"scRNAseq_Somatic\") ## [1] \"Loading the data for patient Sample1.\" ## [1] \"We read in the samples file.\" ## [1] \"We subset to the patient of interest.\" ## [1] \"We get the different samples.\" ## [1] \"We load the SNV files.\" ## [1] \"We read the variants.\" ## [1] \"We read in the cell barcodes output by CellRanger as a list.\" ## [1] \"We read in the vcf file.\" ## [1] \"We generate more accessible names.\" ## [1] \"We read in the different sparse genotype matrices as a list.\" ## [1] \"We have a slot per type of input data.\" ## [1] \"Loading sample 1 of 2\" ## [1] \"Loading sample 2 of 2\" ## [1] \"We generate a large data.frame of all the snv matrices.\" ## [1] \"We remove the matrix lists.\" ## [1] \"We remove variants, that are not detected in at least 2 cells.\" ## [1] \"We remove cells that are always NoCall.\" ## [1] \"scRNAseq_Somatic Variants: 73\" ## [1] \"scRNAseq_Somatic Cells: 571\" ## [1] \"We transform the sparse matrices to matrices, so we can calculate the fraction.\" ## [1] \"We generate a SummarizedExperiment object containing the fraction and the consensus matrices.\" Sample1_scRNAseq_MT <- LoadingMAEGATK_typewise(samples_file = sample_path, patient = \"Sample1\", type_use = \"scRNAseq_MT\") ## [1] \"Loading the data for patient Sample1.\" ## [1] \"We read in the samples file.\" ## [1] \"We subset to the patient of interest.\" ## [1] \"We get the different samples.\" ## [1] \"We read in the cell barcodes output by CellRanger as a list.\" ## [1] \"We load the MAEGATK output files.\" ## [1] \"Loading sample 1 of 2\" ## [1] \"Loading sample 2 of 2\" ## [1] \"We merge the samples.\" ## [1] \"We get the allele frequency.\" ## [1] \"We get the coverage information.\" ## [1] \"We get the number of alternative reads per variant.\" ## [1] \"We get the quality information.\" ## [1] \"We get the number of reference reads.\" ## [1] \"Calculating the strand concordance.\" ## [1] \"We calculate the consensus information.\" ## [1] \"We get the read information per position.\" ## [1] \"We add the values together.\" ## [1] \"We get the position according to their reference base.\" ## [1] \"Now, we check the consensus value for all positions with the same reference base.\" ## [1] \"A\" ## [1] \"C\" ## [1] \"G\" ## [1] \"T\" ## [1] \"N\" ## [1] \"Binding the matrices.\" ## [1] \"We perform some filtering to reduce the memory needed.\" ## [1] \"We remove variants, which are not covered in at least 2 cells .\" ## [1] \"We remove cells that are always NoCall.\" ## [1] \"We add the information to the merged matrices.\" Sample1_combined <- CombineSEobjects(se_somatic = Sample1_scRNAseq_Somatic, se_MT = Sample1_scRNAseq_MT, suffixes = c(\"_somatic\", \"_MT\")) rm(Sample1_scRNAseq_Somatic, Sample1_scRNAseq_MT) Sample1_combined <- Filtering(Sample1_combined, min_cells_per_variant = 2, fraction_threshold = 0.05) ## [1] \"We assume that cells with a fraction smaller than our threshold are actually NoCall.\" ## [1] \"We set consensus values to 0 (NoCall) and fraction values to 0.\" ## [1] \"We do not set fractions between 0.05 and 1 to 1.\" ## [1] \"This way, we retain the heterozygous information.\" ## [1] \"We remove all the variants that are always NoCall.\" ## [1] \"We remove variants, that are not at least detected in 2 cells.\" ## [1] \"We remove all cells that are not >= 1 (Ref) for at least 1 variant.\" Sample1_combined <- VariantBurden(Sample1_combined)"},{"path":"/articles/SiGURD.html","id":"determing-mt-variants-of-interest-","dir":"Articles","previous_headings":"","what":"Determing MT variants of interest.","title":"SiGURD vignette","text":"thresholding adapted Miller et al. https://github.com/petervangalen/MAESTER-2021 https://www.nature.com/articles/s41587-022-01210-8 heatmap needs time plot, since cells clustered.","code":"voi_ch <- VariantQuantileThresholding(SE = Sample1_combined, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9)) ## [1] \"Get the mean allele frequency and coverage.\" ## [1] \"Get the quantiles of the VAFs of each variant.\" ## [1] \"Collect all information in a tibble\" ## [1] \"Thresholding using the quantile approach.\" hm <- HeatmapVoi(SE = Sample1_combined, voi = voi_ch) print(hm)"},{"path":"/articles/SiGURD.html","id":"association-of-variants","dir":"Articles","previous_headings":"","what":"Association of Variants","title":"SiGURD vignette","text":"Using Fisher’s Exact test, find co-present variants. can also use correlation variants. , combine somatic MT results. Since possible number tests/correlations quite large, can use multiple cores perform calculations.","code":"#Sample1_split_rows <- RowWiseSplit(Sample1_combined, remove_nocalls = FALSE) #results_fishertest <- VariantWiseFisherTest(Sample1_split_rows, n_cores = 8) #rm(Sample1_split_rows) #variant_association_heatmap <- VariantFisherTestHeatmap(results_fishertest, patient = \"Sample1\", min_alt_cells = 3) #print(variant_association_heatmap)"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Martin Grasshoff. Author, maintainer. Ivan Costa Gesteira. Author.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Grasshoff M, Gesteira (2023). sigurd: Single cell Genotyping Using RNA Data. R package version 0.2.15, https://costalab.github.io/sigurd/.","code":"@Manual{, title = {sigurd: Single cell Genotyping Using RNA Data}, author = {Martin Grasshoff and Ivan Costa Gesteira}, year = {2023}, note = {R package version 0.2.15}, url = {https://costalab.github.io/sigurd/}, }"},{"path":"/index.html","id":"single-cell-genotyping-using-rna-data-sigurd","dir":"","previous_headings":"","what":"Single cell Genotyping Using RNA Data","title":"Single cell Genotyping Using RNA Data","text":"Martin Graßhoff1 Ivan G. Costa1 1Institute Computational Genomics, Faculty Medicine, RWTH Aachen University, Aachen, 52074 Germany Motivation: advent single RNA seq assays, became possible determine mutational status individual cell. Single cell RNA seq data nature sparse probability hitting specific variants interest therefore low. issue can overcome using modified amplicon assays, also possible impute mutational status using correlation detected mitochondrial somatic variants. Results: Sigurd R package analysis single cell data. determine overall variant burden per cell also number interesting mitochondrial variants using previously published approaches. employ imputation approach utilizes correlation mitochondrial variants somatic variants. Mitochondrial mutations significantly associated somatic mutations used stand-ins.","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Single cell Genotyping Using RNA Data","text":"can install sigurd using following code. vignette requires data currently published, provided reference.","code":"install.packages(\"devtools\") devtools::install_github(\"https://github.com/CostaLab/sigurd.git\", build_vignettes = FALSE) require(sigurd)"},{"path":"/index.html","id":"sigurd","dir":"","previous_headings":"","what":"SiGURD","title":"Single cell Genotyping Using RNA Data","text":"provided small example data set SiGURD. consists chromosome 9 MT one MPN sample. mutation data obtained Sanger Institute Catalogue Somatic Mutations Cancer web site, http://cancer.sanger.ac.uk/cosmic Bamford et al (2004) COSMIC (Catalogue Somatic Mutations Cancer) database website. Br J Cancer, 91,355-358.","code":"# This will be included for published data. # vignette('sigurd')"},{"path":"/index.html","id":"current-features-v0215","dir":"","previous_headings":"","what":"Current Features v0.2.15","title":"Single cell Genotyping Using RNA Data","text":"Loading data VarTrix MAEGATK. Transforming data compatible joint analysis. Calculating variant burden per cell. Thresholding variants using approach described Miller et al. [2] Finding associated variants using correlation Fisher Test.","code":""},{"path":"/index.html","id":"sources","dir":"","previous_headings":"","what":"Sources","title":"Single cell Genotyping Using RNA Data","text":"package implements approaches following packages respositories: - https://github.com/petervangalen/MAESTER-2021 – Variant Thresholding functions loading MAEGATK data. - https://github.com/CostaLab/CimpleG – loading saving function.","code":""},{"path":"/index.html","id":"future","dir":"","previous_headings":"","what":"Future","title":"Single cell Genotyping Using RNA Data","text":"Memory optimization Loading CB sniffer results Providing data vignette","code":""},{"path":"/index.html","id":"references","dir":"","previous_headings":"","what":"References","title":"Single cell Genotyping Using RNA Data","text":"[1] VarTrix. github [2] Miller, T.E., et al. Mitochondrial variant enrichment high-throughput single-cell RNA sequencing resolves clonal populations. Nat Biotechnol (2022). link. See also: MAEGATK Analysis, Data","code":""},{"path":"/reference/AmpliconSupplementing.html","id":null,"dir":"Reference","previous_headings":"","what":"Supplementing scRNAseq values with Amplicon values — AmpliconSupplementing","title":"Supplementing scRNAseq values with Amplicon values — AmpliconSupplementing","text":"replace values scRNAseq experiment values amplicon experiment.","code":""},{"path":"/reference/AmpliconSupplementing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supplementing scRNAseq values with Amplicon values — AmpliconSupplementing","text":"","code":"AmpliconSupplementing(scRNAseq, amplicon)"},{"path":"/reference/AmpliconSupplementing.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supplementing scRNAseq values with Amplicon values — AmpliconSupplementing","text":"scRNAseq SummarizedExperiment object containing scRNAseq data. amplicon SummarizedExperiment object containing amplicon data.","code":""},{"path":"/reference/AmpliconSupplementing_big.html","id":null,"dir":"Reference","previous_headings":"","what":"Supplementing scRNAseq values with Amplicon values using big.matrix — AmpliconSupplementing_big","title":"Supplementing scRNAseq values with Amplicon values using big.matrix — AmpliconSupplementing_big","text":"replace values scRNAseq experiment values amplicon experiment.","code":""},{"path":"/reference/AmpliconSupplementing_big.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supplementing scRNAseq values with Amplicon values using big.matrix — AmpliconSupplementing_big","text":"","code":"AmpliconSupplementing_big(scRNAseq, amplicon)"},{"path":"/reference/AmpliconSupplementing_big.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supplementing scRNAseq values with Amplicon values using big.matrix — AmpliconSupplementing_big","text":"scRNAseq SummarizedExperiment object containing scRNAseq data. amplicon SummarizedExperiment object containing amplicon data.","code":""},{"path":"/reference/CalculateAlleleFrequency.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculating the Minor Allele Frequency. — CalculateAlleleFrequency","title":"Calculating the Minor Allele Frequency. — CalculateAlleleFrequency","text":"calculate MAF MAEGATK results.","code":""},{"path":"/reference/CalculateAlleleFrequency.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculating the Minor Allele Frequency. — CalculateAlleleFrequency","text":"","code":"CalculateAlleleFrequency(reference_reads, alternative_reads)"},{"path":"/reference/CalculateAlleleFrequency.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculating the Minor Allele Frequency. — CalculateAlleleFrequency","text":"reference_reads Reference reads matrix. alternative_reads List matrices alternative reads.","code":""},{"path":"/reference/CalculateAltReads.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateAltReads — CalculateAltReads","title":"CalculateAltReads — CalculateAltReads","text":"calculate number reads covering variant using forward reverse reads.","code":""},{"path":"/reference/CalculateAltReads.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateAltReads — CalculateAltReads","text":"","code":"CalculateAltReads(SE, chromosome_prefix = \"chrM\")"},{"path":"/reference/CalculateAltReads.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateAltReads — CalculateAltReads","text":"SE SummarizedExperiment object. chromosome_prefix List matrices alternative reads.","code":""},{"path":"/reference/CalculateConsensus.html","id":null,"dir":"Reference","previous_headings":"","what":"We calculate the consensus information from the MAEGATK results. — CalculateConsensus","title":"We calculate the consensus information from the MAEGATK results. — CalculateConsensus","text":"calculate consensus information MAEGATK results.","code":""},{"path":"/reference/CalculateConsensus.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"We calculate the consensus information from the MAEGATK results. — CalculateConsensus","text":"","code":"CalculateConsensus(SE, chromosome_prefix = \"chrM\")"},{"path":"/reference/CalculateConsensus.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"We calculate the consensus information from the MAEGATK results. — CalculateConsensus","text":"SE SummarizedExperiment object. chromosome_prefix chromosome name used prefix.","code":""},{"path":"/reference/CalculateCorrelationPValue.html","id":null,"dir":"Reference","previous_headings":"","what":"Correlating the SNVs — CalculateCorrelationPValue","title":"Correlating the SNVs — CalculateCorrelationPValue","text":"perform correlation SNVs calculate P values.","code":""},{"path":"/reference/CalculateCorrelationPValue.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Correlating the SNVs — CalculateCorrelationPValue","text":"","code":"CalculateCorrelationPValue( variant_values, other_mutation, all_variants_list, min_intersecting_cells = 5 )"},{"path":"/reference/CalculateCorrelationPValue.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Correlating the SNVs — CalculateCorrelationPValue","text":"variant_values fraction values analysing. vector. other_mutation variants . vector variant names. all_variants_list List fraction values variants want compare variant . min_intersecting_cells Minimum number intersecting cells. Correlations less performed.","code":""},{"path":"/reference/CalculateCoverage.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateCoverage — CalculateCoverage","title":"CalculateCoverage — CalculateCoverage","text":"calculate coverage information per variant MAEGATK results.","code":""},{"path":"/reference/CalculateCoverage.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateCoverage — CalculateCoverage","text":"","code":"CalculateCoverage(SE, chromosome_prefix = \"chrM\")"},{"path":"/reference/CalculateCoverage.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateCoverage — CalculateCoverage","text":"SE SummarizedExperiment object. chromosome_prefix List matrices alternative reads.","code":""},{"path":"/reference/CalculateFisherTestPValue.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateFisherTestPValue — CalculateFisherTestPValue","title":"CalculateFisherTestPValue — CalculateFisherTestPValue","text":"perform Fisher Test SNVs calculate P values.","code":""},{"path":"/reference/CalculateFisherTestPValue.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateFisherTestPValue — CalculateFisherTestPValue","text":"","code":"CalculateFisherTestPValue( variant_values, other_mutation, all_variants_list, min_intersecting_cells = 5 )"},{"path":"/reference/CalculateFisherTestPValue.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateFisherTestPValue — CalculateFisherTestPValue","text":"variant_values fraction values analysing. vector. other_mutation variants . vector variant names. all_variants_list List fraction values variants want compare variant . min_intersecting_cells Minimum number intersecting cells. Correlations less performed.","code":""},{"path":"/reference/CalculateFisherTestPValue2.html","id":null,"dir":"Reference","previous_headings":"","what":"We perform the Fisher test of SNVs and calculate the P values. — CalculateFisherTestPValue2","title":"We perform the Fisher test of SNVs and calculate the P values. — CalculateFisherTestPValue2","text":"perform Fisher test SNVs calculate P values.","code":""},{"path":"/reference/CalculateFisherTestPValue2.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"We perform the Fisher test of SNVs and calculate the P values. — CalculateFisherTestPValue2","text":"","code":"CalculateFisherTestPValue2( variant_values, other_mutation, all_variants_list, min_intersecting_cells = 5 )"},{"path":"/reference/CalculateFisherTestPValue2.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"We perform the Fisher test of SNVs and calculate the P values. — CalculateFisherTestPValue2","text":"variant_values fraction values analysing. vector. other_mutation variants . vector variant names. all_variants_list List fraction values variants want compare variant . min_intersecting_cells Minimum number intersecting cells. Correlations less performed.","code":""},{"path":"/reference/CalculateQuality.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateQuality — CalculateQuality","title":"CalculateQuality — CalculateQuality","text":"calculate quality per variant.","code":""},{"path":"/reference/CalculateQuality.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateQuality — CalculateQuality","text":"","code":"CalculateQuality( SE, variants = rownames(reads_alt), chromosome_prefix = \"chrM\" )"},{"path":"/reference/CalculateQuality.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateQuality — CalculateQuality","text":"SE SummarizedExperiment object. chromosome_prefix List matrices alternative reads.","code":""},{"path":"/reference/CalculateStrandCorrelation.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateStrandCorrelation — CalculateStrandCorrelation","title":"CalculateStrandCorrelation — CalculateStrandCorrelation","text":"calculate correlation amount forward reverse reads per variant.","code":""},{"path":"/reference/CalculateStrandCorrelation.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateStrandCorrelation — CalculateStrandCorrelation","text":"","code":"CalculateStrandCorrelation(SE, chromosome_prefix = \"chrM\")"},{"path":"/reference/CalculateStrandCorrelation.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateStrandCorrelation — CalculateStrandCorrelation","text":"SE SummarizedExperiment object. chromosome_prefix List matrices alternative reads.","code":""},{"path":"/reference/CombineSEobjects.html","id":null,"dir":"Reference","previous_headings":"","what":"CombineSEobjects — CombineSEobjects","title":"CombineSEobjects — CombineSEobjects","text":"combine two SummarizedExperiment objects.","code":""},{"path":"/reference/CombineSEobjects.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CombineSEobjects — CombineSEobjects","text":"","code":"CombineSEobjects(se_somatic, se_MT, suffixes = c(\"_somatic\", \"_MT\"))"},{"path":"/reference/CombineSEobjects.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CombineSEobjects — CombineSEobjects","text":"se_somatic SummarizedExperiment object somatic variants. se_MT SummarizedExperiment object MT variants. suffixes suffixes want add meta data.frame.","code":""},{"path":"/reference/CombineSEobjects_big.html","id":null,"dir":"Reference","previous_headings":"","what":"CombineSEobjects_big — CombineSEobjects_big","title":"CombineSEobjects_big — CombineSEobjects_big","text":"combine two SummarizedExperiment objects big matrices.","code":""},{"path":"/reference/CombineSEobjects_big.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CombineSEobjects_big — CombineSEobjects_big","text":"","code":"CombineSEobjects_big(se_somatic, se_MT, suffixes = c(\"_somatic\", \"_MT\"))"},{"path":"/reference/CombineSEobjects_big.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CombineSEobjects_big — CombineSEobjects_big","text":"se_somatic SummarizedExperiment object somatic variants. se_MT SummarizedExperiment object MT variants. suffixes suffixes want add meta data.frame.","code":""},{"path":"/reference/Filtering.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering the loaded genotyping data. — Filtering","title":"Filtering the loaded genotyping data. — Filtering","text":"filter SummarizedExperiment object exclude variants cells.","code":""},{"path":"/reference/Filtering.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering the loaded genotyping data. — Filtering","text":"","code":"Filtering( se, blacklisted_barcodes_path = NULL, fraction_threshold = NULL, alts_threshold = NULL, path_seurat = NULL, min_cells_per_variant = 2, min_variants_per_cell = 1, reject_value = \"NoCall\" )"},{"path":"/reference/Filtering.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering the loaded genotyping data. — Filtering","text":"se SummarizedExperiment object. blacklisted_barcodes_path Barcodes want remove. Path file one column without header. fraction_threshold Variants VAF threshold set 0. Numeric. Default = NULL. alts_threshold Variants number alt reads less threshold set 0. Numeric. Default = NULL. path_seurat Path Seurat object. Cells present object removed. min_cells_per_variant many cells variant present included? Numeric. Default = 2. min_variants_per_cell many variants covered cell included? Default = 1. reject_value cells fall threshold (fraction_threshold alts_threshold) treated Reference NoCall? Default = NoCall.","code":""},{"path":"/reference/Filtering.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filtering the loaded genotyping data. — Filtering","text":"one sample time. want remove: cells blacklisted, cells Seurat object, cells least one variant >1 (Reference), variants alternative transcripts, variants always NoCall, set variants VAF threshold reference.","code":""},{"path":"/reference/Filtering.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Filtering the loaded genotyping data. — Filtering","text":"","code":"if (FALSE) { # Removing all variants that are not detected in at least 2 cells. # Before we remove the variants, we set fraction value of variants below 0.05 to 0. se_geno <- Filtering(se_geno, min_cells_per_variant = 2, fraction_threshold = 0.05) }"},{"path":"/reference/Filtering_big.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering the loaded genotyping data using big matrices. — Filtering_big","title":"Filtering the loaded genotyping data using big matrices. — Filtering_big","text":"filter SummarizedExperiment object exclude variants cells.","code":""},{"path":"/reference/Filtering_big.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering the loaded genotyping data using big matrices. — Filtering_big","text":"","code":"Filtering_big( se, blacklisted_barcodes_path = NULL, fraction_threshold = NULL, path_seurat = NULL, min_cells_per_variant = 2 )"},{"path":"/reference/Filtering_big.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering the loaded genotyping data using big matrices. — Filtering_big","text":"se SummarizedExperiment object. blacklisted_barcodes_path Barcodes want remove. Path file one column without header. fraction_threshold Variants VAF threshold set 0. Numeric. path_seurat Path Seurat object. Cells present object removed. min_cells_per_variant many cells variant present included? Numeric. Default = 2. min_variants_per_cell many variants covered cell included? Default = 1.","code":""},{"path":"/reference/Filtering_big.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filtering the loaded genotyping data using big matrices. — Filtering_big","text":"one sample time. want remove: cells blacklisted, cells Seurat object, cells least one variant >1 (Reference), variants alternative transcripts, variants always NoCall, set variants VAF threshold reference.","code":""},{"path":"/reference/Filtering_big.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Filtering the loaded genotyping data using big matrices. — Filtering_big","text":"","code":"if (FALSE) { # Removing all variants that are not detected in at least 2 cells. # Before we remove the variants, we set fraction value of variants below 0.05 to 0. se_geno <- Filtering(se_geno, min_cells_per_variant = 2, fraction_threshold = 0.05) }"},{"path":"/reference/GetCellInfoPerVariant.html","id":null,"dir":"Reference","previous_headings":"","what":"We get the variant information per cell. — GetCellInfoPerVariant","title":"We get the variant information per cell. — GetCellInfoPerVariant","text":"get variant information per cell.","code":""},{"path":"/reference/GetCellInfoPerVariant.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"We get the variant information per cell. — GetCellInfoPerVariant","text":"","code":"GetCellInfoPerVariant(se, voi_ch)"},{"path":"/reference/GetCellInfoPerVariant.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"We get the variant information per cell. — GetCellInfoPerVariant","text":"se SummarizedExperiment object. voi_ch Variants interest.","code":""},{"path":"/reference/HeatmapVoi.html","id":null,"dir":"Reference","previous_headings":"","what":"HeatmapVoi — HeatmapVoi","title":"HeatmapVoi — HeatmapVoi","text":"plot heatmap set Variants Interest using Variant Allele Frequency values SummarizedExperiment object.","code":""},{"path":"/reference/HeatmapVoi.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"HeatmapVoi — HeatmapVoi","text":"","code":"HeatmapVoi(SE, voi, annotation_trait = NULL, column_title = NULL)"},{"path":"/reference/HeatmapVoi.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"HeatmapVoi — HeatmapVoi","text":"SE SummarizedExperiment object. voi Variants Interest. annotation_trait Cell Annotation bottom heat map.","code":""},{"path":"/reference/LoadingMAEGATK_typewise.html","id":null,"dir":"Reference","previous_headings":"","what":"LoadingMAEGATK_typewise — LoadingMAEGATK_typewise","title":"LoadingMAEGATK_typewise — LoadingMAEGATK_typewise","text":"load MAEGATK output transform compatible VarTrix output. input file specifically formated csv file necessary information run analysis. Note source column input file needs one following: vartrix, mgaetk, mgatk. hard coded case insensitive.","code":""},{"path":"/reference/LoadingMAEGATK_typewise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"LoadingMAEGATK_typewise — LoadingMAEGATK_typewise","text":"","code":"LoadingMAEGATK_typewise( samples_file, samples_path = NULL, patient, type_use = \"scRNAseq_MT\", chromosome_prefix = \"chrM\", min_cells = 2, barcodes_path = NULL )"},{"path":"/reference/LoadingMAEGATK_typewise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"LoadingMAEGATK_typewise — LoadingMAEGATK_typewise","text":"samples_file Path csv file samples loaded. samples_path Path input folder. patient patient want load. type_use type input. one : scRNAseq_MT, Amplicon_MT. used samples_path NULL. chromosome_prefix prefix want use. Default: \"chrM\"","code":""},{"path":"/reference/LoadingVarTrix.html","id":null,"dir":"Reference","previous_headings":"","what":"We load the VarTrix results for one patient and merge all types together. — LoadingVarTrix","title":"We load the VarTrix results for one patient and merge all types together. — LoadingVarTrix","text":"load VarTrix results one patient merge types together.","code":""},{"path":"/reference/LoadingVarTrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"We load the VarTrix results for one patient and merge all types together. — LoadingVarTrix","text":"","code":"LoadingVarTrix(samples_path, vcf_path, vcf_path_MT, patient)"},{"path":"/reference/LoadingVarTrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"We load the VarTrix results for one patient and merge all types together. — LoadingVarTrix","text":"samples_path Path csv file samples loaded. vcf_path Path VCF file somatic variants. vcf_path_MT Path VCF file MT variants. patient Patient loaded.","code":""},{"path":"/reference/LoadingVarTrix_ori.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","title":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","text":"function loads VarTrix results outputs list two SummarizedExperiments objects, one somatic variants accompanying mitochondrial variants.","code":""},{"path":"/reference/LoadingVarTrix_ori.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","text":"","code":"LoadingVarTrix(samples_file, vcf_path, vcf_path_MT, patient)"},{"path":"/reference/LoadingVarTrix_ori.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","text":"samples_file Path input file. One sample per row. contain following columns:patient: patient sample. patient can multiple samples (treated/untreated). samples merged per patient.sample: ID sample.resource: VarTrix MAEGATK input? Must either VarTrix MAEGATK.type: type data? Must one following: scRNAseq_Somatic, scRNAseq_MT, Amplicon_Somatic, Amplicon_MT.input_folder: Path folder, VarTrix output stored.cells: Path barcodes file CellRanger output. vcf_path Path VCF file contains somatic variants interested . vcf_path_MT Path VCF file contains MT variants interested . patient patient want load.","code":""},{"path":"/reference/LoadingVarTrix_ori.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","text":"function loads VarTrix one patients input file. patient multiple samples, samples merged. two types input data, single cell RNA sequencing amplicon data. scRNAseq data result something like 10X assay, amplicon data focuses specific part genome. output CellRanger pipeline. Amplicon data much higher coverage specific area therefore much better areas. scRNAseq data overwritten amplicon data available. output list two SummarizedExperiment objects. object contains two assays:consensus_somatic: consensus information per somatic variant cell.consensus_MT: consensus information per MT variant cell.fraction_somatic: variant allele frequency per somatic variant cell.fraction_MT: variant allele frequency per MT variant cell. consensus information encoded following way:0: Call, reads covering position.1: Reference, reference reads cover position.2: Alternative, mutated reads cover position.3: , alt ref reads cover position.","code":""},{"path":"/reference/LoadingVarTrix_ori.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","text":"return list two objects. SE_Somatic somatic variants results. SE_MT MT variants results.","code":""},{"path":[]},{"path":[]},{"path":[]},{"path":[]},{"path":[]},{"path":"/reference/LoadingVarTrix_typewise.html","id":null,"dir":"Reference","previous_headings":"","what":"LoadingVarTrix_typewise — LoadingVarTrix_typewise","title":"LoadingVarTrix_typewise — LoadingVarTrix_typewise","text":"load different types results (scRNAseq/amplicon MT/amplicon), might need extreme amounts memory. solve issue, load type separately. following function (AmpliconSupplementing), can add amplicon information scRNAseq information. input file specifically formated csv file necessary information run analysis. Note source column input file needs one following: vartrix, mgaetk, mgatk. hard coded case insensitive.","code":""},{"path":"/reference/LoadingVarTrix_typewise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"LoadingVarTrix_typewise — LoadingVarTrix_typewise","text":"","code":"LoadingVarTrix_typewise( samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, sample = NULL, type_use = \"scRNAseq_Somatic\", min_reads = NULL, min_cells = 2 )"},{"path":"/reference/LoadingVarTrix_typewise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"LoadingVarTrix_typewise — LoadingVarTrix_typewise","text":"samples_file Path csv file samples loaded. samples_path Path input folder. Must include barcodes file. snp_path Path SNP file used VarTrix (SNV.loci.txt). vcf_path Path VCF file variants. patient patient want load. type_use type input. one : scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. min_reads minimum number reads want. Otherwise treat NoCall. Default = NULL. min_cells minimum number cells variant. Otherwise, remove variant. Default = 2.","code":""},{"path":"/reference/LoadingVarTrix_typewise_big.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading VarTrix results using big.matrix — LoadingVarTrix_typewise_big","title":"Loading VarTrix results using big.matrix — LoadingVarTrix_typewise_big","text":"load different types results (scRNAseq/amplicon MT/amplicon), might need extreme amounts memory. solve issue, load type separately. following function (AmpliconSupplementing), can add amplicon information scRNAseq information.","code":""},{"path":"/reference/LoadingVarTrix_typewise_big.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading VarTrix results using big.matrix — LoadingVarTrix_typewise_big","text":"","code":"LoadingVarTrix_typewise_big( samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, sample = NULL, type_use = \"scRNAseq_Somatic\", min_reads = 3, min_cells = 2 )"},{"path":"/reference/LoadingVarTrix_typewise_big.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading VarTrix results using big.matrix — LoadingVarTrix_typewise_big","text":"samples_file Path csv file samples loaded. samples_path Path input folder. barcodes_path Path barcodes file. vcf_path Path VCF file variants. patient patient want load. type_use type input. one : scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. min_reads minimum number reads want. Otherwise treat NoCall. yet implemented. min_cells minimum number cells variant. Otherwise, remove variant.","code":""},{"path":"/reference/Merging_SE_list.html","id":null,"dir":"Reference","previous_headings":"","what":"Merging list of SummarizedExperiment objects. — Merging_SE_list","title":"Merging list of SummarizedExperiment objects. — Merging_SE_list","text":"function wrapper .(\"cbind\", se).","code":""},{"path":"/reference/Merging_SE_list.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Merging list of SummarizedExperiment objects. — Merging_SE_list","text":"","code":"Merging_SE_list(se)"},{"path":"/reference/Merging_SE_list.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Merging list of SummarizedExperiment objects. — Merging_SE_list","text":"se SummarizedExperiment object","code":""},{"path":"/reference/RowWiseSplit.html","id":null,"dir":"Reference","previous_headings":"","what":"RowWiseSplit — RowWiseSplit","title":"RowWiseSplit — RowWiseSplit","text":"Performing correlation Fisher test association SummarizedExperiment object requires extreme amounts memory. reduce amount memory necessary, instead get individual rows consensus assay. can remove NoCalls (reads) individual vectors, reducing amount memory needed.","code":""},{"path":"/reference/RowWiseSplit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"RowWiseSplit — RowWiseSplit","text":"","code":"RowWiseSplit(se, n_cores = 1, remove_nocalls = TRUE)"},{"path":"/reference/RowWiseSplit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"RowWiseSplit — RowWiseSplit","text":"se SummarizedExperiment object. n_cores Number cores use. remove_nocalls want remove NoCall cells?","code":""},{"path":"/reference/SeparatingMatrixToList.html","id":null,"dir":"Reference","previous_headings":"","what":"SeparatingMatrixToList — SeparatingMatrixToList","title":"SeparatingMatrixToList — SeparatingMatrixToList","text":"separate matrix variant information list. variant entry list. NoCalls (cells reads covering variant) can removed. function gets called RowWiseSplit return.","code":""},{"path":"/reference/SeparatingMatrixToList.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"SeparatingMatrixToList — SeparatingMatrixToList","text":"","code":"SeparatingMatrixToList(row_use, total_matrix, remove_nocalls = TRUE)"},{"path":"/reference/SeparatingMatrixToList.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"SeparatingMatrixToList — SeparatingMatrixToList","text":"row_use row separate. total_matrix matrix split. remove_nocalls want remove NoCall cells?","code":""},{"path":"/reference/VariantBurden.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantBurden — VariantBurden","title":"VariantBurden — VariantBurden","text":"Calculate variant burden per cell. simply sum MAF values per cell.","code":""},{"path":"/reference/VariantBurden.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantBurden — VariantBurden","text":"","code":"VariantBurden(se)"},{"path":"/reference/VariantBurden.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantBurden — VariantBurden","text":"se SummarizedExperiment object","code":""},{"path":"/reference/VariantBurden_big.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate the variant burden per cell using big matrices. — VariantBurden_big","title":"Calculate the variant burden per cell using big matrices. — VariantBurden_big","text":"Calculate variant burden per cell using big matrices.","code":""},{"path":"/reference/VariantBurden_big.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate the variant burden per cell using big matrices. — VariantBurden_big","text":"","code":"VariantBurden_big(se)"},{"path":"/reference/VariantBurden_big.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate the variant burden per cell using big matrices. — VariantBurden_big","text":"se SummarizedExperiment object","code":""},{"path":"/reference/VariantCloneSizeThresholding.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantCloneSizeThresholding — VariantCloneSizeThresholding","title":"VariantCloneSizeThresholding — VariantCloneSizeThresholding","text":"get variants interest using clone size thresholding. Source: https://github.com/petervangalen/MAESTER-2021","code":""},{"path":"/reference/VariantCloneSizeThresholding.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantCloneSizeThresholding — VariantCloneSizeThresholding","text":"","code":"VariantCloneSizeThresholding( se, min_coverage = 2, fraction_negative_cells = 0.9, min_clone_size = 10, vaf_threshold = 0.5 )"},{"path":"/reference/VariantCloneSizeThresholding.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantCloneSizeThresholding — VariantCloneSizeThresholding","text":"se SummarizedExperiment object. min_coverage Minimum coverage variant needs . fraction_negative_cells fraction negative cells needed. min_clone_size minimum number cells. vaf_threshold Variant Allele Threshold. Cells threshold considered mutated.","code":""},{"path":"/reference/VariantCorrelationHeatmap.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantCorrelationHeatmap — VariantCorrelationHeatmap","title":"VariantCorrelationHeatmap — VariantCorrelationHeatmap","text":"generate heatmap showing correlation somatic variants MT variants.","code":""},{"path":"/reference/VariantCorrelationHeatmap.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantCorrelationHeatmap — VariantCorrelationHeatmap","text":"","code":"VariantCorrelationHeatmap( correlation_results, output_path = NULL, patient, min_alt_cells = 5, min_correlation = 0.5, width_use = 2000, height_use = 2000, padding_use = c(165, 165, 2, 2) )"},{"path":"/reference/VariantCorrelationHeatmap.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantCorrelationHeatmap — VariantCorrelationHeatmap","text":"correlation_results Data.frame correlation results. output_path Path output folder. patient patient heatmap. min_alt_cells Minimum number mutated cells needed, otherwise correlation plotted. min_correlation Minimum correlation needed. width_use Width heatmap px. height_use Height heatmap px. padding_use Space around heatmap mm. low, variant names might cut .","code":""},{"path":"/reference/VariantFisherTestHeatmap.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantFisherTestHeatmap — VariantFisherTestHeatmap","title":"VariantFisherTestHeatmap — VariantFisherTestHeatmap","text":"generate heatmap showing Fisher test somatic variants MT variants.","code":""},{"path":"/reference/VariantFisherTestHeatmap.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantFisherTestHeatmap — VariantFisherTestHeatmap","text":"","code":"VariantFisherTestHeatmap( fisher_results, patient, min_alt_cells = 5, min_oddsratio = 1 )"},{"path":"/reference/VariantFisherTestHeatmap.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantFisherTestHeatmap — VariantFisherTestHeatmap","text":"fisher_results Data.frame correlation results. patient patient heatmap. min_alt_cells Minimum number mutated cells needed, otherwise association plotted. min_oddsratio Minimum correlation needed.","code":""},{"path":"/reference/VariantQuantileThresholding.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantQuantileThresholding — VariantQuantileThresholding","title":"VariantQuantileThresholding — VariantQuantileThresholding","text":"get variants interest using quantile thresholding. use top_cells top_VAF, supply one quantil value (quantiles = 0.9, thresholds = 0). function adapted Peter van Galen. Source: https://github.com/petervangalen/MAESTER-2021","code":""},{"path":"/reference/VariantQuantileThresholding.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantQuantileThresholding — VariantQuantileThresholding","text":"","code":"VariantQuantileThresholding( SE, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, min_quality = 30 )"},{"path":"/reference/VariantQuantileThresholding.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantQuantileThresholding — VariantQuantileThresholding","text":"SE SummarizedExperiment object. min_coverage Minimum coverage needed. quantiles lower upper quantile want use. thresholds VAF thresholds want use quantiles. top_cells number cells least top_VAF percent variant. top_VAF VAF top cells. min_quality minimum quality want Variants Interest. Can ignored setting NULL.","code":""},{"path":"/reference/VariantWiseCorrelation.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantWiseCorrelation — VariantWiseCorrelation","title":"VariantWiseCorrelation — VariantWiseCorrelation","text":"correlate variants using Pearson correlation. function calls CalculateCorrelationPValue perform actual correlation.","code":""},{"path":"/reference/VariantWiseCorrelation.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantWiseCorrelation — VariantWiseCorrelation","text":"","code":"VariantWiseCorrelation(variants_list, n_cores = 1, p_value_adjustment = \"fdr\")"},{"path":"/reference/VariantWiseCorrelation.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantWiseCorrelation — VariantWiseCorrelation","text":"variants_list List fraction values. n_cores Number cores want use. Numeric. p_value_adjustment Method P value adjustment. See p.adjust details.","code":""},{"path":"/reference/VariantWiseFisherTest.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantWiseFisherTest — VariantWiseFisherTest","title":"VariantWiseFisherTest — VariantWiseFisherTest","text":"perform Fisher test determine variants associated. function calls CalculateFisherTestPValue perform actual testing.","code":""},{"path":"/reference/VariantWiseFisherTest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantWiseFisherTest — VariantWiseFisherTest","text":"","code":"VariantWiseFisherTest(variants_list, n_cores = 1, p_value_adjustment = \"fdr\")"},{"path":"/reference/VariantWiseFisherTest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantWiseFisherTest — VariantWiseFisherTest","text":"variants_list List fraction values. n_cores Number cores want use. Numeric. p_value_adjustment Method P value adjustment. See p.adjust details.","code":""},{"path":"/reference/combine_NAMES.html","id":null,"dir":"Reference","previous_headings":"","what":"combine_NAMES — combine_NAMES","title":"combine_NAMES — combine_NAMES","text":"combine two vectors names.","code":""},{"path":"/reference/combine_NAMES.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"combine_NAMES — combine_NAMES","text":"","code":"combine_NAMES(x, y)"},{"path":"/reference/combine_NAMES.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"combine_NAMES — combine_NAMES","text":"x First vector names. y Second vector names.","code":""},{"path":"/reference/combine_SparseMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"combine_sparseMatrix — combine_SparseMatrix","title":"combine_sparseMatrix — combine_SparseMatrix","text":"combine two sparse matrices","code":""},{"path":"/reference/combine_SparseMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"combine_sparseMatrix — combine_SparseMatrix","text":"","code":"combine_SparseMatrix(matrix_1, matrix_2)"},{"path":"/reference/combine_SparseMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"combine_sparseMatrix — combine_SparseMatrix","text":"matrix_1 first sparse matrix. matrix_2 second matrix.","code":""},{"path":"/reference/computeAFMutMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"computeAFMutMatrix — computeAFMutMatrix","title":"computeAFMutMatrix — computeAFMutMatrix","text":"Calculate allele frequency per variant. Source: https://github.com/petervangalen/MAESTER-2021 can get AF values greater 1, due uninformative reads. See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD--lower--expected https://github.com/caleblareau/mgatk/issues/1 simply set values 1, since actual information case. issue can solved MAEGATK/GATK side.","code":""},{"path":"/reference/computeAFMutMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"computeAFMutMatrix — computeAFMutMatrix","text":"","code":"computeAFMutMatrix(SE, chromosome_prefix = \"chrM\")"},{"path":"/reference/computeAFMutMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"computeAFMutMatrix — computeAFMutMatrix","text":"SE SummarizedExperiment object.","code":""},{"path":"/reference/getAltMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"getAltMatrix — getAltMatrix","title":"getAltMatrix — getAltMatrix","text":"get alt values MAEGATK results. Source: https://github.com/petervangalen/MAESTER-2021","code":""},{"path":"/reference/getAltMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"getAltMatrix — getAltMatrix","text":"","code":"getAltMatrix(SE_object, letter, chromosome_prefix = \"chrM\")"},{"path":"/reference/getAltMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"getAltMatrix — getAltMatrix","text":"SE_object SummarizedExperiment object. letter base want use. Character. chromosome_prefix chromosome prefix used. ref_allele reference alleles.","code":""},{"path":"/reference/getReadMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the counts for a specific base over all positions. — getReadMatrix","title":"Get the counts for a specific base over all positions. — getReadMatrix","text":"Get counts specific base positions.","code":""},{"path":"/reference/getReadMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the counts for a specific base over all positions. — getReadMatrix","text":"","code":"getReadMatrix(SE, letter, chromosome_prefix = \"chrM\")"},{"path":"/reference/getReadMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get the counts for a specific base over all positions. — getReadMatrix","text":"SE SummarizedExperiment object. letter base want counts. chromosome_prefix chromosome name used prefix.","code":""},{"path":"/reference/getRefMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"getRefMatrix — getRefMatrix","title":"getRefMatrix — getRefMatrix","text":"get reference values MAEGATK result. Source: https://github.com/petervangalen/MAESTER-2021","code":""},{"path":"/reference/getRefMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"getRefMatrix — getRefMatrix","text":"","code":"getRefMatrix(SE_object, letter, chromosome_prefix = \"chrM\")"},{"path":"/reference/getRefMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"getRefMatrix — getRefMatrix","text":"SE_object SummarizedExperiment object. letter base analysing. get matrix shows cells many reference reads letter. chromosome_prefix chromosome prefix used. ref_allele reference alleles.","code":""},{"path":"/reference/get_consensus.html","id":null,"dir":"Reference","previous_headings":"","what":"get_consensus — get_consensus","title":"get_consensus — get_consensus","text":"get consensus information specific matrix.","code":""},{"path":"/reference/get_consensus.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"get_consensus — get_consensus","text":"","code":"get_consensus(alt_base, ref_base, input_matrix, chromosome_prefix = \"chrM\")"},{"path":"/reference/get_consensus.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"get_consensus — get_consensus","text":"ref_base reference base. input_matrix Input matrix present reads numerically encoded. chromosome_prefix chromosome name used prefix. letter alternative base.","code":""},{"path":"/reference/ggsci_pal.html","id":null,"dir":"Reference","previous_headings":"","what":"ggsci_pal — ggsci_pal","title":"ggsci_pal — ggsci_pal","text":"Function return colours ggsci palette.","code":""},{"path":"/reference/ggsci_pal.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"ggsci_pal — ggsci_pal","text":"","code":"ggsci_pal(option, ...)"},{"path":"/reference/ggsci_pal.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"ggsci_pal — ggsci_pal","text":"option colour palette choice.","code":""},{"path":"/reference/ggsci_pal.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"ggsci_pal — ggsci_pal","text":"function returns colour palette ggsci. Options : aaas: 10 d3: 10 futurama: 12 gsea: 12 igv: 51 jama: 7 jco: 10 npg: 10 lancet: 9 locuszoom: 7 material: 10 nejm: 8 rickandmorty: 12 simpsons: 16 startrek: 7 tron: 7 uchicago: 9 ucscgb: 26","code":""},{"path":"/reference/load_object.html","id":null,"dir":"Reference","previous_headings":"","what":"load_object — load_object","title":"load_object — load_object","text":"loading function load RDS files quicker. Source: https://github.com/CostaLab/CimpleG","code":""},{"path":"/reference/load_object.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"load_object — load_object","text":"","code":"load_object(file_name)"},{"path":"/reference/load_object.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"load_object — load_object","text":"file_name path file.","code":""},{"path":"/reference/save_object.html","id":null,"dir":"Reference","previous_headings":"","what":"save_object — save_object","title":"save_object — save_object","text":"Saving function save RDS files quicker. Source:https://github.com/CostaLab/CimpleG","code":""},{"path":"/reference/save_object.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"save_object — save_object","text":"","code":"save_object(object, file_name, file_format = NULL)"},{"path":"/reference/save_object.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"save_object — save_object","text":"object R object save. file_name path file shall save. file_format format save file. one : zstd, lz4, gzip, bzip2, xz, nocomp.","code":""},{"path":"/reference/sdiv.html","id":null,"dir":"Reference","previous_headings":"","what":"Division of sparse matrix. — sdiv","title":"Division of sparse matrix. — sdiv","text":"Division sparse matrix.","code":""},{"path":"/reference/sdiv.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Division of sparse matrix. — sdiv","text":"","code":"sdiv(X, Y, names = dimnames(X))"},{"path":"/reference/sdiv.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Division of sparse matrix. — sdiv","text":"X First sparse matrix. Y Second sparse matrix. names dimension names (dimnames(X)).","code":""}] +[{"path":"/articles/SiGURD.html","id":"your-input-file-","dir":"Articles","previous_headings":"","what":"Your input file.","title":"SiGURD","text":"","code":"sample_path <- system.file(\"extdata\", \"Input_Example_local.csv\", package = \"sigurd\") sample_file <- read.csv(sample_path) print(sample_file) ## patient sample source type ## 1 Sample1 Minus_Sample1 VarTrix scRNAseq_Somatic ## 2 Sample1 Minus_Sample1 VarTrix scRNAseq_MT ## 3 Sample1 Minus_Sample1 MAEGATK scRNAseq_MT ## 4 Sample1 Plus_Sample1 VarTrix scRNAseq_Somatic ## 5 Sample1 Plus_Sample1 VarTrix scRNAseq_MT ## 6 Sample1 Plus_Sample1 MAEGATK scRNAseq_MT ## 7 SW_CellLineMix_All_mr3 SW_CellLineMix_All_mr3 MAEGATK Amplicon_MT ## 8 SW_CellLineMix_RNAseq_mr3 SW_CellLineMix_RNAseq_mr3 MAEGATK scRNAseq_MT ## 9 TenX_BPDCN712_All_mr3 TenX_BPDCN712_All_mr3 MAEGATK Amplicon_MT ## 10 TenX_BPDCN712_RNAseq_mr3 TenX_BPDCN712_RNAseq_mr3 MAEGATK scRNAseq_MT ## bam ## 1 ~/test_data/Minus_Sample1/possorted_genome_bam.bam ## 2 ~/test_data/Minus_Sample1/possorted_genome_bam.bam ## 3 ~/test_data/Minus_Sample1/possorted_genome_bam.bam ## 4 ~/test_data/Plus_Sample1/possorted_genome_bam.bam ## 5 ~/test_data/Plus_Sample1/possorted_genome_bam.bam ## 6 ~/test_data/Plus_Sample1/possorted_genome_bam.bam ## 7 NADA ## 8 NADA ## 9 NADA ## 10 NADA ## input_path ## 1 ~/test_data/VarTrix/Somatic/ ## 2 ~/test_data/VarTrix/MT/ ## 3 ~/test_data/MAEGATK/Minus_Sample1/final/maegatk.rds ## 4 ~/test_data/VarTrix/Somatic/ ## 5 ~/test_data/VarTrix/MT/ ## 6 ~/test_data/MAEGATK/Plus_Sample1/final/maegatk.rds ## 7 ~/test_data/MAESTER_data/SW_CellLineMix_All_mr3/final/SW_CellLineMix_All_mr3_maegatk.rds ## 8 ~/test_data/MAESTER_data/SW_CellLineMix_RNAseq_mr3/final/SW_CellLineMix_RNAseq_mr3_maegatk.rds ## 9 ~/test_data/MAESTER_data/TenX_BPDCN712_All_mr3/final/TenX_BPDCN712_All_mr3_maegatk.rds ## 10 ~/test_data/MAESTER_data/TenX_BPDCN712_RNAseq_mr3/final/TenX_BPDCN712_RNAseq_mr3_maegatk.rds ## cells ## 1 ~/test_data/Minus_Sample1/Minus_Sample1_barcodes.tsv ## 2 ~/test_data/Minus_Sample1/Minus_Sample1_barcodes.tsv ## 3 ~/test_data/Minus_Sample1/Minus_Sample1_barcodes.tsv ## 4 ~/test_data/Plus_Sample1/Plus_Sample1_barcodes.tsv ## 5 ~/test_data/Plus_Sample1/Plus_Sample1_barcodes.tsv ## 6 ~/test_data/Plus_Sample1/Plus_Sample1_barcodes.tsv ## 7 NADA ## 8 NADA ## 9 NADA ## 10 NADA"},{"path":"/articles/SiGURD.html","id":"your-vcf-files-","dir":"Articles","previous_headings":"","what":"Your vcf files.","title":"SiGURD","text":"files needed VarTrix MGATK/MAEGATK. Since MAEGATK analyses mitochondrial genome, need decide chromosomal prefix data. See loading data .","code":"vcf_path <- system.file(\"extdata\", \"CosmicSubset_filtered.vcf\", package = \"sigurd\") vcf <- VariantAnnotation::readVcf(vcf_path) vcf_info <- VariantAnnotation::info(vcf) print(vcf) ## class: CollapsedVCF ## dim: 1684 0 ## rowRanges(vcf): ## GRanges with 5 metadata columns: paramRangeID, REF, ALT, QUAL, FILTER ## info(vcf): ## DataFrame with 10 columns: GENE, STRAND, GENOMIC_ID, LEGACY_ID, CDS, AA, H... ## info(header(vcf)): ## Number Type Description ## GENE 1 String Gene name ## STRAND 1 String Gene strand ## GENOMIC_ID 1 String Genomic Mutation ID ## LEGACY_ID 1 String Legacy Mutation ID ## CDS 1 String CDS annotation ## AA 1 String Peptide annotation ## HGVSC 1 String HGVS cds syntax ## HGVSP 1 String HGVS peptide syntax ## HGVSG 1 String HGVS genomic syntax ## CNT 1 Integer How many samples have this mutation ## geno(vcf): ## List of length 0: print(vcf_info) ## DataFrame with 1684 rows and 10 columns ## GENE STRAND GENOMIC_ID LEGACY_ID CDS ## ## 1 ABL1 + NA COSN17133235 c.136+2107G>C ## 2 ABL1 + NA COSN14774721 c.136+2599C>T ## 3 ABL1 + NA COSN17133236 c.136+3198G>C ## 4 ABL1 + NA COSN17133237 c.136+4488G>C ## 5 ABL1 + NA COSN17133050 c.136+5055C>T ## ... ... ... ... ... ... ## 1680 WT1 - NA COSN6609219 c.872+82G>T ## 1681 WT1 - NA COSN17132919 c.872+16G>A ## 1682 WT1 - NA COSN17134797 c.770-57C>T ## 1683 WT1 - NA COSM5020955 c.594C>T ## 1684 ZRSR2 + NA COSM3035276 c.1338_1343dup ## AA HGVSC HGVSP ## ## 1 p.? ENST00000372348.6:c... NA ## 2 p.? ENST00000372348.6:c... NA ## 3 p.? ENST00000372348.6:c... NA ## 4 p.? ENST00000372348.6:c... NA ## 5 p.? ENST00000372348.6:c... NA ## ... ... ... ... ## 1680 p.? ENST00000332351.7:c... NA ## 1681 p.? ENST00000332351.7:c... NA ## 1682 p.? ENST00000332351.7:c... NA ## 1683 p.N198%3D ENST00000332351.7:c... ENSP00000331327.3:p... ## 1684 p.S447_R448dup ENST00000307771.7:c... ENSP00000303015.7:p... ## HGVSG CNT ## ## 1 9:g.130716562G>C 10 ## 2 9:g.130717054C>T 10 ## 3 9:g.130717653G>C 10 ## 4 9:g.130718943G>C 10 ## 5 9:g.130719510C>T 11 ## ... ... ... ## 1680 11:g.32427874C>A 74 ## 1681 11:g.32427940C>T 132 ## 1682 11:g.32428115G>A 108 ## 1683 11:g.32434752G>A 73 ## 1684 X:g.15823131_1582313.. 10 vcf_path_mt <- system.file(\"extdata\", \"chrM_Input_VCF_NoMAF_Filtering.vcf\", package = \"sigurd\") vcf_mt <- VariantAnnotation::readVcf(vcf_path_mt) vcf_mt_info <- VariantAnnotation::info(vcf_mt) print(vcf_mt) ## class: CollapsedVCF ## dim: 49708 0 ## rowRanges(vcf): ## GRanges with 5 metadata columns: paramRangeID, REF, ALT, QUAL, FILTER ## info(vcf): ## DataFrame with 1 column: ID ## info(header(vcf)): ## Number Type Description ## ID A Character Mutation ## geno(vcf): ## List of length 0: print(vcf_mt_info) ## DataFrame with 49708 rows and 1 column ## ID ## ## chrM:1_G/A 1_G>A ## chrM:3_T/A 3_T>A ## chrM:4_C/A 4_C>A ## chrM:6_C/A 6_C>A ## chrM:8_G/A 8_G>A ## ... ... ## chrM:16564_A/T 16564_A>T ## chrM:16565_C/T 16565_C>T ## chrM:16566_G/T 16566_G>T ## chrM:16567_A/T 16567_A>T ## chrM:16569_G/T 16569_G>T"},{"path":"/articles/SiGURD.html","id":"loading-and-filtering-the-input-data-","dir":"Articles","previous_headings":"","what":"Loading and filtering the input data.","title":"SiGURD","text":"load data per patient merge associated samples automatically. input file, include software tool used analysis. source can either vartrix maegatk/mgatk. respective loading function load files intended . types data available : - scRNAseq_Somatic: standard 10X results analysed somatic variants. - scRNAseq_MT: standard 10X results analysed MT variants. - Amplicon_Somatic: amplicon data analysed somatic variants. - Amplicon_MT: amplicon data analysed MT variants. Since MT results denser, take longer load.","code":"Sample1_scRNAseq_Somatic <- LoadingVarTrix_typewise(samples_file = sample_path, vcf_path = vcf_path, patient = \"Sample1\", type_use = \"scRNAseq_Somatic\") ## [1] \"Loading the data for patient Sample1.\" ## [1] \"We read in the samples file.\" ## [1] \"We subset to the patient of interest.\" ## [1] \"We get the different samples.\" ## [1] \"We load the SNV files.\" ## [1] \"We read the variants.\" ## [1] \"We read in the cell barcodes output by CellRanger as a list.\" ## [1] \"We read in the vcf file.\" ## [1] \"We generate more accessible names.\" ## [1] \"We read in the different sparse genotype matrices as a list.\" ## [1] \"We have a slot per type of input data.\" ## [1] \"Loading sample 1 of 2\" ## [1] \"Loading sample 2 of 2\" ## [1] \"We generate a large data.frame of all the snv matrices.\" ## [1] \"We remove the matrix lists.\" ## [1] \"We remove variants, that are not detected in at least 2 cells.\" ## [1] \"We remove cells that are always NoCall.\" ## [1] \"scRNAseq_Somatic Variants: 73\" ## [1] \"scRNAseq_Somatic Cells: 571\" ## [1] \"We transform the sparse matrices to matrices, so we can calculate the fraction.\" ## [1] \"We generate a SummarizedExperiment object containing the fraction and the consensus matrices.\" Sample1_scRNAseq_MT <- LoadingMAEGATK_typewise(samples_file = sample_path, patient = \"Sample1\", type_use = \"scRNAseq_MT\") ## [1] \"Loading the data for patient Sample1.\" ## [1] \"We read in the samples file.\" ## [1] \"We subset to the patient of interest.\" ## [1] \"We get the different samples.\" ## [1] \"We read in the cell barcodes output by CellRanger as a list.\" ## [1] \"We load the MAEGATK output files.\" ## [1] \"Loading sample 1 of 2\" ## [1] \"Loading sample 2 of 2\" ## [1] \"We merge the samples.\" ## [1] \"We get the allele frequency.\" ## [1] \"We get the coverage information.\" ## [1] \"We get the number of alternative reads per variant.\" ## [1] \"We get the quality information.\" ## [1] \"We get the number of reference reads.\" ## [1] \"Calculating the strand concordance.\" ## [1] \"We calculate the consensus information.\" ## [1] \"We perform some filtering to reduce the memory needed.\" ## [1] \"We remove variants, which are not covered in at least 2 cells .\" ## [1] \"We remove cells that are always NoCall.\" ## [1] \"We add the information to the merged matrices.\" samples_file = sample_path patient = \"Sample1\" type_use = \"scRNAseq_MT\" samples_path = NULL chromosome_prefix = \"chrM\" min_cells = 2 barcodes_path = NULL verbose = TRUE Sample1_combined <- CombineSEobjects(se_somatic = Sample1_scRNAseq_Somatic, se_MT = Sample1_scRNAseq_MT, suffixes = c(\"_somatic\", \"_MT\")) se_somatic = Sample1_scRNAseq_Somatic se_MT = Sample1_scRNAseq_MT suffixes = c(\"_somatic\", \"_MT\") rm(Sample1_scRNAseq_Somatic, Sample1_scRNAseq_MT) Sample1_combined <- Filtering(Sample1_combined, min_cells_per_variant = 2, fraction_threshold = 0.05) ## [1] \"We assume that cells with a fraction smaller than our threshold are actually NoCall.\" ## [1] \"We set consensus values to 0 (NoCall) and fraction values to 0.\" ## [1] \"We do not set fractions between 0.05 and 1 to 1.\" ## [1] \"This way, we retain the heterozygous information.\" ## [1] \"We remove all the variants that are always NoCall.\" ## [1] \"We remove variants, that are not at least detected in 2 cells.\" ## [1] \"We remove all cells that are not >= 1 (Ref) for at least 1 variant.\" Sample1_combined <- VariantBurden(Sample1_combined)"},{"path":"/articles/SiGURD.html","id":"determing-mt-variants-of-interest-","dir":"Articles","previous_headings":"","what":"Determing MT variants of interest.","title":"SiGURD","text":"thresholding adapted Miller et al. https://github.com/petervangalen/MAESTER-2021 https://www.nature.com/articles/s41587-022-01210-8 heatmap needs time plot, since cells clustered.","code":"voi_ch <- VariantQuantileThresholding(SE = Sample1_combined, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9)) ## [1] \"Get the mean allele frequency and coverage.\" ## [1] \"Get the quantiles of the VAFs of each variant.\" ## [1] \"Thresholding using the quantile approach.\" hm <- HeatmapVoi(SE = Sample1_combined, voi = voi_ch) print(hm)"},{"path":"/articles/SiGURD.html","id":"association-of-variants","dir":"Articles","previous_headings":"","what":"Association of Variants","title":"SiGURD","text":"Using Fisher’s Exact test, find co-present variants. can also use correlation variants. , combine somatic MT results. Since possible number tests/correlations quite large, can use multiple cores perform calculations.","code":"#Sample1_split_rows <- RowWiseSplit(Sample1_combined, remove_nocalls = FALSE, n_cores = 19) #results_fishertest <- VariantWiseFisherTest(Sample1_split_rows, n_cores = 19) #rm(Sample1_split_rows) #variant_association_heatmap <- VariantFisherTestHeatmap(results_fishertest, patient = \"Sample1\", min_alt_cells = 3) #print(variant_association_heatmap)"},{"path":"/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Martin Grasshoff. Author, maintainer. Ivan Costa Gesteira. Author.","code":""},{"path":"/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Grasshoff M, Gesteira (2023). sigurd: Single cell Genotyping Using RNA Data. R package version 0.2.30, https://costalab.github.io/sigurd/.","code":"@Manual{, title = {sigurd: Single cell Genotyping Using RNA Data}, author = {Martin Grasshoff and Ivan Costa Gesteira}, year = {2023}, note = {R package version 0.2.30}, url = {https://costalab.github.io/sigurd/}, }"},{"path":"/index.html","id":"single-cell-genotyping-using-rna-data-sigurd","dir":"","previous_headings":"","what":"Single cell Genotyping Using RNA Data","title":"Single cell Genotyping Using RNA Data","text":"Martin Graßhoff1 Ivan G. Costa1 1Institute Computational Genomics, Faculty Medicine, RWTH Aachen University, Aachen, 52074 Germany Motivation: advent single RNA seq assays, became possible determine mutational status individual cell. Single cell RNA seq data nature sparse probability hitting specific variants interest therefore low. issue can overcome using modified amplicon assays, also possible impute mutational status using correlation detected mitochondrial somatic variants. Results: Sigurd R package analysis single cell data. determine overall variant burden per cell also number interesting mitochondrial variants using previously published approaches. employ imputation approach utilizes correlation mitochondrial variants somatic variants. Mitochondrial mutations significantly associated somatic mutations used stand-ins.","code":""},{"path":"/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Single cell Genotyping Using RNA Data","text":"can install sigurd using following code. vignette requires data currently published, provided reference.","code":"install.packages(\"devtools\") devtools::install_github(\"https://github.com/CostaLab/sigurd.git\", build_vignettes = FALSE) require(sigurd)"},{"path":"/index.html","id":"sigurd","dir":"","previous_headings":"","what":"SiGURD","title":"Single cell Genotyping Using RNA Data","text":"provided small example data set SiGURD. consists chromosome 9 MT one MPN sample. mutation data obtained Sanger Institute Catalogue Somatic Mutations Cancer web site, http://cancer.sanger.ac.uk/cosmic Bamford et al (2004) COSMIC (Catalogue Somatic Mutations Cancer) database website. Br J Cancer, 91,355-358.","code":"# This will be included for published data. # vignette('sigurd')"},{"path":"/index.html","id":"current-features-v0230","dir":"","previous_headings":"","what":"Current Features v0.2.30","title":"Single cell Genotyping Using RNA Data","text":"Loading data VarTrix MAEGATK. Transforming data compatible joint analysis. Calculating variant burden per cell. Thresholding variants using approach described Miller et al. [2] Finding associated variants using correlation Fisher Test.","code":""},{"path":"/index.html","id":"sources","dir":"","previous_headings":"","what":"Sources","title":"Single cell Genotyping Using RNA Data","text":"package implements approaches following packages respositories: - https://github.com/petervangalen/MAESTER-2021 – Variant Thresholding functions loading MAEGATK data. - https://github.com/CostaLab/CimpleG – loading saving function.","code":""},{"path":"/index.html","id":"future","dir":"","previous_headings":"","what":"Future","title":"Single cell Genotyping Using RNA Data","text":"Memory optimization Loading CB sniffer results Providing data vignette","code":""},{"path":"/index.html","id":"references","dir":"","previous_headings":"","what":"References","title":"Single cell Genotyping Using RNA Data","text":"[1] VarTrix. github [2] Miller, T.E., et al. Mitochondrial variant enrichment high-throughput single-cell RNA sequencing resolves clonal populations. Nat Biotechnol (2022). link. See also: MAEGATK Analysis, Data","code":""},{"path":"/reference/AmpliconSupplementing.html","id":null,"dir":"Reference","previous_headings":"","what":"Supplementing scRNAseq values with Amplicon values — AmpliconSupplementing","title":"Supplementing scRNAseq values with Amplicon values — AmpliconSupplementing","text":"replace values scRNAseq experiment values amplicon experiment.","code":""},{"path":"/reference/AmpliconSupplementing.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supplementing scRNAseq values with Amplicon values — AmpliconSupplementing","text":"","code":"AmpliconSupplementing(scRNAseq, amplicon, verbose = TRUE)"},{"path":"/reference/AmpliconSupplementing.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supplementing scRNAseq values with Amplicon values — AmpliconSupplementing","text":"scRNAseq SummarizedExperiment object containing scRNAseq data. amplicon SummarizedExperiment object containing amplicon data. verbose function verbose? Default = TRUE","code":""},{"path":"/reference/AmpliconSupplementing_big.html","id":null,"dir":"Reference","previous_headings":"","what":"Supplementing scRNAseq values with Amplicon values using big.matrix — AmpliconSupplementing_big","title":"Supplementing scRNAseq values with Amplicon values using big.matrix — AmpliconSupplementing_big","text":"replace values scRNAseq experiment values amplicon experiment.","code":""},{"path":"/reference/AmpliconSupplementing_big.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Supplementing scRNAseq values with Amplicon values using big.matrix — AmpliconSupplementing_big","text":"","code":"AmpliconSupplementing_big(scRNAseq, amplicon)"},{"path":"/reference/AmpliconSupplementing_big.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Supplementing scRNAseq values with Amplicon values using big.matrix — AmpliconSupplementing_big","text":"scRNAseq SummarizedExperiment object containing scRNAseq data. amplicon SummarizedExperiment object containing amplicon data.","code":""},{"path":"/reference/CalculateAlleleFrequency.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculating the Minor Allele Frequency. — CalculateAlleleFrequency","title":"Calculating the Minor Allele Frequency. — CalculateAlleleFrequency","text":"calculate MAF reference reads matrix alternative reads matrix. function intended used mitochondrial genome somatic mutations.","code":""},{"path":"/reference/CalculateAlleleFrequency.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculating the Minor Allele Frequency. — CalculateAlleleFrequency","text":"","code":"CalculateAlleleFrequency(reference_reads, alternative_reads, pseudo_count = 0)"},{"path":"/reference/CalculateAlleleFrequency.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculating the Minor Allele Frequency. — CalculateAlleleFrequency","text":"reference_reads Reference reads matrix. alternative_reads List matrices alternative reads. pseudo_count = pseudo count want add reference_reads matrix. Default = 0","code":""},{"path":"/reference/CalculateAltReads.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateAltReads — CalculateAltReads","title":"CalculateAltReads — CalculateAltReads","text":"calculate number reads covering variant using forward reverse reads.","code":""},{"path":"/reference/CalculateAltReads.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateAltReads — CalculateAltReads","text":"","code":"CalculateAltReads(SE, chromosome_prefix = \"chrM\")"},{"path":"/reference/CalculateAltReads.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateAltReads — CalculateAltReads","text":"SE SummarizedExperiment object. chromosome_prefix List matrices alternative reads.","code":""},{"path":"/reference/CalculateConsensus.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateConsensus — CalculateConsensus","title":"CalculateConsensus — CalculateConsensus","text":"calculate consensus information MAEGATK results. set cells alternative reads 2 (Alternative). set cells reference reads 1 (Reference). set cells mixture alternative reference reads 3 (). set cells reads 0 (NoCall). Please note. Cells can reads reference specific variant reads alternative. cell can still reads alternative alleles. cell still considered 0 (NoCall) variant. example: cell position 3: 0 reads, 53 T reads, 63 C reads, 148 T reads. variant chrM_3_T_A, cell 53 reference reads, also reads variants position. make sure confusion, cell set NoCall.","code":""},{"path":"/reference/CalculateConsensus.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateConsensus — CalculateConsensus","text":"","code":"CalculateConsensus(SE, chromosome_prefix = \"chrM\", verbose = FALSE)"},{"path":"/reference/CalculateConsensus.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateConsensus — CalculateConsensus","text":"SE SummarizedExperiment object. chromosome_prefix chromosome name used prefix. verbose function verbose? Default = FALSE","code":""},{"path":"/reference/CalculateCorrelationPValue.html","id":null,"dir":"Reference","previous_headings":"","what":"Correlating the SNVs — CalculateCorrelationPValue","title":"Correlating the SNVs — CalculateCorrelationPValue","text":"perform correlation SNVs calculate P values.","code":""},{"path":"/reference/CalculateCorrelationPValue.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Correlating the SNVs — CalculateCorrelationPValue","text":"","code":"CalculateCorrelationPValue( variant_values, other_mutation, all_variants_list, min_intersecting_cells = 5 )"},{"path":"/reference/CalculateCorrelationPValue.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Correlating the SNVs — CalculateCorrelationPValue","text":"variant_values fraction values analysing. vector. other_mutation variants . vector variant names. all_variants_list List fraction values variants want compare variant . min_intersecting_cells Minimum number intersecting cells. Correlations less performed.","code":""},{"path":"/reference/CalculateCoverage.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateCoverage — CalculateCoverage","title":"CalculateCoverage — CalculateCoverage","text":"calculate coverage information per variant MAEGATK results.","code":""},{"path":"/reference/CalculateCoverage.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateCoverage — CalculateCoverage","text":"","code":"CalculateCoverage(SE, chromosome_prefix = \"chrM\")"},{"path":"/reference/CalculateCoverage.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateCoverage — CalculateCoverage","text":"SE SummarizedExperiment object. chromosome_prefix List matrices alternative reads.","code":""},{"path":"/reference/CalculateFisherTestPValue.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateFisherTestPValue — CalculateFisherTestPValue","title":"CalculateFisherTestPValue — CalculateFisherTestPValue","text":"perform Fisher Test SNVs calculate P values.","code":""},{"path":"/reference/CalculateFisherTestPValue.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateFisherTestPValue — CalculateFisherTestPValue","text":"","code":"CalculateFisherTestPValue( variant_values, other_mutation, all_variants_list, min_intersecting_cells = 5 )"},{"path":"/reference/CalculateFisherTestPValue.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateFisherTestPValue — CalculateFisherTestPValue","text":"variant_values fraction values analysing. vector. other_mutation variants . vector variant names. all_variants_list List fraction values variants want compare variant . min_intersecting_cells Minimum number intersecting cells. Correlations less performed.","code":""},{"path":"/reference/CalculateFisherTestPValue2.html","id":null,"dir":"Reference","previous_headings":"","what":"We perform the Fisher test of SNVs and calculate the P values. — CalculateFisherTestPValue2","title":"We perform the Fisher test of SNVs and calculate the P values. — CalculateFisherTestPValue2","text":"perform Fisher test SNVs calculate P values.","code":""},{"path":"/reference/CalculateFisherTestPValue2.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"We perform the Fisher test of SNVs and calculate the P values. — CalculateFisherTestPValue2","text":"","code":"CalculateFisherTestPValue2( variant_values, other_mutation, all_variants_list, min_intersecting_cells = 5 )"},{"path":"/reference/CalculateFisherTestPValue2.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"We perform the Fisher test of SNVs and calculate the P values. — CalculateFisherTestPValue2","text":"variant_values fraction values analysing. vector. other_mutation variants . vector variant names. all_variants_list List fraction values variants want compare variant . min_intersecting_cells Minimum number intersecting cells. Correlations less performed.","code":""},{"path":"/reference/CalculateQuality.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateQuality — CalculateQuality","title":"CalculateQuality — CalculateQuality","text":"calculate quality per variant.","code":""},{"path":"/reference/CalculateQuality.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateQuality — CalculateQuality","text":"","code":"CalculateQuality(SE, variants, chromosome_prefix = \"chrM\")"},{"path":"/reference/CalculateQuality.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateQuality — CalculateQuality","text":"SE SummarizedExperiment object. variants variants want get quality . chromosome_prefix List matrices alternative reads.","code":""},{"path":"/reference/CalculateStrandCorrelation.html","id":null,"dir":"Reference","previous_headings":"","what":"CalculateStrandCorrelation — CalculateStrandCorrelation","title":"CalculateStrandCorrelation — CalculateStrandCorrelation","text":"calculate correlation amount forward reverse reads per variant.","code":""},{"path":"/reference/CalculateStrandCorrelation.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CalculateStrandCorrelation — CalculateStrandCorrelation","text":"","code":"CalculateStrandCorrelation(SE, chromosome_prefix = \"chrM\")"},{"path":"/reference/CalculateStrandCorrelation.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CalculateStrandCorrelation — CalculateStrandCorrelation","text":"SE SummarizedExperiment object. chromosome_prefix List matrices alternative reads.","code":""},{"path":"/reference/CombineSEobjects.html","id":null,"dir":"Reference","previous_headings":"","what":"CombineSEobjects — CombineSEobjects","title":"CombineSEobjects — CombineSEobjects","text":"combine two SummarizedExperiment objects.","code":""},{"path":"/reference/CombineSEobjects.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CombineSEobjects — CombineSEobjects","text":"","code":"CombineSEobjects(se_somatic, se_MT, suffixes = c(\"_somatic\", \"_MT\"))"},{"path":"/reference/CombineSEobjects.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CombineSEobjects — CombineSEobjects","text":"se_somatic SummarizedExperiment object somatic variants. se_MT SummarizedExperiment object MT variants. suffixes suffixes want add meta data.frame.","code":""},{"path":"/reference/CombineSEobjects_big.html","id":null,"dir":"Reference","previous_headings":"","what":"CombineSEobjects_big — CombineSEobjects_big","title":"CombineSEobjects_big — CombineSEobjects_big","text":"combine two SummarizedExperiment objects big matrices.","code":""},{"path":"/reference/CombineSEobjects_big.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"CombineSEobjects_big — CombineSEobjects_big","text":"","code":"CombineSEobjects_big(se_somatic, se_MT, suffixes = c(\"_somatic\", \"_MT\"))"},{"path":"/reference/CombineSEobjects_big.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"CombineSEobjects_big — CombineSEobjects_big","text":"se_somatic SummarizedExperiment object somatic variants. se_MT SummarizedExperiment object MT variants. suffixes suffixes want add meta data.frame.","code":""},{"path":"/reference/Filtering.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering the loaded genotyping data. — Filtering","title":"Filtering the loaded genotyping data. — Filtering","text":"filter SummarizedExperiment object exclude variants cells.","code":""},{"path":"/reference/Filtering.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering the loaded genotyping data. — Filtering","text":"","code":"Filtering( se, blacklisted_barcodes_path = NULL, fraction_threshold = NULL, alts_threshold = NULL, min_cells_per_variant = 2, min_variants_per_cell = 1, reject_value = \"NoCall\", verbose = TRUE )"},{"path":"/reference/Filtering.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering the loaded genotyping data. — Filtering","text":"se SummarizedExperiment object. blacklisted_barcodes_path Barcodes want remove. Path file one column without header. fraction_threshold Variants VAF threshold set 0. Numeric. Default = NULL. alts_threshold Variants number alt reads less threshold set 0. Numeric. Default = NULL. min_cells_per_variant many cells variant present included? Numeric. Default = 2. min_variants_per_cell many variants covered cell included? Default = 1. reject_value cells fall threshold (fraction_threshold alts_threshold) treated Reference NoCall? Default = NoCall. verbose function verbose? Default = TRUE","code":""},{"path":"/reference/Filtering.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filtering the loaded genotyping data. — Filtering","text":"one sample time. want remove: cells blacklisted, cells least one variant >1 (Reference), variants alternative transcripts, variants always NoCall, set variants VAF threshold NoCall Reference.","code":""},{"path":"/reference/Filtering.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Filtering the loaded genotyping data. — Filtering","text":"","code":"if (FALSE) { # Removing all variants that are not detected in at least 2 cells. # Before we remove the variants, we set fraction value of variants below 0.05 to 0. se_geno <- Filtering(se_geno, min_cells_per_variant = 2, fraction_threshold = 0.05) }"},{"path":"/reference/Filtering_big.html","id":null,"dir":"Reference","previous_headings":"","what":"Filtering the loaded genotyping data using big matrices. — Filtering_big","title":"Filtering the loaded genotyping data using big matrices. — Filtering_big","text":"filter SummarizedExperiment object exclude variants cells.","code":""},{"path":"/reference/Filtering_big.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Filtering the loaded genotyping data using big matrices. — Filtering_big","text":"","code":"Filtering_big( se, blacklisted_barcodes_path = NULL, fraction_threshold = NULL, path_seurat = NULL, min_cells_per_variant = 2 )"},{"path":"/reference/Filtering_big.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Filtering the loaded genotyping data using big matrices. — Filtering_big","text":"se SummarizedExperiment object. blacklisted_barcodes_path Barcodes want remove. Path file one column without header. fraction_threshold Variants VAF threshold set 0. Numeric. path_seurat Path Seurat object. Cells present object removed. min_cells_per_variant many cells variant present included? Numeric. Default = 2. min_variants_per_cell many variants covered cell included? Default = 1.","code":""},{"path":"/reference/Filtering_big.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Filtering the loaded genotyping data using big matrices. — Filtering_big","text":"one sample time. want remove: cells blacklisted, cells Seurat object, cells least one variant >1 (Reference), variants alternative transcripts, variants always NoCall, set variants VAF threshold reference.","code":""},{"path":"/reference/Filtering_big.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Filtering the loaded genotyping data using big matrices. — Filtering_big","text":"","code":"if (FALSE) { # Removing all variants that are not detected in at least 2 cells. # Before we remove the variants, we set fraction value of variants below 0.05 to 0. se_geno <- Filtering(se_geno, min_cells_per_variant = 2, fraction_threshold = 0.05) }"},{"path":"/reference/GetCellInfoPerVariant.html","id":null,"dir":"Reference","previous_headings":"","what":"We get the variant information per cell. — GetCellInfoPerVariant","title":"We get the variant information per cell. — GetCellInfoPerVariant","text":"get variant information per cell.","code":""},{"path":"/reference/GetCellInfoPerVariant.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"We get the variant information per cell. — GetCellInfoPerVariant","text":"","code":"GetCellInfoPerVariant(se, voi_ch, verbose = FALSE)"},{"path":"/reference/GetCellInfoPerVariant.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"We get the variant information per cell. — GetCellInfoPerVariant","text":"se SummarizedExperiment object. voi_ch Variants interest. verbose function verbose? Default = FALSE","code":""},{"path":"/reference/GetVariantInfo.html","id":null,"dir":"Reference","previous_headings":"","what":"GetVariantInfo — GetVariantInfo","title":"GetVariantInfo — GetVariantInfo","text":"get genotyping information set variants. function returns matrix values specified assay.","code":""},{"path":"/reference/GetVariantInfo.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"GetVariantInfo — GetVariantInfo","text":"","code":"GetVariantInfo(SE, information = \"consensus\", variants = NULL, cells = NULL)"},{"path":"/reference/GetVariantInfo.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"GetVariantInfo — GetVariantInfo","text":"SE SummarizedExperiment object. information assay desired information. Default: consensus variants vector variants. cells vector cell IDs. default cells returned. Default: NULL","code":""},{"path":"/reference/HeatmapVoi.html","id":null,"dir":"Reference","previous_headings":"","what":"HeatmapVoi — HeatmapVoi","title":"HeatmapVoi — HeatmapVoi","text":"plot heatmap set Variants Interest using Variant Allele Frequency values SummarizedExperiment object.","code":""},{"path":"/reference/HeatmapVoi.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"HeatmapVoi — HeatmapVoi","text":"","code":"HeatmapVoi( SE, voi, annotation_trait = NULL, column_title = NULL, remove_empty_cells = FALSE )"},{"path":"/reference/HeatmapVoi.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"HeatmapVoi — HeatmapVoi","text":"SE SummarizedExperiment object. voi Variants Interest. annotation_trait Cell Annotation bottom heat map. column_title title heat map. Default = NULL remove_empty_cells cells fraction 0 variants removed? Default = FALSE","code":""},{"path":"/reference/LoadingMAEGATK_typewise.html","id":null,"dir":"Reference","previous_headings":"","what":"LoadingMAEGATK_typewise — LoadingMAEGATK_typewise","title":"LoadingMAEGATK_typewise — LoadingMAEGATK_typewise","text":"load MAEGATK output transform compatible VarTrix output. input file specifically formated csv file necessary information run analysis. Note source column input file needs one following: vartrix, mgaetk, mgatk. want load single sample without use input file, set following variables. samples_path barcodes_path patient samples_file = NULL","code":""},{"path":"/reference/LoadingMAEGATK_typewise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"LoadingMAEGATK_typewise — LoadingMAEGATK_typewise","text":"","code":"LoadingMAEGATK_typewise( samples_file, samples_path = NULL, patient, type_use = \"scRNAseq_MT\", chromosome_prefix = \"chrM\", min_cells = 2, barcodes_path = NULL, verbose = TRUE )"},{"path":"/reference/LoadingMAEGATK_typewise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"LoadingMAEGATK_typewise — LoadingMAEGATK_typewise","text":"samples_file Path csv file samples loaded. samples_path Path input folder. patient patient want load. type_use type input. one : scRNAseq_MT, Amplicon_MT. used samples_path NULL. chromosome_prefix prefix want use. Default: \"chrM\" min_cells minimum number cells coverage variant. Variants coverage less amount cells removed. Default = 2 barcodes_path Path barcodes file tsv. Default = NULL verbose function verbose? Default = TRUE","code":""},{"path":"/reference/LoadingVCF_typewise.html","id":null,"dir":"Reference","previous_headings":"","what":"LoadingVCF_typewise — LoadingVCF_typewise","title":"LoadingVCF_typewise — LoadingVCF_typewise","text":"load cellwise pileup result VCF file. want load single sample without use input file, set following variables. samples_path barcodes_path patient samples_file = NULL","code":""},{"path":"/reference/LoadingVCF_typewise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"LoadingVCF_typewise — LoadingVCF_typewise","text":"","code":"LoadingVCF_typewise( samples_file, samples_path = NULL, barcodes_path = NULL, vcf_path, patient, type_use = \"scRNAseq_Somatic\", min_reads = NULL, min_cells = 2, verbose = TRUE )"},{"path":"/reference/LoadingVCF_typewise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"LoadingVCF_typewise — LoadingVCF_typewise","text":"samples_file Path csv file samples loaded. samples_path Path input folder. Must include barcodes file. barcodes_path Path cell barcodes tsv. Default = NULL vcf_path Path VCF file variants. patient patient want load. type_use type input. one : scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. min_reads minimum number reads want. Otherwise treat NoCall. Default = NULL. min_cells minimum number cells variant. Otherwise, remove variant. Default = 2. verbose function verbose? Default = TRUE","code":""},{"path":"/reference/LoadingVarTrix.html","id":null,"dir":"Reference","previous_headings":"","what":"We load the VarTrix results for one patient and merge all types together. — LoadingVarTrix","title":"We load the VarTrix results for one patient and merge all types together. — LoadingVarTrix","text":"load VarTrix results one patient merge types together.","code":""},{"path":"/reference/LoadingVarTrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"We load the VarTrix results for one patient and merge all types together. — LoadingVarTrix","text":"","code":"LoadingVarTrix(samples_path, vcf_path, vcf_path_MT, patient)"},{"path":"/reference/LoadingVarTrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"We load the VarTrix results for one patient and merge all types together. — LoadingVarTrix","text":"samples_path Path csv file samples loaded. vcf_path Path VCF file somatic variants. vcf_path_MT Path VCF file MT variants. patient Patient loaded.","code":""},{"path":"/reference/LoadingVarTrix_ori.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","title":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","text":"function loads VarTrix results outputs list two SummarizedExperiments objects, one somatic variants accompanying mitochondrial variants.","code":""},{"path":"/reference/LoadingVarTrix_ori.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","text":"","code":"LoadingVarTrix(samples_file, vcf_path, vcf_path_MT, patient)"},{"path":"/reference/LoadingVarTrix_ori.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","text":"samples_file Path input file. One sample per row. contain following columns:patient: patient sample. patient can multiple samples (treated/untreated). samples merged per patient.sample: ID sample.resource: VarTrix MAEGATK input? Must either VarTrix MAEGATK.type: type data? Must one following: scRNAseq_Somatic, scRNAseq_MT, Amplicon_Somatic, Amplicon_MT.input_folder: Path folder, VarTrix output stored.cells: Path barcodes file CellRanger output. vcf_path Path VCF file contains somatic variants interested . vcf_path_MT Path VCF file contains MT variants interested . patient patient want load.","code":""},{"path":"/reference/LoadingVarTrix_ori.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","text":"function loads VarTrix one patients input file. patient multiple samples, samples merged. two types input data, single cell RNA sequencing amplicon data. scRNAseq data result something like 10X assay, amplicon data focuses specific part genome. output CellRanger pipeline. Amplicon data much higher coverage specific area therefore much better areas. scRNAseq data overwritten amplicon data available. output list two SummarizedExperiment objects. object contains two assays:consensus_somatic: consensus information per somatic variant cell.consensus_MT: consensus information per MT variant cell.fraction_somatic: variant allele frequency per somatic variant cell.fraction_MT: variant allele frequency per MT variant cell. consensus information encoded following way:0: Call, reads covering position.1: Reference, reference reads cover position.2: Alternative, mutated reads cover position.3: , alt ref reads cover position.","code":""},{"path":"/reference/LoadingVarTrix_ori.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Loading VarTrix results for the down stream analysis. — LoadingVarTrix","text":"return list two objects. SE_Somatic somatic variants results. SE_MT MT variants results.","code":""},{"path":[]},{"path":[]},{"path":[]},{"path":[]},{"path":[]},{"path":"/reference/LoadingVarTrix_typewise.html","id":null,"dir":"Reference","previous_headings":"","what":"LoadingVarTrix_typewise — LoadingVarTrix_typewise","title":"LoadingVarTrix_typewise — LoadingVarTrix_typewise","text":"load different types results (scRNAseq/amplicon MT/amplicon), might need extreme amounts memory. solve issue, load type separately. following function (AmpliconSupplementing), can add amplicon information scRNAseq information. input file specifically formated csv file necessary information run analysis. Note source column input file needs one following: vartrix, mgaetk, mgatk. hard coded case insensitive.","code":""},{"path":"/reference/LoadingVarTrix_typewise.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"LoadingVarTrix_typewise — LoadingVarTrix_typewise","text":"","code":"LoadingVarTrix_typewise( samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, type_use = \"scRNAseq_Somatic\", min_reads = NULL, min_cells = 2, verbose = TRUE )"},{"path":"/reference/LoadingVarTrix_typewise.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"LoadingVarTrix_typewise — LoadingVarTrix_typewise","text":"samples_file Path csv file samples loaded. samples_path Path input folder. Must include barcodes file. barcodes_path path cell barcodes tsv. Default = NULL snp_path Path SNP file used VarTrix (SNV.loci.txt). vcf_path Path VCF file variants. patient patient want load. type_use type input. one : scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. min_reads minimum number reads want. Otherwise treat NoCall. Default = NULL. min_cells minimum number cells variant. Otherwise, remove variant. Default = 2. verbose function verbose? Default = TRUE","code":""},{"path":"/reference/LoadingVarTrix_typewise_big.html","id":null,"dir":"Reference","previous_headings":"","what":"Loading VarTrix results using big.matrix — LoadingVarTrix_typewise_big","title":"Loading VarTrix results using big.matrix — LoadingVarTrix_typewise_big","text":"load different types results (scRNAseq/amplicon MT/amplicon), might need extreme amounts memory. solve issue, load type separately. following function (AmpliconSupplementing), can add amplicon information scRNAseq information.","code":""},{"path":"/reference/LoadingVarTrix_typewise_big.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Loading VarTrix results using big.matrix — LoadingVarTrix_typewise_big","text":"","code":"LoadingVarTrix_typewise_big( samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, sample = NULL, type_use = \"scRNAseq_Somatic\", min_reads = 3, min_cells = 2 )"},{"path":"/reference/LoadingVarTrix_typewise_big.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Loading VarTrix results using big.matrix — LoadingVarTrix_typewise_big","text":"samples_file Path csv file samples loaded. samples_path Path input folder. barcodes_path Path barcodes file. vcf_path Path VCF file variants. patient patient want load. type_use type input. one : scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. min_reads minimum number reads want. Otherwise treat NoCall. yet implemented. min_cells minimum number cells variant. Otherwise, remove variant.","code":""},{"path":"/reference/Merging_SE_list.html","id":null,"dir":"Reference","previous_headings":"","what":"Merging list of SummarizedExperiment objects. — Merging_SE_list","title":"Merging list of SummarizedExperiment objects. — Merging_SE_list","text":"function wrapper .(\"cbind\", se).","code":""},{"path":"/reference/Merging_SE_list.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Merging list of SummarizedExperiment objects. — Merging_SE_list","text":"","code":"Merging_SE_list(se)"},{"path":"/reference/Merging_SE_list.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Merging list of SummarizedExperiment objects. — Merging_SE_list","text":"se SummarizedExperiment object","code":""},{"path":"/reference/RowWiseSplit.html","id":null,"dir":"Reference","previous_headings":"","what":"RowWiseSplit — RowWiseSplit","title":"RowWiseSplit — RowWiseSplit","text":"Performing correlation Fisher test association SummarizedExperiment object requires extreme amounts memory. reduce amount memory necessary, instead get individual rows consensus assay. can remove NoCalls (reads) individual vectors, reducing amount memory needed.","code":""},{"path":"/reference/RowWiseSplit.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"RowWiseSplit — RowWiseSplit","text":"","code":"RowWiseSplit(se, n_cores = 1, remove_nocalls = TRUE)"},{"path":"/reference/RowWiseSplit.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"RowWiseSplit — RowWiseSplit","text":"se SummarizedExperiment object. n_cores Number cores use. remove_nocalls want remove NoCall cells?","code":""},{"path":"/reference/SeparatingMatrixToList.html","id":null,"dir":"Reference","previous_headings":"","what":"SeparatingMatrixToList — SeparatingMatrixToList","title":"SeparatingMatrixToList — SeparatingMatrixToList","text":"separate matrix variant information list. variant entry list. NoCalls (cells reads covering variant) can removed. function gets called RowWiseSplit return.","code":""},{"path":"/reference/SeparatingMatrixToList.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"SeparatingMatrixToList — SeparatingMatrixToList","text":"","code":"SeparatingMatrixToList(row_use, total_matrix, remove_nocalls = TRUE)"},{"path":"/reference/SeparatingMatrixToList.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"SeparatingMatrixToList — SeparatingMatrixToList","text":"row_use row separate. total_matrix matrix split. remove_nocalls want remove NoCall cells?","code":""},{"path":"/reference/SetVariantInfo.html","id":null,"dir":"Reference","previous_headings":"","what":"GetVariantInfo — SetVariantInfo","title":"GetVariantInfo — SetVariantInfo","text":"add genotyping information set variants Seurat object. function returns matrix values specified assay.","code":""},{"path":"/reference/SetVariantInfo.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"GetVariantInfo — SetVariantInfo","text":"","code":"SetVariantInfo(SE, seurat_object, information = \"consensus\", variants = NULL)"},{"path":"/reference/SetVariantInfo.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"GetVariantInfo — SetVariantInfo","text":"SE SummarizedExperiment object. seurat_object Seurat object. information assay desired information. Default: consensus variants vector variants.","code":""},{"path":"/reference/VariantBurden.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantBurden — VariantBurden","title":"VariantBurden — VariantBurden","text":"Calculate variant burden per cell. simply sum MAF values per cell.","code":""},{"path":"/reference/VariantBurden.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantBurden — VariantBurden","text":"","code":"VariantBurden(se)"},{"path":"/reference/VariantBurden.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantBurden — VariantBurden","text":"se SummarizedExperiment object","code":""},{"path":"/reference/VariantBurden_big.html","id":null,"dir":"Reference","previous_headings":"","what":"Calculate the variant burden per cell using big matrices. — VariantBurden_big","title":"Calculate the variant burden per cell using big matrices. — VariantBurden_big","text":"Calculate variant burden per cell using big matrices.","code":""},{"path":"/reference/VariantBurden_big.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Calculate the variant burden per cell using big matrices. — VariantBurden_big","text":"","code":"VariantBurden_big(se)"},{"path":"/reference/VariantBurden_big.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Calculate the variant burden per cell using big matrices. — VariantBurden_big","text":"se SummarizedExperiment object","code":""},{"path":"/reference/VariantCloneSizeThresholding.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantCloneSizeThresholding — VariantCloneSizeThresholding","title":"VariantCloneSizeThresholding — VariantCloneSizeThresholding","text":"get variants interest using clone size thresholding. Source: https://github.com/petervangalen/MAESTER-2021","code":""},{"path":"/reference/VariantCloneSizeThresholding.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantCloneSizeThresholding — VariantCloneSizeThresholding","text":"","code":"VariantCloneSizeThresholding( se, min_coverage = 2, fraction_negative_cells = 0.9, min_clone_size = 10, vaf_threshold = 0.5, verbose = TRUE )"},{"path":"/reference/VariantCloneSizeThresholding.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantCloneSizeThresholding — VariantCloneSizeThresholding","text":"se SummarizedExperiment object. min_coverage Minimum coverage variant needs . fraction_negative_cells fraction negative cells needed. min_clone_size minimum number cells. vaf_threshold Variant Allele Threshold. Cells threshold considered mutated. verbose function verbose? Default = TRUE","code":""},{"path":"/reference/VariantCorrelationHeatmap.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantCorrelationHeatmap — VariantCorrelationHeatmap","title":"VariantCorrelationHeatmap — VariantCorrelationHeatmap","text":"generate heatmap showing correlation somatic variants MT variants. Packages want remove. see used. ggplot2 parallel rcompanion tidyr","code":""},{"path":"/reference/VariantCorrelationHeatmap.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantCorrelationHeatmap — VariantCorrelationHeatmap","text":"","code":"VariantCorrelationHeatmap( correlation_results, output_path = NULL, patient, min_alt_cells = 5, min_correlation = 0.5, width_use = 2000, height_use = 2000, padding_use = c(165, 165, 2, 2), verbose = TRUE )"},{"path":"/reference/VariantCorrelationHeatmap.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantCorrelationHeatmap — VariantCorrelationHeatmap","text":"correlation_results Data.frame correlation results. output_path Path output folder. patient patient heatmap. min_alt_cells Minimum number mutated cells needed, otherwise correlation plotted. min_correlation Minimum correlation needed. width_use Width heatmap px. height_use Height heatmap px. padding_use Space around heatmap mm. low, variant names might cut . verbose function verbose? Default = TRUE","code":""},{"path":"/reference/VariantFisherTestHeatmap.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantFisherTestHeatmap — VariantFisherTestHeatmap","title":"VariantFisherTestHeatmap — VariantFisherTestHeatmap","text":"generate heatmap showing Fisher test somatic variants MT variants. Packages want remove. ggplot2 parallel rcompanion tidyr","code":""},{"path":"/reference/VariantFisherTestHeatmap.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantFisherTestHeatmap — VariantFisherTestHeatmap","text":"","code":"VariantFisherTestHeatmap( fisher_results, patient, min_alt_cells = 5, min_oddsratio = 1, verbose = TRUE )"},{"path":"/reference/VariantFisherTestHeatmap.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantFisherTestHeatmap — VariantFisherTestHeatmap","text":"fisher_results Data.frame correlation results. patient patient heatmap. min_alt_cells Minimum number mutated cells needed, otherwise association plotted. min_oddsratio Minimum correlation needed. verbose function verbose? Default = TRUE","code":""},{"path":"/reference/VariantQuantileThresholding.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantQuantileThresholding — VariantQuantileThresholding","title":"VariantQuantileThresholding — VariantQuantileThresholding","text":"get variants interest using quantile thresholding. use top_cells top_VAF, supply one quantil value (quantiles = 0.9, thresholds = 0). function adapted Peter van Galen. Source: https://github.com/petervangalen/MAESTER-2021","code":""},{"path":"/reference/VariantQuantileThresholding.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantQuantileThresholding — VariantQuantileThresholding","text":"","code":"VariantQuantileThresholding( SE, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, min_quality = NULL, mean_allele_frequency = 0, group_of_interest = NULL, group1 = NULL, group2 = NULL, group_factor = NULL, verbose = TRUE )"},{"path":"/reference/VariantQuantileThresholding.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantQuantileThresholding — VariantQuantileThresholding","text":"SE SummarizedExperiment object. min_coverage Minimum coverage needed. quantiles lower upper quantile want use. thresholds VAF thresholds want use quantiles. top_cells number cells least top_VAF percent variant. top_VAF VAF top cells. min_quality minimum quality want Variants Interest. Can ignored setting NULL. mean_allele_frequency minimum mean allele frequency. Default = 0 group_of_interest column data divides cells. group1 first group interest. group2 second group interest. group_factor much higher mean allele frequency group 1 compared group 2? verbose function verbose? Default = TRUE","code":""},{"path":"/reference/VariantWiseCorrelation.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantWiseCorrelation — VariantWiseCorrelation","title":"VariantWiseCorrelation — VariantWiseCorrelation","text":"correlate variants using Pearson correlation. function calls CalculateCorrelationPValue perform actual correlation. Packages want remove. SummarizedExperiment","code":""},{"path":"/reference/VariantWiseCorrelation.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantWiseCorrelation — VariantWiseCorrelation","text":"","code":"VariantWiseCorrelation( variants_list, n_cores = 1, p_value_adjustment = \"fdr\", verbose = TRUE )"},{"path":"/reference/VariantWiseCorrelation.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantWiseCorrelation — VariantWiseCorrelation","text":"variants_list List fraction values. n_cores Number cores want use. Numeric. p_value_adjustment Method P value adjustment. See p.adjust details. verbose function verbose? Default = TRUE","code":""},{"path":"/reference/VariantWiseFisherTest.html","id":null,"dir":"Reference","previous_headings":"","what":"VariantWiseFisherTest — VariantWiseFisherTest","title":"VariantWiseFisherTest — VariantWiseFisherTest","text":"perform Fisher test determine variants associated. function calls CalculateFisherTestPValue perform actual testing. Packages want remove. SummarizedExperiment","code":""},{"path":"/reference/VariantWiseFisherTest.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"VariantWiseFisherTest — VariantWiseFisherTest","text":"","code":"VariantWiseFisherTest( variants_list, n_cores = 1, p_value_adjustment = \"fdr\", verbose = TRUE )"},{"path":"/reference/VariantWiseFisherTest.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"VariantWiseFisherTest — VariantWiseFisherTest","text":"variants_list List fraction values. n_cores Number cores want use. Numeric. p_value_adjustment Method P value adjustment. See p.adjust details. verbose function verbose? Default = TRUE","code":""},{"path":"/reference/char_to_numeric.html","id":null,"dir":"Reference","previous_headings":"","what":"char_to_numeric — char_to_numeric","title":"char_to_numeric — char_to_numeric","text":"function convert heterozygous/homozygous information VCF consensus information VarTrix. used LoadingVCF_typewise.R.","code":""},{"path":"/reference/char_to_numeric.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"char_to_numeric — char_to_numeric","text":"","code":"char_to_numeric(char_value)"},{"path":"/reference/char_to_numeric.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"char_to_numeric — char_to_numeric","text":"char_value genotype encoding want convert?","code":""},{"path":"/reference/combine_NAMES.html","id":null,"dir":"Reference","previous_headings":"","what":"combine_NAMES — combine_NAMES","title":"combine_NAMES — combine_NAMES","text":"combine two vectors names.","code":""},{"path":"/reference/combine_NAMES.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"combine_NAMES — combine_NAMES","text":"","code":"combine_NAMES(x, y)"},{"path":"/reference/combine_NAMES.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"combine_NAMES — combine_NAMES","text":"x First vector names. y Second vector names.","code":""},{"path":"/reference/combine_SparseMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"combine_sparseMatrix — combine_SparseMatrix","title":"combine_sparseMatrix — combine_SparseMatrix","text":"combine two sparse matrices","code":""},{"path":"/reference/combine_SparseMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"combine_sparseMatrix — combine_SparseMatrix","text":"","code":"combine_SparseMatrix(matrix_1, matrix_2)"},{"path":"/reference/combine_SparseMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"combine_sparseMatrix — combine_SparseMatrix","text":"matrix_1 first sparse matrix. matrix_2 second matrix.","code":""},{"path":"/reference/computeAFMutMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"computeAFMutMatrix — computeAFMutMatrix","title":"computeAFMutMatrix — computeAFMutMatrix","text":"Calculate allele frequency per variant. Source: https://github.com/petervangalen/MAESTER-2021 can get AF values greater 1, due uninformative reads. See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD--lower--expected https://github.com/caleblareau/mgatk/issues/1 simply set values 1, since actual information case.","code":""},{"path":"/reference/computeAFMutMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"computeAFMutMatrix — computeAFMutMatrix","text":"","code":"computeAFMutMatrix(SE, chromosome_prefix = \"chrM\")"},{"path":"/reference/computeAFMutMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"computeAFMutMatrix — computeAFMutMatrix","text":"SE SummarizedExperiment object. chromosome_prefix prefix chromosome.","code":""},{"path":"/reference/getAltMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"getAltMatrix — getAltMatrix","title":"getAltMatrix — getAltMatrix","text":"get alt values MAEGATK results. Source: https://github.com/petervangalen/MAESTER-2021","code":""},{"path":"/reference/getAltMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"getAltMatrix — getAltMatrix","text":"","code":"getAltMatrix(SE_object, letter, chromosome_prefix = \"chrM\")"},{"path":"/reference/getAltMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"getAltMatrix — getAltMatrix","text":"SE_object SummarizedExperiment object. letter base want use. Character. chromosome_prefix chromosome prefix used.","code":""},{"path":"/reference/getMutMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"getMutMatrix — getMutMatrix","title":"getMutMatrix — getMutMatrix","text":"function gets allele frequency specific allele. used computeAFMutMatrix. Source: https://github.com/petervangalen/MAESTER-2021","code":""},{"path":"/reference/getMutMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"getMutMatrix — getMutMatrix","text":"","code":"getMutMatrix(SE, cov, letter, ref_allele, chromosome_prefix)"},{"path":"/reference/getMutMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"getMutMatrix — getMutMatrix","text":"SE SummarizedExperiment object. cov coverage matrix MAEGATK/MGATK. letter base interested . ref_allele Vector reference alleles. chromosome_prefix chromosome prefix used.","code":""},{"path":"/reference/getReadMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"Get the counts for a specific base over all positions. — getReadMatrix","title":"Get the counts for a specific base over all positions. — getReadMatrix","text":"Get counts specific base positions.","code":""},{"path":"/reference/getReadMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Get the counts for a specific base over all positions. — getReadMatrix","text":"","code":"getReadMatrix(SE, letter, chromosome_prefix = \"chrM\")"},{"path":"/reference/getReadMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Get the counts for a specific base over all positions. — getReadMatrix","text":"SE SummarizedExperiment object. letter base want counts. chromosome_prefix chromosome name used prefix.","code":""},{"path":"/reference/getRefMatrix.html","id":null,"dir":"Reference","previous_headings":"","what":"getRefMatrix — getRefMatrix","title":"getRefMatrix — getRefMatrix","text":"get reference values MAEGATK result. Source: https://github.com/petervangalen/MAESTER-2021","code":""},{"path":"/reference/getRefMatrix.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"getRefMatrix — getRefMatrix","text":"","code":"getRefMatrix(SE_object, letter, chromosome_prefix = \"chrM\")"},{"path":"/reference/getRefMatrix.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"getRefMatrix — getRefMatrix","text":"SE_object SummarizedExperiment object. letter base analysing. get matrix shows cells many reference reads letter. chromosome_prefix chromosome prefix used.","code":""},{"path":"/reference/get_consensus.html","id":null,"dir":"Reference","previous_headings":"","what":"get_consensus — get_consensus","title":"get_consensus — get_consensus","text":"get consensus information specific matrix. want remove packages needed. See package apperantly needed. Package remove: dplyr, SummarizedExperiment","code":""},{"path":"/reference/get_consensus.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"get_consensus — get_consensus","text":"","code":"get_consensus(alt_base, ref_base, input_matrix, chromosome_prefix = \"chrM\")"},{"path":"/reference/get_consensus.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"get_consensus — get_consensus","text":"alt_base alternative base. ref_base reference base. input_matrix Input matrix present reads numerically encoded. chromosome_prefix chromosome name used prefix.","code":""},{"path":"/reference/ggsci_pal.html","id":null,"dir":"Reference","previous_headings":"","what":"ggsci_pal — ggsci_pal","title":"ggsci_pal — ggsci_pal","text":"Function return colours ggsci palette.","code":""},{"path":"/reference/ggsci_pal.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"ggsci_pal — ggsci_pal","text":"","code":"ggsci_pal(option, ...)"},{"path":"/reference/ggsci_pal.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"ggsci_pal — ggsci_pal","text":"option colour palette choice. ... options passed palette function.","code":""},{"path":"/reference/ggsci_pal.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"ggsci_pal — ggsci_pal","text":"function returns colour palette ggsci. Options : aaas: 10 d3: 10 futurama: 12 gsea: 12 igv: 51 jama: 7 jco: 10 npg: 10 lancet: 9 locuszoom: 7 material: 10 nejm: 8 rickandmorty: 12 simpsons: 16 startrek: 7 tron: 7 uchicago: 9 ucscgb: 26","code":""},{"path":"/reference/load_object.html","id":null,"dir":"Reference","previous_headings":"","what":"load_object — load_object","title":"load_object — load_object","text":"loading function load RDS files quicker. Source: https://github.com/CostaLab/CimpleG","code":""},{"path":"/reference/load_object.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"load_object — load_object","text":"","code":"load_object(file_name)"},{"path":"/reference/load_object.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"load_object — load_object","text":"file_name path file.","code":""},{"path":"/reference/save_object.html","id":null,"dir":"Reference","previous_headings":"","what":"save_object — save_object","title":"save_object — save_object","text":"Saving function save RDS files quicker. Source:https://github.com/CostaLab/CimpleG","code":""},{"path":"/reference/save_object.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"save_object — save_object","text":"","code":"save_object(object, file_name, file_format = \"zstd\")"},{"path":"/reference/save_object.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"save_object — save_object","text":"object R object save. file_name path file shall save. file_format format save file. one : zstd, lz4, gzip, bzip2, xz, nocomp.","code":""},{"path":"/reference/sdiv.html","id":null,"dir":"Reference","previous_headings":"","what":"Division of sparse matrix. — sdiv","title":"Division of sparse matrix. — sdiv","text":"Division sparse matrix.","code":""},{"path":"/reference/sdiv.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Division of sparse matrix. — sdiv","text":"","code":"sdiv(X, Y, names = dimnames(X))"},{"path":"/reference/sdiv.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Division of sparse matrix. — sdiv","text":"X First sparse matrix. Y Second sparse matrix. names dimension names (dimnames(X)).","code":""}] diff --git a/docs/sitemap.xml b/docs/sitemap.xml index b929382..09248c0 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -66,12 +66,18 @@ /reference/GetCellInfoPerVariant.html + + /reference/GetVariantInfo.html + /reference/HeatmapVoi.html /reference/LoadingMAEGATK_typewise.html + + /reference/LoadingVCF_typewise.html + /reference/LoadingVarTrix.html @@ -93,6 +99,9 @@ /reference/SeparatingMatrixToList.html + + /reference/SetVariantInfo.html + /reference/VariantBurden.html @@ -117,6 +126,9 @@ /reference/VariantWiseFisherTest.html + + /reference/char_to_numeric.html + /reference/combine_NAMES.html @@ -129,6 +141,9 @@ /reference/getAltMatrix.html + + /reference/getMutMatrix.html + /reference/getReadMatrix.html diff --git a/inst/extdata/Input_Example_local.csv b/inst/extdata/Input_Example_local.csv index fa74a3e..a7662d8 100644 --- a/inst/extdata/Input_Example_local.csv +++ b/inst/extdata/Input_Example_local.csv @@ -1,11 +1,11 @@ -patient,sample,source,type,bam,input_folder,cells +patient,sample,source,type,bam,input_path,cells Sample1,Minus_Sample1,VarTrix,scRNAseq_Somatic,~/test_data/Minus_Sample1/possorted_genome_bam.bam,~/test_data/VarTrix/Somatic/,~/test_data/Minus_Sample1/Minus_Sample1_barcodes.tsv Sample1,Minus_Sample1,VarTrix,scRNAseq_MT,~/test_data/Minus_Sample1/possorted_genome_bam.bam,~/test_data/VarTrix/MT/,~/test_data/Minus_Sample1/Minus_Sample1_barcodes.tsv -Sample1,Minus_Sample1,MAEGATK,scRNAseq_MT,~/test_data/Minus_Sample1/possorted_genome_bam.bam,~/test_data/MAEGATK/,~/test_data/Minus_Sample1/Minus_Sample1_barcodes.tsv +Sample1,Minus_Sample1,MAEGATK,scRNAseq_MT,~/test_data/Minus_Sample1/possorted_genome_bam.bam,~/test_data/MAEGATK/Minus_Sample1/final/maegatk.rds,~/test_data/Minus_Sample1/Minus_Sample1_barcodes.tsv Sample1,Plus_Sample1,VarTrix,scRNAseq_Somatic,~/test_data/Plus_Sample1/possorted_genome_bam.bam,~/test_data/VarTrix/Somatic/,~/test_data/Plus_Sample1/Plus_Sample1_barcodes.tsv Sample1,Plus_Sample1,VarTrix,scRNAseq_MT,~/test_data/Plus_Sample1/possorted_genome_bam.bam,~/test_data/VarTrix/MT/,~/test_data/Plus_Sample1/Plus_Sample1_barcodes.tsv -Sample1,Plus_Sample1,MAEGATK,scRNAseq_MT,~/test_data/Plus_Sample1/possorted_genome_bam.bam,~/test_data/MAEGATK/,~/test_data/Plus_Sample1/Plus_Sample1_barcodes.tsv -SW_CellLineMix_All_mr3,SW_CellLineMix_All_mr3,MAEGATK,Amplicon_MT,NADA,~/test_data/MAESTER_data/,NADA -SW_CellLineMix_RNAseq_mr3,SW_CellLineMix_RNAseq_mr3,MAEGATK,scRNAseq_MT,NADA,~/test_data/MAESTER_data/,NADA -TenX_BPDCN712_All_mr3,TenX_BPDCN712_All_mr3,MAEGATK,Amplicon_MT,NADA,~/test_data/MAESTER_data/,NADA -TenX_BPDCN712_RNAseq_mr3,TenX_BPDCN712_RNAseq_mr3,MAEGATK,scRNAseq_MT,NADA,~/test_data/MAESTER_data/,NADA +Sample1,Plus_Sample1,MAEGATK,scRNAseq_MT,~/test_data/Plus_Sample1/possorted_genome_bam.bam,~/test_data/MAEGATK/Plus_Sample1/final/maegatk.rds,~/test_data/Plus_Sample1/Plus_Sample1_barcodes.tsv +SW_CellLineMix_All_mr3,SW_CellLineMix_All_mr3,MAEGATK,Amplicon_MT,NADA,~/test_data/MAESTER_data/SW_CellLineMix_All_mr3/final/SW_CellLineMix_All_mr3_maegatk.rds,NADA +SW_CellLineMix_RNAseq_mr3,SW_CellLineMix_RNAseq_mr3,MAEGATK,scRNAseq_MT,NADA,~/test_data/MAESTER_data/SW_CellLineMix_RNAseq_mr3/final/SW_CellLineMix_RNAseq_mr3_maegatk.rds,NADA +TenX_BPDCN712_All_mr3,TenX_BPDCN712_All_mr3,MAEGATK,Amplicon_MT,NADA,~/test_data/MAESTER_data/TenX_BPDCN712_All_mr3/final/TenX_BPDCN712_All_mr3_maegatk.rds,NADA +TenX_BPDCN712_RNAseq_mr3,TenX_BPDCN712_RNAseq_mr3,MAEGATK,scRNAseq_MT,NADA,~/test_data/MAESTER_data/TenX_BPDCN712_RNAseq_mr3/final/TenX_BPDCN712_RNAseq_mr3_maegatk.rds,NADA diff --git a/man/AmpliconSupplementing.Rd b/man/AmpliconSupplementing.Rd index d3742a1..313db21 100644 --- a/man/AmpliconSupplementing.Rd +++ b/man/AmpliconSupplementing.Rd @@ -4,12 +4,14 @@ \alias{AmpliconSupplementing} \title{Supplementing scRNAseq values with Amplicon values} \usage{ -AmpliconSupplementing(scRNAseq, amplicon) +AmpliconSupplementing(scRNAseq, amplicon, verbose = TRUE) } \arguments{ \item{scRNAseq}{The SummarizedExperiment object containing the scRNAseq data.} \item{amplicon}{The SummarizedExperiment object containing the amplicon data.} + +\item{verbose}{Should the function be verbose? Default = TRUE} } \description{ We replace the values from an scRNAseq experiment with values we have from an amplicon experiment. diff --git a/man/CalculateAlleleFrequency.Rd b/man/CalculateAlleleFrequency.Rd index 87d9d80..7336256 100644 --- a/man/CalculateAlleleFrequency.Rd +++ b/man/CalculateAlleleFrequency.Rd @@ -4,13 +4,16 @@ \alias{CalculateAlleleFrequency} \title{Calculating the Minor Allele Frequency.} \usage{ -CalculateAlleleFrequency(reference_reads, alternative_reads) +CalculateAlleleFrequency(reference_reads, alternative_reads, pseudo_count = 0) } \arguments{ \item{reference_reads}{Reference reads matrix.} \item{alternative_reads}{List of matrices for the alternative reads.} + +\item{pseudo_count}{= What is the pseudo count you want to add to the reference_reads matrix. Default = 0} } \description{ -We calculate the MAF for the MAEGATK results. +We calculate the MAF from a reference reads matrix and an alternative reads matrix. +This function is intended to be used with the mitochondrial genome and not with other somatic mutations. } diff --git a/man/CalculateConsensus.Rd b/man/CalculateConsensus.Rd index 33d93a7..5980e5b 100644 --- a/man/CalculateConsensus.Rd +++ b/man/CalculateConsensus.Rd @@ -2,15 +2,28 @@ % Please edit documentation in R/CalculateConsensus.R \name{CalculateConsensus} \alias{CalculateConsensus} -\title{We calculate the consensus information from the MAEGATK results.} +\title{CalculateConsensus} \usage{ -CalculateConsensus(SE, chromosome_prefix = "chrM") +CalculateConsensus(SE, chromosome_prefix = "chrM", verbose = FALSE) } \arguments{ \item{SE}{SummarizedExperiment object.} \item{chromosome_prefix}{The chromosome name used as a prefix.} + +\item{verbose}{Should the function be verbose? Default = FALSE} } \description{ We calculate the consensus information from the MAEGATK results. +We set cells that have only alternative reads to 2 (Alternative). +We set cells that have only reference reads to 1 (Reference). +We set cells that have a mixture of alternative and reference reads to 3 (Both). +We set cells that have no reads to 0 (NoCall). + +Please note. Cells can have reads for the reference of a specific variant and no reads for the alternative. +The cell can still have a reads for the other alternative alleles. Then the cell is still considered as 0 (NoCall) for this variant. +For example: +A cell has at position 3: 0 A reads, 53 T reads, 63 C reads, 148 T reads. +For the variant chrM_3_T_A, the cell would have 53 reference reads, but also reads for other variants at this position. +To make sure that there is no confusion, the cell is set to NoCall. } diff --git a/man/CalculateQuality.Rd b/man/CalculateQuality.Rd index ff62dcd..4a614f6 100644 --- a/man/CalculateQuality.Rd +++ b/man/CalculateQuality.Rd @@ -4,15 +4,13 @@ \alias{CalculateQuality} \title{CalculateQuality} \usage{ -CalculateQuality( - SE, - variants = rownames(reads_alt), - chromosome_prefix = "chrM" -) +CalculateQuality(SE, variants, chromosome_prefix = "chrM") } \arguments{ \item{SE}{SummarizedExperiment object.} +\item{variants}{The variants you want to get the quality for.} + \item{chromosome_prefix}{List of matrices for the alternative reads.} } \description{ diff --git a/man/Filtering.Rd b/man/Filtering.Rd index a55a1a9..b707850 100644 --- a/man/Filtering.Rd +++ b/man/Filtering.Rd @@ -11,7 +11,8 @@ Filtering( alts_threshold = NULL, min_cells_per_variant = 2, min_variants_per_cell = 1, - reject_value = "NoCall" + reject_value = "NoCall", + verbose = TRUE ) } \arguments{ @@ -28,6 +29,8 @@ Filtering( \item{min_variants_per_cell}{How many variants should be covered in a cell have to be included? Default = 1.} \item{reject_value}{Should cells that fall below a threshold (fraction_threshold or alts_threshold) be treated as Reference or NoCall? Default = NoCall.} + +\item{verbose}{Should the function be verbose? Default = TRUE} } \description{ We filter a SummarizedExperiment object to exclude variants and cells. diff --git a/man/GetCellInfoPerVariant.Rd b/man/GetCellInfoPerVariant.Rd index b1b90aa..c641fa2 100644 --- a/man/GetCellInfoPerVariant.Rd +++ b/man/GetCellInfoPerVariant.Rd @@ -4,12 +4,14 @@ \alias{GetCellInfoPerVariant} \title{We get the variant information per cell.} \usage{ -GetCellInfoPerVariant(se, voi_ch) +GetCellInfoPerVariant(se, voi_ch, verbose = FALSE) } \arguments{ \item{se}{SummarizedExperiment object.} \item{voi_ch}{Variants of interest.} + +\item{verbose}{Should the function be verbose? Default = FALSE} } \description{ We get the variant information per cell. diff --git a/man/HeatmapVoi.Rd b/man/HeatmapVoi.Rd index f6fab8a..46100df 100644 --- a/man/HeatmapVoi.Rd +++ b/man/HeatmapVoi.Rd @@ -18,6 +18,10 @@ HeatmapVoi( \item{voi}{Variants Of Interest.} \item{annotation_trait}{Cell Annotation at the bottom of the heat map.} + +\item{column_title}{The title of the heat map. Default = NULL} + +\item{remove_empty_cells}{Should cells that have a fraction of 0 for all variants be removed? Default = FALSE} } \description{ We plot a heatmap of a set of Variants Of Interest using the Variant Allele Frequency values of a SummarizedExperiment object. diff --git a/man/LoadingMAEGATK_typewise.Rd b/man/LoadingMAEGATK_typewise.Rd index 7c76fe5..4c4415a 100644 --- a/man/LoadingMAEGATK_typewise.Rd +++ b/man/LoadingMAEGATK_typewise.Rd @@ -11,7 +11,8 @@ LoadingMAEGATK_typewise( type_use = "scRNAseq_MT", chromosome_prefix = "chrM", min_cells = 2, - barcodes_path = NULL + barcodes_path = NULL, + verbose = TRUE ) } \arguments{ @@ -24,10 +25,22 @@ LoadingMAEGATK_typewise( \item{type_use}{The type of input. Has to be one of: scRNAseq_MT, Amplicon_MT. Only used if samples_path is not NULL.} \item{chromosome_prefix}{The prefix you want use. Default: "chrM"} + +\item{min_cells}{The minimum number of cells with coverage for a variant. Variants with coverage in less than this amount of cells are removed. Default = 2} + +\item{barcodes_path}{Path to the barcodes file tsv. Default = NULL} + +\item{verbose}{Should the function be verbose? Default = TRUE} } \description{ We load the MAEGATK output and transform it to be compatible with the VarTrix output. The input file is a specifically formated csv file with all the necessary information to run the analysis. Note that the source column in the input file needs to be one of the following: vartrix, mgaetk, mgatk. -This is hard coded and case insensitive. +If you want to only load a single sample without the use of an input file, you have to set the following variables. +\enumerate{ + \item samples_path + \item barcodes_path + \item patient + \item samples_file = NULL +} } diff --git a/man/LoadingVCF_typewise.Rd b/man/LoadingVCF_typewise.Rd new file mode 100644 index 0000000..70a2acb --- /dev/null +++ b/man/LoadingVCF_typewise.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LoadingVCF_typewise.R +\name{LoadingVCF_typewise} +\alias{LoadingVCF_typewise} +\title{LoadingVCF_typewise} +\usage{ +LoadingVCF_typewise( + samples_file, + samples_path = NULL, + barcodes_path = NULL, + vcf_path, + patient, + type_use = "scRNAseq_Somatic", + min_reads = NULL, + min_cells = 2, + remove_N_alternative = TRUE, + verbose = TRUE +) +} +\arguments{ +\item{samples_file}{Path to the csv file with the samples to be loaded.} + +\item{samples_path}{Path to the input folder. Must include a barcodes file.} + +\item{barcodes_path}{Path to the cell barcodes tsv. Default = NULL} + +\item{vcf_path}{Path to the VCF file with the variants.} + +\item{patient}{The patient you want to load.} + +\item{type_use}{The type of input. Has to be one of: scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT.} + +\item{min_reads}{The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL.} + +\item{min_cells}{The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2.} + +\item{remove_N_alternative}{Remove all variants that have N as an alternative, see Description. Default = TRUE} + +\item{verbose}{Should the function be verbose? Default = TRUE} +} +\description{ +We load a cellwise pileup result from a VCF file. +If you want to only load a single sample without the use of an input file, you have to set the following variables. +\enumerate{ + \item samples_path + \item barcodes_path + \item patient + \item samples_file = NULL +} + +It has happened that reads with an N allele were aligned. This can cause problems since these variants are typically not in variants lists. +We can remove all of these variants by setting remove_N_alternative to TRUE (the default). +Set this option to FALSE, if you really want to retain these variants. +} diff --git a/man/LoadingVarTrix_ori.Rd b/man/LoadingVarTrix_ori.Rd deleted file mode 100644 index 488aacb..0000000 --- a/man/LoadingVarTrix_ori.Rd +++ /dev/null @@ -1,73 +0,0 @@ -\name{LoadingVarTrix} -\alias{LoadingVarTrix} -\title{ -Loading VarTrix results for the down stream analysis. -} -\description{ -This function loads the VarTrix results and outputs a list of two SummarizedExperiments objects, one for the somatic variants and on accompanying for the mitochondrial variants. -} -\usage{ -LoadingVarTrix(samples_file, vcf_path, vcf_path_MT, patient) -} -\arguments{ -\item{\bold{samples_file}}{Path to the input file. One sample per row. It has to contain the following columns:\cr\cr -\bold{patient}: The patient of the sample. A patient can have multiple samples (treated/untreated). All samples are merged per patient.\cr\cr -\bold{sample}: The ID of the sample.\cr\cr -\bold{resource}: Is this a VarTrix or MAEGATK input? Must be either VarTrix or MAEGATK.\cr\cr -\bold{type}: What type of data? Must be one of the following: scRNAseq_Somatic, scRNAseq_MT, Amplicon_Somatic, Amplicon_MT.\cr\cr -\bold{input_folder}: Path to the folder, where the VarTrix output is stored.\cr\cr -\bold{cells}: Path to the barcodes file from the CellRanger output. -} -\item{\bold{vcf_path}}{Path to the VCF file that contains the somatic variants you are interested in.} -\item{\bold{vcf_path_MT}}{Path to the VCF file that contains the MT variants you are interested in.} -\item{\bold{patient}}{The patient you want to load.} -} -\details{ -The function loads the VarTrix for one of the patients in the input file.\cr -If a patient has multiple samples, then all samples are merged.\cr -There are two types of input data, single cell RNA sequencing and amplicon data.\cr -scRNAseq data is the result from something like a 10X assay, while amplicon data focuses on a specific part of the genome.\cr -Both are the output of the CellRanger pipeline. Amplicon data has a much higher coverage in the specific area and therefore is much better for these areas. The scRNAseq data is overwritten if amplicon data is available.\cr\cr -The output is a list of two SummarizedExperiment objects. Each object contains two assays:\cr\cr -\bold{consensus_somatic}: The consensus information per somatic variant and cell.\cr -\bold{consensus_MT}: The consensus information per MT variant and cell.\cr -\bold{fraction_somatic}: The variant allele frequency per somatic variant and cell.\cr -\bold{fraction_MT}: The variant allele frequency per MT variant and cell.\cr\cr -The consensus information is encoded in the following way:\cr\cr -\bold{0}: No Call, no reads are covering this position.\cr -\bold{1}: Reference, only reference reads cover this position.\cr -\bold{2}: Alternative, only mutated reads cover this position.\cr -\bold{3}: Both, both alt and ref reads cover this position.\cr -} -\value{ -The return list has two objects.\cr\cr -\item{\bold{SE_Somatic}}{The somatic variants results.} -\item{\bold{SE_MT}}{The MT variants results.} -} -\references{ -%% ~put references to the literature/web site here ~ -} -\author{ -%% ~~who you are~~ -} -\note{ -%% ~~further notes~~ -} - -%% ~Make other sections like Warning with \section{Warning }{....} ~ - -\seealso{ -%% ~~objects to See Also as \code{\link{help}}, ~~~ -} -\examples{ - -} -% Add one or more standard keywords, see file 'KEYWORDS' in the -% R documentation directory (show via RShowDoc("KEYWORDS")): -% \keyword{ ~kwd1 } -% \keyword{ ~kwd2 } -% Use only one keyword per line. -% For non-standard keywords, use \concept instead of \keyword: -% \concept{ ~cpt1 } -% \concept{ ~cpt2 } -% Use only one concept per line. diff --git a/man/LoadingVarTrix_typewise.Rd b/man/LoadingVarTrix_typewise.Rd index c01b670..372ca60 100644 --- a/man/LoadingVarTrix_typewise.Rd +++ b/man/LoadingVarTrix_typewise.Rd @@ -11,10 +11,10 @@ LoadingVarTrix_typewise( snp_path = NULL, vcf_path, patient, - sample = NULL, type_use = "scRNAseq_Somatic", min_reads = NULL, - min_cells = 2 + min_cells = 2, + verbose = TRUE ) } \arguments{ @@ -22,6 +22,8 @@ LoadingVarTrix_typewise( \item{samples_path}{Path to the input folder. Must include a barcodes file.} +\item{barcodes_path}{The path to the cell barcodes tsv. Default = NULL} + \item{snp_path}{Path to the SNP file used for VarTrix (SNV.loci.txt).} \item{vcf_path}{Path to the VCF file with the variants.} @@ -33,6 +35,8 @@ LoadingVarTrix_typewise( \item{min_reads}{The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL.} \item{min_cells}{The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2.} + +\item{verbose}{Should the function be verbose? Default = TRUE} } \description{ When we load all the different types of results (scRNAseq/amplicon and MT/amplicon), diff --git a/man/VariantCloneSizeThresholding.Rd b/man/VariantCloneSizeThresholding.Rd index 7f0c149..3f34efe 100644 --- a/man/VariantCloneSizeThresholding.Rd +++ b/man/VariantCloneSizeThresholding.Rd @@ -9,7 +9,8 @@ VariantCloneSizeThresholding( min_coverage = 2, fraction_negative_cells = 0.9, min_clone_size = 10, - vaf_threshold = 0.5 + vaf_threshold = 0.5, + verbose = TRUE ) } \arguments{ @@ -22,6 +23,8 @@ VariantCloneSizeThresholding( \item{min_clone_size}{minimum number of cells.} \item{vaf_threshold}{Variant Allele Threshold. Cells above this threshold are considered mutated.} + +\item{verbose}{Should the function be verbose? Default = TRUE} } \description{ We get variants of interest using a clone size thresholding. diff --git a/man/VariantCorrelationHeatmap.Rd b/man/VariantCorrelationHeatmap.Rd index 8853630..c1535cb 100644 --- a/man/VariantCorrelationHeatmap.Rd +++ b/man/VariantCorrelationHeatmap.Rd @@ -12,7 +12,8 @@ VariantCorrelationHeatmap( min_correlation = 0.5, width_use = 2000, height_use = 2000, - padding_use = c(165, 165, 2, 2) + padding_use = c(165, 165, 2, 2), + verbose = TRUE ) } \arguments{ @@ -31,7 +32,11 @@ VariantCorrelationHeatmap( \item{height_use}{Height of the heatmap in px.} \item{padding_use}{Space around the heatmap in mm. If this is to low, the variant names might be cut off.} + +\item{verbose}{Should the function be verbose? Default = TRUE} } \description{ We generate a heatmap showing the correlation of somatic variants with the MT variants. +Packages I want to remove. I cannot see where they are used. +ggplot2 parallel rcompanion tidyr } diff --git a/man/VariantFisherTestHeatmap.Rd b/man/VariantFisherTestHeatmap.Rd index 68fde4d..ca9b9f2 100644 --- a/man/VariantFisherTestHeatmap.Rd +++ b/man/VariantFisherTestHeatmap.Rd @@ -8,7 +8,8 @@ VariantFisherTestHeatmap( fisher_results, patient, min_alt_cells = 5, - min_oddsratio = 1 + min_oddsratio = 1, + verbose = TRUE ) } \arguments{ @@ -19,7 +20,10 @@ VariantFisherTestHeatmap( \item{min_alt_cells}{Minimum number of mutated cells needed, otherwise an association will not be plotted.} \item{min_oddsratio}{Minimum correlation needed.} + +\item{verbose}{Should the function be verbose? Default = TRUE} } \description{ We generate a heatmap showing the Fisher test of somatic variants with the MT variants. +Packages I want to remove. } diff --git a/man/VariantQuantileThresholding.Rd b/man/VariantQuantileThresholding.Rd index 5d3d435..83ce2a5 100644 --- a/man/VariantQuantileThresholding.Rd +++ b/man/VariantQuantileThresholding.Rd @@ -11,12 +11,13 @@ VariantQuantileThresholding( thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, - min_quality = 30, + min_quality = NULL, mean_allele_frequency = 0, group_of_interest = NULL, group1 = NULL, group2 = NULL, - group_factor = NULL + group_factor = NULL, + verbose = TRUE ) } \arguments{ @@ -43,6 +44,8 @@ VariantQuantileThresholding( \item{group2}{The second group of interest.} \item{group_factor}{How much higher has the mean allele frequency to be in group 1 when compared to group 2?} + +\item{verbose}{Should the function be verbose? Default = TRUE} } \description{ We get variants of interest using the quantile thresholding. diff --git a/man/VariantWiseCorrelation.Rd b/man/VariantWiseCorrelation.Rd index f64e738..410335b 100644 --- a/man/VariantWiseCorrelation.Rd +++ b/man/VariantWiseCorrelation.Rd @@ -4,7 +4,12 @@ \alias{VariantWiseCorrelation} \title{VariantWiseCorrelation} \usage{ -VariantWiseCorrelation(variants_list, n_cores = 1, p_value_adjustment = "fdr") +VariantWiseCorrelation( + variants_list, + n_cores = 1, + p_value_adjustment = "fdr", + verbose = TRUE +) } \arguments{ \item{variants_list}{List of fraction values.} @@ -12,6 +17,8 @@ VariantWiseCorrelation(variants_list, n_cores = 1, p_value_adjustment = "fdr") \item{n_cores}{Number of cores you want to use. Numeric.} \item{p_value_adjustment}{Method for P value adjustment. See p.adjust for details.} + +\item{verbose}{Should the function be verbose? Default = TRUE} } \description{ We correlate the variants with each other using the Pearson correlation. diff --git a/man/VariantWiseFisherTest.Rd b/man/VariantWiseFisherTest.Rd index eaf33d0..f8a6373 100644 --- a/man/VariantWiseFisherTest.Rd +++ b/man/VariantWiseFisherTest.Rd @@ -4,7 +4,12 @@ \alias{VariantWiseFisherTest} \title{VariantWiseFisherTest} \usage{ -VariantWiseFisherTest(variants_list, n_cores = 1, p_value_adjustment = "fdr") +VariantWiseFisherTest( + variants_list, + n_cores = 1, + p_value_adjustment = "fdr", + verbose = TRUE +) } \arguments{ \item{variants_list}{List of fraction values.} @@ -12,6 +17,8 @@ VariantWiseFisherTest(variants_list, n_cores = 1, p_value_adjustment = "fdr") \item{n_cores}{Number of cores you want to use. Numeric.} \item{p_value_adjustment}{Method for P value adjustment. See p.adjust for details.} + +\item{verbose}{Should the function be verbose? Default = TRUE} } \description{ We perform the Fisher test to determine which variants are associated. diff --git a/man/char_to_numeric.Rd b/man/char_to_numeric.Rd new file mode 100644 index 0000000..692dcf9 --- /dev/null +++ b/man/char_to_numeric.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/char_to_numeric.R +\name{char_to_numeric} +\alias{char_to_numeric} +\title{char_to_numeric} +\usage{ +char_to_numeric(char_value) +} +\arguments{ +\item{char_value}{What is the genotype encoding you want to convert?} +} +\description{ +A function to convert the heterozygous/homozygous information from the VCF to the consensus information from VarTrix. +It is only used in LoadingVCF_typewise.R. +} diff --git a/man/computeAFMutMatrix.Rd b/man/computeAFMutMatrix.Rd index 9c499e4..7179476 100644 --- a/man/computeAFMutMatrix.Rd +++ b/man/computeAFMutMatrix.Rd @@ -8,6 +8,8 @@ computeAFMutMatrix(SE, chromosome_prefix = "chrM") } \arguments{ \item{SE}{SummarizedExperiment object.} + +\item{chromosome_prefix}{The prefix of the chromosome.} } \description{ Calculate the allele frequency per variant. @@ -16,5 +18,4 @@ We can get AF values greater than 1, which is due to uninformative reads. See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD-is-lower-than-expected and https://github.com/caleblareau/mgatk/issues/1 We simply set these values to 1, since that is the actual information we have in this case. -This issue can be solved on the MAEGATK/GATK side. } diff --git a/man/getAltMatrix.Rd b/man/getAltMatrix.Rd index fd8c844..69a70c6 100644 --- a/man/getAltMatrix.Rd +++ b/man/getAltMatrix.Rd @@ -12,8 +12,6 @@ getAltMatrix(SE_object, letter, chromosome_prefix = "chrM") \item{letter}{The base you want to use. Character.} \item{chromosome_prefix}{The chromosome prefix used.} - -\item{ref_allele}{The reference alleles.} } \description{ We get the alt values from the MAEGATK results. diff --git a/man/getMutMatrix.Rd b/man/getMutMatrix.Rd new file mode 100644 index 0000000..1b8eefe --- /dev/null +++ b/man/getMutMatrix.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getMutMatrix.R +\name{getMutMatrix} +\alias{getMutMatrix} +\title{getMutMatrix} +\usage{ +getMutMatrix(SE, cov, letter, ref_allele, chromosome_prefix) +} +\arguments{ +\item{SE}{SummarizedExperiment object.} + +\item{cov}{The coverage matrix from MAEGATK/MGATK.} + +\item{letter}{The base we are interested in.} + +\item{ref_allele}{Vector of reference alleles.} + +\item{chromosome_prefix}{The chromosome prefix used.} +} +\description{ +This function gets the allele frequency for a specific allele. It is used in computeAFMutMatrix. +Source: https://github.com/petervangalen/MAESTER-2021 +} diff --git a/man/getRefMatrix.Rd b/man/getRefMatrix.Rd index 90de365..bdba64e 100644 --- a/man/getRefMatrix.Rd +++ b/man/getRefMatrix.Rd @@ -12,8 +12,6 @@ getRefMatrix(SE_object, letter, chromosome_prefix = "chrM") \item{letter}{The base you are analysing. You get a matrix that shows which cells have how many reference reads for this letter.} \item{chromosome_prefix}{The chromosome prefix used.} - -\item{ref_allele}{The reference alleles.} } \description{ We get the reference values from the MAEGATK result. diff --git a/man/get_consensus.Rd b/man/get_consensus.Rd index 22177c7..dea9b43 100644 --- a/man/get_consensus.Rd +++ b/man/get_consensus.Rd @@ -7,14 +7,15 @@ get_consensus(alt_base, ref_base, input_matrix, chromosome_prefix = "chrM") } \arguments{ +\item{alt_base}{The alternative base.} + \item{ref_base}{The reference base.} \item{input_matrix}{Input matrix with the present reads numerically encoded.} \item{chromosome_prefix}{The chromosome name used as a prefix.} - -\item{letter}{The alternative base.} } \description{ We get the consensus information for a specific matrix. +I want to remove some packages if they are not needed. See below which package apperantly wasn't needed. } diff --git a/man/ggsci_pal.Rd b/man/ggsci_pal.Rd index 4aa77f9..4725dee 100644 --- a/man/ggsci_pal.Rd +++ b/man/ggsci_pal.Rd @@ -8,6 +8,8 @@ ggsci_pal(option, ...) } \arguments{ \item{option}{Your colour palette of choice.} + +\item{...}{Further options passed to the palette function.} } \description{ Function to return colours from a ggsci palette. diff --git a/man/save_object.Rd b/man/save_object.Rd index b92aca0..de75e20 100644 --- a/man/save_object.Rd +++ b/man/save_object.Rd @@ -4,7 +4,7 @@ \alias{save_object} \title{save_object} \usage{ -save_object(object, file_name, file_format = NULL) +save_object(object, file_name, file_format = "zstd") } \arguments{ \item{object}{The R object to be save.} diff --git a/preprocessing/MAESTER_preprocessing.sh b/preprocessing/MAESTER_preprocessing.sh index b317f4f..302220a 100644 --- a/preprocessing/MAESTER_preprocessing.sh +++ b/preprocessing/MAESTER_preprocessing.sh @@ -16,7 +16,7 @@ BASE_QUALITY=30 MIN_BARCODE_READS=3 sample_use="SampleID" -cells_use="/Path/To/Your/CellRanger/barcodes.tsv" # Not necessary, but I recommend it. +cells_use="/Path/To/Your/CellRanger/barcodes.tsv" # Not necessary, but it recommended. bams_use="/Path/To/Your/CellRanger/possorted_genome_bam.bam" mkdir -p $OUTPUT/$sample_use cd ${OUTPUT}${sample_use} @@ -35,7 +35,7 @@ maegatk bcall \ --base-qual $BASE_QUALITY \ --mito-genome $BWA_INDEX -# I automatically remove the temporary files. +# This automatically removes the temporary files. # This is not necessary, but once overthing works reliably it removes clutter. # rm -r .snakemake # rm -r logs diff --git a/preprocessing/UMI_Preprocessing/ReadMoleculeInfo.R b/preprocessing/UMI_Preprocessing/ReadMoleculeInfo.R index 0478cef..5d86c9b 100644 --- a/preprocessing/UMI_Preprocessing/ReadMoleculeInfo.R +++ b/preprocessing/UMI_Preprocessing/ReadMoleculeInfo.R @@ -11,12 +11,12 @@ suppressPackageStartupMessages(library(optparse)) print("Variables.") option_list = list( - make_option("--molecule_info", type = "character", default = "", help = "The molecule info input file.", metavar = "character"), - make_option("--barcodes_path", type = "character", default = "", help = "The barcodes.", metavar = "character"), - make_option("--output", type = "character", default = "", help = "The output path.", metavar = "character"), - make_option("--sample", type = "character", default = "", help = "The sample used.", metavar = "character"), + make_option("--molecule_info", type = "character", default = "", help = "The molecule info input file.", metavar = "character"), + make_option("--barcodes_path", type = "character", default = "", help = "The barcodes.", metavar = "character"), + make_option("--output", type = "character", default = "", help = "The output path.", metavar = "character"), + make_option("--sample", type = "character", default = "", help = "The sample used.", metavar = "character"), make_option("--min_reads_per_umi", type = "character", default = 10, help = "The minimum number of reads a UMI needs.", metavar = "numeric"), - make_option("--umi_length", type = "character", default = 12, help = "The length of a UMI.", metavar = "numeric") + make_option("--umi_length", type = "character", default = 12, help = "The length of a UMI.", metavar = "numeric") ) opt_parser <- OptionParser(option_list = option_list) opt <- parse_args(opt_parser) diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..0937896 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,12 @@ +# This file is part of the standard setup for testthat. +# It is recommended that you do not modify it. +# +# Where should you do additional test configuration? +# Learn more about the roles of various files in: +# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview +# * https://testthat.r-lib.org/articles/special-files.html + +library(testthat) +library(sigurd) + +test_check("sigurd") diff --git a/tests/testthat/test-AmpliconSupplementing.R b/tests/testthat/test-AmpliconSupplementing.R new file mode 100644 index 0000000..3118fba --- /dev/null +++ b/tests/testthat/test-AmpliconSupplementing.R @@ -0,0 +1,67 @@ +test_that("Testing AmpliconSupplementing.", { + # This is the test data. + scrna_data <- rbind(consensus = c( 0, 3, 2, 1, 3, 1, 2, 0, 0, 1, 1, 2, 0, 3, 3, 2), + fraction = c( 0, 0.54, 1, 0, 0.61, 0, 1, 0, 0, 0, 0, 1, 0, 0.95, 0.95, 1), + coverage = c( 0, 100, 16, 32, 1000, 64, 128, 0, 0, 256, 512, 1024, 0, 2000, 4000, 8192), + alts = c( 0, 54, 16, 0, 610, 0, 128, 0, 0, 0, 0, 1024, 0, 1900, 3800, 8192), + refs = c( 0, 46, 0, 32, 390, 64, 0, 0, 0, 256, 512, 0, 0, 100, 200, 0)) + amplicon_data <- rbind(consensus = c( 2, 3, 3, 1, 3, 1, 2, 0, 0, 1, 1, 2, 0, 3, 3, 2), + fraction = c( 1, 0.54, 0.8, 0, 0.91, 0, 1, 0, 0, 0, 0, 1, 0, 0.95, 0.95, 1), + coverage = c(1000, 100, 500, 32, 1000, 500, 128, 0, 0, 256, 512, 1024, 0, 3000, 8000, 8192), + alts = c(1000, 54, 400, 0, 910, 0, 128, 0, 0, 0, 0, 1024, 0, 2850, 7600, 8192), + refs = c( 0, 46, 100, 32, 90, 500, 0, 0, 0, 256, 512, 0, 0, 150, 400, 0)) + # Generating sparse matrices to generate a SummarizedExperiment object. + scrna_consensus <- Matrix::sparseMatrix(i = c(2,1,2,3,4,1,2,3,4,1,3,4), j = c(1,2,2,2,2,3,3,3,3,4,4,4), x = c(3,3,1,1,3,2,2,1,3,1,2,2), + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + scrna_fraction <- Matrix::sparseMatrix(i = c(2,1,4,1,2,4,3,4), j = c(1,2,2,3,3,3,4,4), x = c(0.61, 0.54, 0.95, 1, 1, 0.95, 1, 1), + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + scrna_coverage <- Matrix::sparseMatrix(i = c(2,1,2,3,4,1,2,3,4,1,3,4), j = c(1,2,2,2,2,3,3,3,3,4,4,4), x = c(1000,100,64,256,2000,16,128,512,4000,32,1024,8192), + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + scrna_alts <- Matrix::sparseMatrix(i = c(2,1,4,1,2,4,3,4), j = c(1,2,2,3,3,3,4,4), x = c(610,54,1900,16,128,3800,1024,8192), + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + scrna_refs <- Matrix::sparseMatrix(i = c(2,1,2,3,4,3,4,1), j = c(1,2,2,2,2,3,3,4), x = c(390,46,64,256,100,512,200,32), + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + scrna_coldata <- S4Vectors::DataFrame(Cell = paste0("Cell_", 1:4), Type = "scRNAseq_Somatic", AverageCoverage = Matrix::rowMeans(scrna_coverage)) + names(scrna_coldata$AverageCoverage) <- NULL + scrna_rowdata <- S4Vectors::DataFrame(VariantName = paste0("Variant_", 1:4), Concordance = 0:3, Depth = Matrix::colMeans(scrna_coverage)) + names(scrna_rowdata$Depth) <- NULL + scRNAseq <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = scrna_consensus, fraction = scrna_fraction, coverage = scrna_coverage, alts = scrna_alts, refs = scrna_refs), + colData = scrna_coldata, rowData = scrna_rowdata) + + amplicon_consensus <- Matrix::sparseMatrix(i = c(1,2,1,2,3,4,1,2,3,4,1,3,4), j = c(1,1,2,2,2,2,3,3,3,3,4,4,4), x = c(2,3,3,1,1,3,3,2,1,3,1,2,2), + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + amplicon_fraction <- Matrix::sparseMatrix(i = c(1,2,1,4,1,2,4,3,4), j = c(1,1,2,2,3,3,3,4,4), x = c(1,0.91,0.54,0.95,0.8,1,0.95,1,1), + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + + amplicon_coverage <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4, 4), x = amplicon_data["coverage",], + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + amplicon_coverage <- Matrix::sparseMatrix(i = c(1,2,1,2,3,4,1,2,3,4,1,3,4), j = c(1,1,2,2,2,2,3,3,3,3,4,4,4), x = c(1000,1000,100,500,256,3000,500,128,512,8000,32,1024,8192), + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + amplicon_alts <- Matrix::sparseMatrix(i = c(1,2,1,4,1,2,4,3,4), j = c(1,1,2,2,3,3,3,4,4), x = c(1000,910,54,2850,400,128,7600,1024,8192), + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + amplicon_refs <- Matrix::sparseMatrix(i = c(2,1,2,3,4,1,3,4,1), j = c(1,2,2,2,2,3,3,3,4), x = c(90,46,500,256,150,100,512,400,32), + dimnames = list(paste0("Variant_", 1:4), paste0("Cell_", 1:4))) + + amplicon_coldata <- S4Vectors::DataFrame(Cell = paste0("Cell_", 1:4), Type = "scRNAseq_Amplicon", AverageCoverage = Matrix::rowMeans(amplicon_coverage)) + names(amplicon_coldata$AverageCoverage) <- NULL + amplicon_rowdata <- S4Vectors::DataFrame(VariantName = paste0("Variant_", 1:4), Concordance = 3:0, VariantQuality = 1:4, Depth = Matrix::colMeans(amplicon_coverage)) + names(amplicon_rowdata$Depth) <- NULL + amplicon <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = amplicon_consensus, fraction = amplicon_fraction, coverage = amplicon_coverage, alts = amplicon_alts, refs = amplicon_refs), + colData = amplicon_coldata, rowData = amplicon_rowdata) + # The objects for the test. + test_result <- sigurd::AmpliconSupplementing(scRNAseq, amplicon, verbose = FALSE) + test_coldata <- S4Vectors::DataFrame(Cell = scrna_coldata$Cell, TypescRNAseq = scrna_coldata$Type, AverageCoveragescRNAseq = scrna_coldata$AverageCoverage, + TypeAmplicon = amplicon_coldata$Type, AverageCoverageAmplicon = amplicon_coldata$AverageCoverage, AverageCoverage = amplicon_coldata$AverageCoverage, + row.names = scrna_coldata$Cell) + test_rowdata <- S4Vectors::DataFrame(VariantName = paste0("Variant_", 1:4), ConcordancescRNAseq = 0:3, DepthscRNAseq = c(250, 605, 1164, 2312), ConcordanceAmplicon = 3:0, VariantQuality = 1:4, DepthAmplicon = c(500, 964, 2285, 2312), Concordance = 3:0, Depth = c(500, 964, 2285, 2312), + row.names = paste0("Variant_", 1:4)) + + expect_equal(names(assays(test_result)), c("consensus", "fraction", "coverage", "alts", "refs")) + expect_equal(colData(test_result), test_coldata) + expect_equal(rowData(test_result), test_rowdata) + expect_equal(assays(test_result)[["consensus"]], amplicon_consensus) + expect_equal(assays(test_result)[["fraction"]], amplicon_fraction) + expect_equal(assays(test_result)[["coverage"]], amplicon_coverage) + expect_equal(assays(test_result)[["alts"]], amplicon_alts) + expect_equal(assays(test_result)[["refs"]], amplicon_refs) +}) diff --git a/tests/testthat/test-CalculateAlleleFrequency.R b/tests/testthat/test-CalculateAlleleFrequency.R new file mode 100644 index 0000000..400cb15 --- /dev/null +++ b/tests/testthat/test-CalculateAlleleFrequency.R @@ -0,0 +1,31 @@ +test_that("Testing CalculateAlleleFrequency.R", { + # We generate the input matrix. + variants <- c("chrM_1_G_A", "chrM_3_T_A", "chrM_4_C_A", + "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", + "chrM_1_G_T", "chrM_2_A_T", "chrM_4_C_T", + "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C") + sample(seq(from = 0, to = 100, by = 10), size = 48, replace = TRUE) + ref_reads <- matrix(c( 0, 90, 80, 100, 90, 80, 0, 100, 80, 0, 100, 90, + 0, 50, 40, 50, 50, 40, 0, 50, 40, 0, 50, 50, + 80, 50, 90, 40, 50, 90, 80, 40, 90, 80, 40, 50, + 70, 20, 80, 0, 20, 80, 70, 0, 80, 70, 0, 20), + ncol = 4, nrow = 12, dimnames = list(variants, paste0("Cell_", 1:4))) + alt_reads <- matrix(c(90, 10, 10, 50, 0, 0, 0, 0, 0, 0, 0, 0, + 90, 50, 0, 0, 100, 0, 0, 0, 0, 0, 40, 60, + 0, 0, 0, 0, 0, 0, 0, 0, 90, 10, 0, 80, + 60, 70, 30, 10, 80, 100, 100, 20, 0, 100, 0, 0), + ncol = 4, nrow = 12, dimnames = list(variants, paste0("Cell_", 1:4))) + + expected_result1 <- matrix(c(1, 0.1, 1/9, 1/3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.5, 0, 0, 2/3, 0, 0, 0, 0, 0, 0.4444444, 0.5454545, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 1/9, 0, 0.6153846, 0.4615385, 0.7777778, 0.2727273, 1, 0.8, 0.5555556, + 0.5882353, 1, 0, 0.5882353, 0, 0), + ncol = 4, nrow = 12, dimnames = list(variants, paste0("Cell_", 1:4))) + expected_result2 <- matrix(c(1, 0.1, 1/9, 1/3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0.5, 0, 0, 2/3, 0, 0, 0, 0, 0, 0.4444444, 0.5454545, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 1/9, 0, 0.6153846, 0.4615385, 0.7777778, 0.2727273, 0.9999999, 0.8, + 0.5555556, 0.5882353, 1, 0, 0.5882353, 0, 0), + ncol = 4, nrow = 12, dimnames = list(variants, paste0("Cell_", 1:4))) + expected_result3 <- matrix(c(0.989011, 0.0990099, 0.1098901, 0.3311258, 0, 0, 0, 0, 0, 0, 0, 0, 0.989011, 0.4950495, 0, 0, 0.6622517, 0, 0, 0, 0, 0, 0.4395604, 0.5405405, 0, 0, 0, 0, 0, 0, 0, 0, 0.4972376, 0.1098901, + 0, 0.6106870, 0.4580153, 0.7692308, 0.2702703, 0.9090909, 0.7920792, 0.5524862, 0.5847953, 0.952381, 0, 0.5847953, 0, 0), + ncol = 4, nrow = 12, dimnames = list(variants, paste0("Cell_", 1:4))) + expect_equal(sigurd::CalculateAlleleFrequency(reference_reads = ref_reads, alternative_reads = alt_reads, pseudo_count = 0), expected_result1, tolerance = 1e-6) + expect_equal(sigurd::CalculateAlleleFrequency(reference_reads = ref_reads, alternative_reads = alt_reads, pseudo_count = 0.000001), expected_result2, tolerance = 1e-6) + expect_equal(sigurd::CalculateAlleleFrequency(reference_reads = ref_reads, alternative_reads = alt_reads, pseudo_count = 1), expected_result3, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-CalculateAltReads.R b/tests/testthat/test-CalculateAltReads.R new file mode 100644 index 0000000..3b38d16 --- /dev/null +++ b/tests/testthat/test-CalculateAltReads.R @@ -0,0 +1,69 @@ +test_that("Testing CalculateAltReads.R", { + # These are the reference alleles for the first 4 positions. + ref_allele <- c("G", "A", "T", "C") + # All possible combinations. The first allele means the reference allele at this position. + # The second allele means the reads we are observing. + variants <- c("chrM_1_G_A", "chrM_2_A_A", "chrM_3_T_A", "chrM_4_C_A", + "chrM_1_G_G", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", + "chrM_1_G_T", "chrM_2_A_T", "chrM_3_T_T", "chrM_4_C_T", + "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_4_C_C") + # We generate a matrix to have an overview over the reads we observe per cell and variant. + reads_per_variant <- matrix(0, nrow = 16, ncol = 4, dimnames = list(variants, paste0("Cell_", 1:4))) + reads_per_variant[,1] <- c(0,20,0,0, 23,0,0,0, 0,0,45,0, 0,0,0,25) + reads_per_variant[,2] <- c(0,0,0,0, 20,20,0,0, 40,0,0,0, 0,0,0,0) + reads_per_variant[,3] <- c(0,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + reads_per_variant[,4] <- c(20,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + # We generate sparse matrices for the SummarizedExperimentObject. + A_counts_fw <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + A_counts_rev <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_fw <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_rev <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_fw <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_rev <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_fw <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_rev <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + # We add the sparse matrices together to get the coverage. + As <- as.matrix(A_counts_fw) + as.matrix(A_counts_rev) + Cs <- as.matrix(C_counts_fw) + as.matrix(C_counts_rev) + Gs <- as.matrix(G_counts_fw) + as.matrix(G_counts_rev) + Ts <- as.matrix(T_counts_fw) + as.matrix(T_counts_rev) + coverage <- As + Cs + coverage <- coverage + Gs + coverage <- coverage + Ts + coverage <- as(coverage, "CsparseMatrix") + + # We generate a GRanges object for the SummarizedExperimentObject. + rowRanges <- GenomicRanges::GRanges(seqnames = "chrM", ranges = IRanges::IRanges(start = 1:4, end = 1:4, width = 1), strand = "*", refAllele = ref_allele) + # We generate the actual SummarizedExperimentObject. + se <- SummarizedExperiment::SummarizedExperiment(assays = list(A_counts_fw = A_counts_fw, A_counts_rev = A_counts_rev, + C_counts_fw = C_counts_fw, C_counts_rev = C_counts_rev, + G_counts_fw = G_counts_fw, G_counts_rev = G_counts_rev, + T_counts_fw = T_counts_fw, T_counts_rev = T_counts_rev, + coverage = coverage), + rowRanges = rowRanges) + + # We generate the expected results. + expected_result <- Matrix::sparseMatrix(i = c(7,10,12,1,12), j = c(2,2,3,4,4), x = c(40,80,60,40,60), dims = c(12,4), + dimnames = list(c("chrM_1_G_A", "chrM_3_T_A", "chrM_4_C_A", "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", "chrM_1_G_T", "chrM_2_A_T", "chrM_4_C_T"), paste0("Cell_", 1:4))) + expect_equal(sigurd::CalculateAltReads(SE = se, chromosome_prefix = "chrM"), expected_result, tolerance = 1e-6) +}) + + + + + + + + + + + + diff --git a/tests/testthat/test-CalculateConsensus.R b/tests/testthat/test-CalculateConsensus.R new file mode 100644 index 0000000..8e28151 --- /dev/null +++ b/tests/testthat/test-CalculateConsensus.R @@ -0,0 +1,109 @@ +test_that("Testing CalculateConsensus.R", { + # These are the reference alleles for the first 4 positions. + ref_allele <- c("G", "A", "T", "C") + # All possible combinations. The first allele means the reference allele at this position. + # The second allele means the reads we are observing. + variants <- c("chrM_1_G_A", "chrM_2_A_A", "chrM_3_T_A", "chrM_4_C_A", + "chrM_1_G_G", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", + "chrM_1_G_T", "chrM_2_A_T", "chrM_3_T_T", "chrM_4_C_T", + "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_4_C_C") + # We generate a matrix to have an overview over the reads we observe per cell and variant. + reads_per_variant <- matrix(0, nrow = length(variants), ncol = 4, dimnames = list(variants, paste0("Cell_", 1:4))) + reads_per_variant[,1] <- c(0,20,0,0, 23,0,0,0, 0,0,45,0, 0,0,0,25) + reads_per_variant[,2] <- c(0,0,0,0, 20,20,0,0, 40,0,0,0, 0,0,0,0) + reads_per_variant[,3] <- c(0,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + reads_per_variant[,4] <- c(20,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + # We generate sparse matrices for the SummarizedExperimentObject. + A_counts_fw <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + A_counts_rev <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_fw <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_rev <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_fw <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_rev <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_fw <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_rev <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + # We add the sparse matrices together to get the coverage. + As <- as.matrix(A_counts_fw) + as.matrix(A_counts_rev) + Cs <- as.matrix(C_counts_fw) + as.matrix(C_counts_rev) + Gs <- as.matrix(G_counts_fw) + as.matrix(G_counts_rev) + Ts <- as.matrix(T_counts_fw) + as.matrix(T_counts_rev) + coverage <- As + Cs + coverage <- coverage + Gs + coverage <- coverage + Ts + coverage <- as(coverage, "CsparseMatrix") + + # We generate a GRanges object for the SummarizedExperimentObject. + rowRanges <- GenomicRanges::GRanges(seqnames = "chrM", ranges = IRanges::IRanges(start = 1:4, end = 1:4, width = 1), strand = "*", refAllele = ref_allele) + # We generate the actual SummarizedExperimentObject. + se <- SummarizedExperiment::SummarizedExperiment(assays = list(A_counts_fw = A_counts_fw, A_counts_rev = A_counts_rev, + C_counts_fw = C_counts_fw, C_counts_rev = C_counts_rev, + G_counts_fw = G_counts_fw, G_counts_rev = G_counts_rev, + T_counts_fw = T_counts_fw, T_counts_rev = T_counts_rev, + coverage = coverage), + rowRanges = rowRanges) + # We generate the expected results. + expected_result <- Matrix::sparseMatrix(i = c(1:12,2,9,6,6,7), j = c(rep(1,12),2,2,3,4,4), x = c(rep(1,12),2,3,2,2,2), dims = c(12,4), + dimnames = list(c("chrM_2_A_C","chrM_2_A_G","chrM_2_A_T","chrM_4_C_A","chrM_4_C_G","chrM_4_C_T","chrM_1_G_A","chrM_1_G_C","chrM_1_G_T","chrM_3_T_A","chrM_3_T_C","chrM_3_T_G"), paste0("Cell_", 1:4))) + expect_equal(sigurd::CalculateConsensus(SE = se, chromosome_prefix = "chrM", verbose = FALSE), expected_result, tolerance = 1e-6) + + + # These are the reference alleles for the first 4 positions and the position 3107. + ref_allele <- c("G", "A", "T", "C", "N") + # All possible combinations. The first allele means the reference allele at this position. + # The second allele means the reads we are observing. + variants <- c("chrM_1_G_A", "chrM_2_A_A", "chrM_3_T_A", "chrM_4_C_A", + "chrM_1_G_G", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", + "chrM_1_G_T", "chrM_2_A_T", "chrM_3_T_T", "chrM_4_C_T", + "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_4_C_C", + "chrM_3107_N_A", "chrM_3107_N_G", "chrM_3107_N_T", "chrM_3107_N_C") + # We generate a matrix to have an overview over the reads we observe per cell and variant. + reads_per_variant <- matrix(0, nrow = length(variants), ncol = 4, dimnames = list(variants, paste0("Cell_", 1:4))) + reads_per_variant[,1] <- c(0,20,0,0, 23,0,0,0, 0,0,45,0, 0,0,0,25, 10,0,0,0) + reads_per_variant[,2] <- c(0,0,0,0, 20,20,0,0, 40,0,0,0, 0,0,0,0, 0,10,0,0) + reads_per_variant[,3] <- c(0,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0, 0,0,10,0) + reads_per_variant[,4] <- c(20,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0, 0,0,0,10) + # We generate sparse matrices for the SummarizedExperimentObject. + A_counts_fw <- Matrix::sparseMatrix(i = c(2,1,5), j = c(1,4,1), x = c(20,20,10), dims = c(5,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + A_counts_rev <- Matrix::sparseMatrix(i = c(2,1,5), j = c(1,4,1), x = c(20,20,10), dims = c(5,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_fw <- Matrix::sparseMatrix(i = c(4,5), j = c(1,2), x = c(25,10), dims = c(5,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_rev <- Matrix::sparseMatrix(i = c(4,5), j = c(1,2), x = c(25,10), dims = c(5,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_fw <- Matrix::sparseMatrix(i = c(1,1,2,5), j = c(1,2,2,3), x = c(23,20,20,10), dims = c(5,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_rev <- Matrix::sparseMatrix(i = c(1,1,2,5), j = c(1,2,2,3), x = c(23,20,20,10), dims = c(5,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_fw <- Matrix::sparseMatrix(i = c(3,1,4,4,5), j = c(1,2,3,4,4), x = c(45,40,30,30,10), dims = c(5,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_rev <- Matrix::sparseMatrix(i = c(3,1,4,4,5), j = c(1,2,3,4,4), x = c(45,40,30,30,10), dims = c(5,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + # We add the sparse matrices together to get the coverage. + As <- as.matrix(A_counts_fw) + as.matrix(A_counts_rev) + Cs <- as.matrix(C_counts_fw) + as.matrix(C_counts_rev) + Gs <- as.matrix(G_counts_fw) + as.matrix(G_counts_rev) + Ts <- as.matrix(T_counts_fw) + as.matrix(T_counts_rev) + coverage <- As + Cs + coverage <- coverage + Gs + coverage <- coverage + Ts + coverage <- as(coverage, "CsparseMatrix") + + # We generate a GRanges object for the SummarizedExperimentObject. + rowRanges <- GenomicRanges::GRanges(seqnames = "chrM", ranges = IRanges::IRanges(start = c(1:4,3107), end = c(1:4,3107), width = 1), strand = "*", refAllele = ref_allele) + # We generate the actual SummarizedExperimentObject. + se <- SummarizedExperiment::SummarizedExperiment(assays = list(A_counts_fw = A_counts_fw, A_counts_rev = A_counts_rev, + C_counts_fw = C_counts_fw, C_counts_rev = C_counts_rev, + G_counts_fw = G_counts_fw, G_counts_rev = G_counts_rev, + T_counts_fw = T_counts_fw, T_counts_rev = T_counts_rev, + coverage = coverage), + rowRanges = rowRanges) + # We generate the expected results. + expected_result <- Matrix::sparseMatrix(i = c(1:13,2,9,14,6,15,6,7,16), j = c(rep(1,13),2,2,2,3,3,4,4,4), x = c(rep(1,12),2,2,3,2,2,2,2,2,2), dims = c(16,4), + dimnames = list(c("chrM_2_A_C","chrM_2_A_G","chrM_2_A_T","chrM_4_C_A","chrM_4_C_G","chrM_4_C_T","chrM_1_G_A","chrM_1_G_C","chrM_1_G_T","chrM_3_T_A","chrM_3_T_C","chrM_3_T_G", + "chrM_3107_N_A","chrM_3107_N_C","chrM_3107_N_G","chrM_3107_N_T"), paste0("Cell_", 1:4))) + expect_equal(sigurd::CalculateConsensus(SE = se, chromosome_prefix = "chrM", verbose = FALSE), expected_result, tolerance = 1e-6) +}) + + + + + + + + + + + diff --git a/tests/testthat/test-CalculateCorrelationPValue.R b/tests/testthat/test-CalculateCorrelationPValue.R new file mode 100644 index 0000000..d0dd139 --- /dev/null +++ b/tests/testthat/test-CalculateCorrelationPValue.R @@ -0,0 +1,15 @@ +test_that("multiplication works", { + # We generate the input lists. + variants_list <- list(JAK2_V617F = c(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1), + chr11_61796992_G_C = c(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0), + chrM_1_G_A = c(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1), + chrM_2_A_G = c(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0), + chrM_3_T_G = c(0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0), + chrM_4_C_T = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1)) + for(i in 1:length(variants_list)) names(variants_list[[i]]) <- paste0("Cell_", 1:20) + + # We generate the expected output. + expected_result1 <- c(0.4175949, 0.1919192, 11, 9, 11, 9) + names(expected_result1) <- c("", "cor", "", "", "", "") + expect_equal(sigurd::CalculateCorrelationPValue(variant_values = variants_list[["JAK2_V617F"]], other_mutation = "chr11_61796992_G_C", all_variants_list = variants_list[names(variants_list) != "JAK2_V617F"]), expected_result1, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-CalculateCoverage.R b/tests/testthat/test-CalculateCoverage.R new file mode 100644 index 0000000..7e7e340 --- /dev/null +++ b/tests/testthat/test-CalculateCoverage.R @@ -0,0 +1,61 @@ +test_that("Testing CalculateCoverage.R", { + # These are the reference alleles for the first 4 positions. + ref_allele <- c("G", "A", "T", "C") + # All possible combinations. The first allele means the reference allele at this position. + # The second allele means the reads we are observing. + variants <- c("chrM_1_G_A", "chrM_2_A_A", "chrM_3_T_A", "chrM_4_C_A", + "chrM_1_G_G", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", + "chrM_1_G_T", "chrM_2_A_T", "chrM_3_T_T", "chrM_4_C_T", + "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_4_C_C") + # We generate a matrix to have an overview over the reads we observe per cell and variant. + reads_per_variant <- matrix(0, nrow = 16, ncol = 4, dimnames = list(variants, paste0("Cell_", 1:4))) + reads_per_variant[,1] <- c(0,20,0,0, 23,0,0,0, 0,0,45,0, 0,0,0,25) + reads_per_variant[,2] <- c(0,0,0,0, 20,20,0,0, 40,0,0,0, 0,0,0,0) + reads_per_variant[,3] <- c(0,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + reads_per_variant[,4] <- c(20,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + # We generate sparse matrices for the SummarizedExperimentObject. + A_counts_fw <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + A_counts_rev <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_fw <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_rev <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_fw <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_rev <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_fw <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_rev <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + # We add the sparse matrices together to get the coverage. + As <- as.matrix(A_counts_fw) + as.matrix(A_counts_rev) + Cs <- as.matrix(C_counts_fw) + as.matrix(C_counts_rev) + Gs <- as.matrix(G_counts_fw) + as.matrix(G_counts_rev) + Ts <- as.matrix(T_counts_fw) + as.matrix(T_counts_rev) + coverage <- As + Cs + coverage <- coverage + Gs + coverage <- coverage + Ts + coverage <- as(coverage, "CsparseMatrix") + + # We generate a GRanges object for the SummarizedExperimentObject. + rowRanges <- GenomicRanges::GRanges(seqnames = "chrM", ranges = IRanges::IRanges(start = 1:4, end = 1:4, width = 1), strand = "*", refAllele = ref_allele) + # We generate the actual SummarizedExperimentObject. + se <- SummarizedExperiment::SummarizedExperiment(assays = list(A_counts_fw = A_counts_fw, A_counts_rev = A_counts_rev, + C_counts_fw = C_counts_fw, C_counts_rev = C_counts_rev, + G_counts_fw = G_counts_fw, G_counts_rev = G_counts_rev, + T_counts_fw = T_counts_fw, T_counts_rev = T_counts_rev, + coverage = coverage), + rowRanges = rowRanges) + # We generate the expected results. + expected_result <- Matrix::sparseMatrix(i = c(1:12,1,4,5,7,10,11,3,9,12,1,3,4,9,10,12), j = c(rep(1,12),rep(2,6),rep(3,3),rep(4,6)), x = c(46,90,50,46,40,90,40,90,50,46,40,50,120,120,40,40,120,40,60,60,60,40,60,40,60,40,60), dims = c(12,4), + dimnames = list(c("chrM_1_G_A", "chrM_3_T_A", "chrM_4_C_A", "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", "chrM_1_G_T", "chrM_2_A_T", "chrM_4_C_T"), paste0("Cell_", 1:4))) + expect_equal(sigurd::CalculateCoverage(SE = se, chromosome_prefix = "chrM"), expected_result, tolerance = 1e-6) +}) + + + + + + + + + + + + + diff --git a/tests/testthat/test-CalculateFisherTestPValue.R b/tests/testthat/test-CalculateFisherTestPValue.R new file mode 100644 index 0000000..9b065e9 --- /dev/null +++ b/tests/testthat/test-CalculateFisherTestPValue.R @@ -0,0 +1,15 @@ +test_that("Test CalculateFisherTestPValue.R", { + # We generate the input lists. + variants_list <- list(JAK2_V617F = c(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1), + chr11_61796992_G_C = c(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0), + chrM_1_G_A = c(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1), + chrM_2_A_G = c(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0), + chrM_3_T_G = c(0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0), + chrM_4_C_T = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1)) + for(i in 1:length(variants_list)) names(variants_list[[i]]) <- paste0("Cell_", 1:20) + + # We generate the expected output. + expected_result1 <- c(0.6534175, 2.1011776, 7, 4, 4, 5) + names(expected_result1) <- c("", "odds ratio", "", "", "", "") + expect_equal(sigurd::CalculateFisherTestPValue(variant_values = variants_list[["JAK2_V617F"]], other_mutation = "chr11_61796992_G_C", all_variants_list = variants_list[names(variants_list) != "JAK2_V617F"]), expected_result1, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-CalculateQuality.R b/tests/testthat/test-CalculateQuality.R new file mode 100644 index 0000000..ebaa54d --- /dev/null +++ b/tests/testthat/test-CalculateQuality.R @@ -0,0 +1,59 @@ +test_that("Testing CalculateQuality.R", { + # These are the reference alleles for the first 4 positions. + ref_allele <- c("G", "A", "T", "C") + # All possible combinations. The first allele means the reference allele at this position. + # The second allele means the reads we are observing. + variants <- c("chrM_1_G_A", "chrM_2_A_A", "chrM_3_T_A", "chrM_4_C_A", + "chrM_1_G_G", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", + "chrM_1_G_T", "chrM_2_A_T", "chrM_3_T_T", "chrM_4_C_T", + "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_4_C_C") + # We generate a matrix to have an overview over the reads we observe per cell and variant. + reads_per_variant <- matrix(0, nrow = 16, ncol = 4, dimnames = list(variants, paste0("Cell_", 1:4))) + reads_per_variant[,1] <- c(0,20,0,0, 23,0,0,0, 0,0,45,0, 0,0,0,25) + reads_per_variant[,2] <- c(0,0,0,0, 20,20,0,0, 40,0,0,0, 0,0,0,0) + reads_per_variant[,3] <- c(0,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + reads_per_variant[,4] <- c(20,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + # We generate sparse matrices for the SummarizedExperimentObject. + A_counts_fw <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + A_qual_fw <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(34,24), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + A_counts_rev <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + A_qual_rev <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(28,18), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_fw <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_qual_fw <- Matrix::sparseMatrix(i = 4, j = 1, x = 34, dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_rev <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_qual_rev <- Matrix::sparseMatrix(i = 4, j = 1, x = 38, dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_fw <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_qual_fw <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(34,35,36), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_rev <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_qual_rev <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(36,37,38), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_fw <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_qual_fw <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(12,13,14,15), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_rev <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_qual_rev <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(34,34,34,34), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + + # We add the sparse matrices together to get the coverage. + As <- as.matrix(A_counts_fw) + as.matrix(A_counts_rev) + Cs <- as.matrix(C_counts_fw) + as.matrix(C_counts_rev) + Gs <- as.matrix(G_counts_fw) + as.matrix(G_counts_rev) + Ts <- as.matrix(T_counts_fw) + as.matrix(T_counts_rev) + coverage <- As + Cs + coverage <- coverage + Gs + coverage <- coverage + Ts + coverage <- as(coverage, "CsparseMatrix") + + # We generate a GRanges object for the SummarizedExperimentObject. + rowRanges <- GenomicRanges::GRanges(seqnames = "chrM", ranges = IRanges::IRanges(start = 1:4, end = 1:4, width = 1), strand="*", refAllele = ref_allele) + # We generate the actual SummarizedExperimentObject. + se <- SummarizedExperiment::SummarizedExperiment(assays = list(A_counts_fw = A_counts_fw, A_counts_rev = A_counts_rev, A_qual_fw = A_qual_fw, A_qual_rev = A_qual_rev, + C_counts_fw = C_counts_fw, C_counts_rev = C_counts_rev, C_qual_fw = C_qual_fw, C_qual_rev = C_qual_rev, + G_counts_fw = G_counts_fw, G_counts_rev = G_counts_rev, G_qual_fw = G_qual_fw, G_qual_rev = G_qual_rev, + T_counts_fw = T_counts_fw, T_counts_rev = T_counts_rev, T_qual_fw = T_qual_fw, T_qual_rev = T_qual_rev, + coverage = coverage), + rowRanges = rowRanges) + reads_alt <- CalculateAltReads(SE = se, chromosome_prefix = "chrM") + # We generate the expected results. + expected_result <- c(21, NaN, NaN, NaN, NaN, NaN, 37, NaN, NaN, 23.5, NaN, 24.25) + names(expected_result) <- c("chrM_1_G_A", "chrM_3_T_A", "chrM_4_C_A", "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", "chrM_1_G_T", + "chrM_2_A_T", "chrM_4_C_T") + expect_equal(sigurd::CalculateQuality(SE = se, variants = rownames(reads_alt), chromosome_prefix = "chrM"), expected_result, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-CalculateStrandCorrelation.R b/tests/testthat/test-CalculateStrandCorrelation.R new file mode 100644 index 0000000..380eebf --- /dev/null +++ b/tests/testthat/test-CalculateStrandCorrelation.R @@ -0,0 +1,53 @@ +test_that("Testing CalculateStrandCorrelation.R", { + # These are the reference alleles for the first 4 positions. + ref_allele <- c("G", "A", "T", "C") + # All possible combinations. The first allele means the reference allele at this position. + # The second allele means the reads we are observing. + variants <- c("chrM_1_G_A", "chrM_2_A_A", "chrM_3_T_A", "chrM_4_C_A", + "chrM_1_G_G", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", + "chrM_1_G_T", "chrM_2_A_T", "chrM_3_T_T", "chrM_4_C_T", + "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_4_C_C") + # We generate a matrix to have an overview over the reads we observe per cell and variant. + reads_per_variant <- matrix(0, nrow = 16, ncol = 4, dimnames = list(variants, paste0("Cell_", 1:4))) + reads_per_variant[,1] <- c(0,20,0,0, 23,0,0,0, 0,0,45,0, 0,0,0,25) + reads_per_variant[,2] <- c(0,0,0,0, 20,20,0,0, 40,0,0,0, 0,0,0,0) + reads_per_variant[,3] <- c(0,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + reads_per_variant[,4] <- c(20,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + # We generate sparse matrices for the SummarizedExperimentObject. + A_counts_fw <- Matrix::sparseMatrix(i = c(1,2,3,4,1,1,2,3,4,1,3,4), j = c(1,1,1,1,2,3,3,3,3,4,4,4), x = c(10,20,30,40,50,10,20,30,40,10,20,30), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + A_counts_rev <- Matrix::sparseMatrix(i = c(1,2,3,4,2,1,2,3,4,2,2,4), j = c(1,1,1,1,2,3,3,3,3,4,4,4), x = c(10,20,30,40,50,40,30,20,10,10,20,30), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + + C_counts_fw <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(26, 58, 59, 81, 18, 87, 27, 39, 34, 62, 86, 47, 85, 78, 25, 43), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_rev <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(7, 83, 5, 87, 58, 54, 8, 9, 2, 1, 3, 44, 29, 90, 64, 71), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + + G_counts_fw <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(90, 6, 17, 74, 35, 76, 61, 98, 81, 70, 33, 59, 12, 77, 27, 48), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_rev <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(67, 19, 58, 42, 80, 0, 26, 12, 72, 78, 17, 45, 24, 91, 96, 62), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + + T_counts_fw <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(31, 70, 38, 4, 50, 77, 95, 41, 28, 20, 92, 65, 2, 90, 6, 17), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_rev <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(38, 42, 3, 2, 95, 7, 71, 45, 58, 33, 30, 51, 91, 8, 79, 81), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + + # We add the sparse matrices together to get the coverage. + As <- as.matrix(A_counts_fw) + as.matrix(A_counts_rev) + Cs <- as.matrix(C_counts_fw) + as.matrix(C_counts_rev) + Gs <- as.matrix(G_counts_fw) + as.matrix(G_counts_rev) + Ts <- as.matrix(T_counts_fw) + as.matrix(T_counts_rev) + coverage <- As + Cs + coverage <- coverage + Gs + coverage <- coverage + Ts + coverage <- as(coverage, "CsparseMatrix") + + # We generate a GRanges object for the SummarizedExperimentObject. + rowRanges <- GenomicRanges::GRanges(seqnames = "chrM", ranges = IRanges::IRanges(start = 1:4, end = 1:4, width = 1), strand = "*", refAllele = ref_allele) + # We generate the actual SummarizedExperimentObject. + se <- SummarizedExperiment::SummarizedExperiment(assays = list(A_counts_fw = A_counts_fw, A_counts_rev = A_counts_rev,# A_qual_fw = A_qual_fw, A_qual_rev = A_qual_rev, + C_counts_fw = C_counts_fw, C_counts_rev = C_counts_rev,# C_qual_fw = C_qual_fw, C_qual_rev = C_qual_rev, + G_counts_fw = G_counts_fw, G_counts_rev = G_counts_rev,# G_qual_fw = G_qual_fw, G_qual_rev = G_qual_rev, + T_counts_fw = T_counts_fw, T_counts_rev = T_counts_rev,# T_qual_fw = T_qual_fw, T_qual_rev = T_qual_rev, + coverage = coverage), + rowRanges = rowRanges) + # We generate the expected results. + expected_result <- c(NA, NA, -0.1889822, 0.6921127, 0.3135761, -0.2888473, -0.8573596, 0.9442110, 0.5970276, 0.6795482, -0.1622359, -0.9911656) + names(expected_result) <- c("chrM_1_G_A", "chrM_3_T_A", "chrM_4_C_A", "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", "chrM_1_G_T", + "chrM_2_A_T", "chrM_4_C_T") + expect_equal(sigurd::CalculateStrandCorrelation(SE = se, chromosome_prefix = "chrM"), expected_result, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-Filtering.R b/tests/testthat/test-Filtering.R new file mode 100644 index 0000000..37c7b2a --- /dev/null +++ b/tests/testthat/test-Filtering.R @@ -0,0 +1,37 @@ +test_that("Testing Filtering.R", { + # Testing if blacklisting works. + test_blacklist <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + blacklist <- paste0(getwd(), "/test_data/Filtering_Blacklisted_Barcodes.tsv") + test_blacklist <- sigurd::Filtering(test_blacklist, blacklisted_barcodes_path = blacklist, verbose = FALSE) + # saveRDS(test_blacklist, paste0(getwd(), "/test_data/Filtering_Blacklist_ExpectedResults.rds")) + expected_result_blacklist <- readRDS(paste0(getwd(), "/test_data/Filtering_Blacklist_ExpectedResults.rds")) + expect_equal(test_blacklist, expected_result_blacklist, tolerance = 1e-6) + + # Testing if fraction thresholding works. + test_fraction_threshold <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + test_fraction_threshold <- sigurd::Filtering(se = test_fraction_threshold, fraction_threshold = 0.25, verbose = FALSE) + # saveRDS(test_fraction_threshold, paste0(getwd(), "/test_data/Filtering_Fraction_Threshold_ExpectedResults.rds")) + expected_result_fraction_threshold <- readRDS(paste0(getwd(), "/test_data/Filtering_Fraction_Threshold_ExpectedResults.rds")) + expect_equal(test_fraction_threshold, expected_result_fraction_threshold, tolerance = 1e-6) + + # Testing if alt reads thresholding works. + test_alts_threshold <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + test_alts_threshold <- sigurd::Filtering(se = test_alts_threshold, alts_threshold = 50, verbose = FALSE) + # saveRDS(test_alts_threshold, paste0(getwd(), "/test_data/Filtering_Alts_Threshold_ExpectedResults.rds")) + expected_result_alts_threshold <- readRDS(paste0(getwd(), "/test_data/Filtering_Alts_Threshold_ExpectedResults.rds")) + expect_equal(test_alts_threshold, expected_result_alts_threshold, tolerance = 1e-6) + + # Testing if min cells per variant thresholding works. + test_min_cells_per_variant <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + test_min_cells_per_variant <- sigurd::Filtering(se = test_min_cells_per_variant, min_cells_per_variant = 4, verbose = FALSE) + # saveRDS(test_min_cells_per_variant, paste0(getwd(), "/test_data/Filtering_CellThreshold_ExpectedResults.rds")) + expected_result_min_cells_per_variant <- readRDS(paste0(getwd(), "/test_data/Filtering_CellThreshold_ExpectedResults.rds")) + expect_equal(test_min_cells_per_variant, expected_result_min_cells_per_variant, tolerance = 1e-6) + + # Testing if min variants per cell thresholding works. + test_min_variants_per_cell <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + test_min_variants_per_cell <- sigurd::Filtering(se = test_min_variants_per_cell, min_variants_per_cell = 4, verbose = FALSE) + # saveRDS(test_min_variants_per_cell, paste0(getwd(), "/test_data/Filtering_VariantThreshold_ExpectedResults.rds")) + expected_result_min_variants_per_cell <- readRDS(paste0(getwd(), "/test_data/Filtering_VariantThreshold_ExpectedResults.rds")) + expect_equal(test_min_variants_per_cell, expected_result_min_variants_per_cell, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-GetCellInfoPerVariant.R b/tests/testthat/test-GetCellInfoPerVariant.R new file mode 100644 index 0000000..ddb7273 --- /dev/null +++ b/tests/testthat/test-GetCellInfoPerVariant.R @@ -0,0 +1,10 @@ +test_that("Testing GetCellInfoPerVariant.R", { + # Loading the test object. + se <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + vois <- c("chrM_1_G_A", "chrM_3_T_C") + test <- GetCellInfoPerVariant(se = se, voi_ch = vois, verbose = FALSE) + # We generate the expected result. + expected_result <- tibble::tibble(cell = paste0("Test_Cell_", 1:4), cov_chrM_1_G_A = c(279,328,230,300), af_chrM_1_G_A = c(0.07168459, 0.15243902, 0.21739130, 1/30), + cov_chrM_3_T_C = c(335,264,311,331), af_chrM_3_T_C = c(0.1074627, 0.2386364, 0.2861736, 0.2749245)) + expect_equal(test, expected_result, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-GetVariantInfo.R b/tests/testthat/test-GetVariantInfo.R new file mode 100644 index 0000000..1066e32 --- /dev/null +++ b/tests/testthat/test-GetVariantInfo.R @@ -0,0 +1,21 @@ +test_that("Testing GetVariantInfo.R", { + # Loading the input object. + input_object <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + test_consensus <- GetVariantInfo(SE = input_object, information = "consensus", variants = "chrM_1_G_A", cells = NULL) + test_fraction <- GetVariantInfo(SE = input_object, information = "fraction", variants = "chrM_1_G_A", cells = "Test_Cell_1") + test_coverage <- GetVariantInfo(SE = input_object, information = "coverage", variants = "chrM_1_G_A", cells = c("Test_Cell_1", "Test_Cell_3")) + test_alts <- GetVariantInfo(SE = input_object, information = "alts", variants = "chrM_1_G_A", cells = NULL) + test_refs <- GetVariantInfo(SE = input_object, information = "refs", variants = "chrM_1_G_A", cells = NULL) + # We generate the expected output. + expected_output_consensus <- Matrix::sparseMatrix(i = c(1,1,1,1), j = 1:4, x = rep(3,4), dims = c(1,4), dimnames = list("chrM_1_G_A", paste0("Test_Cell_", 1:4))) + expected_output_fraction <- Matrix::sparseMatrix(i = 1, j = 1, x = 0.07168459, dims = c(1,1), dimnames = list("chrM_1_G_A", "Test_Cell_1")) + expected_output_coverage <- Matrix::sparseMatrix(i = c(1,1), j = 1:2, x = c(279,230), dims = c(1,2), dimnames = list("chrM_1_G_A", paste0("Test_Cell_", c(1,3)))) + expected_output_alts <- Matrix::sparseMatrix(i = c(1,1,1,1), j = 1:4, x = c(20,50,50,10), dims = c(1,4), dimnames = list("chrM_1_G_A", paste0("Test_Cell_", 1:4))) + expected_output_refs <- Matrix::sparseMatrix(i = c(1,1,1,1), j = 1:4, x = c(259,278,180,290), dims = c(1,4), dimnames = list("chrM_1_G_A", paste0("Test_Cell_", 1:4))) + # We perform the tests. + expect_equal(test_consensus, expected_output_consensus, tolerance = 1e-6) + expect_equal(test_fraction, expected_output_fraction, tolerance = 1e-6) + expect_equal(test_coverage, expected_output_coverage, tolerance = 1e-6) + expect_equal(test_alts, expected_output_alts, tolerance = 1e-6) + expect_equal(test_refs, expected_output_refs, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-LoadingMAEGATK_typewise.R b/tests/testthat/test-LoadingMAEGATK_typewise.R new file mode 100644 index 0000000..21a1f6e --- /dev/null +++ b/tests/testthat/test-LoadingMAEGATK_typewise.R @@ -0,0 +1,67 @@ +test_that("Testing LoadingMAEGATK_typewise.R", { + # These are the reference alleles for the first 4 positions. + ref_allele <- c("G", "A", "T", "C") + # All possible combinations. The first allele means the reference allele at this position. + # The second allele means the reads we are observing. + variants <- c("chrM_1_G_A", "chrM_2_A_A", "chrM_3_T_A", "chrM_4_C_A", + "chrM_1_G_G", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", + "chrM_1_G_T", "chrM_2_A_T", "chrM_3_T_T", "chrM_4_C_T", + "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_4_C_C") + # We generate a matrix to have an overview over the reads we observe per cell and variant. + reads_per_variant <- matrix(0, nrow = 16, ncol = 4, dimnames = list(variants, paste0("Cell_", 1:4))) + reads_per_variant[,1] <- c( 0,20,0,0, 23, 0,0,0, 0,0,45, 0, 0,0,0,25) + reads_per_variant[,2] <- c( 0, 0,0,0, 20,20,0,0, 40,0, 0, 0, 0,0,0, 0) + reads_per_variant[,3] <- c( 0, 0,0,0, 0, 0,0,0, 0,0, 0,30, 0,0,0, 0) + reads_per_variant[,4] <- c(20, 0,0,0, 0, 0,0,0, 0,0, 0,30, 0,0,0, 0) + # We generate sparse matrices for the SummarizedExperimentObject. + A_counts_fw <- Matrix::sparseMatrix(i = c(1,2,3,4,1,1,2,3,4,1,3,4), j = c(1,1,1,1,2,3,3,3,3,4,4,4), x = c(10,20,30,40,50,10,20,30,40,10,20,30), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + A_qual_fw <- Matrix::sparseMatrix(i = c(1,2,3,4,1,1,2,3,4,1,3,4), j = c(1,1,1,1,2,3,3,3,3,4,4,4), x = c(23,37,20,31,17,15,22,40,30,10,16,14), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + A_counts_rev <- Matrix::sparseMatrix(i = c(1,2,3,4,2,1,2,3,4,2,2,4), j = c(1,1,1,1,2,3,3,3,3,4,4,4), x = c(10,20,30,40,50,40,30,20,10,10,20,30), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + A_qual_rev <- Matrix::sparseMatrix(i = c(1,2,3,4,1,1,2,3,4,1,3,4), j = c(1,1,1,1,2,3,3,3,3,4,4,4), x = c(17,31,23,27,37,5,24,8,32,26,33,28), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_fw <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(26,58,59,81,18,87,27,39,34,62,86,47,85,78,25,43), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_qual_fw <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(17,36,37,38,40,26,29,35,11,32,22,19,20,21,25,10), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_rev <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(7,83,5,87,58,54,8,9,2,1,3,44,29,90,64,71), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + C_qual_rev <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(27,25,12,22,23,14,17,5,15,26,30,33,11,16,9,40), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_fw <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(90,6,17,74,35,76,61,98,81,70,33,59,12,77,27,48), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_qual_fw <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(7,35,36,40,13,5,37,12,15,25,26,9,17,18,10,31), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_rev <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(67,19,58,42,80,0,26,12,72,78,17,45,24,91,96,62), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + G_qual_rev <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(40,39,15,5,30,21,14,33,36,27,18,28,10,11,6,35), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_fw <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(31, 70, 38, 4, 50, 77, 95, 41, 28, 20, 92, 65, 2, 90, 6, 17), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_qual_fw <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(32,35,19,5,23,12,25,6,9,18,24,34,28,21,14,37), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_rev <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(38, 42, 3, 2, 95, 7, 71, 45, 58, 33, 30, 51, 91, 8, 79, 81), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + T_qual_rev <- Matrix::sparseMatrix(i = rep(1:4, each = 4), j = rep(1:4,4), x = c(22,8,13,7,36,28,31,18,30,19,17,27,35,16,23,40), dims = c(4,4), dimnames = list(NULL, paste0("Cell_", 1:4))) + + # We add the sparse matrices together to get the coverage. + As <- as.matrix(A_counts_fw) + as.matrix(A_counts_rev) + Cs <- as.matrix(C_counts_fw) + as.matrix(C_counts_rev) + Gs <- as.matrix(G_counts_fw) + as.matrix(G_counts_rev) + Ts <- as.matrix(T_counts_fw) + as.matrix(T_counts_rev) + coverage <- As + Cs + coverage <- coverage + Gs + coverage <- coverage + Ts + coverage <- as(coverage, "CsparseMatrix") + + # We generate a GRanges object for the SummarizedExperimentObject. + rowRanges <- GenomicRanges::GRanges(seqnames = "chrM", ranges = IRanges::IRanges(start = 1:4, end = 1:4, width = 1), strand = "*", refAllele = ref_allele) + # We generate the actual SummarizedExperimentObject. + se <- SummarizedExperiment::SummarizedExperiment(assays = list(A_counts_fw = A_counts_fw, A_counts_rev = A_counts_rev, A_qual_fw = A_qual_fw, A_qual_rev = A_qual_rev, + C_counts_fw = C_counts_fw, C_counts_rev = C_counts_rev, C_qual_fw = C_qual_fw, C_qual_rev = C_qual_rev, + G_counts_fw = G_counts_fw, G_counts_rev = G_counts_rev, G_qual_fw = G_qual_fw, G_qual_rev = G_qual_rev, + T_counts_fw = T_counts_fw, T_counts_rev = T_counts_rev, T_qual_fw = T_qual_fw, T_qual_rev = T_qual_rev, + coverage = coverage), + rowRanges = rowRanges) + # saveRDS(se, paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_Data.rds")) + # se <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_Data.rds")) + barcodes <- data.frame(paste0("Cell_", 1:4)) + # write.table(barcodes, paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_Barcodes.tsv"), sep = "\t", quote = FALSE, col.names = FALSE, row.names = FALSE) + test <- sigurd::LoadingMAEGATK_typewise(samples_file = NULL, samples_path = paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_Data.rds"), patient = "Test", type_use = "scRNAseq_MT", + chromosome_prefix = "chrM", min_cells = 2, barcodes_path = paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_Barcodes.tsv"), verbose = FALSE) + # saveRDS(test, paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + expected_result <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + # We perform the test for a single sample. + expect_equal(test, expected_result, tolerance = 1e-6) + # We perform the test for a sample input file. + test <- sigurd::LoadingMAEGATK_typewise(samples_file = paste0(getwd(), "/test_data/MAEGATK_inputfile_test.csv"), samples_path = NULL, patient = "Test", type_use = "scRNAseq_MT", + chromosome_prefix = "chrM", min_cells = 2, barcodes_path = paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_Barcodes.tsv"), verbose = FALSE) + expect_equal(test, expected_result, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-LoadingVCF_typewise.R b/tests/testthat/test-LoadingVCF_typewise.R new file mode 100644 index 0000000..8cc7445 --- /dev/null +++ b/tests/testthat/test-LoadingVCF_typewise.R @@ -0,0 +1,14 @@ +test_that("Testing LoadingVCF_typewise.R", { + # We test for a sample input file. + test <- LoadingVCF_typewise(samples_file = paste0(getwd(), "/test_data/VCF_Test/VCF_inputfile_test.csv"), vcf_path = paste0(getwd(), "/test_data/VCF_Test/test.vcf"), patient = "Test", + samples_path = NULL, barcodes_path = NULL, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, verbose = FALSE) + # saveRDS(test, paste0(getwd(), "/test_data/VCF_Test/VCF_ExpectedResults.rds")) + expected_result <- readRDS(paste0(getwd(), "/test_data/VCF_Test/VCF_ExpectedResults.rds")) + expect_equal(test, expected_result, tolerance = 1e-6) + + # We test for a single sample. + test <- LoadingVCF_typewise(samples_file = NULL, vcf_path = paste0(getwd(), "/test_data/VCF_Test/test.vcf"), patient = "Test", samples_path = paste0(getwd(), "/test_data/VCF_Test/cellSNP.cells.vcf"), + barcodes_path = paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_Barcodes.tsv"), type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, verbose = FALSE) + expected_result <- readRDS(paste0(getwd(), "/test_data/VCF_Test/VCF_ExpectedResults.rds")) + expect_equal(test, expected_result, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-LoadingVarTrix_typewise.R b/tests/testthat/test-LoadingVarTrix_typewise.R new file mode 100644 index 0000000..ced1e8d --- /dev/null +++ b/tests/testthat/test-LoadingVarTrix_typewise.R @@ -0,0 +1,15 @@ +test_that("Testing LoadingVarTrix_typewise.R", { + # We perform the test for the a single sample. + input_vatrix <- sigurd::LoadingVarTrix_typewise(samples_file = NULL, samples_path = paste0(getwd(), "/test_data/VarTrix_Test/"), barcodes_path = paste0(getwd(), "/test_data/VarTrix_Test/barcodes.tsv"), + snp_path = paste0(getwd(), "/test_data/VarTrix_Test/SNV.loci.txt"), vcf_path = paste0(getwd(), "/test_data/VarTrix_Test/test.vcf"), + patient = "Test", type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, verbose = FALSE) + # saveRDS(input_vatrix, paste0(getwd(), "/test_data/VarTrix_Test/test.rds")) + expected_results <- readRDS(paste0(getwd(), "/test_data/VarTrix_Test/test.rds")) + expect_equal(input_vatrix, expected_results, tolerance = 1e-6) + # We perform the test for the a sample input file. + input_vatrix <- sigurd::LoadingVarTrix_typewise(samples_file = paste0(getwd(), "/test_data/VarTrix_Test/inputfile_test.csv"), samples_path = NULL, barcodes_path = NULL, + snp_path = NULL, vcf_path = paste0(getwd(), "/test_data/VarTrix_Test/test.vcf"), + patient = "Test", type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, verbose = FALSE) + expected_results <- readRDS(paste0(getwd(), "/test_data/VarTrix_Test/test.rds")) + expect_equal(input_vatrix, expected_results, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-Merging_SE_list.R b/tests/testthat/test-Merging_SE_list.R new file mode 100644 index 0000000..8a68315 --- /dev/null +++ b/tests/testthat/test-Merging_SE_list.R @@ -0,0 +1,8 @@ +test_that("Testing Merging_SE_list.R", { + inputobject_1 <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + inputobject_2 <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + combined_object <- sigurd::Merging_SE_list(se = list(inputobject_1, inputobject_2)) + # saveRDS(combined_object, paste0(getwd(), "/test_data/Merging_SE_list_ExpectedResults.rds")) + expected_result <- readRDS(paste0(getwd(), "/test_data/Merging_SE_list_ExpectedResults.rds")) + expect_equal(combined_object, expected_result, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-RowWiseSplit.R b/tests/testthat/test-RowWiseSplit.R new file mode 100644 index 0000000..6912f3b --- /dev/null +++ b/tests/testthat/test-RowWiseSplit.R @@ -0,0 +1,53 @@ +test_that("Testing RowWiseSplit.R", { + inputobject <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + test <- sigurd::RowWiseSplit(se = inputobject, n_cores = 1, remove_nocalls = FALSE) + test_parallel <- sigurd::RowWiseSplit(se = inputobject, n_cores = 2, remove_nocalls = FALSE) + test_removed_nocalls <- sigurd::RowWiseSplit(se = inputobject, n_cores = 1, remove_nocalls = TRUE) + test_removed_nocalls_parallel <- sigurd::RowWiseSplit(se = inputobject, n_cores = 2, remove_nocalls = TRUE) + # We generate the expected results. + expected_result <- list("chrM_1_G_A" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_3_T_A" = c("Test_Cell_1" = 1, "Test_Cell_2" = 0, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_4_C_A" = c("Test_Cell_1" = 1, "Test_Cell_2" = 0, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_1_G_C" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_2_A_C" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_3_T_C" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_2_A_G" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_3_T_G" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_4_C_G" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_1_G_T" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_2_A_T" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_4_C_T" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1)) + expect_equal(test, expected_result, tolerance = 1e-6) + expect_equal(test_parallel, expected_result, tolerance = 1e-6) + expected_result <- list("chrM_1_G_A" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_3_T_A" = c("Test_Cell_1" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_4_C_A" = c("Test_Cell_1" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_1_G_C" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_2_A_C" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_3_T_C" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_2_A_G" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_3_T_G" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_4_C_G" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_1_G_T" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_2_A_T" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1), + "chrM_4_C_T" = c("Test_Cell_1" = 1, "Test_Cell_2" = 1, "Test_Cell_3" = 1, "Test_Cell_4" = 1)) + expect_equal(test_removed_nocalls, expected_result, tolerance = 1e-6) + expect_equal(test_removed_nocalls_parallel, expected_result, tolerance = 1e-6) +}) + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/test-SeparatingMatrixToList.R b/tests/testthat/test-SeparatingMatrixToList.R new file mode 100644 index 0000000..8d49ffd --- /dev/null +++ b/tests/testthat/test-SeparatingMatrixToList.R @@ -0,0 +1,31 @@ +test_that("Testing SeparatingMatrixToList.R", { + input_matrix <- Matrix::sparseMatrix(i = c(rep(1,4),2,2,2,3,3,3), j = c(1:4,1,2,3,2,3,4), x = c(1:3,1,1,2,3,3,2,1), + dims = c(4,4), dimnames = list(c("chrM_1_G_A", "chrM_2_A_G", "chrM_2_A_T", "chrM_4_C_G"), paste0("Cell_", 1:4))) + test1 <- sigurd::SeparatingMatrixToList("chrM_1_G_A", input_matrix, remove_nocalls = FALSE) + test2 <- sigurd::SeparatingMatrixToList("chrM_2_A_G", input_matrix, remove_nocalls = FALSE) + test3 <- sigurd::SeparatingMatrixToList("chrM_2_A_T", input_matrix, remove_nocalls = FALSE) + test4 <- sigurd::SeparatingMatrixToList("chrM_4_C_G", input_matrix, remove_nocalls = FALSE) + test1_removed_nocalls <- sigurd::SeparatingMatrixToList("chrM_1_G_A", input_matrix, remove_nocalls = TRUE) + test2_removed_nocalls <- sigurd::SeparatingMatrixToList("chrM_2_A_G", input_matrix, remove_nocalls = TRUE) + test3_removed_nocalls <- sigurd::SeparatingMatrixToList("chrM_2_A_T", input_matrix, remove_nocalls = TRUE) + test4_removed_nocalls <- sigurd::SeparatingMatrixToList("chrM_4_C_G", input_matrix, remove_nocalls = TRUE) + # We generate the expected outputs. + expected_result1 <- c(Cell_1 = 0, Cell_2 = 1, Cell_3 = 1, Cell_4 = 0) + expected_result2 <- c(Cell_1 = 0, Cell_2 = 1, Cell_3 = 1, Cell_4 = 0) + expected_result3 <- c(Cell_1 = 0, Cell_2 = 1, Cell_3 = 1, Cell_4 = 0) + expected_result4 <- c(Cell_1 = 0, Cell_2 = 0, Cell_3 = 0, Cell_4 = 0) + expected_result1_removed_nocalls <- c(Cell_1 = 0, Cell_2 = 1, Cell_3 = 1, Cell_4 = 0) + expected_result2_removed_nocalls <- c(Cell_1 = 0, Cell_2 = 1, Cell_3 = 1) + expected_result3_removed_nocalls <- c(Cell_2 = 1, Cell_3 = 1, Cell_4 = 0) + expected_result4_removed_nocalls <- numeric() + names(expected_result4_removed_nocalls) <- character() + # We perform the tests. + expect_equal(test1, expected_result1, tolerance = 1e-6) + expect_equal(test2, expected_result2, tolerance = 1e-6) + expect_equal(test3, expected_result3, tolerance = 1e-6) + expect_equal(test4, expected_result4, tolerance = 1e-6) + expect_equal(test1_removed_nocalls, expected_result1_removed_nocalls, tolerance = 1e-6) + expect_equal(test2_removed_nocalls, expected_result2_removed_nocalls, tolerance = 1e-6) + expect_equal(test3_removed_nocalls, expected_result3_removed_nocalls, tolerance = 1e-6) + expect_equal(test4_removed_nocalls, expected_result4_removed_nocalls, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-SetVariantInfo.R b/tests/testthat/test-SetVariantInfo.R new file mode 100644 index 0000000..fab1fb5 --- /dev/null +++ b/tests/testthat/test-SetVariantInfo.R @@ -0,0 +1,11 @@ +test_that("Testing SetVariantInfo.R", { + inputobject <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + counts_matrix <- Matrix::sparseMatrix(i = rep(1:4,6), j = rep(1:6,each=4), x = c(118,112,85,85,98,96,106,86,103,80,87,91,75,120,78,110,98,87,109,82,122,114,91,111), + dims = c(4,6), dimnames = list(paste0("Gene_", 1:4), paste0("Test_Cell_", 1:6))) + seurat_object <- suppressWarnings(SeuratObject::CreateSeuratObject(counts = counts_matrix, assay = "RNA")) + test <- SetVariantInfo(SE = inputobject, seurat_object = seurat_object, information = "consensus", variants = "chrM_3_T_A") + # We generate the expected result. + expected_result <- seurat_object + expected_result <- Seurat::AddMetaData(object = seurat_object, metadata = c(3,0,3,3,NA,NA), col.name = "chrM_3_T_A_consensus") + expect_equal(test, expected_result, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-VariantBurden.R b/tests/testthat/test-VariantBurden.R new file mode 100644 index 0000000..42b778b --- /dev/null +++ b/tests/testthat/test-VariantBurden.R @@ -0,0 +1,8 @@ +test_that("Testing VariantBurden.R", { + inputobject <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + test <- sigurd::VariantBurden(inputobject) + # We generate the expected result. + expected_result <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds")) + expected_result$Burden <- c(Test_Cell_1 = 2.72123540439267, Test_Cell_2 = 3.19347598570439, Test_Cell_3 = 2.87721693179241, Test_Cell_4 = 2.85496178156784) + expect_equal(test, expected_result, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-VariantCloneSizeThresholding.R b/tests/testthat/test-VariantCloneSizeThresholding.R new file mode 100644 index 0000000..624ad80 --- /dev/null +++ b/tests/testthat/test-VariantCloneSizeThresholding.R @@ -0,0 +1,10 @@ +# We check if the MAESTER input file is present. +test_input_file <- file.exists(paste0(getwd(), "/testthat/test_data/MAEGATK_inputfile_test.csv")) +test_maegatk_sw <- file.exists(paste0(getwd(), "/testthat/test_data/MAESTER_data/SW/SW_CellLineMix_All_mr3_maegatk.rds")) +test_maegatk_sw_cbs <- file.exists(paste0(getwd(), "/testthat/test_data/MAESTER_data/SW/SW_CellLineMix_All_mr3_maegatk_CellBarcodes.tsv")) +test_maegatk_tenx <- file.exists(paste0(getwd(), "/testthat/test_data/MAESTER_data/TenX/TenX_CellLineMix_All_mr3_maegatk.rds")) +test_maegatk_tenx_cbs <- file.exists(paste0(getwd(), "/testthat/test_data/MAESTER_data/TenX/TenX_CellLineMix_All_mr3_maegatk_CellBarcodes.tsv")) + +test_that("Testing VariantCloneSizeThresholding.R", { + expect_equal(2 * 2, 4) +}) diff --git a/tests/testthat/test-VariantWiseCorrelation.R b/tests/testthat/test-VariantWiseCorrelation.R new file mode 100644 index 0000000..77d5256 --- /dev/null +++ b/tests/testthat/test-VariantWiseCorrelation.R @@ -0,0 +1,26 @@ +test_that("Testing VariantWiseCorrelation.R", { + # We generate the input lists. + variants_list <- list(JAK2_V617F = c(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1), + chr11_61796992_G_C = c(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0), + chrM_1_G_A = c(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1), + chrM_2_A_G = c(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0), + chrM_3_T_G = c(0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0), + chrM_4_C_T = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1)) + for(i in 1:length(variants_list)) names(variants_list[[i]]) <- paste0("Cell_", 1:20) + # We generate the expected output. + expected_result1 <- data.frame(Variant1 = c(rep("JAK2_V617F", 3), rep("chr11_61796992_G_C", 3)), + Variant2 = c("chr11_61796992_G_C", "chrM_1_G_A", "chrM_4_C_T", "JAK2_V617F", "chrM_1_G_A", "chrM_4_C_T"), + P = c(0.4175949, 0, 0.4629459, 0.4175949, 0.4175949, 0.4629459), + Corr = c(0.1919192, 1, 0.1740777, 0.1919192, 0.1919192, 0.1740777), + Cells_1_Alt = 11, Cells_1_Ref = 9, Cells_2_Alt = c(11,11,15,11,11,15), + Cells_2_Ref = c(9, 9, 5, 9, 9, 5), P_adj = c(0.4629459, 0, 0.4629459, 0.4629459, 0.4629459, 0.4629459)) + expected_result2 <- data.frame(Variant1 = c(rep("JAK2_V617F", 3), rep("chr11_61796992_G_C", 3)), + Variant2 = c("chr11_61796992_G_C", "chrM_1_G_A", "chrM_4_C_T", "JAK2_V617F", "chrM_1_G_A", "chrM_4_C_T"), + P = c(0.4175949, 0, 0.4629459, 0.4175949, 0.4175949, 0.4629459), + Corr = c(0.1919192, 1, 0.1740777, 0.1919192, 0.1919192, 0.1740777), + Cells_1_Alt = 11, Cells_1_Ref = 9, Cells_2_Alt = c(11,11,15,11,11,15), + Cells_2_Ref = c(9, 9, 5, 9, 9, 5), P_adj = c(1, 0, 1, 1, 1, 1)) + expect_equal(sigurd::VariantWiseCorrelation(variants_list, n_cores = 1, p_value_adjustment = "fdr", verbose = FALSE), expected_result1, tolerance = 1e-6) + expect_equal(sigurd::VariantWiseCorrelation(variants_list, n_cores = 2, p_value_adjustment = "fdr", verbose = FALSE), expected_result1, tolerance = 1e-6) + expect_equal(sigurd::VariantWiseCorrelation(variants_list, n_cores = 1, p_value_adjustment = "bonferroni", verbose = FALSE), expected_result2, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-VariantWiseFisherTest.R b/tests/testthat/test-VariantWiseFisherTest.R new file mode 100644 index 0000000..b35a78f --- /dev/null +++ b/tests/testthat/test-VariantWiseFisherTest.R @@ -0,0 +1,29 @@ +test_that("Testing VariantWiseFisherTest.R", { + # We generate the input lists. + variants_list <- list(JAK2_V617F = c(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1), + chr11_61796992_G_C = c(1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0), + chrM_1_G_A = c(0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1), + chrM_2_A_G = c(0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0), + chrM_3_T_G = c(0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0), + chrM_4_C_T = c(1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1)) + for(i in 1:length(variants_list)) names(variants_list[[i]]) <- paste0("Cell_", 1:20) + # We generate the expected output. + test1 <- sigurd::VariantWiseFisherTest(variants_list, n_cores = 1, p_value_adjustment = "fdr", verbose = FALSE) + test2 <- sigurd::VariantWiseFisherTest(variants_list, n_cores = 2, p_value_adjustment = "fdr", verbose = FALSE) + test3 <- sigurd::VariantWiseFisherTest(variants_list, n_cores = 1, p_value_adjustment = "bonferroni", verbose = FALSE) + expected_result1 <- data.frame(Variant1 = c(rep("JAK2_V617F", 3), rep("chr11_61796992_G_C", 3)), + Variant2 = c("chr11_61796992_G_C", "chrM_1_G_A", "chrM_4_C_T", "JAK2_V617F", "chrM_1_G_A", "chrM_4_C_T"), + P = c(0.6534174804, 0.0000059538, 0.6168730650, 0.6534174804, 0.6534174804, 0.6168730650), + OddsRatio = c(2.1011776181, Inf, 2.1582973857, 2.1011776181, 2.1011776181, 2.1582973857), + Cells_Alt_1_2 = c(7,11,9,7,7,9), Cells_Alt_1_Ref_2 = c(4,0,2,4,4,2), Cells_Alt_2_Ref_1 = c(4,0,6,4,4,6), + Cells_Ref_1_2 = c(5,9,3,5,5,3), P_adj = c(0.6534174804, 0.0000357228, 0.6534174804, 0.6534174804, 0.6534174804, 0.6534174804)) + expected_result2 <- data.frame(Variant1 = c(rep("JAK2_V617F", 3), rep("chr11_61796992_G_C", 3)), + Variant2 = c("chr11_61796992_G_C", "chrM_1_G_A", "chrM_4_C_T", "JAK2_V617F", "chrM_1_G_A", "chrM_4_C_T"), + P = c(0.6534174804, 0.0000059538, 0.6168730650, 0.6534174804, 0.6534174804, 0.6168730650), + OddsRatio = c(2.1011776181, Inf, 2.1582973857, 2.1011776181, 2.1011776181, 2.1582973857), + Cells_Alt_1_2 = c(7,11,9,7,7,9), Cells_Alt_1_Ref_2 = c(4,0,2,4,4,2), Cells_Alt_2_Ref_1 = c(4,0,6,4,4,6), + Cells_Ref_1_2 = c(5,9,3,5,5,3), P_adj = c(1, 0.0000357228, 1, 1, 1, 1)) + expect_equal(sigurd::VariantWiseFisherTest(variants_list, n_cores = 1, p_value_adjustment = "fdr", verbose = FALSE), expected_result1, tolerance = 1e-6) + expect_equal(sigurd::VariantWiseFisherTest(variants_list, n_cores = 2, p_value_adjustment = "fdr", verbose = FALSE), expected_result1, tolerance = 1e-6) + expect_equal(sigurd::VariantWiseFisherTest(variants_list, n_cores = 1, p_value_adjustment = "bonferroni", verbose = FALSE), expected_result2, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-char_to_numeric.R b/tests/testthat/test-char_to_numeric.R new file mode 100644 index 0000000..e57adab --- /dev/null +++ b/tests/testthat/test-char_to_numeric.R @@ -0,0 +1,7 @@ +test_that("Checking if genotyping conversion to numeric works.", { + expect_equal(sigurd::char_to_numeric("1/1"), 2) + expect_equal(sigurd::char_to_numeric("1/0"), 2) + expect_equal(sigurd::char_to_numeric("0/1"), 2) + expect_equal(sigurd::char_to_numeric("0/0"), 1) + expect_equal(sigurd::char_to_numeric("asdf"), 0) +}) diff --git a/tests/testthat/test-combine_NAMES.R b/tests/testthat/test-combine_NAMES.R new file mode 100644 index 0000000..8a16516 --- /dev/null +++ b/tests/testthat/test-combine_NAMES.R @@ -0,0 +1,6 @@ +test_that("multiplication works", { + x <- c("JAK2_p.V617I_c.1849G>A", "ABL1_p.E274K_c.820G>A") + y <- c("chrM_1_G_A", "chrM_3_T_A") + expected_result <- c(x, y) + expect_equal(sigurd::combine_NAMES(x = x, y = y), expected_result) +}) diff --git a/tests/testthat/test-computeAFMutMatrix.R b/tests/testthat/test-computeAFMutMatrix.R new file mode 100644 index 0000000..da4743f --- /dev/null +++ b/tests/testthat/test-computeAFMutMatrix.R @@ -0,0 +1,83 @@ +test_that("Testing if computeAFMutMatrix works.", { + # These are the reference alleles for the first 4 positions. + ref_allele <- c("G", "A", "T", "C") + # All possible combinations. The first allele means the reference allele at this position. + # The second allele means the reads we are observing. + variants <- c("chrM_1_G_A", "chrM_2_A_A", "chrM_3_T_A", "chrM_4_C_A", + "chrM_1_G_G", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", + "chrM_1_G_T", "chrM_2_A_T", "chrM_3_T_T", "chrM_4_C_T", + "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_4_C_C") + # We generate a matrix to have an overview over the reads we observe per cell and variant. + reads_per_variant <- matrix(0, nrow = 16, ncol = 4, dimnames = list(variants, paste0("Cell_", 1:4))) + reads_per_variant[,1] <- c(0,20,0,0, 23,0,0,0, 0,0,45,0, 0,0,0,25) + reads_per_variant[,2] <- c(0,0,0,0, 20,20,0,0, 40,0,0,0, 0,0,0,0) + reads_per_variant[,3] <- c(0,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + reads_per_variant[,4] <- c(20,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + # We generate sparse matrices for the SummarizedExperimentObject. + A_counts_fw <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + A_counts_rev <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_fw <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_rev <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_fw <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_rev <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_fw <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_rev <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + # We add the sparse matrices together to get the coverage. + As <- as.matrix(A_counts_fw) + as.matrix(A_counts_rev) + Cs <- as.matrix(C_counts_fw) + as.matrix(C_counts_rev) + Gs <- as.matrix(G_counts_fw) + as.matrix(G_counts_rev) + Ts <- as.matrix(T_counts_fw) + as.matrix(T_counts_rev) + coverage <- As + Cs + coverage <- coverage + Gs + coverage <- coverage + Ts + coverage <- as(coverage, "CsparseMatrix") + + # We generate a GRanges object for the SummarizedExperimentObject. + rowRanges <- GenomicRanges::GRanges(seqnames = "chrM", ranges = IRanges::IRanges(start = 1:4, end = 1:4, width = 1), strand = "*", refAllele = ref_allele) + # We generate the actual SummarizedExperimentObject. + se <- SummarizedExperiment::SummarizedExperiment(assays = list(A_counts_fw = A_counts_fw, A_counts_rev = A_counts_rev, + C_counts_fw = C_counts_fw, C_counts_rev = C_counts_rev, + G_counts_fw = G_counts_fw, G_counts_rev = G_counts_rev, + T_counts_fw = T_counts_fw, T_counts_rev = T_counts_rev, + coverage = coverage), + rowRanges = rowRanges) + # We generate the expected results. + expected_result <- Matrix::sparseMatrix(i = c(7,10,12,1,12), j = c(2,2,3,4,4), x = c(20/(20+0.000001),40/(60+0.000001),30/(30+0.000001),30/(30+0.000001),30/(30+0.000001)), dims = c(12,4), + dimnames = list(c("chrM_1_G_A", "chrM_3_T_A", "chrM_4_C_A", "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", "chrM_1_G_T", "chrM_2_A_T", "chrM_4_C_T"), paste0("Cell_", 1:4))) + expect_equal(sigurd::computeAFMutMatrix(se), expected_result, tolerance = 1e-6) +}) + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/testthat/test-getAltMatrix.R b/tests/testthat/test-getAltMatrix.R new file mode 100644 index 0000000..5982306 --- /dev/null +++ b/tests/testthat/test-getAltMatrix.R @@ -0,0 +1,21 @@ +test_that("Testing getAltMatrix.R", { + # Loading the test SE object. + input_object <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_Data.rds")) + test_A <- getAltMatrix(SE_object = input_object, letter = "A", chromosome_prefix = "chrM") + test_T <- getAltMatrix(SE_object = input_object, letter = "T", chromosome_prefix = "chrM") + test_C <- getAltMatrix(SE_object = input_object, letter = "C", chromosome_prefix = "chrM") + test_G <- getAltMatrix(SE_object = input_object, letter = "G", chromosome_prefix = "chrM") + # We generate the expected results. + expected_result_A <- Matrix::sparseMatrix(i = c(1,2,3,1,1:3,1:3), j = c(rep(1,3),2,rep(3,3),rep(4,3)), x = c(20,60,80,50,50,50,50,10,20,60), dims = c(3,4), + dimnames = list(c("chrM_1_G>A", "chrM_3_T>A", "chrM_4_C>A"), paste0("Cell_", 1:4))) + expected_result_T <- Matrix::sparseMatrix(i = rep(1:3, 4), j = rep(1:4,each=3), x = c(69,145,93,112,84,98,41,166,85,6,86,98), dims = c(3,4), + dimnames = list(c("chrM_1_G>T", "chrM_2_A>T", "chrM_4_C>T"), paste0("Cell_", 1:4))) + expected_result_C <- Matrix::sparseMatrix(i = rep(1:3, 4), j = rep(1:4,each=3), x = c(33,76,36,141,141,63,64,35,89,168,48,91), dims = c(3,4), + dimnames = list(c("chrM_1_G>C", "chrM_2_A>C", "chrM_3_T>C"), paste0("Cell_", 1:4))) + expected_result_G <- Matrix::sparseMatrix(i = rep(1:3, 4), j = rep(1:4,each=3), x = c(115,153,36,76,148,168,87,50,123,110,104,110), dims = c(3,4), + dimnames = list(c("chrM_2_A>G", "chrM_3_T>G", "chrM_4_C>G"), paste0("Cell_", 1:4))) + expect_equal(test_A, expected_result_A, tolerance = 1e-6) + expect_equal(test_T, expected_result_T, tolerance = 1e-6) + expect_equal(test_C, expected_result_C, tolerance = 1e-6) + expect_equal(test_G, expected_result_G, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-getMutMatrix.R b/tests/testthat/test-getMutMatrix.R new file mode 100644 index 0000000..476a1f5 --- /dev/null +++ b/tests/testthat/test-getMutMatrix.R @@ -0,0 +1,60 @@ +test_that("Testing if getMutMatrix.R works.", { + # These are the reference alleles for the first 4 positions. + ref_allele <- c("G", "A", "T", "C") + # All possible combinations. The first allele means the reference allele at this position. + # The second allele means the reads we are observing. + variants <- c("chrM_1_G_A", "chrM_2_A_A", "chrM_3_T_A", "chrM_4_C_A", + "chrM_1_G_G", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G", + "chrM_1_G_T", "chrM_2_A_T", "chrM_3_T_T", "chrM_4_C_T", + "chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_4_C_C") + # We generate a matrix to have an overview over the reads we observe per cell and variant. + reads_per_variant <- matrix(0, nrow = 16, ncol = 4, dimnames = list(variants, paste0("Cell_", 1:4))) + reads_per_variant[,1] <- c(0,20,0,0, 23,0,0,0, 0,0,45,0, 0,0,0,25) + reads_per_variant[,2] <- c(0,0,0,0, 20,20,0,0, 40,0,0,0, 0,0,0,0) + reads_per_variant[,3] <- c(0,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + reads_per_variant[,4] <- c(20,0,0,0, 0,0,0,0, 0,0,0,30, 0,0,0,0) + # We generate sparse matrices for the SummarizedExperimentObject. + A_counts_fw <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + A_counts_rev <- Matrix::sparseMatrix(i = c(2,1), j = c(1,4), x = c(20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_fw <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + C_counts_rev <- Matrix::sparseMatrix(i = 4, j = 1, x = 25, dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_fw <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + G_counts_rev <- Matrix::sparseMatrix(i = c(1,1,2), j = c(1,2,2), x = c(23,20,20), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_fw <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + T_counts_rev <- Matrix::sparseMatrix(i = c(3,1,4,4), j = c(1,2,3,4), x = c(45,40,30,30), dims = c(4,4), + dimnames = list(NULL, paste0("Cell_", 1:4))) + # We add the sparse matrices together to get the coverage. + As <- as.matrix(A_counts_fw) + as.matrix(A_counts_rev) + Cs <- as.matrix(C_counts_fw) + as.matrix(C_counts_rev) + Gs <- as.matrix(G_counts_fw) + as.matrix(G_counts_rev) + Ts <- as.matrix(T_counts_fw) + as.matrix(T_counts_rev) + coverage <- As + Cs + coverage <- coverage + Gs + coverage <- coverage + Ts + coverage <- as(coverage, "CsparseMatrix") + # We generate a GRanges object for the SummarizedExperimentObject. + rowRanges <- GenomicRanges::GRanges(seqnames = "chrM", ranges = IRanges::IRanges(start = 1:4, end = 1:4, width = 1), strand = "*", refAllele = ref_allele) + # We generate the actual SummarizedExperimentObject. + se <- SummarizedExperiment::SummarizedExperiment(assays = list(A_counts_fw = A_counts_fw, A_counts_rev = A_counts_rev, + C_counts_fw = C_counts_fw, C_counts_rev = C_counts_rev, + G_counts_fw = G_counts_fw, G_counts_rev = G_counts_rev, + T_counts_fw = T_counts_fw, T_counts_rev = T_counts_rev, + coverage = coverage), + rowRanges = rowRanges) + # The expected result. + expected_result_A <- Matrix::sparseMatrix(i = 1, j = 4, x = 1, dims = c(3,4), dimnames = list(c("chrM_1_G_A", "chrM_3_T_A", "chrM_4_C_A"), paste0("Cell_", 1:4))) + expected_result_C <- Matrix::sparseMatrix(i = integer(0), j = integer(0), x = double(0), dims = c(3,4), dimnames = list(c("chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C"), paste0("Cell_", 1:4))) + expected_result_G <- Matrix::sparseMatrix(i = 1, j = 2, x = 1, dims = c(3,4), dimnames = list(c("chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G"), paste0("Cell_", 1:4))) + expected_result_T <- Matrix::sparseMatrix(i = c(1,3,3), j = c(2,3,4), x = c(2/3,1,1), dims = c(3,4), dimnames = list(c("chrM_1_G_T", "chrM_2_A_T", "chrM_4_C_T"), paste0("Cell_", 1:4))) + expect_equal(sigurd::getMutMatrix(SE = se, cov = coverage + 0.000001, letter = "A", ref_allele = ref_allele, chromosome_prefix = "chrM"), expected_result_A, tolerance = 1e-6) + expect_equal(sigurd::getMutMatrix(SE = se, cov = coverage + 0.000001, letter = "C", ref_allele = ref_allele, chromosome_prefix = "chrM"), expected_result_C, tolerance = 1e-6) + expect_equal(sigurd::getMutMatrix(SE = se, cov = coverage + 0.000001, letter = "G", ref_allele = ref_allele, chromosome_prefix = "chrM"), expected_result_G, tolerance = 1e-6) + expect_equal(sigurd::getMutMatrix(SE = se, cov = coverage + 0.000001, letter = "T", ref_allele = ref_allele, chromosome_prefix = "chrM"), expected_result_T, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-getReadMatrix.R b/tests/testthat/test-getReadMatrix.R new file mode 100644 index 0000000..56d31b9 --- /dev/null +++ b/tests/testthat/test-getReadMatrix.R @@ -0,0 +1,26 @@ +test_that("Testing getReadMatrix.R", { + # We load the input object. + input_object <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_Data.rds")) + test_A <- sigurd::getReadMatrix(SE = input_object, letter = "A", chromosome_prefix = "chrM") + test_C <- sigurd::getReadMatrix(SE = input_object, letter = "C", chromosome_prefix = "chrM") + test_G <- sigurd::getReadMatrix(SE = input_object, letter = "G", chromosome_prefix = "chrM") + test_T <- sigurd::getReadMatrix(SE = input_object, letter = "T", chromosome_prefix = "chrM") + # We generate the expected results. + expected_result_A <- Matrix::sparseMatrix(i = c(1:4,1:2,rep(1:4,2)), j = c(rep(1,4),rep(2,2),rep(3,4),rep(4,4)), x = c(20,40,60,80,50,50,50,50,50,50,10,30,20,60), dims = c(4,4), dimnames = list(c("chrM_1_G_A", "chrM_2_A_A", "chrM_3_T_A", "chrM_4_C_A"), paste0("Cell_", 1:4))) + expected_result_C <- Matrix::sparseMatrix(i = rep(1:4,4), j = rep(1:4,each=4), x = c(33,76,36,114,141,141,63,168,64,35,89,89,168,48,91,114), dims = c(4,4), dimnames = list(c("chrM_1_G_C", "chrM_2_A_C", "chrM_3_T_C", "chrM_4_C_C"), paste0("Cell_", 1:4))) + expected_result_G <- Matrix::sparseMatrix(i = rep(1:4,4), j = rep(1:4,each=4), x = c(157,115,153,36,25,76,148,168,75,87,50,123,116,110,104,110), dims = c(4,4), dimnames = list(c("chrM_1_G_G", "chrM_2_A_G", "chrM_3_T_G", "chrM_4_C_G"), paste0("Cell_", 1:4))) + expected_result_T <- Matrix::sparseMatrix(i = rep(1:4,4), j = rep(1:4,each=4), x = c(69,145,86,93,112,84,53,98,41,166,122,85,6,86,116,98), dims = c(4,4), dimnames = list(c("chrM_1_G_T", "chrM_2_A_T", "chrM_3_T_T", "chrM_4_C_T"), paste0("Cell_", 1:4))) + # We perform the tests. + expect_equal(test_A, expected_result_A, tolerance = 1e-6) + expect_equal(test_C, expected_result_C, tolerance = 1e-6) + expect_equal(test_G, expected_result_G, tolerance = 1e-6) + expect_equal(test_T, expected_result_T, tolerance = 1e-6) +}) + + + + + + + + diff --git a/tests/testthat/test-getRefMatrix.R b/tests/testthat/test-getRefMatrix.R new file mode 100644 index 0000000..54ca6c7 --- /dev/null +++ b/tests/testthat/test-getRefMatrix.R @@ -0,0 +1,18 @@ +test_that("Testing getRefMatrix.R", { + # We load the input object. + input_object <- readRDS(paste0(getwd(), "/test_data/LoadingMAEGATK_typewise_Test_Data.rds")) + test_A <- sigurd::getRefMatrix(SE = input_object, letter = "A", chromosome_prefix = "chrM") + test_C <- sigurd::getRefMatrix(SE = input_object, letter = "C", chromosome_prefix = "chrM") + test_G <- sigurd::getRefMatrix(SE = input_object, letter = "G", chromosome_prefix = "chrM") + test_T <- sigurd::getRefMatrix(SE = input_object, letter = "T", chromosome_prefix = "chrM") + # We generate the expected results. + expected_result_A <- c(Cell_1 = 40, Cell_2 = 50, Cell_3 = 50, Cell_4 = 30) + expected_result_C <- c(Cell_1 = 114, Cell_2 = 168, Cell_3 = 89, Cell_4 = 114) + expected_result_G <- c(Cell_1 = 157, Cell_2 = 25, Cell_3 = 75, Cell_4 = 116) + expected_result_T <- c(Cell_1 = 86, Cell_2 = 53, Cell_3 = 122, Cell_4 = 116) + # We perform the tests. + expect_equal(test_A, expected_result_A, tolerance = 1e-6) + expect_equal(test_C, expected_result_C, tolerance = 1e-6) + expect_equal(test_G, expected_result_G, tolerance = 1e-6) + expect_equal(test_T, expected_result_T, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-get_consensus.R b/tests/testthat/test-get_consensus.R new file mode 100644 index 0000000..3cfcb61 --- /dev/null +++ b/tests/testthat/test-get_consensus.R @@ -0,0 +1,58 @@ +test_that("Testing get_consensus.R", { + input_object <- list(A = Matrix::sparseMatrix(i = rep(1,2), j = c(1,2), x = c(8,2), dims = c(1,4), dimnames = list("chrM_2_A_A", paste0("Cell_", 1:4))), + C = Matrix::sparseMatrix(i = rep(1,3), j = c(1,3,4), x = c(4,1,1), dims = c(1,4), dimnames = list("chrM_4_C_A", paste0("Cell_", 1:4))), + G = Matrix::sparseMatrix(i = rep(1,3), j = c(1,2,4), x = c(2,3,8), dims = c(1,4), dimnames = list("chrM_1_G_A", paste0("Cell_", 1:4))), + T = Matrix::sparseMatrix(i = rep(1,1), j = 1, x = 1, dims = c(1,4), dimnames = list("chrM_3_T_A", paste0("Cell_", 1:4))), + N = Matrix::sparseMatrix(i = rep(1,4), j = 1:4, x = c(8,4,2,1), dims = c(1,4), dimnames = list("chrM_3107_N_A", paste0("Cell_", 1:4)))) + test_A_C <- get_consensus(alt_base = "C", ref_base = "A", input_matrix = as.matrix(input_object[[1]]), chromosome_prefix = "chrM") + test_A_G <- get_consensus(alt_base = "G", ref_base = "A", input_matrix = as.matrix(input_object[[1]]), chromosome_prefix = "chrM") + test_A_T <- get_consensus(alt_base = "T", ref_base = "A", input_matrix = as.matrix(input_object[[1]]), chromosome_prefix = "chrM") + test_C_A <- get_consensus(alt_base = "A", ref_base = "C", input_matrix = as.matrix(input_object[[2]]), chromosome_prefix = "chrM") + test_C_G <- get_consensus(alt_base = "G", ref_base = "C", input_matrix = as.matrix(input_object[[2]]), chromosome_prefix = "chrM") + test_C_T <- get_consensus(alt_base = "T", ref_base = "C", input_matrix = as.matrix(input_object[[2]]), chromosome_prefix = "chrM") + test_G_A <- get_consensus(alt_base = "A", ref_base = "G", input_matrix = as.matrix(input_object[[3]]), chromosome_prefix = "chrM") + test_G_C <- get_consensus(alt_base = "C", ref_base = "G", input_matrix = as.matrix(input_object[[3]]), chromosome_prefix = "chrM") + test_G_T <- get_consensus(alt_base = "T", ref_base = "G", input_matrix = as.matrix(input_object[[3]]), chromosome_prefix = "chrM") + test_T_A <- get_consensus(alt_base = "A", ref_base = "T", input_matrix = as.matrix(input_object[[4]]), chromosome_prefix = "chrM") + test_T_C <- get_consensus(alt_base = "C", ref_base = "T", input_matrix = as.matrix(input_object[[4]]), chromosome_prefix = "chrM") + test_T_G <- get_consensus(alt_base = "G", ref_base = "T", input_matrix = as.matrix(input_object[[4]]), chromosome_prefix = "chrM") + test_N_A <- get_consensus(alt_base = "A", ref_base = "N", input_matrix = as.matrix(input_object[[5]]), chromosome_prefix = "chrM") + test_N_C <- get_consensus(alt_base = "C", ref_base = "N", input_matrix = as.matrix(input_object[[5]]), chromosome_prefix = "chrM") + test_N_G <- get_consensus(alt_base = "G", ref_base = "N", input_matrix = as.matrix(input_object[[5]]), chromosome_prefix = "chrM") + test_N_T <- get_consensus(alt_base = "T", ref_base = "N", input_matrix = as.matrix(input_object[[5]]), chromosome_prefix = "chrM") + # We generate the expected results. + expected_result_A_C <- Matrix::sparseMatrix(i = 1, j = 1, x = 1, dims = c(1,4), dimnames = list("chrM_2_A_C", paste0("Cell_", 1:4))) + expected_result_A_G <- Matrix::sparseMatrix(i = c(1,1), j = c(1,2), x = 1:2, dims = c(1,4), dimnames = list("chrM_2_A_G", paste0("Cell_", 1:4))) + expected_result_A_T <- Matrix::sparseMatrix(i = 1, j = 1, x = 1, dims = c(1,4), dimnames = list("chrM_2_A_T", paste0("Cell_", 1:4))) + expected_result_C_A <- Matrix::sparseMatrix(i = 1, j = 1, x = 1, dims = c(1,4), dimnames = list("chrM_4_C_A", paste0("Cell_", 1:4))) + expected_result_C_G <- Matrix::sparseMatrix(i = 1, j = 1, x = 1, dims = c(1,4), dimnames = list("chrM_4_C_G", paste0("Cell_", 1:4))) + expected_result_C_T <- Matrix::sparseMatrix(i = c(1,1,1), j = c(1,3,4), x = c(1,2,2), dims = c(1,4), dimnames = list("chrM_4_C_T", paste0("Cell_", 1:4))) + expected_result_G_A <- Matrix::sparseMatrix(i = c(1,1), j = c(1,4), x = c(1,2), dims = c(1,4), dimnames = list("chrM_1_G_A", paste0("Cell_", 1:4))) + expected_result_G_C <- Matrix::sparseMatrix(i = 1, j = 1, x = 1, dims = c(1,4), dimnames = list("chrM_1_G_C", paste0("Cell_", 1:4))) + expected_result_G_T <- Matrix::sparseMatrix(i = c(1,1), j = c(1,2), x = c(1,3), dims = c(1,4), dimnames = list("chrM_1_G_T", paste0("Cell_", 1:4))) + expected_result_T_A <- Matrix::sparseMatrix(i = 1, j = 1, x = 1, dims = c(1,4), dimnames = list("chrM_3_T_A", paste0("Cell_", 1:4))) + expected_result_T_C <- Matrix::sparseMatrix(i = 1, j = 1, x = 1, dims = c(1,4), dimnames = list("chrM_3_T_C", paste0("Cell_", 1:4))) + expected_result_T_G <- Matrix::sparseMatrix(i = 1, j = 1, x = 1, dims = c(1,4), dimnames = list("chrM_3_T_G", paste0("Cell_", 1:4))) + expected_result_N_A <- Matrix::sparseMatrix(i = 1, j = 1, x = 2, dims = c(1,4), dimnames = list("chrM_3107_N_A", paste0("Cell_", 1:4))) + expected_result_N_C <- Matrix::sparseMatrix(i = 1, j = 2, x = 2, dims = c(1,4), dimnames = list("chrM_3107_N_C", paste0("Cell_", 1:4))) + expected_result_N_G <- Matrix::sparseMatrix(i = 1, j = 3, x = 2, dims = c(1,4), dimnames = list("chrM_3107_N_G", paste0("Cell_", 1:4))) + expected_result_N_T <- Matrix::sparseMatrix(i = 1, j = 4, x = 2, dims = c(1,4), dimnames = list("chrM_3107_N_T", paste0("Cell_", 1:4))) + + # We perform the tests. + expect_equal(test_A_C, expected_result_A_C, tolerance = 1e-6) + expect_equal(test_A_G, expected_result_A_G, tolerance = 1e-6) + expect_equal(test_A_T, expected_result_A_T, tolerance = 1e-6) + expect_equal(test_C_A, expected_result_C_A, tolerance = 1e-6) + expect_equal(test_C_G, expected_result_C_G, tolerance = 1e-6) + expect_equal(test_C_T, expected_result_C_T, tolerance = 1e-6) + expect_equal(test_G_A, expected_result_G_A, tolerance = 1e-6) + expect_equal(test_G_C, expected_result_G_C, tolerance = 1e-6) + expect_equal(test_G_T, expected_result_G_T, tolerance = 1e-6) + expect_equal(test_T_A, expected_result_T_A, tolerance = 1e-6) + expect_equal(test_T_C, expected_result_T_C, tolerance = 1e-6) + expect_equal(test_T_G, expected_result_T_G, tolerance = 1e-6) + expect_equal(test_N_A, expected_result_N_A, tolerance = 1e-6) + expect_equal(test_N_C, expected_result_N_C, tolerance = 1e-6) + expect_equal(test_N_G, expected_result_N_G, tolerance = 1e-6) + expect_equal(test_N_T, expected_result_N_T, tolerance = 1e-6) +}) diff --git a/tests/testthat/test-ggsci_pal.R b/tests/testthat/test-ggsci_pal.R new file mode 100644 index 0000000..5787d80 --- /dev/null +++ b/tests/testthat/test-ggsci_pal.R @@ -0,0 +1,5 @@ +testthat::test_that("ggsci_pal returns a valid color palette for aaas", { + colors <- sigurd::ggsci_pal("aaas")(10) + colors_expected <- c("#3B4992FF", "#EE0000FF", "#008B45FF", "#631879FF", "#008280FF", "#BB0021FF", "#5F559BFF", "#A20056FF", "#808180FF", "#1B1919FF") + testthat::expect_true(all(colors == colors_expected), "Testing if the colors of the aaas palette are as expected.") +}) diff --git a/tests/testthat/test_data/Filtering_Alts_Threshold_ExpectedResults.rds b/tests/testthat/test_data/Filtering_Alts_Threshold_ExpectedResults.rds new file mode 100644 index 0000000..4b94614 Binary files /dev/null and b/tests/testthat/test_data/Filtering_Alts_Threshold_ExpectedResults.rds differ diff --git a/tests/testthat/test_data/Filtering_Blacklist_ExpectedResults.rds b/tests/testthat/test_data/Filtering_Blacklist_ExpectedResults.rds new file mode 100644 index 0000000..030127f Binary files /dev/null and b/tests/testthat/test_data/Filtering_Blacklist_ExpectedResults.rds differ diff --git a/tests/testthat/test_data/Filtering_Blacklisted_Barcodes.tsv b/tests/testthat/test_data/Filtering_Blacklisted_Barcodes.tsv new file mode 100644 index 0000000..6cf0504 --- /dev/null +++ b/tests/testthat/test_data/Filtering_Blacklisted_Barcodes.tsv @@ -0,0 +1 @@ +Test_Cell_3 diff --git a/tests/testthat/test_data/Filtering_CellThreshold_ExpectedResults.rds b/tests/testthat/test_data/Filtering_CellThreshold_ExpectedResults.rds new file mode 100644 index 0000000..9cc50c3 Binary files /dev/null and b/tests/testthat/test_data/Filtering_CellThreshold_ExpectedResults.rds differ diff --git a/tests/testthat/test_data/Filtering_Fraction_Threshold_ExpectedResults.rds b/tests/testthat/test_data/Filtering_Fraction_Threshold_ExpectedResults.rds new file mode 100644 index 0000000..99e2d81 Binary files /dev/null and b/tests/testthat/test_data/Filtering_Fraction_Threshold_ExpectedResults.rds differ diff --git a/tests/testthat/test_data/Filtering_VariantThreshold_ExpectedResults.rds b/tests/testthat/test_data/Filtering_VariantThreshold_ExpectedResults.rds new file mode 100644 index 0000000..101b961 Binary files /dev/null and b/tests/testthat/test_data/Filtering_VariantThreshold_ExpectedResults.rds differ diff --git a/tests/testthat/test_data/LoadingMAEGATK_typewise_Test_Barcodes.tsv b/tests/testthat/test_data/LoadingMAEGATK_typewise_Test_Barcodes.tsv new file mode 100644 index 0000000..b77bba0 --- /dev/null +++ b/tests/testthat/test_data/LoadingMAEGATK_typewise_Test_Barcodes.tsv @@ -0,0 +1,4 @@ +Cell_1 +Cell_2 +Cell_3 +Cell_4 diff --git a/tests/testthat/test_data/LoadingMAEGATK_typewise_Test_Data.rds b/tests/testthat/test_data/LoadingMAEGATK_typewise_Test_Data.rds new file mode 100644 index 0000000..f39f52d Binary files /dev/null and b/tests/testthat/test_data/LoadingMAEGATK_typewise_Test_Data.rds differ diff --git a/tests/testthat/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds b/tests/testthat/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds new file mode 100644 index 0000000..101b961 Binary files /dev/null and b/tests/testthat/test_data/LoadingMAEGATK_typewise_Test_ExpectedResults.rds differ diff --git a/tests/testthat/test_data/MAEGATK_inputfile_test.csv b/tests/testthat/test_data/MAEGATK_inputfile_test.csv new file mode 100644 index 0000000..c936a91 --- /dev/null +++ b/tests/testthat/test_data/MAEGATK_inputfile_test.csv @@ -0,0 +1,2 @@ +patient,sample,source,type,bam,input_path,cells +Test,Test,MAEGATK,scRNAseq_MT,NA,test_data/LoadingMAEGATK_typewise_Test_Data.rds,test_data/LoadingMAEGATK_typewise_Test_Barcodes.tsv diff --git a/tests/testthat/test_data/Merging_SE_list_ExpectedResults.rds b/tests/testthat/test_data/Merging_SE_list_ExpectedResults.rds new file mode 100644 index 0000000..322043d Binary files /dev/null and b/tests/testthat/test_data/Merging_SE_list_ExpectedResults.rds differ diff --git a/tests/testthat/test_data/VCF_Test/VCF_ExpectedResults.rds b/tests/testthat/test_data/VCF_Test/VCF_ExpectedResults.rds new file mode 100644 index 0000000..64eab09 Binary files /dev/null and b/tests/testthat/test_data/VCF_Test/VCF_ExpectedResults.rds differ diff --git a/tests/testthat/test_data/VCF_Test/VCF_inputfile_test.csv b/tests/testthat/test_data/VCF_Test/VCF_inputfile_test.csv new file mode 100644 index 0000000..8f240fe --- /dev/null +++ b/tests/testthat/test_data/VCF_Test/VCF_inputfile_test.csv @@ -0,0 +1,2 @@ +patient,sample,source,type,bam,input_path,cells +Test,Test,VCF,scRNAseq_Somatic,NA,test_data/VCF_Test/cellSNP.cells.vcf,test_data/LoadingMAEGATK_typewise_Test_Barcodes.tsv diff --git a/tests/testthat/test_data/VCF_Test/cellSNP.base.vcf b/tests/testthat/test_data/VCF_Test/cellSNP.base.vcf new file mode 100644 index 0000000..ad3d52d --- /dev/null +++ b/tests/testthat/test_data/VCF_Test/cellSNP.base.vcf @@ -0,0 +1,5 @@ +##fileformat=VCFv4.2 +#CHROM POS ID REF ALT QUAL FILTER INFO +1 200 . C G . PASS AD=0;DP=1;OTH=0 +17 200 . G A . PASS AD=0;DP=2;OTH=0 +2 200 . G C . PASS AD=1;DP=1;OTH=0 diff --git a/tests/testthat/test_data/VCF_Test/cellSNP.cells.vcf b/tests/testthat/test_data/VCF_Test/cellSNP.cells.vcf new file mode 100644 index 0000000..f27f7e3 --- /dev/null +++ b/tests/testthat/test_data/VCF_Test/cellSNP.cells.vcf @@ -0,0 +1,41 @@ +##fileformat=VCFv4.2 +##source=cellSNP_v1.2.3 +##FILTER= +##FILTER= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT AAACGGGCATTTGCCC-1 AAGCCGCGTTAGGGTG-1 ACACCAAAGCATGGCA-2 ACGCCAGTCACTTCAT-2 ACTGATGCATGCCTAA-1 AGATTGCCACCCATTC-2 AGTCTTTAGAGTGAGA-2 AGTGTCATCATGTCTT-1 ATAACGCGTCGAACAG-2 CACAGTAAGTGAAGTT-2 CAGATCACATACTCTT-1 CGAGCCAAGGCGATAC-1 CTCCTAGTCGGTGTCG-2 CTTAGGATCTCTTATG-2 GAAACTCCAAGTCTGT-2 GAACATCAGTCCATAC-1 GACTGCGCAGTAAGCG-1 TACTTACCACCCATGG-2 TACTTACTCTATCCCG-2 TGAGCATAGCGATATA-2 +1 200 . C G . PASS AD=0;DP=1;OTH=0 GT:AD:DP:OTH:PL:ALL .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. 0/0:0:1:0:0,3,42:0,1,0,0,0 .:.:.:.:.:. +17 200 . G A . PASS AD=0;DP=2;OTH=0 GT:AD:DP:OTH:PL:ALL .:.:.:.:.:. 0/0:0:1:0:0,3,42:0,0,1,0,0 .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. 0/0:0:1:0:0,3,42:0,0,1,0,0 .:.:.:.:.:. +2 200 . G C . PASS AD=1;DP=1;OTH=0 GT:AD:DP:OTH:PL:ALL .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. 1/1:1:1:0:42,3,0:0,1,0,0,0 .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. .:.:.:.:.:. diff --git a/tests/testthat/test_data/VCF_Test/cellSNP.samples.tsv b/tests/testthat/test_data/VCF_Test/cellSNP.samples.tsv new file mode 100644 index 0000000..208a990 --- /dev/null +++ b/tests/testthat/test_data/VCF_Test/cellSNP.samples.tsv @@ -0,0 +1,20 @@ +AAACGGGCATTTGCCC-1 +AAGCCGCGTTAGGGTG-1 +ACACCAAAGCATGGCA-2 +ACGCCAGTCACTTCAT-2 +ACTGATGCATGCCTAA-1 +AGATTGCCACCCATTC-2 +AGTCTTTAGAGTGAGA-2 +AGTGTCATCATGTCTT-1 +ATAACGCGTCGAACAG-2 +CACAGTAAGTGAAGTT-2 +CAGATCACATACTCTT-1 +CGAGCCAAGGCGATAC-1 +CTCCTAGTCGGTGTCG-2 +CTTAGGATCTCTTATG-2 +GAAACTCCAAGTCTGT-2 +GAACATCAGTCCATAC-1 +GACTGCGCAGTAAGCG-1 +TACTTACCACCCATGG-2 +TACTTACTCTATCCCG-2 +TGAGCATAGCGATATA-2 diff --git a/tests/testthat/test_data/VCF_Test/cellSNP.tag.AD.mtx b/tests/testthat/test_data/VCF_Test/cellSNP.tag.AD.mtx new file mode 100644 index 0000000..63b6819 --- /dev/null +++ b/tests/testthat/test_data/VCF_Test/cellSNP.tag.AD.mtx @@ -0,0 +1,4 @@ +%%MatrixMarket matrix coordinate integer general +% +3 20 1 +3 7 1 diff --git a/tests/testthat/test_data/VCF_Test/cellSNP.tag.DP.mtx b/tests/testthat/test_data/VCF_Test/cellSNP.tag.DP.mtx new file mode 100644 index 0000000..a78ea30 --- /dev/null +++ b/tests/testthat/test_data/VCF_Test/cellSNP.tag.DP.mtx @@ -0,0 +1,7 @@ +%%MatrixMarket matrix coordinate integer general +% +3 20 4 +1 19 1 +2 2 1 +2 19 1 +3 7 1 diff --git a/tests/testthat/test_data/VCF_Test/cellSNP.tag.OTH.mtx b/tests/testthat/test_data/VCF_Test/cellSNP.tag.OTH.mtx new file mode 100644 index 0000000..11e5c12 --- /dev/null +++ b/tests/testthat/test_data/VCF_Test/cellSNP.tag.OTH.mtx @@ -0,0 +1,3 @@ +%%MatrixMarket matrix coordinate integer general +% +3 20 0 diff --git a/tests/testthat/test_data/VCF_Test/test.vcf b/tests/testthat/test_data/VCF_Test/test.vcf new file mode 100644 index 0000000..82eccd8 --- /dev/null +++ b/tests/testthat/test_data/VCF_Test/test.vcf @@ -0,0 +1,11 @@ +##fileformat=VCFv4.2 +##FORMAT= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 200 . C G . . . GT ./. +17 200 . G A . . . GT ./. +2 200 . G C . . . GT ./. +7 200 . A G . . . GT ./. diff --git a/tests/testthat/test_data/VarTrix_Test/SNV.loci.txt b/tests/testthat/test_data/VarTrix_Test/SNV.loci.txt new file mode 100644 index 0000000..d3fe71e --- /dev/null +++ b/tests/testthat/test_data/VarTrix_Test/SNV.loci.txt @@ -0,0 +1,11 @@ +##fileformat=VCFv4.2 +##FORMAT= +##contig= +##contig= +##contig= +##contig= +#CHROM POS REF ALT +1 200 C G +17 200 G A +2 200 G C +7 200 A G diff --git a/tests/testthat/test_data/VarTrix_Test/Test/out_matrix_consensus.mtx b/tests/testthat/test_data/VarTrix_Test/Test/out_matrix_consensus.mtx new file mode 100644 index 0000000..114e54c --- /dev/null +++ b/tests/testthat/test_data/VarTrix_Test/Test/out_matrix_consensus.mtx @@ -0,0 +1,7 @@ +%%MatrixMarket matrix coordinate real general +% written by sprs +4 20 4 +1 20 1 +2 15 1 +2 20 1 +3 18 2 diff --git a/tests/testthat/test_data/VarTrix_Test/Test/out_matrix_coverage.mtx b/tests/testthat/test_data/VarTrix_Test/Test/out_matrix_coverage.mtx new file mode 100644 index 0000000..da96b2c --- /dev/null +++ b/tests/testthat/test_data/VarTrix_Test/Test/out_matrix_coverage.mtx @@ -0,0 +1,7 @@ +%%MatrixMarket matrix coordinate real general +% written by sprs +4 20 4 +1 20 0 +2 20 0 +2 15 0 +3 18 7 diff --git a/tests/testthat/test_data/VarTrix_Test/Test/ref_matrix_coverage.mtx b/tests/testthat/test_data/VarTrix_Test/Test/ref_matrix_coverage.mtx new file mode 100644 index 0000000..0526b6a --- /dev/null +++ b/tests/testthat/test_data/VarTrix_Test/Test/ref_matrix_coverage.mtx @@ -0,0 +1,7 @@ +%%MatrixMarket matrix coordinate real general +% written by sprs +4 20 4 +1 20 1 +2 20 6 +2 15 1 +3 18 0 diff --git a/tests/testthat/test_data/VarTrix_Test/barcodes.tsv b/tests/testthat/test_data/VarTrix_Test/barcodes.tsv new file mode 100644 index 0000000..e794cf9 --- /dev/null +++ b/tests/testthat/test_data/VarTrix_Test/barcodes.tsv @@ -0,0 +1,20 @@ +CGAGCCAAGGCGATAC-1 +GAAACTCCAAGTCTGT-2 +ATAACGCGTCGAACAG-2 +AAACGGGCATTTGCCC-1 +AGTGTCATCATGTCTT-1 +CTTAGGATCTCTTATG-2 +ACGCCAGTCACTTCAT-2 +ACACCAAAGCATGGCA-2 +CTCCTAGTCGGTGTCG-2 +CACAGTAAGTGAAGTT-2 +TACTTACCACCCATGG-2 +TGAGCATAGCGATATA-2 +GAACATCAGTCCATAC-1 +AGATTGCCACCCATTC-2 +AAGCCGCGTTAGGGTG-1 +CAGATCACATACTCTT-1 +ACTGATGCATGCCTAA-1 +AGTCTTTAGAGTGAGA-2 +GACTGCGCAGTAAGCG-1 +TACTTACTCTATCCCG-2 diff --git a/tests/testthat/test_data/VarTrix_Test/inputfile_test.csv b/tests/testthat/test_data/VarTrix_Test/inputfile_test.csv new file mode 100644 index 0000000..c859097 --- /dev/null +++ b/tests/testthat/test_data/VarTrix_Test/inputfile_test.csv @@ -0,0 +1,2 @@ +patient,sample,source,type,bam,input_path,cells +Test,Test,VarTrix,scRNAseq_Somatic,test_data/VarTrix_Test/Test/test.bam,test_data/VarTrix_Test/,test_data/VarTrix_Test/barcodes.tsv diff --git a/tests/testthat/test_data/VarTrix_Test/test.rds b/tests/testthat/test_data/VarTrix_Test/test.rds new file mode 100644 index 0000000..50c72ec Binary files /dev/null and b/tests/testthat/test_data/VarTrix_Test/test.rds differ diff --git a/tests/testthat/test_data/VarTrix_Test/test.vcf b/tests/testthat/test_data/VarTrix_Test/test.vcf new file mode 100644 index 0000000..82eccd8 --- /dev/null +++ b/tests/testthat/test_data/VarTrix_Test/test.vcf @@ -0,0 +1,11 @@ +##fileformat=VCFv4.2 +##FORMAT= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 200 . C G . . . GT ./. +17 200 . G A . . . GT ./. +2 200 . G C . . . GT ./. +7 200 . A G . . . GT ./. diff --git a/tests/testthat/test_data/test.bam b/tests/testthat/test_data/test.bam new file mode 100644 index 0000000..9a19bb8 Binary files /dev/null and b/tests/testthat/test_data/test.bam differ diff --git a/tests/testthat/test_data/test.bam.bai b/tests/testthat/test_data/test.bam.bai new file mode 100644 index 0000000..aab23a7 Binary files /dev/null and b/tests/testthat/test_data/test.bam.bai differ diff --git a/vignettes/SiGURD.Rmd b/vignettes/SiGURD.Rmd index bd3d3aa..4c5cb51 100644 --- a/vignettes/SiGURD.Rmd +++ b/vignettes/SiGURD.Rmd @@ -1,5 +1,5 @@ --- -title: "SiGURD vignette" +title: "SiGURD" author: - name: Martin Grasshoff affilitation: @@ -13,7 +13,11 @@ author: Faculty of Medicine, RWTH Aachen University, Aachen, 52074 Germany -output: html_document +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{SiGURD} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8] --- ```{r setup, include=FALSE} @@ -45,14 +49,14 @@ These files are needed for VarTrix and not MGATK/MAEGATK. Since MAEGATK only ana ```{r InputVCF, warning = FALSE, message = FALSE, output = FALSE} vcf_path <- system.file("extdata", "CosmicSubset_filtered.vcf", package = "sigurd") -vcf <- readVcf(vcf_path) -vcf_info <- info(vcf) +vcf <- VariantAnnotation::readVcf(vcf_path) +vcf_info <- VariantAnnotation::info(vcf) print(vcf) print(vcf_info) vcf_path_mt <- system.file("extdata", "chrM_Input_VCF_NoMAF_Filtering.vcf", package = "sigurd") -vcf_mt <- readVcf(vcf_path_mt) -vcf_mt_info <- info(vcf_mt) +vcf_mt <- VariantAnnotation::readVcf(vcf_path_mt) +vcf_mt_info <- VariantAnnotation::info(vcf_mt) print(vcf_mt) print(vcf_mt_info) @@ -76,8 +80,20 @@ Since the MT results are denser, they take longer to load. Sample1_scRNAseq_Somatic <- LoadingVarTrix_typewise(samples_file = sample_path, vcf_path = vcf_path, patient = "Sample1", type_use = "scRNAseq_Somatic") Sample1_scRNAseq_MT <- LoadingMAEGATK_typewise(samples_file = sample_path, patient = "Sample1", type_use = "scRNAseq_MT") +samples_file = sample_path +patient = "Sample1" +type_use = "scRNAseq_MT" +samples_path = NULL +chromosome_prefix = "chrM" +min_cells = 2 +barcodes_path = NULL +verbose = TRUE Sample1_combined <- CombineSEobjects(se_somatic = Sample1_scRNAseq_Somatic, se_MT = Sample1_scRNAseq_MT, suffixes = c("_somatic", "_MT")) +se_somatic = Sample1_scRNAseq_Somatic +se_MT = Sample1_scRNAseq_MT +suffixes = c("_somatic", "_MT") + rm(Sample1_scRNAseq_Somatic, Sample1_scRNAseq_MT) Sample1_combined <- Filtering(Sample1_combined, min_cells_per_variant = 2, fraction_threshold = 0.05) Sample1_combined <- VariantBurden(Sample1_combined) @@ -92,7 +108,7 @@ https://www.nature.com/articles/s41587-022-01210-8 The heatmap needs some time to plot, since the cells are clustered. -```{r VariantsOfInterest, warning = FALSE, message = FALSE, output = FALSE} +```{r VariantsOfInterest, warning = FALSE, message = FALSE, output = FALSE, fig.width = 6} voi_ch <- VariantQuantileThresholding(SE = Sample1_combined, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9)) hm <- HeatmapVoi(SE = Sample1_combined, voi = voi_ch) print(hm) @@ -106,9 +122,9 @@ Since the possible number of tests/correlations is quite large, you can use mult ```{r VariantAssociation, warning = FALSE, message = FALSE, output = FALSE} -#Sample1_split_rows <- RowWiseSplit(Sample1_combined, remove_nocalls = FALSE) +#Sample1_split_rows <- RowWiseSplit(Sample1_combined, remove_nocalls = FALSE, n_cores = 19) -#results_fishertest <- VariantWiseFisherTest(Sample1_split_rows, n_cores = 8) +#results_fishertest <- VariantWiseFisherTest(Sample1_split_rows, n_cores = 19) #rm(Sample1_split_rows) #variant_association_heatmap <- VariantFisherTestHeatmap(results_fishertest, patient = "Sample1", min_alt_cells = 3)
We calculate the MAF for the MAEGATK results.
We calculate the MAF from a reference reads matrix and an alternative reads matrix. +This function is intended to be used with the mitochondrial genome and not with other somatic mutations.
CalculateAlleleFrequency(reference_reads, alternative_reads)
CalculateAlleleFrequency(reference_reads, alternative_reads, pseudo_count = 0)
CalculateConsensus.Rd
We calculate the consensus information from the MAEGATK results.
We calculate the consensus information from the MAEGATK results. +We set cells that have only alternative reads to 2 (Alternative). +We set cells that have only reference reads to 1 (Reference). +We set cells that have a mixture of alternative and reference reads to 3 (Both). +We set cells that have no reads to 0 (NoCall).
Please note. Cells can have reads for the reference of a specific variant and no reads for the alternative. +The cell can still have a reads for the other alternative alleles. Then the cell is still considered as 0 (NoCall) for this variant. +For example: +A cell has at position 3: 0 A reads, 53 T reads, 63 C reads, 148 T reads. +For the variant chrM_3_T_A, the cell would have 53 reference reads, but also reads for other variants at this position. +To make sure that there is no confusion, the cell is set to NoCall.
CalculateConsensus(SE, chromosome_prefix = "chrM")
CalculateConsensus(SE, chromosome_prefix = "chrM", verbose = FALSE)
CalculateQuality( - SE, - variants = rownames(reads_alt), - chromosome_prefix = "chrM" -)
CalculateQuality(SE, variants, chromosome_prefix = "chrM")
We do this for one sample at a time. We want to remove:
all cells that are blacklisted,
all cells that are not in a Seurat object,
all cells that do not have at least one variant with >1 (Reference),
all variants that are for alternative transcripts,
all variants that are always NoCall,
set variants with a VAF below a threshold to reference.
set variants with a VAF below a threshold to NoCall or Reference.
GetCellInfoPerVariant(se, voi_ch)
GetCellInfoPerVariant(se, voi_ch, verbose = FALSE)
GetVariantInfo.Rd
We get the genotyping information for a set of variants. +The function returns a matrix with the values from the specified assay.
GetVariantInfo(SE, information = "consensus", variants = NULL, cells = NULL)
SummarizedExperiment object.
The assay with the desired information. Default: consensus
A vector of variants.
A vector of cell IDs. On default all cells are returned. Default: NULL
HeatmapVoi(SE, voi, annotation_trait = NULL, column_title = NULL)
HeatmapVoi( + SE, + voi, + annotation_trait = NULL, + column_title = NULL, + remove_empty_cells = FALSE +)
We load the MAEGATK output and transform it to be compatible with the VarTrix output. The input file is a specifically formated csv file with all the necessary information to run the analysis. Note that the source column in the input file needs to be one of the following: vartrix, mgaetk, mgatk. -This is hard coded and case insensitive.
samples_path
barcodes_path
patient
samples_file = NULL
The minimum number of cells with coverage for a variant. Variants with coverage in less than this amount of cells are removed. Default = 2
Path to the barcodes file tsv. Default = NULL
Should the function be verbose? Default = TRUE
SetVariantInfo.Rd
We add the genotyping information for a set of variants to a Seurat object. +The function returns a matrix with the values from the specified assay.
SetVariantInfo(SE, seurat_object, information = "consensus", variants = NULL)
The Seurat object.
We generate a heatmap showing the correlation of somatic variants with the MT variants.
We generate a heatmap showing the correlation of somatic variants with the MT variants. +Packages I want to remove. I cannot see where they are used. +ggplot2 parallel rcompanion tidyr
We generate a heatmap showing the Fisher test of somatic variants with the MT variants.
We generate a heatmap showing the Fisher test of somatic variants with the MT variants. +Packages I want to remove. +ggplot2 parallel rcompanion tidyr
We correlate the variants with each other using the Pearson correlation. -This function calls CalculateCorrelationPValue to perform the actual correlation.
VariantWiseCorrelation(variants_list, n_cores = 1, p_value_adjustment = "fdr")
VariantWiseCorrelation( + variants_list, + n_cores = 1, + p_value_adjustment = "fdr", + verbose = TRUE +)
We perform the Fisher test to determine which variants are associated. -This function calls CalculateFisherTestPValue to perform the actual testing.
VariantWiseFisherTest(variants_list, n_cores = 1, p_value_adjustment = "fdr")
VariantWiseFisherTest( + variants_list, + n_cores = 1, + p_value_adjustment = "fdr", + verbose = TRUE +)
char_to_numeric.Rd
A function to convert the heterozygous/homozygous information from the VCF to the consensus information from VarTrix. +It is only used in LoadingVCF_typewise.R.
char_to_numeric(char_value)
What is the genotype encoding you want to convert?
getMutMatrix.Rd
This function gets the allele frequency for a specific allele. It is used in computeAFMutMatrix. +Source: https://github.com/petervangalen/MAESTER-2021
getMutMatrix(SE, cov, letter, ref_allele, chromosome_prefix)
The coverage matrix from MAEGATK/MGATK.
The base we are interested in.
Vector of reference alleles.
The chromosome prefix used.
We get the consensus information for a specific matrix.
We get the consensus information for a specific matrix. +I want to remove some packages if they are not needed. See below which package apperantly wasn't needed. +Package to remove: dplyr, SummarizedExperiment
The alternative base.
The reference base.
CalculateCorrelationPValue()
HeatmapVoi()
LoadingVCF_typewise()
LoadingVarTrix_typewise()
VariantBurden()
combine_NAMES()
getReadMatrix()
save_object(object, file_name, file_format = NULL)
save_object(object, file_name, file_format = "zstd")