diff --git a/DESCRIPTION b/DESCRIPTION index 3b8532d..4d3decd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: sigurd Type: Package Title: Single cell Genotyping Using RNA Data -Version: 0.2.23 +Version: 0.2.32 Authors@R: c( person(given = "Martin", family = "Grasshoff", @@ -20,21 +20,38 @@ Encoding: UTF-8 LazyData: true Imports: archive, - bigmemory, - circlize, + BiocGenerics, ComplexHeatmap, + circlize, + data.table, dplyr, fastmatch, + GenomeInfoDb, ggplot2, + ggsci, + glue, grid, - magrittr, Matrix, + MatrixGenerics, + magrittr, + methods, parallel, rcompanion, S4Vectors, Seurat, SummarizedExperiment, + scales, + tibble, tidyr, tidyverse, VariantAnnotation RoxygenNote: 7.2.3 +Suggests: + GenomicRanges, + IRanges, + knitr, + rmarkdown, + SeuratObject, + testthat (>= 3.0.0) +VignetteBuilder: knitr +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index d71eb89..48515c2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ export(GetCellInfoPerVariant) export(GetVariantInfo) export(HeatmapVoi) export(LoadingMAEGATK_typewise) +export(LoadingVCF_typewise) export(LoadingVarTrix_typewise) export(Merging_SE_list) export(RowWiseSplit) @@ -27,10 +28,12 @@ export(VariantFisherTestHeatmap) export(VariantQuantileThresholding) export(VariantWiseCorrelation) export(VariantWiseFisherTest) +export(char_to_numeric) export(combine_NAMES) export(combine_SparseMatrix) export(computeAFMutMatrix) export(getAltMatrix) +export(getMutMatrix) export(getReadMatrix) export(getRefMatrix) export(get_consensus) @@ -39,26 +42,54 @@ export(load_object) export(save_object) export(sdiv) import(BiocGenerics) -import(ComplexHeatmap) -import(Matrix) -import(MatrixGenerics) -import(Seurat) -import(SummarizedExperiment) -import(VariantAnnotation) import(archive) -import(assertthat) -import(circlize) -import(data.table) -import(dplyr) -import(fastmatch) -import(ggplot2) import(ggsci) -import(glue) -import(grid) -import(parallel) -import(rcompanion) -import(scales) -import(stats) -import(tibble) -import(tidyr) -import(tidyverse) +importFrom(BiocGenerics,start) +importFrom(ComplexHeatmap,Heatmap) +importFrom(ComplexHeatmap,columnAnnotation) +importFrom(ComplexHeatmap,draw) +importFrom(ComplexHeatmap,rowAnnotation) +importFrom(GenomeInfoDb,seqnames) +importFrom(Matrix,colMeans) +importFrom(Matrix,colSums) +importFrom(Matrix,readMM) +importFrom(Matrix,rowMeans) +importFrom(Matrix,rowSums) +importFrom(Matrix,sparseMatrix) +importFrom(Matrix,summary) +importFrom(Matrix,t) +importFrom(S4Vectors,merge) +importFrom(Seurat,AddMetaData) +importFrom(SummarizedExperiment,SummarizedExperiment) +importFrom(SummarizedExperiment,assays) +importFrom(SummarizedExperiment,colData) +importFrom(SummarizedExperiment,rowData) +importFrom(SummarizedExperiment,rowRanges) +importFrom(VariantAnnotation,alt) +importFrom(VariantAnnotation,info) +importFrom(VariantAnnotation,readGeno) +importFrom(VariantAnnotation,readVcf) +importFrom(VariantAnnotation,ref) +importFrom(circlize,colorRamp2) +importFrom(data.table,data.table) +importFrom(dplyr,"%>%") +importFrom(dplyr,left_join) +importFrom(dplyr,sym) +importFrom(glue,glue) +importFrom(grDevices,dev.off) +importFrom(grDevices,png) +importFrom(grid,gpar) +importFrom(grid,unit) +importFrom(methods,as) +importFrom(parallel,mclapply) +importFrom(scales,hue_pal) +importFrom(stats,cor.test) +importFrom(stats,fisher.test) +importFrom(stats,na.omit) +importFrom(stats,p.adjust) +importFrom(stats,quantile) +importFrom(tibble,as_tibble) +importFrom(tibble,tibble) +importFrom(utils,read.csv) +importFrom(utils,read.table) +importFrom(utils,tail) diff --git a/R/AmpliconSupplementing.R b/R/AmpliconSupplementing.R index 28eb7d2..bcf7507 100644 --- a/R/AmpliconSupplementing.R +++ b/R/AmpliconSupplementing.R @@ -2,83 +2,89 @@ #' #'@description #'We replace the values from an scRNAseq experiment with values we have from an amplicon experiment. -#' -#'@import archive Matrix SummarizedExperiment VariantAnnotation +#'@importFrom S4Vectors merge +#'@importFrom SummarizedExperiment colData rowData assays SummarizedExperiment +#'@importFrom stats na.omit #'@param scRNAseq The SummarizedExperiment object containing the scRNAseq data. #'@param amplicon The SummarizedExperiment object containing the amplicon data. +#'@param verbose Should the function be verbose? Default = TRUE #'@export -AmpliconSupplementing <- function(scRNAseq, amplicon){ +AmpliconSupplementing <- function(scRNAseq, amplicon, verbose = TRUE){ # We supplement the scRNAseq data with the amplicon data. - print("We get the new meta data.") - new_meta_data <- merge(colData(scRNAseq), colData(amplicon), by = "Cell", all.x = TRUE, all.y = TRUE, - suffixes = c("scRNAseq", "Amplicon")) + if(verbose) print("We get the new meta data.") + new_meta_data <- S4Vectors::merge(SummarizedExperiment::colData(scRNAseq), SummarizedExperiment::colData(amplicon), by = "Cell", all.x = TRUE, all.y = TRUE, + suffixes = c("scRNAseq", "Amplicon")) rownames(new_meta_data) <- new_meta_data$Cell # We add an AverageCoverage column to the new meta data. new_meta_data$AverageCoverage <- new_meta_data$AverageCoveragescRNAseq amplicon_value <- new_meta_data$AverageCoverageAmplicon names(amplicon_value) <- colnames(amplicon) - amplicon_value <- na.omit(amplicon_value) + amplicon_value <- stats::na.omit(amplicon_value) new_meta_data[names(amplicon_value), "AverageCoverage"] <- amplicon_value - new_row_data <- merge(rowData(scRNAseq), rowData(amplicon), by = "VariantName", all.x = TRUE, all.y = TRUE, + new_row_data <- merge(SummarizedExperiment::rowData(scRNAseq), SummarizedExperiment::rowData(amplicon), by = "VariantName", all.x = TRUE, all.y = TRUE, suffixes = c("scRNAseq", "Amplicon")) rownames(new_row_data) <- new_row_data$VariantName - # We add a VariantQuality column to the row data, showing the scRNAseq quality with the supplemented amplicon quality. - # We do the same for the concordance and the depth. - new_row_data$VariantQuality <- new_row_data$VariantQualityscRNAseq - amplicon_value <- rowData(amplicon)$VariantQuality - names(amplicon_value) <- rownames(amplicon) - amplicon_value <- na.omit(amplicon_value) - new_row_data[names(amplicon_value), "VariantQuality"] <- amplicon_value - - new_row_data$Concordance <- new_row_data$ConcordancescRNAseq - amplicon_value <- rowData(amplicon)$Concordance - names(amplicon_value) <- rownames(amplicon) - amplicon_value <- na.omit(amplicon_value) - new_row_data[names(amplicon_value), "Concordance"] <- amplicon_value - + if("VariantQualityscRNAseq" %in% colnames(new_row_data)){ + # We add a VariantQuality column to the row data, showing the scRNAseq quality with the supplemented amplicon quality. + # We do the same for the concordance and the depth. + new_row_data$VariantQuality <- new_row_data$VariantQualityscRNAseq + amplicon_value <- SummarizedExperiment::rowData(amplicon)$VariantQuality + names(amplicon_value) <- rownames(amplicon) + amplicon_value <- stats::na.omit(amplicon_value) + new_row_data[names(amplicon_value), "VariantQuality"] <- amplicon_value + } + + if("ConcordancescRNAseq" %in% colnames(new_row_data)){ + new_row_data$Concordance <- new_row_data$ConcordancescRNAseq + amplicon_value <- SummarizedExperiment::rowData(amplicon)$Concordance + names(amplicon_value) <- rownames(amplicon) + amplicon_value <- stats::na.omit(amplicon_value) + new_row_data[names(amplicon_value), "Concordance"] <- amplicon_value + } + new_row_data$Depth <- new_row_data$DepthscRNAseq - amplicon_value <- rowData(amplicon)$Depth + amplicon_value <- SummarizedExperiment::rowData(amplicon)$Depth names(amplicon_value) <- rownames(amplicon) - amplicon_value <- na.omit(amplicon_value) + amplicon_value <- stats::na.omit(amplicon_value) new_row_data[names(amplicon_value), "Depth"] <- amplicon_value - print("We get all cells and variants.") + if(verbose) print("We get all cells and variants.") all_cells <- unique(c(colnames(scRNAseq), colnames(amplicon))) all_variants <- unique(c(rownames(scRNAseq), rownames(amplicon))) new_meta_data <- new_meta_data[all_cells,] new_row_data <- new_row_data[all_variants,] - print("We generate our output matrices.") + if(verbose) print("We generate our output matrices.") consensus <- matrix(0, ncol = length(all_cells), nrow = length(all_variants)) rownames(consensus) <- all_variants colnames(consensus) <- all_cells - #consensus <- sparseMatrix(i = 1, j = 1, dims = c(length(all_variants), length(all_cells)), repr = "C") + #consensus <- Matrix::sparseMatrix(i = 1, j = 1, dims = c(length(all_variants), length(all_cells)), repr = "C") fraction <- consensus reads <- consensus alts <- consensus refs <- consensus - print("We fill the output matrices.") - consensus[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$consensus) - fraction[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$fraction) - reads[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$coverage) - alts[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$alts) - refs[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(assays(scRNAseq)$refs) + if(verbose) print("We fill the output matrices.") + consensus[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$consensus) + fraction[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$fraction) + reads[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$coverage) + alts[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$alts) + refs[rownames(scRNAseq), colnames(scRNAseq)] <- as.matrix(SummarizedExperiment::assays(scRNAseq)$refs) - print("We add the the amplicon information.") - consensus[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$consensus) - fraction[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$fraction) - reads[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$coverage) - alts[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$alts) - refs[rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$refs) + if(verbose) print("We add the the amplicon information.") + consensus[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$consensus) + fraction[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$fraction) + reads[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$coverage) + alts[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$alts) + refs[rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$refs) - #print("We add the the amplicon information.") - #assays(scRNAseq)[["consensus"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$consensus) - #assays(scRNAseq)[["fraction"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$fraction) - #assays(scRNAseq)[["coverage"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$coverage) + #if(verbose) print("We add the the amplicon information.") + #SummarizedExperiment::assays(scRNAseq)[["consensus"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$consensus) + #SummarizedExperiment::assays(scRNAseq)[["fraction"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$fraction) + #SummarizedExperiment::assays(scRNAseq)[["coverage"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(SummarizedExperiment::assays(amplicon)$coverage) - se <- SummarizedExperiment(assays = list(consensus = as(consensus, "dgCMatrix"), fraction = as(fraction, "dgCMatrix"), coverage = as(reads, "dgCMatrix"), alts = as(alts, "dgCMatrix"), refs = as(refs, "dgCMatrix")), - colData = new_meta_data, rowData = new_row_data) + se <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = methods::as(consensus, "dgCMatrix"), fraction = methods::as(fraction, "dgCMatrix"), coverage = methods::as(reads, "dgCMatrix"), alts = methods::as(alts, "dgCMatrix"), refs = methods::as(refs, "dgCMatrix")), + colData = new_meta_data, rowData = new_row_data) return(se) } diff --git a/R/CalculateAlleleFrequency.R b/R/CalculateAlleleFrequency.R index 1e79c2d..6a6f36c 100644 --- a/R/CalculateAlleleFrequency.R +++ b/R/CalculateAlleleFrequency.R @@ -1,11 +1,13 @@ #'Calculating the Minor Allele Frequency. #'@description -#'We calculate the MAF for the MAEGATK results. -#'@import MatrixGenerics SummarizedExperiment +#'We calculate the MAF from a reference reads matrix and an alternative reads matrix. +#'This function is intended to be used with the mitochondrial genome and not with other somatic mutations. +# #'@import MatrixGenerics SummarizedExperiment #'@param reference_reads Reference reads matrix. #'@param alternative_reads List of matrices for the alternative reads. +#'@param pseudo_count = What is the pseudo count you want to add to the reference_reads matrix. Default = 0 #'@export -CalculateAlleleFrequency <- function(reference_reads, alternative_reads){ +CalculateAlleleFrequency <- function(reference_reads, alternative_reads, pseudo_count = 0){ # We remove the potential N at position 3107 of the human genome. alternative_reads <- alternative_reads[!grepl("_N", rownames(alternative_reads)),] # We get the first part of the ref row name. This includes the position and the ref allele. @@ -19,7 +21,7 @@ CalculateAlleleFrequency <- function(reference_reads, alternative_reads){ # We match the new ref reads matrices to be the same order as the alt read matrices. reference_reads <- reference_reads[match(rows_alt_reads, rows_ref_reads),] # We divide the alt matrix by alt + ref matrix. - allelefrequency <- as.matrix(alternative_reads / (alternative_reads + reference_reads)) + allelefrequency <- as.matrix(alternative_reads / (alternative_reads + reference_reads + pseudo_count)) allelefrequency[is.na(allelefrequency)] <- 0 rownames(allelefrequency) <- gsub(">", "_", rownames(allelefrequency)) return(allelefrequency) diff --git a/R/CalculateAltReads.R b/R/CalculateAltReads.R index 0b0c165..c8042ec 100644 --- a/R/CalculateAltReads.R +++ b/R/CalculateAltReads.R @@ -1,25 +1,26 @@ #'CalculateAltReads #'@description #'We calculate the number of reads covering a variant using forward and reverse reads. -#'@import MatrixGenerics SummarizedExperiment +# #'@import MatrixGenerics +#'@importFrom SummarizedExperiment SummarizedExperiment assays rowRanges #'@param SE SummarizedExperiment object. #'@param chromosome_prefix List of matrices for the alternative reads. #'@export CalculateAltReads <- function(SE, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) - reads_A <- assays(SE)[["A_counts_fw"]] + assays(SE)[["A_counts_rev"]] + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) + reads_A <- SummarizedExperiment::assays(SE)[["A_counts_fw"]] + SummarizedExperiment::assays(SE)[["A_counts_rev"]] rownames(reads_A) <- paste0(chromosome_prefix, "_", 1:nrow(reads_A), "_", ref_allele, "_A") reads_A <- reads_A[ref_allele != "A",] - reads_C <- assays(SE)[["C_counts_fw"]] + assays(SE)[["C_counts_rev"]] + reads_C <- SummarizedExperiment::assays(SE)[["C_counts_fw"]] + SummarizedExperiment::assays(SE)[["C_counts_rev"]] rownames(reads_C) <- paste0(chromosome_prefix, "_", 1:nrow(reads_C), "_", ref_allele, "_C") reads_C <- reads_C[ref_allele != "C",] - reads_G <- assays(SE)[["G_counts_fw"]] + assays(SE)[["G_counts_rev"]] + reads_G <- SummarizedExperiment::assays(SE)[["G_counts_fw"]] + SummarizedExperiment::assays(SE)[["G_counts_rev"]] rownames(reads_G) <- paste0(chromosome_prefix, "_", 1:nrow(reads_G), "_", ref_allele, "_G") reads_G <- reads_G[ref_allele != "G",] - reads_T <- assays(SE)[["T_counts_fw"]] + assays(SE)[["T_counts_rev"]] + reads_T <- SummarizedExperiment::assays(SE)[["T_counts_fw"]] + SummarizedExperiment::assays(SE)[["T_counts_rev"]] rownames(reads_T) <- paste0(chromosome_prefix, "_", 1:nrow(reads_T), "_", ref_allele, "_T") reads_T <- reads_T[ref_allele != "T",] diff --git a/R/CalculateConsensus.R b/R/CalculateConsensus.R index 84172fb..7bedf90 100644 --- a/R/CalculateConsensus.R +++ b/R/CalculateConsensus.R @@ -1,17 +1,32 @@ +#'CalculateConsensus +#'@description #'We calculate the consensus information from the MAEGATK results. -#'@import dplyr MatrixGenerics SummarizedExperiment +#'We set cells that have only alternative reads to 2 (Alternative). +#'We set cells that have only reference reads to 1 (Reference). +#'We set cells that have a mixture of alternative and reference reads to 3 (Both). +#'We set cells that have no reads to 0 (NoCall). +#' +#'Please note. Cells can have reads for the reference of a specific variant and no reads for the alternative. +#'The cell can still have a reads for the other alternative alleles. Then the cell is still considered as 0 (NoCall) for this variant. +#'For example: +#'A cell has at position 3: 0 A reads, 53 T reads, 63 C reads, 148 T reads. +#'For the variant chrM_3_T_A, the cell would have 53 reference reads, but also reads for other variants at this position. +#'To make sure that there is no confusion, the cell is set to NoCall. +# #'@import MatrixGenerics +#'@importFrom SummarizedExperiment rowRanges #'@param SE SummarizedExperiment object. #'@param chromosome_prefix The chromosome name used as a prefix. +#'@param verbose Should the function be verbose? Default = FALSE #'@export -CalculateConsensus <- function(SE, chromosome_prefix = "chrM"){ +CalculateConsensus <- function(SE, chromosome_prefix = "chrM", verbose = FALSE){ # 0 NoCall = coverage is 0. # 1 Reference = only reference reads. # 2 Alternative = only alternative reads of one variant. # 3 Both = reads for reference and one or more variants. - - print("We get the read information per position.") + + if(verbose) print("We get the read information per position.") letter <- c("A", "C", "G", "T") - ref_allele <- as.character(rowRanges(SE)$refAllele) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) reads <- lapply(letter, getReadMatrix, SE = SE, chromosome_prefix = chromosome_prefix) # Since we have always the same 4 bases, we get all possible combinations by assigning numeric values. # A = 8, C = 4, G = 2, T = 1. @@ -21,44 +36,54 @@ CalculateConsensus <- function(SE, chromosome_prefix = "chrM"){ reads[[2]][reads[[2]] > 0] <- 4 reads[[3]][reads[[3]] > 0] <- 2 reads[[4]][reads[[4]] > 0] <- 1 - print("We add the values together.") + if(verbose) print("We add the values together.") # The row names are the names from the first matrix and not accurate any more. # The only relevant parts are the position and the reference base. variants_matrix <- reads[[1]] + reads[[2]] + reads[[3]] + reads[[4]] rm(reads) gc() - - print("We get the position according to their reference base.") + + if(verbose) print("We get the position according to their reference base.") # Now, we have a list for each set of position with the same base reference. - variants_matrix_ls <- list(A = variants_matrix[grep("_A_", rownames(variants_matrix), value = TRUE),], - C = variants_matrix[grep("_C_", rownames(variants_matrix), value = TRUE),], - G = variants_matrix[grep("_G_", rownames(variants_matrix), value = TRUE),], - T = variants_matrix[grep("_T_", rownames(variants_matrix), value = TRUE),], - N = variants_matrix[grep("_N_", rownames(variants_matrix), value = TRUE),]) - variants_matrix_ls[["N"]] <- matrix(variants_matrix_ls[["N"]], nrow = 1, ncol = length(variants_matrix_ls[["N"]])) - colnames(variants_matrix_ls[["N"]]) <- colnames(variants_matrix) - rownames(variants_matrix_ls[["N"]]) <- paste0(chromosome_prefix, "_3107_N_A") + variants_matrix_ls <- list(A = variants_matrix[grep("_A_", rownames(variants_matrix), value = TRUE), , drop = FALSE], + C = variants_matrix[grep("_C_", rownames(variants_matrix), value = TRUE), , drop = FALSE], + G = variants_matrix[grep("_G_", rownames(variants_matrix), value = TRUE), , drop = FALSE], + T = variants_matrix[grep("_T_", rownames(variants_matrix), value = TRUE), , drop = FALSE], + N = variants_matrix[grep("_N_", rownames(variants_matrix), value = TRUE), , drop = FALSE]) + # We check if the N reference is even used. If the variants_matrix_ls[["N"]] is empty (zero rows), we do not perform the consensus determination. + n_binding <- FALSE + if(nrow(variants_matrix_ls[["N"]]) > 0){ + n_binding <- TRUE + variants_matrix_ls[["N"]] <- matrix(variants_matrix_ls[["N"]], nrow = 1, ncol = ncol(variants_matrix)) + colnames(variants_matrix_ls[["N"]]) <- colnames(variants_matrix) + rownames(variants_matrix_ls[["N"]]) <- paste0(chromosome_prefix, "_3107_N_A") + } rm(variants_matrix) gc() - - print("Now, we check the consensus value for all positions with the same reference base.") + + if(verbose) print("Now, we check the consensus value for all positions with the same reference base.") # Then we can rbind these matrices again and return one large consensus matrix in the end. - print("A") + if(verbose) print("A") consensus_a <- lapply(c("C", "G", "T"), get_consensus, ref_base = "A", input_matrix = as.matrix(variants_matrix_ls[[1]]), chromosome_prefix = chromosome_prefix) consensus_a <- do.call("rbind", consensus_a) - print("C") + if(verbose) print("C") consensus_c <- lapply(c("A", "G", "T"), get_consensus, ref_base = "C", input_matrix = as.matrix(variants_matrix_ls[[2]]), chromosome_prefix = chromosome_prefix) consensus_c <- do.call("rbind", consensus_c) - print("G") + if(verbose) print("G") consensus_g <- lapply(c("A", "C", "T"), get_consensus, ref_base = "G", input_matrix = as.matrix(variants_matrix_ls[[3]]), chromosome_prefix = chromosome_prefix) consensus_g <- do.call("rbind", consensus_g) - print("T") + if(verbose) print("T") consensus_t <- lapply(c("A", "C", "G"), get_consensus, ref_base = "T", input_matrix = as.matrix(variants_matrix_ls[[4]]), chromosome_prefix = chromosome_prefix) consensus_t <- do.call("rbind", consensus_t) - print("N") - consensus_n <- lapply(c("A", "C", "G", "T"), get_consensus, ref_base = "N", input_matrix = variants_matrix_ls[[5]], chromosome_prefix = chromosome_prefix) - consensus_n <- do.call("rbind", consensus_n) - print("Binding the matrices.") - consensus <- rbind(consensus_a, consensus_c, consensus_g, consensus_t, consensus_n) + if(n_binding){ + if(verbose) print("N") + consensus_n <- lapply(c("A", "C", "G", "T"), get_consensus, ref_base = "N", input_matrix = variants_matrix_ls[[5]], chromosome_prefix = chromosome_prefix) + consensus_n <- do.call("rbind", consensus_n) + } else{ + if(verbose) print("N reference not present.") + } + if(verbose) print("Binding the matrices.") + consensus <- rbind(consensus_a, consensus_c, consensus_g, consensus_t) + if(n_binding) consensus <- rbind(consensus, consensus_n) return(consensus) } diff --git a/R/CalculateCorrelationPValue.R b/R/CalculateCorrelationPValue.R index 4f63056..0a0e947 100644 --- a/R/CalculateCorrelationPValue.R +++ b/R/CalculateCorrelationPValue.R @@ -2,7 +2,7 @@ #' #'@description #'We perform the correlation of SNVs and calculate the P values. -#'@import stats +#'@importFrom stats cor.test #'@param variant_values The fraction values you are analysing. A vector. #'@param other_mutation All other variants you have. A vector of variant names. #'@param all_variants_list List of fraction values for all the variants you want to compare your variant with. @@ -27,7 +27,7 @@ CalculateCorrelationPValue <- function(variant_values, other_mutation, all_varia result <- c(NA,NA,NA,NA,NA,NA) return(result) } else if(length(variant_values) > 2){ - result <- cor.test(variant_values, other_variant_values) + result <- stats::cor.test(variant_values, other_variant_values) cells_som_alt <- sum(variant_values == 1) cells_som_ref <- sum(variant_values == 0) cells_MT_alt <- sum(other_variant_values == 1) diff --git a/R/CalculateCoverage.R b/R/CalculateCoverage.R index 9ef7f5a..a82c116 100644 --- a/R/CalculateCoverage.R +++ b/R/CalculateCoverage.R @@ -1,13 +1,14 @@ #'CalculateCoverage #'@description #'We calculate the coverage information per variant from the MAEGATK results. -#'@import MatrixGenerics SummarizedExperiment +# #'@import MatrixGenerics +#'@importFrom SummarizedExperiment rowRanges assays #'@param SE SummarizedExperiment object. #'@param chromosome_prefix List of matrices for the alternative reads. #'@export CalculateCoverage <- function(SE, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) - coverage <- assays(SE)[["coverage"]] + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) + coverage <- SummarizedExperiment::assays(SE)[["coverage"]] rownames(coverage) <- paste0(chromosome_prefix, "_", 1:nrow(coverage), "_", ref_allele, "_A") coverage_A <- coverage[ref_allele != "A",] diff --git a/R/CalculateFisherTestPValue.R b/R/CalculateFisherTestPValue.R index 3c7652e..c59726a 100644 --- a/R/CalculateFisherTestPValue.R +++ b/R/CalculateFisherTestPValue.R @@ -2,7 +2,7 @@ #' #'@description #'We perform the Fisher Test of SNVs and calculate the P values. -#'@import stats +#'@importFrom stats fisher.test #'@param variant_values The fraction values you are analysing. A vector. #'@param other_mutation All other variants you have. A vector of variant names. #'@param all_variants_list List of fraction values for all the variants you want to compare your variant with. @@ -39,7 +39,7 @@ CalculateFisherTestPValue <- function(variant_values, other_mutation, all_varian count_matrix[2,1] <- sum(variant_values == 1 & other_variant_values == 0) count_matrix[1,2] <- sum(variant_values == 0 & other_variant_values == 1) count_matrix[2,2] <- sum(variant_values == 0 & other_variant_values == 0) - result <- fisher.test(x = count_matrix) + result <- stats::fisher.test(x = count_matrix) result <- c(result$p.value, result$estimate, count_matrix[1,1], count_matrix[2,1], count_matrix[1,2], count_matrix[2,2]) } else{ #print("We do not have more than 2 cells for the somatic variant.") diff --git a/R/CalculateQuality.R b/R/CalculateQuality.R index 5467d23..b3713f5 100644 --- a/R/CalculateQuality.R +++ b/R/CalculateQuality.R @@ -1,17 +1,21 @@ #'CalculateQuality #'@description #'We calculate the quality per variant. -#'@import MatrixGenerics SummarizedExperiment +# #'@import MatrixGenerics +#'@importFrom SummarizedExperiment assays +#'@importFrom Matrix rowSums +#'@importFrom utils tail #'@param SE SummarizedExperiment object. +#'@param variants The variants you want to get the quality for. #'@param chromosome_prefix List of matrices for the alternative reads. #'@export -CalculateQuality <- function(SE, variants = rownames(reads_alt), chromosome_prefix = "chrM"){ +CalculateQuality <- function(SE, variants, chromosome_prefix = "chrM"){ variants <- gsub(paste0(chromosome_prefix, "_"), "", variants) qualities <- lapply(c("A", "T", "C", "G"), function(x){ - fwrev <- cbind(assays(SE)[[paste0(x, "_counts_fw")]], assays(SE)[[paste0(x, "_counts_rev")]]) - qualities_fwrev <- cbind(assays(SE)[[paste0(x, "_qual_fw")]], assays(SE)[[paste0(x, "_qual_rev")]]) + fwrev <- cbind(SummarizedExperiment::assays(SE)[[paste0(x, "_counts_fw")]], SummarizedExperiment::assays(SE)[[paste0(x, "_counts_rev")]]) + qualities_fwrev <- cbind(SummarizedExperiment::assays(SE)[[paste0(x, "_qual_fw")]], SummarizedExperiment::assays(SE)[[paste0(x, "_qual_rev")]]) variants_use <- strsplit(variants, "") - variants_use <- sapply(variants_use, tail, n = 1) + variants_use <- sapply(variants_use, utils::tail, n = 1) variants_use <- variants_use == x variants_use_names <- variants[variants_use] variants_use <- as.numeric(gsub("_.*", "", variants_use_names)) @@ -22,7 +26,7 @@ CalculateQuality <- function(SE, variants = rownames(reads_alt), chromosome_pref rownames(qualities_fwrev) <- variants_use_names fwrev <- fwrev > 0 qualities_fwrev <- qualities_fwrev * fwrev - qualities <- apply(qualities_fwrev, 1, sum) / rowSums(fwrev > 0) + qualities <- apply(qualities_fwrev, 1, sum) / Matrix::rowSums(fwrev > 0) qualities[qualities == 0] <- NA return(qualities) }) diff --git a/R/CalculateStrandCorrelation.R b/R/CalculateStrandCorrelation.R index 1731609..575ec70 100644 --- a/R/CalculateStrandCorrelation.R +++ b/R/CalculateStrandCorrelation.R @@ -1,79 +1,85 @@ #'CalculateStrandCorrelation #'@description #'We calculate the correlation between the amount of forward and reverse reads per variant. -#'@import MatrixGenerics SummarizedExperiment data.table +#'@importFrom SummarizedExperiment rowRanges assays +#'@importFrom data.table data.table +#'@importFrom dplyr sym #'@param SE SummarizedExperiment object. #'@param chromosome_prefix List of matrices for the alternative reads. #'@export CalculateStrandCorrelation <- function(SE, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) variants_A <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_A") variants_A <- variants_A[ref_allele != "A"] - reads_A_fw <- assays(SE)[["A_counts_fw"]] - reads_A_rev <- assays(SE)[["A_counts_rev"]] + reads_A_fw <- SummarizedExperiment::assays(SE)[["A_counts_fw"]] + reads_A_rev <- SummarizedExperiment::assays(SE)[["A_counts_rev"]] rownames(reads_A_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_A_fw), "_", ref_allele, "_A") rownames(reads_A_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_A_rev), "_", ref_allele, "_A") reads_A_fw <- reads_A_fw[ref_allele != "A",] reads_A_rev <- reads_A_rev[ref_allele != "A",] - dt <- merge(data.table(summary(reads_A_fw)), - data.table(summary(reads_A_rev)), + dt <- merge(data.table::data.table(summary(reads_A_fw)), + data.table::data.table(summary(reads_A_rev)), by.x = c("i", "j"), by.y = c("i", "j"), all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_A[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_A <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + dt <- data.table::data.table(variant = variants_A[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_A <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] variants_C <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_C") variants_C <- variants_C[ref_allele != "C"] - reads_C_fw <- assays(SE)[["C_counts_fw"]] - reads_C_rev <- assays(SE)[["C_counts_rev"]] + reads_C_fw <- SummarizedExperiment::assays(SE)[["C_counts_fw"]] + reads_C_rev <- SummarizedExperiment::assays(SE)[["C_counts_rev"]] rownames(reads_C_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_C_fw), "_", ref_allele, "_C") rownames(reads_C_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_C_rev), "_", ref_allele, "_C") reads_C_fw <- reads_C_fw[ref_allele != "C",] reads_C_rev <- reads_C_rev[ref_allele != "C",] - dt <- merge(data.table(summary(reads_C_fw)), - data.table(summary(reads_C_rev)), +# dt <- merge(data.table::data.table(summary(reads_C_fw)), +# data.table::data.table(summary(reads_C_rev)), +# by.x = c("i", "j"), by.y = c("i", "j"), +# all = TRUE)[x.x > 0 | x.y > 0] + dt <- merge(data.table::data.table(summary(reads_C_fw)), + data.table::data.table(summary(reads_C_rev)), by.x = c("i", "j"), by.y = c("i", "j"), - all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_C[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_C <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + all = TRUE)[!!dplyr::sym("x.x") > 0 | !!dplyr::sym("x.y") > 0] + dt <- data.table::data.table(variant = variants_C[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_C <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] variants_G <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_G") variants_G <- variants_G[ref_allele != "G"] - reads_G_fw <- assays(SE)[["G_counts_fw"]] - reads_G_rev <- assays(SE)[["G_counts_rev"]] + reads_G_fw <- SummarizedExperiment::assays(SE)[["G_counts_fw"]] + reads_G_rev <- SummarizedExperiment::assays(SE)[["G_counts_rev"]] rownames(reads_G_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_G_fw), "_", ref_allele, "_G") rownames(reads_G_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_G_rev), "_", ref_allele, "_G") reads_G_fw <- reads_G_fw[ref_allele != "G",] reads_G_rev <- reads_G_rev[ref_allele != "G",] - dt <- merge(data.table(summary(reads_G_fw)), - data.table(summary(reads_G_rev)), + dt <- merge(data.table::data.table(summary(reads_G_fw)), + data.table::data.table(summary(reads_G_rev)), by.x = c("i", "j"), by.y = c("i", "j"), all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_G[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_G <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + dt <- data.table::data.table(variant = variants_G[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_G <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] variants_T <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_T") variants_T <- variants_T[ref_allele != "T"] - reads_T_fw <- assays(SE)[["T_counts_fw"]] - reads_T_rev <- assays(SE)[["T_counts_rev"]] + reads_T_fw <- SummarizedExperiment::assays(SE)[["T_counts_fw"]] + reads_T_rev <- SummarizedExperiment::assays(SE)[["T_counts_rev"]] rownames(reads_T_fw) <- paste0(chromosome_prefix, "_", 1:nrow(reads_T_fw), "_", ref_allele, "_T") rownames(reads_T_rev) <- paste0(chromosome_prefix, "_", 1:nrow(reads_T_rev), "_", ref_allele, "_T") reads_T_fw <- reads_T_fw[ref_allele != "T",] reads_T_rev <- reads_T_rev[ref_allele != "T",] - dt <- merge(data.table(summary(reads_T_fw)), - data.table(summary(reads_T_rev)), + dt <- merge(data.table::data.table(summary(reads_T_fw)), + data.table::data.table(summary(reads_T_rev)), by.x = c("i", "j"), by.y = c("i", "j"), all = TRUE)[x.x >0 | x.y >0] - dt <- data.table(variant = variants_T[dt[[1]]], - cell_id = dt[[2]], - fw = dt[[3]], rev = dt[[4]]) - cor_result_T <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] + dt <- data.table::data.table(variant = variants_T[dt[[1]]], + cell_id = dt[[2]], + fw = dt[[3]], rev = dt[[4]]) + cor_result_T <- dt[, .(cor = suppressWarnings(stats::cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)] cor_results <- rbind(cor_result_A, cor_result_C, cor_result_G, cor_result_T) diff --git a/R/CombineSEobjects.R b/R/CombineSEobjects.R index a5de795..bc8900d 100644 --- a/R/CombineSEobjects.R +++ b/R/CombineSEobjects.R @@ -1,28 +1,29 @@ #'CombineSEobjects #'@description #'We combine two SummarizedExperiment objects. -#'@import SummarizedExperiment BiocGenerics +# #'@import BiocGenerics +#'@importFrom SummarizedExperiment assays colData rowData SummarizedExperiment #'@param se_somatic SummarizedExperiment object for the somatic variants. #'@param se_MT SummarizedExperiment object for the MT variants. #'@param suffixes The suffixes you want to add to the meta data.frame. #'@export CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT")){ # We check if the assays are equally named. - assay_names_somatic <- names(assays(se_somatic)) - assay_names_MT <- names(assays(se_MT)) + assay_names_somatic <- names(SummarizedExperiment::assays(se_somatic)) + assay_names_MT <- names(SummarizedExperiment::assays(se_MT)) if(!all(assay_names_somatic == assay_names_MT)){ stop("Your assays are not equally named or ordered.") } features <- combine_NAMES(names(se_somatic), names(se_MT)) cells <- combine_NAMES(colnames(se_somatic), colnames(se_MT)) - meta_data_somatic <- colData(se_somatic) - meta_data_MT <- colData(se_MT) + meta_data_somatic <- SummarizedExperiment::colData(se_somatic) + meta_data_MT <- SummarizedExperiment::colData(se_MT) meta_data <- merge(meta_data_somatic, meta_data_MT, by = "Cell", all = TRUE, suffixes = suffixes) meta_data <- meta_data[match(cells, meta_data$Cell),] - meta_row_somatic <- rowData(se_somatic) - meta_row_MT <- rowData(se_MT) + meta_row_somatic <- SummarizedExperiment::rowData(se_somatic) + meta_row_MT <- SummarizedExperiment::rowData(se_MT) if(ncol(meta_row_somatic) > 0 & ncol(meta_row_MT) > 0){ meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes) meta_row <- meta_row[match(features, meta_row$VariantName),] @@ -31,7 +32,7 @@ CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT")) meta_row_somatic <- matrix(NA, nrow = nrow(meta_row_somatic), ncol = ncol(meta_row_MT)) rownames(meta_row_somatic) <- rownames(se_somatic) colnames(meta_row_somatic) <- colnames(meta_row_MT) - meta_row_somatic <- DataFrame(meta_row_somatic) + meta_row_somatic <- S4Vectors::DataFrame(meta_row_somatic) meta_row_somatic$VariantName <- rownames(meta_row_somatic) meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes) meta_row <- meta_row[match(features, meta_row$VariantName),] @@ -39,31 +40,20 @@ CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT")) meta_row_MT <- matrix(NA, nrow = nrow(meta_row_MT), ncol = ncol(meta_row_somatic)) rownames(meta_row_MT) <- rownames(se_MT) colnames(meta_row_MT) <- colnames(meta_row_somatic) - meta_row_MT <- DataFrame(meta_row_MT) + meta_row_MT <- S4Vectors::DataFrame(meta_row_MT) meta_row_MT$VariantName <- rownames(meta_row_MT) meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes) meta_row <- meta_row[match(features, meta_row$VariantName),] } - #assays_somatic <- assays(se_somatic) - #assays_somatic <- lapply(assays_somatic, as.matrix) - #assays_MT <- assays(se_MT) - #assays_MT <- lapply(assays_MT, as.matrix) - #assays_combined <- S4Vectors::mendoapply(BiocGenerics::combine, assays_somatic, assays_MT) - #assays_combined[[1]] <- as(assays_combined[[1]], "dgCMatrix") - #assays_combined[[2]] <- as(assays_combined[[2]], "dgCMatrix") - #assays_combined[[3]] <- as(assays_combined[[3]], "dgCMatrix") - #assays_combined[["consensus"]]@x[is.na(assays_combined[["consensus"]]@x)] <- 0 - #assays_combined[["fraction"]]@x[is.na(assays_combined[["fraction"]]@x)] <- 0 - #assays_combined[["coverage"]]@x[is.na(assays_combined[["coverage"]]@x)] <- 0 assays_combined <- lapply(assay_names_somatic, function(x){ - result <- combine_SparseMatrix(assays(se_somatic)[[x]], assays(se_MT)[[x]]) + result <- combine_SparseMatrix(SummarizedExperiment::assays(se_somatic)[[x]], SummarizedExperiment::assays(se_MT)[[x]]) }) names(assays_combined) <- assay_names_somatic - se_combined <- SummarizedExperiment(assays = assays_combined, - colData = meta_data, rowData = meta_row) + + se_combined <- SummarizedExperiment::SummarizedExperiment(assays = assays_combined, colData = meta_data, rowData = meta_row) return(se_combined) } diff --git a/R/Filtering.R b/R/Filtering.R index 24b1c09..c874611 100644 --- a/R/Filtering.R +++ b/R/Filtering.R @@ -13,7 +13,9 @@ #' \item all variants that are always NoCall, #' \item set variants with a VAF below a threshold to NoCall or Reference. #' } -#'@import fastmatch Matrix SummarizedExperiment +#'@importFrom Matrix summary +#'@importFrom SummarizedExperiment assays +#'@importFrom utils read.table #'@param se SummarizedExperiment object. #'@param blacklisted_barcodes_path Barcodes you want to remove. Path to a file with one column without header. #'@param fraction_threshold Variants with an VAF below this threshold are set to 0. Numeric. Default = NULL. @@ -21,6 +23,7 @@ #'@param min_cells_per_variant In how many cells should a variant be present to be included? Numeric. Default = 2. #'@param min_variants_per_cell How many variants should be covered in a cell have to be included? Default = 1. #'@param reject_value Should cells that fall below a threshold (fraction_threshold or alts_threshold) be treated as Reference or NoCall? Default = NoCall. +#'@param verbose Should the function be verbose? Default = TRUE #'@examples #' \dontrun{ #' # Removing all variants that are not detected in at least 2 cells. @@ -28,7 +31,7 @@ #' se_geno <- Filtering(se_geno, min_cells_per_variant = 2, fraction_threshold = 0.05) #' } #'@export -Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = NULL, alts_threshold = NULL, min_cells_per_variant = 2, min_variants_per_cell = 1, reject_value = "NoCall"){ +Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = NULL, alts_threshold = NULL, min_cells_per_variant = 2, min_variants_per_cell = 1, reject_value = "NoCall", verbose = TRUE){ # Checking if the reject_value variable is correct. if(!reject_value %in% c("Reference", "NoCall")){ stop(paste0("Your reject_value is ", reject_value, ".\nIt should be Reference or NoCall.")) @@ -39,8 +42,8 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = if(!is.null(blacklisted_barcodes_path)){ - print("We remove the unwanted cell barcodes.") - blacklisted_barcodes <- read.table(blacklisted_barcodes_path, header = FALSE) + if(verbose) print("We remove the unwanted cell barcodes.") + blacklisted_barcodes <- utils::read.table(blacklisted_barcodes_path, header = FALSE) blacklisted_barcodes <- blacklisted_barcodes[,1] barcodes_keep <- colnames(se) barcodes_keep <- barcodes_keep[!barcodes_keep %in% blacklisted_barcodes] @@ -50,30 +53,33 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = # If the fraction_threshold is 0, we skip the thresholding. 0 and NULL would be the same. # We might want to use a fraction_threshold of 0 to use the same variable to create file paths later. + if(!is.null(fraction_threshold)){ if(fraction_threshold == 0){ - fraction_threshold <- NULL + fraction_threshold <- NULL + } } - + + if(!is.null(fraction_threshold)){ if(any(fraction_threshold >= 1, fraction_threshold <= 0)){ stop("Your fraction threshold is not 0 < x < 1.") } - print(paste0("We assume that cells with a fraction smaller than our threshold are actually ", reject_value, ".")) - print(paste0("We set consensus values to ", reject_value_numeric, " (", reject_value, ") and fraction values to 0.")) - print(paste0("We do not set fractions between ", fraction_threshold, " and 1 to 1.")) - print("This way, we retain the heterozygous information.") + if(verbose) print(paste0("We assume that cells with a fraction smaller than our threshold are actually ", reject_value, ".")) + if(verbose) print(paste0("We set consensus values to ", reject_value_numeric, " (", reject_value, ") and fraction values to 0.")) + if(verbose) print(paste0("We do not set fractions between ", fraction_threshold, " and 1 to 1.")) + if(verbose) print("This way, we retain the heterozygous information.") # Filtering using sparse matrices. - consensus_matrix <- assays(se)$consensus - fraction_matrix <- assays(se)$fraction - position_matrix <- summary(fraction_matrix) + consensus_matrix <- SummarizedExperiment::assays(se)$consensus + fraction_matrix <- SummarizedExperiment::assays(se)$fraction + position_matrix <- Matrix::summary(fraction_matrix) position_matrix <- subset(position_matrix, x > 0 & x < fraction_threshold) # If no elements fall between 0 and the fraction_threshold, we do not have to change the matrices. if(nrow(position_matrix) > 0){ ij <- as.matrix(position_matrix[, 1:2]) consensus_matrix[ij] <- reject_value_numeric fraction_matrix[ij] <- 0 - assays(se)$consensus <- consensus_matrix - assays(se)$fraction <- fraction_matrix + SummarizedExperiment::assays(se)$consensus <- consensus_matrix + SummarizedExperiment::assays(se)$fraction <- fraction_matrix } } @@ -82,16 +88,16 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = if(any(!is.numeric(alts_threshold), is.infinite(alts_threshold))){ stop("Your alts_threshold should be a numeric value.") } - print(paste0("We assume that cells with a number of alternative reads smaller than our threshold are actually ", reject_value, ".")) - print(paste0("We set consensus values to ", reject_value_numeric, " (", reject_value, "), fraction values to 0.")) - if(reject_value == "NoCall") print("We set Alts, Refs and Coverage to 0.") - if(reject_value == "Reference") print("We set Alts to 0 and adjust the Coverage.") + if(verbose) print(paste0("We assume that cells with a number of alternative reads smaller than our threshold are actually ", reject_value, ".")) + if(verbose) print(paste0("We set consensus values to ", reject_value_numeric, " (", reject_value, "), fraction values to 0.")) + if(reject_value == "NoCall") if(verbose) print("We set Alts, Refs and Coverage to 0.") + if(reject_value == "Reference") if(verbose) print("We set Alts to 0 and adjust the Coverage.") # Filtering using sparse matrices. - consensus_matrix <- assays(se)$consensus - fraction_matrix <- assays(se)$fraction - coverage_matrix <- assays(se)$coverage - alts_matrix <- assays(se)$alts - refs_matrix <- assays(se)$refs + consensus_matrix <- SummarizedExperiment::assays(se)$consensus + fraction_matrix <- SummarizedExperiment::assays(se)$fraction + coverage_matrix <- SummarizedExperiment::assays(se)$coverage + alts_matrix <- SummarizedExperiment::assays(se)$alts + refs_matrix <- SummarizedExperiment::assays(se)$refs position_matrix <- summary(alts_matrix) position_matrix <- subset(position_matrix, x < alts_threshold) # If no elements fall between 0 and the alts_threshold, we do not have to change the matrices. @@ -109,28 +115,28 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = refs_matrix[ij] <- 0 coverage_matrix[ij] <- 0 } - assays(se)$consensus <- consensus_matrix - assays(se)$fraction <- fraction_matrix - assays(se)$coverage <- coverage_matrix - assays(se)$alts <- alts_matrix - assays(se)$refs <- refs_matrix + SummarizedExperiment::assays(se)$consensus <- consensus_matrix + SummarizedExperiment::assays(se)$fraction <- fraction_matrix + SummarizedExperiment::assays(se)$coverage <- coverage_matrix + SummarizedExperiment::assays(se)$alts <- alts_matrix + SummarizedExperiment::assays(se)$refs <- refs_matrix } } - print("We remove all the variants that are always NoCall.") - consensus_test <- assays(se)$consensus > 0 + if(verbose) print("We remove all the variants that are always NoCall.") + consensus_test <- SummarizedExperiment::assays(se)$consensus > 0 keep_variants <- rowSums(consensus_test) > 0 se <- se[keep_variants,] - print(paste0("We remove variants, that are not at least detected in ", min_cells_per_variant, " cells.")) - keep_variants <- rowSums(assays(se)$consensus >= 1) + if(verbose) print(paste0("We remove variants, that are not at least detected in ", min_cells_per_variant, " cells.")) + keep_variants <- rowSums(SummarizedExperiment::assays(se)$consensus >= 1) keep_variants <- keep_variants >= min_cells_per_variant se <- se[keep_variants,] - print(paste0("We remove all cells that are not >= 1 (Ref) for at least ", min_variants_per_cell, " variant.")) - consensus_test <- assays(se)$consensus >= 1 + if(verbose) print(paste0("We remove all cells that are not >= 1 (Ref) for at least ", min_variants_per_cell, " variant.")) + consensus_test <- SummarizedExperiment::assays(se)$consensus >= 1 keep_cells <- colSums(consensus_test) > min_variants_per_cell se <- se[,keep_cells] return(se) diff --git a/R/GetCellInfoPerVariant.R b/R/GetCellInfoPerVariant.R index d3a423b..65362ee 100644 --- a/R/GetCellInfoPerVariant.R +++ b/R/GetCellInfoPerVariant.R @@ -1,20 +1,23 @@ #'We get the variant information per cell. -#'@import dplyr SummarizedExperiment tibble tidyverse +# #'@import dplyr SummarizedExperiment tibble tidyverse +#'@importFrom SummarizedExperiment assays +#'@importFrom dplyr left_join %>% +#'@importFrom tibble tibble as_tibble #'@param se SummarizedExperiment object. #'@param voi_ch Variants of interest. +#'@param verbose Should the function be verbose? Default = FALSE #'@export -GetCellInfoPerVariant <- function(se, voi_ch){ - print("Generate matrices with coverage, allele frequency and reference / variant reads") - cov_voi_mat <- assays(se)[["coverage"]][voi_ch,] - af_voi_mat <- assays(se)[["fraction"]][voi_ch,] +GetCellInfoPerVariant <- function(se, voi_ch, verbose = FALSE){ + if(verbose) print("Generate matrices with coverage, allele frequency and reference / variant reads") + cov_voi_mat <- SummarizedExperiment::assays(se)[["coverage"]][voi_ch,] + af_voi_mat <- SummarizedExperiment::assays(se)[["fraction"]][voi_ch,] - print("Add coverage and allele frequency info from variants of interest to cells_tib.") - cells_tib <- tibble(cell = colnames(se), - Mean_Cov = se$depth) + if(verbose) print("Add coverage and allele frequency info from variants of interest to cells_tib.") + cells_tib <- tibble::tibble(cell = colnames(se), Mean_Cov = se$depth) for(voi in voi_ch){ cells_tib <- cells_tib %>% - left_join(as_tibble(assays(se)[["coverage"]][voi,], rownames = "cell"), by = "cell") %>% - left_join(as_tibble(assays(se)[["fraction"]][voi,], rownames = "cell"), by = "cell") + dplyr::left_join(tibble::as_tibble(SummarizedExperiment::assays(se)[["coverage"]][voi,], rownames = "cell"), by = "cell") %>% + dplyr::left_join(tibble::as_tibble(SummarizedExperiment::assays(se)[["fraction"]][voi,], rownames = "cell"), by = "cell") colnames(cells_tib) <- gsub("value.x", paste0("cov_", voi), colnames(cells_tib)) colnames(cells_tib) <- gsub("value.y", paste0("af_", voi), colnames(cells_tib)) } diff --git a/R/GetVariantInfo.R b/R/GetVariantInfo.R index 5b2a02f..373c02d 100644 --- a/R/GetVariantInfo.R +++ b/R/GetVariantInfo.R @@ -2,7 +2,7 @@ #'@description #'We get the genotyping information for a set of variants. #'The function returns a matrix with the values from the specified assay. -#'@import SummarizedExperiment +#'@importFrom SummarizedExperiment assays #'@param SE SummarizedExperiment object. #'@param information The assay with the desired information. Default: consensus #'@param variants A vector of variants. @@ -24,11 +24,11 @@ GetVariantInfo <- function(SE, information = "consensus", variants = NULL, cells stop(paste0("Only ", variants_check, " of ", length(variants), " are in the SE object.")) } # We check if the requested assay is actually present. - assay_check <- information %in% names(assays(SE)) + assay_check <- information %in% names(SummarizedExperiment::assays(SE)) if(!assay_check){ stop("The assay you wants is not present in the object.") } - res <- assays(SE)[[information]][variants, , drop = FALSE] + res <- SummarizedExperiment::assays(SE)[[information]][variants, , drop = FALSE] # We subset the result to only include the cells of interest. # We check if the cells vector is not NULL. if(!is.null(cells)){ diff --git a/R/HeatmapVOI.R b/R/HeatmapVOI.R index 0c79a1a..72aa3c4 100644 --- a/R/HeatmapVOI.R +++ b/R/HeatmapVOI.R @@ -1,14 +1,20 @@ #'HeatmapVoi #'@description #'We plot a heatmap of a set of Variants Of Interest using the Variant Allele Frequency values of a SummarizedExperiment object. -#'@import ComplexHeatmap SummarizedExperiment grid circlize scales +#'@importFrom ComplexHeatmap columnAnnotation Heatmap +#'@importFrom SummarizedExperiment assays colData +#'@importFrom grid gpar unit +#'@importFrom circlize colorRamp2 +#'@importFrom scales hue_pal #'@param SE SummarizedExperiment object. #'@param voi Variants Of Interest. -#'@param annotation_trait Cell Annotation at the bottom of the heat map. +#'@param annotation_trait Cell Annotation at the bottom of the heat map. +#'@param column_title The title of the heat map. Default = NULL +#'@param remove_empty_cells Should cells that have a fraction of 0 for all variants be removed? Default = FALSE #'@export HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL, remove_empty_cells = FALSE){ - fraction <- assays(SE)[["fraction"]][voi,] + fraction <- SummarizedExperiment::assays(SE)[["fraction"]][voi,] fraction[is.na(fraction)] <- 0 if(length(voi) == 1){ fraction <- t(as.matrix(fraction)) @@ -22,9 +28,9 @@ HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL, re fraction <- fraction[,cell_check, drop = FALSE] } if(!is.null(annotation_trait)){ - colours_use <- scales::hue_pal(length(unique(colData(SE)[,annotation_trait]))) - names(colours_use) <- unique(colData(SE)[,annotation_trait]) - ha <- ComplexHeatmap::columnAnnotation(annotation_trait = colData(SE)[,annotation_trait], + colours_use <- scales::hue_pal(length(unique(SummarizedExperiment::colData(SE)[,annotation_trait]))) + names(colours_use) <- unique(SummarizedExperiment::colData(SE)[,annotation_trait]) + ha <- ComplexHeatmap::columnAnnotation(annotation_trait = SummarizedExperiment::colData(SE)[,annotation_trait], col = list(annotation_trait = colours_use)) } else if(is.null(annotation_trait)){ ha <- NULL @@ -35,16 +41,16 @@ HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL, re column_title <- "Cells" } - heatmap_voi <- Heatmap(fraction, - column_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), - row_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), - row_names_gp = grid::gpar(fontsize = 20, fontface = "bold"), - col = circlize::colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9), - c("#FCFCFC","#FFEDB0","#FFDF5F","#FEC510","#FA8E24","#F14C2B","#DA2828","#BE2222","#A31D1D")), - show_row_names = T, show_column_names = F, cluster_columns = T, clustering_method_columns = "complete", cluster_rows = F, name = "VAF", - heatmap_legend_param = list(border = "#000000", grid_height = unit(10, "mm")), - bottom_annotation = ha, border = T, use_raster = T, - column_title = column_title, - row_title = "Variants") + heatmap_voi <- ComplexHeatmap::Heatmap(fraction, + column_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), + row_title_gp = grid::gpar(fontsize = 20, fontface = "bold"), + row_names_gp = grid::gpar(fontsize = 20, fontface = "bold"), + col = circlize::colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9), + c("#FCFCFC","#FFEDB0","#FFDF5F","#FEC510","#FA8E24","#F14C2B","#DA2828","#BE2222","#A31D1D")), + show_row_names = T, show_column_names = F, cluster_columns = T, clustering_method_columns = "complete", cluster_rows = F, name = "VAF", + heatmap_legend_param = list(border = "#000000", grid_height = grid::unit(10, "mm")), + bottom_annotation = ha, border = T, use_raster = T, + column_title = column_title, + row_title = "Variants") return(heatmap_voi) } diff --git a/R/LoadingMAEGATK_typewise.R b/R/LoadingMAEGATK_typewise.R index 7071597..b2891f8 100755 --- a/R/LoadingMAEGATK_typewise.R +++ b/R/LoadingMAEGATK_typewise.R @@ -3,92 +3,105 @@ #'We load the MAEGATK output and transform it to be compatible with the VarTrix output. #'The input file is a specifically formated csv file with all the necessary information to run the analysis. #'Note that the source column in the input file needs to be one of the following: vartrix, mgaetk, mgatk. -#'This is hard coded and case insensitive. -#'@import Matrix SummarizedExperiment +#'If you want to only load a single sample without the use of an input file, you have to set the following variables. +#' \enumerate{ +#' \item samples_path +#' \item barcodes_path +#' \item patient +#' \item samples_file = NULL +#' } +#'@importFrom utils read.csv read.table +#'@importFrom SummarizedExperiment SummarizedExperiment #'@param samples_path Path to the input folder. #'@param samples_file Path to the csv file with the samples to be loaded. #'@param type_use The type of input. Has to be one of: scRNAseq_MT, Amplicon_MT. Only used if samples_path is not NULL. #'@param patient The patient you want to load. #'@param chromosome_prefix The prefix you want use. Default: "chrM" +#'@param min_cells The minimum number of cells with coverage for a variant. Variants with coverage in less than this amount of cells are removed. Default = 2 +#'@param barcodes_path Path to the barcodes file tsv. Default = NULL +#'@param verbose Should the function be verbose? Default = TRUE #'@export LoadingMAEGATK_typewise <- function(samples_file, samples_path = NULL, patient, type_use = "scRNAseq_MT", chromosome_prefix = "chrM", - min_cells = 2, barcodes_path = NULL){ + min_cells = 2, barcodes_path = NULL, verbose = TRUE){ if(all(!is.null(samples_path), !is.null(barcodes_path))){ - print(paste0("Loading the data for patient ", patient, ".")) - samples <- list.files(samples_path) - samples <- grep(patient, samples, value = TRUE) - samples_file <- data.frame(patient = patient, sample = samples, input_folder = samples_path, cells = barcodes_path) + if(verbose) print(paste0("Loading the data for patient ", patient, ".")) + samples <- patient + samples_file <- data.frame(patient = patient, sample = samples, input_path = samples_path, cells = barcodes_path) } else{ - print(paste0("Loading the data for patient ", patient, ".")) - print("We read in the samples file.") - samples_file <- read.csv(samples_file) + if(verbose) print(paste0("Loading the data for patient ", patient, ".")) + if(verbose) print("We read in the samples file.") + samples_file <- utils::read.csv(samples_file) - print("We subset to the patient of interest.") + if(verbose) print("We subset to the patient of interest.") samples_file <- samples_file[grep("maegatk|mgatk", samples_file$source, ignore.case = TRUE),] samples_file <- samples_file[grep(patient, samples_file$patient),] samples_file <- samples_file[grep(type_use, samples_file$type),] - print("We get the different samples.") + if(verbose) print("We get the different samples.") samples <- samples_file$sample } - print("We read in the cell barcodes output by CellRanger as a list.") - barcodes <- lapply(samples_file$cells, read.table) + if(verbose) print("We read in the cell barcodes output by CellRanger as a list.") + barcodes <- lapply(samples_file$cells, utils::read.table) names(barcodes) <- samples - print("We load the MAEGATK output files.") + if(verbose) print("We load the MAEGATK output files.") se_ls <- list() for(i in 1:nrow(samples_file)){ - print(paste0("Loading sample ", i, " of ", nrow(samples_file))) - input_folder_use <- samples_file$input_folder[i] - sample_use <- samples_file$sample[i] + if(verbose) print(paste0("Loading sample ", i, " of ", nrow(samples_file))) + input_file_use <- samples_file$input_path[i] + sample_use <- samples_file$sample[i] + + # We check if the file exists. + if(!file.exists(input_file_use)){ + stop(paste0("Error: the file ", input_file_use, " does not exist.")) + } # We get the final output file for either mgatk or maegatk. - final_output_file <- list.files(paste0(input_folder_use, sample_use, "/final/"), full.names = TRUE) - final_output_file <- grep(paste0("maegtk.rds|maegatk.rds|mgatk.rds|", sample_use, ".rds"), final_output_file, value = TRUE) - se_ls[[sample_use]] <- load_object(final_output_file) + se_ls[[sample_use]] <- load_object(input_file_use) colnames(se_ls[[sample_use]]) <- paste0(sample_use, "_", colnames(se_ls[[sample_use]])) - barcodes_use <- paste0(sample_use, "_", barcodes[[sample_use]][,1]) - barcodes_use <- barcodes_use[barcodes_use %in% colnames(se_ls[[sample_use]])] - se_ls[[sample_use]] <- se_ls[[sample_use]][,barcodes_use] + barcodes_use <- paste0(sample_use, "_", barcodes[[sample_use]][,1]) + barcodes_use <- barcodes_use[barcodes_use %in% colnames(se_ls[[sample_use]])] + se_ls[[sample_use]] <- se_ls[[sample_use]][, barcodes_use] } - print("We merge the samples.") + if(verbose) print("We merge the samples.") se_merged <- do.call("cbind", se_ls) rm(se_ls) gc() - print("We get the allele frequency.") - fraction <- computeAFMutMatrix(se_merged, chromosome_prefix = chromosome_prefix) + if(verbose) print("We get the allele frequency.") + fraction <- computeAFMutMatrix(SE = se_merged, chromosome_prefix = chromosome_prefix) - print("We get the coverage information.") + if(verbose) print("We get the coverage information.") coverage <- CalculateCoverage(SE = se_merged, chromosome_prefix = chromosome_prefix) if(!all(rownames(fraction) == rownames(coverage))){ coverage <- coverage[match(rownames(fraction), rownames(coverage)),] } - print("We get the number of alternative reads per variant.") + if(verbose) print("We get the number of alternative reads per variant.") reads_alt <- CalculateAltReads(SE = se_merged, chromosome_prefix = chromosome_prefix) - print("We get the quality information.") + if(verbose) print("We get the quality information.") variant_quality <- CalculateQuality(SE = se_merged, variants = rownames(reads_alt), chromosome_prefix = chromosome_prefix) - print("We get the number of reference reads.") + if(verbose) print("We get the number of reference reads.") reads_ref <- coverage - reads_alt - print("Calculating the strand concordance.") + + if(verbose) print("Calculating the strand concordance.") concordance <- CalculateStrandCorrelation(SE = se_merged, chromosome_prefix = chromosome_prefix) - print("We calculate the consensus information.") + if(verbose) print("We calculate the consensus information.") consensus <- CalculateConsensus(SE = se_merged, chromosome_prefix = chromosome_prefix) # We order the consensus matrix like the coverage matrix. if(!all(rownames(fraction) == rownames(consensus))){ @@ -96,111 +109,55 @@ LoadingMAEGATK_typewise <- function(samples_file, samples_path = NULL, patient, } - print("We perform some filtering to reduce the memory needed.") - print(paste0("We remove variants, which are not covered in at least ", min_cells, " cells .")) - keep_variants <- rowSums(consensus >= 1) - keep_variants <- keep_variants >= min_cells - # If we only have one cell or one variant, we loose the matrix. - cell_ids <- colnames(consensus) - variant_names <- names(keep_variants[keep_variants]) - # consensus <- consensus[keep_variants,] - # coverage <- coverage[keep_variants,] - # fraction <- fraction[keep_variants,] - # concordance <- concordance[keep_variants] - # reads_alt <- reads_alt[keep_variants,] - # reads_ref <- reads_ref[keep_variants,] - consensus <- consensus[keep_variants,] - consensus <- suppressWarnings(matrix(consensus, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(consensus) <- cell_ids - rownames(consensus) <- variant_names - consensus <- as(consensus, "dgCMatrix") - coverage <- coverage[keep_variants,] - coverage <- suppressWarnings(matrix(coverage, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(coverage) <- cell_ids - rownames(coverage) <- variant_names - coverage <- as(coverage, "dgCMatrix") - fraction <- fraction[keep_variants,] - fraction <- suppressWarnings(matrix(fraction, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(fraction) <- cell_ids - rownames(fraction) <- variant_names - fraction <- as(fraction, "dgCMatrix") - concordance <- concordance[keep_variants] + if(verbose) print("We perform some filtering to reduce the memory needed.") + if(verbose) print(paste0("We remove variants, which are not covered in at least ", min_cells, " cells .")) + keep_variants <- rowSums(consensus >= 1) + keep_variants <- keep_variants >= min_cells + consensus <- consensus[keep_variants, , drop = FALSE] + coverage <- coverage[keep_variants, , drop = FALSE] + fraction <- fraction[keep_variants, , drop = FALSE] + concordance <- concordance[keep_variants] variant_quality <- variant_quality[keep_variants] - reads_alt <- reads_alt[keep_variants,] - reads_alt <- suppressWarnings(matrix(reads_alt, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_alt) <- cell_ids - rownames(reads_alt) <- variant_names - reads_alt <- as(reads_alt, "dgCMatrix") - reads_ref <- reads_ref[keep_variants,] - reads_ref <- suppressWarnings(matrix(reads_ref, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_ref) <- cell_ids - rownames(reads_ref) <- variant_names - reads_ref <- as(reads_ref, "dgCMatrix") - - - print("We remove cells that are always NoCall.") + reads_alt <- reads_alt[keep_variants, , drop = FALSE] + reads_ref <- reads_ref[keep_variants, , drop = FALSE] + + + if(verbose) print("We remove cells that are always NoCall.") consensus_test <- consensus > 0 keep_cells <- colSums(consensus_test) > 0 - # If we only have one cell or one variant, we loose the matrix. - cell_ids <- colnames(consensus) - variant_names <- names(keep_variants[keep_variants]) - # consensus <- consensus[,keep_cells] - # coverage <- coverage[,keep_cells] - # fraction <- fraction[,keep_cells] - # reads_alt <- reads_alt[,keep_cells] - # reads_ref <- reads_ref[,keep_cells] - consensus <- consensus[,keep_cells] - consensus <- suppressWarnings(matrix(consensus, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(consensus) <- cell_ids - rownames(consensus) <- variant_names - consensus <- as(consensus, "dgCMatrix") - coverage <- coverage[,keep_cells] - coverage <- suppressWarnings(matrix(coverage, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(coverage) <- cell_ids - rownames(coverage) <- variant_names - coverage <- as(coverage, "dgCMatrix") - fraction <- fraction[,keep_cells] - fraction <- suppressWarnings(matrix(fraction, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(fraction) <- cell_ids - rownames(fraction) <- variant_names - fraction <- as(fraction, "dgCMatrix") - reads_alt <- reads_alt[,keep_cells] - reads_alt <- suppressWarnings(matrix(reads_alt, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_alt) <- cell_ids - rownames(reads_alt) <- variant_names - reads_alt <- as(reads_alt, "dgCMatrix") - reads_ref <- reads_ref[,keep_cells] - reads_ref <- suppressWarnings(matrix(reads_ref, nrow = length(variant_names), ncol = length(cell_ids))) - colnames(reads_ref) <- cell_ids - rownames(reads_ref) <- variant_names - reads_ref <- as(reads_ref, "dgCMatrix") + consensus <- consensus[, keep_cells, drop = FALSE] + coverage <- coverage[, keep_cells, drop = FALSE] + fraction <- fraction[, keep_cells, drop = FALSE] + reads_alt <- reads_alt[, keep_cells, drop = FALSE] + reads_ref <- reads_ref[, keep_cells, drop = FALSE] # We check if the matrices are empty (0 cells, 0 variants). Then we simply return NULL. dim_test <- dim(coverage) if(any(dim_test == 0)){ - print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) - print("Returning NULL.") + if(verbose) print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) + if(verbose) print("Returning NULL.") return(NULL) } else{ - print("We add the information to the merged matrices.") - coverage_depth_per_cell <- rownames(coverage) - coverage_depth_per_cell <- gsub("_._.$", "", coverage_depth_per_cell) - coverage_depth_per_cell <- !duplicated(coverage_depth_per_cell) - coverage_depth_per_cell <- coverage[coverage_depth_per_cell,] - cell_ids <- colnames(coverage_depth_per_cell) - variant_names <- rownames(coverage_depth_per_cell) - coverage_depth_per_cell <- suppressWarnings(matrix(coverage, nrow = length(variant_names), ncol = length(cell_ids))) + if(verbose) print("We add the information to the merged matrices.") + coverage_depth_per_cell <- rownames(coverage) + coverage_depth_per_cell <- gsub("_._.$", "", coverage_depth_per_cell) + coverage_depth_per_cell <- !duplicated(coverage_depth_per_cell) + coverage_depth_per_cell <- coverage[coverage_depth_per_cell,] + cell_ids <- colnames(coverage_depth_per_cell) + variant_names <- rownames(coverage_depth_per_cell) + coverage_depth_per_cell <- suppressWarnings(matrix(coverage, nrow = length(variant_names), ncol = length(cell_ids))) colnames(coverage_depth_per_cell) <- cell_ids rownames(coverage_depth_per_cell) <- variant_names - coverage_depth_per_variant <- rowMeans(coverage) - coverage_depth_per_cell <- colMeans(coverage_depth_per_cell) - meta_data_col <- data.frame(Cell = colnames(consensus), AverageCoverage = coverage_depth_per_cell) - rownames(meta_data_col) <- meta_data_col$Cell - meta_data_row <- data.frame(VariantName = rownames(consensus), Concordance = concordance, VariantQuality = variant_quality, Depth = coverage_depth_per_variant) - rownames(meta_data_row) <- meta_data_row$VariantName - se_output <- SummarizedExperiment(assays = list(consensus = consensus, fraction = fraction, coverage = coverage, alts = reads_alt, refs = reads_ref), - colData = meta_data_col, rowData = meta_data_row) + coverage_depth_per_variant <- rowMeans(coverage) + coverage_depth_per_cell <- colMeans(coverage_depth_per_cell) + meta_data_col <- data.frame(Cell = colnames(consensus), AverageCoverage = coverage_depth_per_cell) + rownames(meta_data_col) <- meta_data_col$Cell + meta_data_row <- data.frame(VariantName = rownames(consensus), Concordance = concordance, VariantQuality = variant_quality, Depth = coverage_depth_per_variant) + rownames(meta_data_row) <- meta_data_row$VariantName + + se_output <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = consensus, fraction = fraction, coverage = coverage, alts = reads_alt, refs = reads_ref), + colData = meta_data_col, rowData = meta_data_row) return(se_output) } } diff --git a/R/LoadingVCF_typewise.R b/R/LoadingVCF_typewise.R new file mode 100644 index 0000000..dbab895 --- /dev/null +++ b/R/LoadingVCF_typewise.R @@ -0,0 +1,248 @@ +#'LoadingVCF_typewise +#'@description +#' We load a cellwise pileup result from a VCF file. +#' If you want to only load a single sample without the use of an input file, you have to set the following variables. +#' \enumerate{ +#' \item samples_path +#' \item barcodes_path +#' \item patient +#' \item samples_file = NULL +#' } +#' +#' It has happened that reads with an N allele were aligned. This can cause problems since these variants are typically not in variants lists. +#' We can remove all of these variants by setting remove_N_alternative to TRUE (the default). +#' Set this option to FALSE, if you really want to retain these variants. +#'@importFrom GenomeInfoDb seqnames +#'@importFrom BiocGenerics start +#'@importFrom utils read.table read.csv +#'@importFrom VariantAnnotation readVcf info readGeno ref alt +#'@importFrom SummarizedExperiment SummarizedExperiment +#'@importFrom Matrix rowSums colSums rowMeans colMeans +#'@param samples_path Path to the input folder. Must include a barcodes file. +#'@param samples_file Path to the csv file with the samples to be loaded. +#'@param vcf_path Path to the VCF file with the variants. +#'@param patient The patient you want to load. +#'@param type_use The type of input. Has to be one of: scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. +#'@param min_reads The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL. +#'@param min_cells The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2. +#'@param barcodes_path Path to the cell barcodes tsv. Default = NULL +#'@param remove_N_alternative Remove all variants that have N as an alternative, see Description. Default = TRUE +#'@param verbose Should the function be verbose? Default = TRUE +#'@export +LoadingVCF_typewise <- function(samples_file, samples_path = NULL, barcodes_path = NULL, vcf_path, patient, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, remove_N_alternative = TRUE, verbose = TRUE){ + if(all(!is.null(samples_path), !is.null(barcodes_path))){ + if(verbose) print(paste0("Loading the data for sample ", patient, ".")) + samples_file <- data.frame(patient = patient, sample = patient, input_path = samples_path, cells = barcodes_path) + samples <- samples_file$sample + } else{ + if(verbose) print(paste0("Loading the data for patient ", patient, ".")) + if(verbose) print("We read in the samples file.") + samples_file <- utils::read.csv(samples_file, stringsAsFactors = FALSE) + + + if(verbose) print("We subset to the patient of interest.") + samples_file <- samples_file[grep("vcf", samples_file$source, ignore.case = TRUE),] + samples_file <- samples_file[samples_file$patient == patient,] + samples_file <- samples_file[samples_file$type == type_use,] + + + if(verbose) print("We get the different samples.") + samples <- samples_file$sample + } + + + if(verbose) print("We read in the cell barcodes output by CellRanger as a list.") + barcodes <- lapply(samples_file$cells, utils::read.table) + names(barcodes) <- samples + + + if(verbose) print("We read in the vcf file.") + vcf <- VariantAnnotation::readVcf(vcf_path) + vcf_info <- VariantAnnotation::info(vcf) + + + if(verbose) print("We load the VCF file.") + reads_matrix_total <- c() # The total number of reads + coverage_matrix_total <- c() # The alternative reads + ref_matrix_total <- c() # The reference reads + consensus_matrix_total <- c() + for(i in 1:length(samples)){ + if(verbose) print(paste0("Loading sample ", i, " of ", nrow(samples_file))) + input_folder_use <- samples_file$input_path[i] + sample_use <- samples_file$sample[i] + + # The cell barcodes and variants. + cellbarcodes_use <- barcodes[[sample_use]] + + # We load the VCF file. + # vcf_data <- paste0(input_folder_use, sample_use, "/cellSNP.cells.sorted.vcf.gz") + vcf_data <- paste0(input_folder_use) + depth_to_add <- VariantAnnotation::readGeno(vcf_data, "DP") + depth_to_add[is.na(depth_to_add)] <- 0 + rownames(depth_to_add) <- make.names(rownames(depth_to_add)) + colnames(depth_to_add) <- paste0(sample_use, "_", colnames(depth_to_add)) + depth_to_add <- methods::as(depth_to_add, "sparseMatrix") + reads_matrix_total <- cbind(reads_matrix_total, depth_to_add) + + alts_to_add <- VariantAnnotation::readGeno(vcf_data, "AD") + alts_to_add[is.na(alts_to_add)] <- 0 + rownames(alts_to_add) <- make.names(rownames(alts_to_add)) + colnames(alts_to_add) <- paste0(sample_use, "_", colnames(alts_to_add)) + alts_to_add <- methods::as(alts_to_add, "sparseMatrix") + coverage_matrix_total <- cbind(coverage_matrix_total, alts_to_add) + + consensus_to_add <- VariantAnnotation::readGeno(vcf_data, "GT") + consensus_to_add <- matrix(sapply(consensus_to_add, char_to_numeric), nrow = nrow(consensus_to_add), dimnames = list(make.names(rownames(consensus_to_add)), paste0(sample_use, "_", colnames(consensus_to_add)))) + consensus_to_add <- methods::as(consensus_to_add, "sparseMatrix") + consensus_matrix_total <- cbind(consensus_matrix_total, consensus_to_add) + } + ref_matrix_total <- reads_matrix_total - coverage_matrix_total + rm(consensus_to_add, alts_to_add, depth_to_add) + + + # We can get the N allele as an alternative allele. This happened in a visium data set. + # We remove all variants with the N allele as alternative. + if(remove_N_alternative){ + ref_matrix_total_n <- substr(rownames(ref_matrix_total), start = nchar(rownames(ref_matrix_total)), stop = nchar(rownames(ref_matrix_total))) + ref_matrix_total_n <- ref_matrix_total_n != "N" + ref_matrix_total <- ref_matrix_total[ref_matrix_total_n,] + reads_matrix_total <- reads_matrix_total[ref_matrix_total_n,] + coverage_matrix_total <- coverage_matrix_total[ref_matrix_total_n,] + consensus_matrix_total <- consensus_matrix_total[ref_matrix_total_n,] + rm(ref_matrix_total_n) + } else{ + print("We keep all variants with an N as alternative allele. Please ensure that these variants are in your variant VCF file.") + } + + + if(verbose) print("We generate more accessible names.") + if(all(c("GENE", "AA", "CDS") %in% colnames(vcf_info))){ + new_names <- paste0(vcf_info$GENE, "_", vcf_info$AA, "_", vcf_info$CDS) + names(new_names) <- make.names(paste0(as.character(rep(GenomeInfoDb::seqnames(vcf)@values, GenomeInfoDb::seqnames(vcf)@lengths)), ".", BiocGenerics::start(vcf), "_", as.character(VariantAnnotation::ref(vcf)), ".", as.character(unlist(VariantAnnotation::alt(vcf))))) + new_names <- new_names[rownames(ref_matrix_total)] + } else{ + new_names <- rownames(vcf_info) + new_names <- gsub(":|\\/|\\?", "_", new_names) + names(new_names) <- make.names(paste0(as.character(rep(GenomeInfoDb::seqnames(vcf)@values, GenomeInfoDb::seqnames(vcf)@lengths)), ".", BiocGenerics::start(vcf), "_", as.character(VariantAnnotation::ref(vcf)), ".", as.character(unlist(VariantAnnotation::alt(vcf))))) + # new_names <- new_names[names(new_names) %in% rownames(ref_matrix_total)] + new_names <- new_names[rownames(ref_matrix_total)] + } + + + if(!is.null(min_reads)){ + if(verbose) print(paste0("We set read values below the threshold of ", min_reads, " to 0.")) + if(verbose) print("We then generate the consensus matrix again.") + ref_matrix_total@x[ref_matrix_total@x < min_reads] <- 0 + coverage_matrix_total@x[coverage_matrix_total@x < min_reads] <- 0 + + reference_construction <- ref_matrix_total + reference_construction@x[reference_construction@x > 0] <- 1 + + coverage_construction <- coverage_matrix_total + coverage_construction@x[coverage_construction@x > 0] <- 2 + + consensus_matrix_total <- reference_construction + coverage_construction + rm(reference_construction, coverage_construction) + } + + + # We check if number of rows of the matrices are the same as the length of the new names. + if(all(nrow(coverage_matrix_total) != length(new_names))){ + input_rows <- nrow(coverage_matrix_total) + new_names_length <- length(new_names) + stop(paste0("Error: you have ", input_rows, " variants in you matrix and ", new_names_length, " actual variant names.")) + } + + rownames(coverage_matrix_total) <- new_names + rownames(ref_matrix_total) <- new_names + rownames(consensus_matrix_total) <- new_names + + + if(verbose) print(paste0("We remove variants, that are not detected in at least ", min_cells, " cells.")) + keep_variants <- Matrix::rowSums(consensus_matrix_total >= 1) + keep_variants <- keep_variants >= min_cells + # If we only have one cell or one variant, we loose the matrix. + cell_ids <- colnames(consensus_matrix_total) + variant_names <- names(keep_variants[keep_variants]) + consensus_matrix_total <- consensus_matrix_total[keep_variants, ] + consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(consensus_matrix_total) <- cell_ids + rownames(consensus_matrix_total) <- variant_names + consensus_matrix_total <- methods::as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[keep_variants, ] + coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(coverage_matrix_total) <- cell_ids + rownames(coverage_matrix_total) <- variant_names + coverage_matrix_total <- methods::as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[keep_variants, ] + ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(ref_matrix_total) <- cell_ids + rownames(ref_matrix_total) <- variant_names + ref_matrix_total <- methods::as(ref_matrix_total, "dgCMatrix") + + + if(verbose) print("We remove cells that are always NoCall.") + consensus_test <- consensus_matrix_total > 0 + keep_cells <- Matrix::colSums(consensus_test) > 0 + # If we only have one cell or one variant, we loose the matrix. + cell_ids <- names(keep_cells[keep_cells]) + variant_names <- rownames(consensus_matrix_total) + consensus_matrix_total <- consensus_matrix_total[, keep_cells] + consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(consensus_matrix_total) <- cell_ids + rownames(consensus_matrix_total) <- variant_names + consensus_matrix_total <- methods::as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[, keep_cells] + coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(coverage_matrix_total) <- cell_ids + rownames(coverage_matrix_total) <- variant_names + coverage_matrix_total <- methods::as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[, keep_cells] + ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + colnames(ref_matrix_total) <- cell_ids + rownames(ref_matrix_total) <- variant_names + ref_matrix_total <- methods::as(ref_matrix_total, "dgCMatrix") + + + if(verbose) print(paste0(type_use, " Variants: ", nrow(consensus_matrix_total))) + if(verbose) print(paste0(type_use, " Cells: ", ncol(consensus_matrix_total))) + + rm(consensus_test, keep_variants, keep_cells) + gc() + + + if(verbose) print("We transform the sparse matrices to matrices, so we can calculate the fraction.") + coverage_matrix_total <- as.matrix(coverage_matrix_total) + ref_matrix_total <- as.matrix(ref_matrix_total) + consensus_matrix_total <- as.matrix(consensus_matrix_total) + reads_total <- coverage_matrix_total + ref_matrix_total + fraction_total <- coverage_matrix_total / reads_total + fraction_total[is.na(fraction_total)] <- 0 + gc() + + + # We check if the matrices are empty (0 cells, 0 variants). Then we simply return NULL. + dim_test <- dim(reads_total) + if(any(dim_test == 0)){ + if(verbose) print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) + if(verbose) print("Returning NULL.") + return(NULL) + } else { + if(verbose) print("We generate a SummarizedExperiment object containing the fraction and the consensus matrices.") + # We want an assay for the Consensus information and for the fraction. + # As meta data we add a data frame showing the cell id, the associated patient and the sample. + coverage_depth_per_cell <- Matrix::colMeans(reads_total) + coverage_depth_per_variant <- Matrix::rowMeans(reads_total) + meta_data <- data.frame(Cell = colnames(consensus_matrix_total), Type = type_use, AverageCoverage = coverage_depth_per_cell) + rownames(meta_data) <- meta_data$Cell + meta_row <- data.frame(VariantName = rownames(consensus_matrix_total), Depth = coverage_depth_per_variant) + rownames(meta_row) <- meta_row$VariantName + #se_merged <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = methods::as(consensus_matrix_total, "dgCMatrix"), fraction = methods::as(fraction_total, "dgCMatrix"), coverage = methods::as(reads_total, "dgCMatrix")), + # colData = meta_data) + se_merged <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = methods::as(consensus_matrix_total, "CsparseMatrix"), fraction = methods::as(fraction_total, "CsparseMatrix"), coverage = methods::as(reads_total, "CsparseMatrix"), + alts = methods::as(coverage_matrix_total, "CsparseMatrix"), refs = methods::as(ref_matrix_total, "CsparseMatrix")), + colData = meta_data, rowData = meta_row) + return(se_merged) + } +} + diff --git a/R/LoadingVarTrix_typewise.R b/R/LoadingVarTrix_typewise.R index 8b4da86..dd531ac 100644 --- a/R/LoadingVarTrix_typewise.R +++ b/R/LoadingVarTrix_typewise.R @@ -7,8 +7,10 @@ #'The input file is a specifically formated csv file with all the necessary information to run the analysis. #'Note that the source column in the input file needs to be one of the following: vartrix, mgaetk, mgatk. #'This is hard coded and case insensitive. -#' -#'@import Matrix SummarizedExperiment VariantAnnotation +#'@importFrom utils read.csv read.table +#'@importFrom VariantAnnotation readVcf info +#'@importFrom SummarizedExperiment SummarizedExperiment +#'@importFrom Matrix readMM #'@param samples_path Path to the input folder. Must include a barcodes file. #'@param samples_file Path to the csv file with the samples to be loaded. #'@param vcf_path Path to the VCF file with the variants. @@ -17,61 +19,53 @@ #'@param type_use The type of input. Has to be one of: scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT. #'@param min_reads The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL. #'@param min_cells The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2. +#'@param barcodes_path The path to the cell barcodes tsv. Default = NULL +#'@param verbose Should the function be verbose? Default = TRUE #'@export -LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, sample = NULL, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2){ - if(all(!is.null(samples_path), !is.null(barcodes_path), !is.null(sample), !is.null(snp_path))){ - print(paste0("Loading the data for sample ", sample, ".")) - #samples <- list.files(samples_path) - #samples <- grep(patient, samples, value = TRUE) - - #barcodes_files <- list.files(path = samples_path, pattern = "barcodes") - #barcodes_files <- unlist(lapply(paste0(samples_path, samples, "/"), list.files, pattern = "barcodes", full.names = TRUE)) - - - #samples_file <- data.frame(patient = patient, sample = samples, input_folder = samples_path, cells = barcodes_files) - samples_file <- data.frame(patient = patient, sample = sample, input_folder = samples_path, cells = barcodes_path) +LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_path = NULL, snp_path = NULL, vcf_path, patient, type_use = "scRNAseq_Somatic", min_reads = NULL, min_cells = 2, verbose = TRUE){ + if(all(!is.null(samples_path), !is.null(barcodes_path), !is.null(snp_path))){ + if(verbose) print(paste0("Loading the data for sample ", patient, ".")) + samples_file <- data.frame(patient = patient, sample = patient, input_path = samples_path, cells = barcodes_path) samples <- samples_file$sample } else{ - print(paste0("Loading the data for patient ", patient, ".")) - print("We read in the samples file.") - samples_file <- read.csv(samples_file, stringsAsFactors = FALSE) - - - print("We subset to the patient of interest.") + if(verbose) print(paste0("Loading the data for patient ", patient, ".")) + if(verbose) print("We read in the samples file.") + samples_file <- utils::read.csv(samples_file, stringsAsFactors = FALSE) + + if(verbose) print("We subset to the patient of interest.") samples_file <- samples_file[grep("vartrix", samples_file$source, ignore.case = TRUE),] samples_file <- samples_file[samples_file$patient == patient,] samples_file <- samples_file[samples_file$type == type_use,] - - - print("We get the different samples.") + + if(verbose) print("We get the different samples.") samples <- samples_file$sample } - print("We load the SNV files.") + if(verbose) print("We load the SNV files.") if(!is.null(snp_path)){ path_snps <- snp_path } else{ - path_snps <- paste0(samples_file$input_folder, "/SNV.loci.txt") + path_snps <- paste0(samples_file$input_path, "/SNV.loci.txt") } - print("We read the variants.") - snps_list <- lapply(path_snps, read.table, header = FALSE) + if(verbose) print("We read the variants.") + snps_list <- lapply(path_snps, utils::read.table, header = FALSE) names(snps_list) <- samples - print("We read in the cell barcodes output by CellRanger as a list.") - barcodes <- lapply(samples_file$cells, read.table) + if(verbose) print("We read in the cell barcodes output by CellRanger as a list.") + barcodes <- lapply(samples_file$cells, utils::read.table) names(barcodes) <- samples - print("We read in the vcf file.") - vcf <- readVcf(vcf_path) - vcf_info <- info(vcf) + if(verbose) print("We read in the vcf file.") + vcf <- VariantAnnotation::readVcf(vcf_path) + vcf_info <- VariantAnnotation::info(vcf) - print("We generate more accessible names.") + if(verbose) print("We generate more accessible names.") if(all(c("GENE", "AA", "CDS") %in% colnames(vcf_info))){ new_names <- paste0(vcf_info$GENE, "_", vcf_info$AA, "_", vcf_info$CDS) } else{ @@ -79,14 +73,14 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ new_names <- gsub(":|\\/|\\?", "_", new_names) } - print("We read in the different sparse genotype matrices as a list.") - print("We have a slot per type of input data.") + if(verbose) print("We read in the different sparse genotype matrices as a list.") + if(verbose) print("We have a slot per type of input data.") coverage_matrices <- list() ref_matrices <- list() consensus_matrices <- list() for(i in 1:nrow(samples_file)){ - print(paste0("Loading sample ", i, " of ", nrow(samples_file))) - input_folder_use <- samples_file$input_folder[i] + if(verbose) print(paste0("Loading sample ", i, " of ", nrow(samples_file))) + input_folder_use <- samples_file$input_path[i] sample_use <- samples_file$sample[i] # The cell barcodes and variants. @@ -109,20 +103,20 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ } - print("We generate a large data.frame of all the snv matrices.") + if(verbose) print("We generate a large data.frame of all the snv matrices.") coverage_matrix_total <- do.call("cbind", coverage_matrices) ref_matrix_total <- do.call("cbind", ref_matrices) consensus_matrix_total <- do.call("cbind", consensus_matrices) - print("We remove the matrix lists.") + if(verbose) print("We remove the matrix lists.") rm(coverage_matrices, ref_matrices, consensus_matrices) gc() if(!is.null(min_reads)){ - print(paste0("We set read values below the threshold of ", min_reads, " to 0.")) - print("We then generate the consensus matrix again.") + if(verbose) print(paste0("We set read values below the threshold of ", min_reads, " to 0.")) + if(verbose) print("We then generate the consensus matrix again.") ref_matrix_total@x[ref_matrix_total@x < min_reads] <- 0 coverage_matrix_total@x[coverage_matrix_total@x < min_reads] <- 0 @@ -149,146 +143,77 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ rownames(consensus_matrix_total) <- new_names - print(paste0("We remove variants, that are not detected in at least ", min_cells, " cells.")) + if(verbose) print(paste0("We remove variants, that are not detected in at least ", min_cells, " cells.")) keep_variants <- rowSums(consensus_matrix_total >= 1) keep_variants <- keep_variants >= min_cells # If we only have one cell or one variant, we loose the matrix. - cell_ids <- colnames(consensus_matrix_total) - variant_names <- names(keep_variants[keep_variants]) - consensus_matrix_total <- consensus_matrix_total[keep_variants, ] - consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(consensus_matrix_total) <- cell_ids - rownames(consensus_matrix_total) <- variant_names - consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") - coverage_matrix_total <- coverage_matrix_total[keep_variants, ] - coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(coverage_matrix_total) <- cell_ids - rownames(coverage_matrix_total) <- variant_names - coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") - ref_matrix_total <- ref_matrix_total[keep_variants, ] - ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(ref_matrix_total) <- cell_ids - rownames(ref_matrix_total) <- variant_names - ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") - -# if(sum(keep_variants) > 1){ -# consensus_matrix_total <- consensus_matrix_total[keep_variants,] -# coverage_matrix_total <- coverage_matrix_total[keep_variants,] -# ref_matrix_total <- ref_matrix_total[keep_variants,] -# } else if(sum(keep_variants) == 1){ -# cell_ids <- colnames(consensus_matrix_total) -# variant_names <- names(keep_variants[keep_variants]) -# consensus_matrix_total <- consensus_matrix_total[keep_variants,] -# consensus_matrix_total <- matrix(consensus_matrix_total, nrow = 1, ncol = length(consensus_matrix_total)) -# rownames(consensus_matrix_total) <- variant_names -# colnames(consensus_matrix_total) <- cell_ids -# consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") -# coverage_matrix_total <- coverage_matrix_total[keep_variants,] -# coverage_matrix_total <- matrix(coverage_matrix_total, nrow = 1, ncol = length(coverage_matrix_total)) -# rownames(coverage_matrix_total) <- variant_names -# colnames(coverage_matrix_total) <- cell_ids -# coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") -# ref_matrix_total <- ref_matrix_total[keep_variants,] -# ref_matrix_total <- matrix(ref_matrix_total, nrow = 1, ncol = length(ref_matrix_total)) -# rownames(ref_matrix_total) <- variant_names -# colnames(ref_matrix_total) <- cell_ids -# ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") -# rm(cell_ids, variant_names) -# } - - - print("We remove cells that are always NoCall.") + #cell_ids <- colnames(consensus_matrix_total) + #variant_names <- names(keep_variants[keep_variants]) + consensus_matrix_total <- consensus_matrix_total[keep_variants, , drop = FALSE] + #consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(consensus_matrix_total) <- cell_ids + #rownames(consensus_matrix_total) <- variant_names + #consensus_matrix_total <- methods::as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[keep_variants, , drop = FALSE] + #coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(coverage_matrix_total) <- cell_ids + #rownames(coverage_matrix_total) <- variant_names + #coverage_matrix_total <- methods::as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[keep_variants, , drop = FALSE] + #ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(ref_matrix_total) <- cell_ids + #rownames(ref_matrix_total) <- variant_names + #ref_matrix_total <- methods::as(ref_matrix_total, "dgCMatrix") + + + if(verbose) print("We remove cells that are always NoCall.") consensus_test <- consensus_matrix_total > 0 keep_cells <- colSums(consensus_test) > 0 # If we only have one cell or one variant, we loose the matrix. - cell_ids <- names(keep_cells[keep_cells]) - variant_names <- rownames(consensus_matrix_total) - consensus_matrix_total <- consensus_matrix_total[, keep_cells] - consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(consensus_matrix_total) <- cell_ids - rownames(consensus_matrix_total) <- variant_names - consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") - coverage_matrix_total <- coverage_matrix_total[, keep_cells] - coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(coverage_matrix_total) <- cell_ids - rownames(coverage_matrix_total) <- variant_names - coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") - ref_matrix_total <- ref_matrix_total[, keep_cells] - ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) - colnames(ref_matrix_total) <- cell_ids - rownames(ref_matrix_total) <- variant_names - ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") - -# if(sum(keep_cells) > 1){ -# consensus_matrix_total <- consensus_matrix_total[, keep_cells] -# coverage_matrix_total <- coverage_matrix_total[, keep_cells] -# ref_matrix_total <- ref_matrix_total[, keep_cells] -# } else if(sum(keep_cells) == 1){ -# cell_ids <- names(keep_cells[keep_cells]) -# variant_names <- rownames(consensus_matrix_total) -# consensus_matrix_total <- consensus_matrix_total[,keep_cells] -# consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = 1) -# rownames(consensus_matrix_total) <- variant_names -# colnames(consensus_matrix_total) <- cell_ids -# consensus_matrix_total <- as(consensus_matrix_total, "dgCMatrix") -# coverage_matrix_total <- coverage_matrix_total[,keep_cells] -# coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = 1) -# rownames(coverage_matrix_total) <- variant_names -# colnames(coverage_matrix_total) <- cell_ids -# coverage_matrix_total <- as(coverage_matrix_total, "dgCMatrix") -# ref_matrix_total <- ref_matrix_total[, keep_cells] -# ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = 1) -# rownames(ref_matrix_total) <- variant_names -# colnames(ref_matrix_total) <- cell_ids -# ref_matrix_total <- as(ref_matrix_total, "dgCMatrix") -# rm(cell_ids, variant_names) -# } - - print(paste0(type_use, " Variants: ", nrow(consensus_matrix_total))) - print(paste0(type_use, " Cells: ", ncol(consensus_matrix_total))) + #cell_ids <- names(keep_cells[keep_cells]) + #variant_names <- rownames(consensus_matrix_total) + consensus_matrix_total <- consensus_matrix_total[, keep_cells, drop = FALSE] + #consensus_matrix_total <- matrix(consensus_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(consensus_matrix_total) <- cell_ids + #rownames(consensus_matrix_total) <- variant_names + #consensus_matrix_total <- methods::as(consensus_matrix_total, "dgCMatrix") + coverage_matrix_total <- coverage_matrix_total[, keep_cells, drop = FALSE] + #coverage_matrix_total <- matrix(coverage_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(coverage_matrix_total) <- cell_ids + #rownames(coverage_matrix_total) <- variant_names + #coverage_matrix_total <- methods::as(coverage_matrix_total, "dgCMatrix") + ref_matrix_total <- ref_matrix_total[, keep_cells, drop = FALSE] + #ref_matrix_total <- matrix(ref_matrix_total, nrow = length(variant_names), ncol = length(cell_ids)) + #colnames(ref_matrix_total) <- cell_ids + #rownames(ref_matrix_total) <- variant_names + #ref_matrix_total <- methods::as(ref_matrix_total, "dgCMatrix") + + + if(verbose) print(paste0(type_use, " Variants: ", nrow(consensus_matrix_total))) + if(verbose) print(paste0(type_use, " Cells: ", ncol(consensus_matrix_total))) rm(consensus_test, keep_variants, keep_cells) gc() - print("We transform the sparse matrices to matrices, so we can calculate the fraction.") - # For test purposes - #coverage_matrix_total_ori <- coverage_matrix_total - #ref_matrix_total_ori <- ref_matrix_total - #consensus_matrix_total_ori <- consensus_matrix_total - #coverage_matrix_total <- coverage_matrix_total_ori - #ref_matrix_total <- ref_matrix_total_ori - #consensus_matrix_total <- consensus_matrix_total_ori - - #coverage_matrix_total <- coverage_matrix_total[1:5000,1:5000] - #ref_matrix_total <- ref_matrix_total[1:5000,1:5000] - #consensus_matrix_total <- coverage_matrix_total[1:5000,1:5000] - #colnames(coverage_matrix_total) <- make.names(colnames(coverage_matrix_total)) - #rownames(coverage_matrix_total) <- make.names(rownames(coverage_matrix_total)) - #colnames(ref_matrix_total) <- make.names(colnames(ref_matrix_total)) - #rownames(ref_matrix_total) <- make.names(rownames(ref_matrix_total)) - #colnames(consensus_matrix_total) <- make.names(colnames(consensus_matrix_total)) - #rownames(consensus_matrix_total) <- make.names(rownames(consensus_matrix_total)) - coverage_matrix_total <- as.matrix(coverage_matrix_total) - ref_matrix_total <- as.matrix(ref_matrix_total) - consensus_matrix_total <- as.matrix(consensus_matrix_total) - reads_total <- coverage_matrix_total + ref_matrix_total - fraction_total <- coverage_matrix_total / reads_total + if(verbose) print("We transform the sparse matrices to matrices, so we can calculate the fraction.") + coverage_matrix_total <- as.matrix(coverage_matrix_total) + ref_matrix_total <- as.matrix(ref_matrix_total) + consensus_matrix_total <- as.matrix(consensus_matrix_total) + reads_total <- coverage_matrix_total + ref_matrix_total + fraction_total <- coverage_matrix_total / reads_total fraction_total[is.na(fraction_total)] <- 0 - #fraction_total <- sdiv(X = coverage_matrix_total, Y = reads_total, - # names = dimnames(coverage_matrix_total)) - #rm(coverage_matrix_total, ref_matrix_total) gc() # We check if the matrices are empty (0 cells, 0 variants). Then we simply return NULL. dim_test <- dim(reads_total) if(any(dim_test == 0)){ - print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) - print("Returning NULL.") + if(verbose) print(paste0("The filtering left ", dim_test[1], " variants and ", dim_test[2], "cells.")) + if(verbose) print("Returning NULL.") return(NULL) } else { - print("We generate a SummarizedExperiment object containing the fraction and the consensus matrices.") + if(verbose) print("We generate a SummarizedExperiment object containing the fraction and the consensus matrices.") # We want an assay for the Consensus information and for the fraction. # As meta data we add a data frame showing the cell id, the associated patient and the sample. coverage_depth_per_cell <- colMeans(reads_total) @@ -297,11 +222,9 @@ LoadingVarTrix_typewise <- function(samples_file, samples_path = NULL, barcodes_ rownames(meta_data) <- meta_data$Cell meta_row <- data.frame(VariantName = rownames(consensus_matrix_total), Depth = coverage_depth_per_variant) rownames(meta_row) <- meta_row$VariantName - #se_merged <- SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "dgCMatrix"), fraction = as(fraction_total, "dgCMatrix"), coverage = as(reads_total, "dgCMatrix")), - # colData = meta_data) - se_merged <- SummarizedExperiment(assays = list(consensus = as(consensus_matrix_total, "CsparseMatrix"), fraction = as(fraction_total, "CsparseMatrix"), coverage = as(reads_total, "CsparseMatrix"), - alts = as(coverage_matrix_total, "CsparseMatrix"), refs = as(ref_matrix_total, "CsparseMatrix")), - colData = meta_data, rowData = meta_row) + se_merged <- SummarizedExperiment::SummarizedExperiment(assays = list(consensus = methods::as(consensus_matrix_total, "CsparseMatrix"), fraction = methods::as(fraction_total, "CsparseMatrix"), coverage = methods::as(reads_total, "CsparseMatrix"), + alts = methods::as(coverage_matrix_total, "CsparseMatrix"), refs = methods::as(ref_matrix_total, "CsparseMatrix")), + colData = meta_data, rowData = meta_row) return(se_merged) } } diff --git a/R/Merging_SE_list.R b/R/Merging_SE_list.R index 2591dc4..94d78c9 100644 --- a/R/Merging_SE_list.R +++ b/R/Merging_SE_list.R @@ -1,7 +1,6 @@ #'Merging list of SummarizedExperiment objects. #'@description #'This function is a wrapper for do.all("cbind", se). -#' #'@import BiocGenerics #'@param se SummarizedExperiment object #'@export diff --git a/R/RowWiseSplit.R b/R/RowWiseSplit.R index bac1734..8114d6a 100644 --- a/R/RowWiseSplit.R +++ b/R/RowWiseSplit.R @@ -3,14 +3,15 @@ #'Performing the correlation or Fisher test association for a SummarizedExperiment object requires extreme amounts of memory. #'To reduce the amount of memory necessary, we instead get the individual rows from the consensus assay. #'We can then remove the NoCalls (no reads) from the individual vectors, further reducing the amount of memory needed. -#'@import Matrix SummarizedExperiment parallel +#'@importFrom parallel mclapply +#'@importFrom SummarizedExperiment assays #'@param se SummarizedExperiment object. #'@param n_cores Number of cores to use. #'@param remove_nocalls Do you want to remove NoCall cells? #'@export RowWiseSplit <- function(se, n_cores = 1, remove_nocalls = TRUE){ - consensus <- assays(se)$consensus - consensus_list <- mclapply(rownames(se), SeparatingMatrixToList, total_matrix = consensus, remove_nocalls = remove_nocalls, mc.cores = n_cores) + consensus <- SummarizedExperiment::assays(se)$consensus + consensus_list <- parallel::mclapply(rownames(se), SeparatingMatrixToList, total_matrix = consensus, remove_nocalls = remove_nocalls, mc.cores = n_cores) names(consensus_list) <- rownames(se) return(consensus_list) } diff --git a/R/SeparatingMatrixToList.R b/R/SeparatingMatrixToList.R index 00970a6..3bb7ff1 100644 --- a/R/SeparatingMatrixToList.R +++ b/R/SeparatingMatrixToList.R @@ -4,13 +4,14 @@ #'Each variant is an entry in the list. #'NoCalls (cells with no reads covering a variant) can be removed. #'This function gets called by RowWiseSplit in return. +#'@importFrom stats na.omit #'@param row_use The row the separate. #'@param total_matrix The matrix to be split. #'@param remove_nocalls Do you want to remove NoCall cells? #'@export SeparatingMatrixToList <- function(row_use, total_matrix, remove_nocalls = TRUE){ selected_row <- total_matrix[row_use,] - selected_row <- na.omit(selected_row) + selected_row <- stats::na.omit(selected_row) if(remove_nocalls == TRUE){ # We remove the NoCall cells. diff --git a/R/SetVariantInfo.R b/R/SetVariantInfo.R index 584ea06..53aefa7 100644 --- a/R/SetVariantInfo.R +++ b/R/SetVariantInfo.R @@ -2,7 +2,9 @@ #'@description #'We add the genotyping information for a set of variants to a Seurat object. #'The function returns a matrix with the values from the specified assay. -#'@import SummarizedExperiment Seurat +#'@importFrom SummarizedExperiment assays +#'@importFrom Matrix t +#'@importFrom Seurat AddMetaData #'@param SE SummarizedExperiment object. #'@param seurat_object The Seurat object. #'@param information The assay with the desired information. Default: consensus @@ -28,7 +30,7 @@ SetVariantInfo <- function(SE, seurat_object, information = "consensus", variant if(!assay_check){ stop("The assay you wants is not present in the object.") } - res <- t(assays(SE)[[information]][variants, , drop = FALSE]) + res <- t(SummarizedExperiment::assays(SE)[[information]][variants, , drop = FALSE]) # We check if all the cells are actually in the Seurat object. # If not, we only add the information for the ones present. # We execute an error function if there are zero cells present. diff --git a/R/VariantBurden.R b/R/VariantBurden.R index 208dab1..5fc840b 100644 --- a/R/VariantBurden.R +++ b/R/VariantBurden.R @@ -2,11 +2,12 @@ #'@description #'Calculate the variant burden per cell. #'We simply sum up the MAF values per cell. -#'@import Matrix SummarizedExperiment +#'@importFrom SummarizedExperiment assays colData +#'@importFrom Matrix colSums #'@param se SummarizedExperiment object #'@export VariantBurden <- function(se){ - burden <- colSums(assays(se)[["fraction"]]) - colData(se)[,"Burden"] <- burden + burden <- Matrix::colSums(SummarizedExperiment::assays(se)[["fraction"]]) + SummarizedExperiment::colData(se)[,"Burden"] <- burden return(se) } diff --git a/R/VariantCloneSizeThresholding.R b/R/VariantCloneSizeThresholding.R index f1a4c24..3c81e52 100644 --- a/R/VariantCloneSizeThresholding.R +++ b/R/VariantCloneSizeThresholding.R @@ -2,41 +2,32 @@ #'@description #'We get variants of interest using a clone size thresholding. #'Source: https://github.com/petervangalen/MAESTER-2021 -#'@import dplyr Matrix SummarizedExperiment tidyverse +#'@importFrom SummarizedExperiment assays #'@param se SummarizedExperiment object. #'@param min_coverage Minimum coverage a variant needs to have. #'@param fraction_negative_cells The fraction of negative cells needed. #'@param min_clone_size minimum number of cells. #'@param vaf_threshold Variant Allele Threshold. Cells above this threshold are considered mutated. +#'@param verbose Should the function be verbose? Default = TRUE #'@export -VariantCloneSizeThresholding <- function(se, min_coverage = 2, fraction_negative_cells = 0.9, min_clone_size = 10, vaf_threshold = 0.5){ - # This function is adapted from the Peter van Galen. - print("Get the mean allele frequency and coverage.") - mean_af <- rowMeans(assays(se)[["fraction"]], na.rm = TRUE) - mean_cov <- rowMeans(assays(se)[["coverage"]], na.rm = TRUE) +VariantCloneSizeThresholding <- function(se, min_coverage = 2, fraction_negative_cells = 0.9, min_clone_size = 10, vaf_threshold = 0.5, verbose = TRUE){ + if(verbose) print("Get the mean allele frequency and coverage.") + mean_af <- rowMeans(SummarizedExperiment::assays(se)[["fraction"]], na.rm = TRUE) + mean_cov <- rowMeans(SummarizedExperiment::assays(se)[["coverage"]], na.rm = TRUE) - print("Collect all information in a tibble") - #vars_tib <- as_tibble(do.call(cbind, c(list(mean_af), list(mean_cov))), rownames = "var") + if(verbose) print("Collect all information in a tibble") vars <- do.call(cbind, c(list(mean_af), list(mean_cov))) - #colnames(vars_tib)[2] <- "mean_af" - #colnames(vars_tib)[3] <- "mean_cov" colnames(vars) <- c("mean_af", "mean_cov") - print("We add the number of cells that exceed the VAF thresholds.") - #vars_tib <- vars_tib %>% - # mutate(n0 = apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE))) %>% - # mutate(VAF_threshold = apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE))) - n0 <- apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) - VAF_threshold <- apply(assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) + if(verbose) print("We add the number of cells that exceed the VAF thresholds.") + n0 <- apply(SummarizedExperiment::assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) + VAF_threshold <- apply(SummarizedExperiment::assays(se)[["fraction"]], 1, function(x) sum(x > vaf_threshold, na.rm = TRUE)) vars <- cbind(vars, n0, VAF_threshold) - print("Thresholding using the clone size approach.") - #voi_ch <- subset(vars_tib, mean_cov > min_coverage & - # n0 > ceiling(fraction_negative_cells * ncol(se)) & - # VAF_threshold > min_clone_size)$var - voi_ch <- subset(vars, mean_cov > min_coverage & - n0 > ceiling(fraction_negative_cells * ncol(se)) & - VAF_threshold > min_clone_size) + if(verbose) print("Thresholding using the clone size approach.") + voi_ch <- subset(vars, vars$mean_cov > min_coverage & + vars$n0 > ceiling(fraction_negative_cells * ncol(se)) & + vars$VAF_threshold > min_clone_size) voi_ch <- rownames(voi_ch) return(voi_ch) } diff --git a/R/VariantCorrelationHeatmap.R b/R/VariantCorrelationHeatmap.R index 7892988..f278d52 100644 --- a/R/VariantCorrelationHeatmap.R +++ b/R/VariantCorrelationHeatmap.R @@ -1,7 +1,13 @@ #'VariantCorrelationHeatmap #'@description #'We generate a heatmap showing the correlation of somatic variants with the MT variants. -#'@import circlize ComplexHeatmap ggplot2 Matrix parallel rcompanion tidyr grid +#'Packages I want to remove. I cannot see where they are used. +#'ggplot2 parallel rcompanion tidyr +#'@importFrom circlize colorRamp2 +#'@importFrom ComplexHeatmap columnAnnotation rowAnnotation Heatmap draw +#'@importFrom grid gpar +#'@importFrom stats na.omit +#'@importFrom grDevices png dev.off #'@param correlation_results Data.frame with the correlation results. #'@param output_path Path to the output folder. #'@param patient The patient for this heatmap. @@ -10,22 +16,23 @@ #'@param width_use Width of the heatmap in px. #'@param height_use Height of the heatmap in px. #'@param padding_use Space around the heatmap in mm. If this is to low, the variant names might be cut off. +#'@param verbose Should the function be verbose? Default = TRUE #'@export VariantCorrelationHeatmap <- function(correlation_results, output_path = NULL, patient, min_alt_cells = 5, min_correlation = 0.5, - width_use = 2000, height_use = 2000, padding_use = c(165,165,2,2)){ + width_use = 2000, height_use = 2000, padding_use = c(165,165,2,2), verbose = TRUE){ correlation_results$P_adj_logged <- -log10(correlation_results$P_adj) - correlation_results <- subset(correlation_results, P_adj_logged > -log10(0.05)) - correlation_results <- subset(correlation_results, Cells_1_Alt >= min_alt_cells & Cells_2_Alt >= min_alt_cells) - correlation_results <- subset(correlation_results, Corr > min_correlation) + correlation_results <- subset(correlation_results, correlation_results$P_adj_logged > -log10(0.05)) + correlation_results <- subset(correlation_results, correlation_results$Cells_1_Alt >= min_alt_cells & correlation_results$Cells_2_Alt >= min_alt_cells) + correlation_results <- subset(correlation_results, correlation_results$Corr > min_correlation) - print("We get the unique variants.") + if(verbose) print("We get the unique variants.") somatic_uniques <- unique(correlation_results$Variant1) mt_uniques <- unique(correlation_results$Variant2) - print("Getting the maximum P value.") - pvalue_max <- as.numeric(na.omit(correlation_results$P_adj_logged)) + if(verbose) print("Getting the maximum P value.") + pvalue_max <- as.numeric(stats::na.omit(correlation_results$P_adj_logged)) if(length(pvalue_max) > 1){ pvalue_max <- pvalue_max[pvalue_max != Inf] if(length(pvalue_max) >= 1){ @@ -38,51 +45,51 @@ VariantCorrelationHeatmap <- function(correlation_results, output_path = NULL, p pvalue_max <- max(pvalue_max, 100) } correlation_results$P_adj_logged[correlation_results$P_adj_logged == Inf] <- pvalue_max - col_fun <- colorRamp2(c(0,pvalue_max), c("white", "red")) + col_fun <- circlize::colorRamp2(c(0,pvalue_max), c("white", "red")) - print("We set insignificant P values to NA.") + if(verbose) print("We set insignificant P values to NA.") correlation_results$P_adj_logged <- ifelse(correlation_results$P_adj_logged > -log10(0.05), correlation_results$P_adj_logged, NA) - print("We generate a matrix with the adjusted P values.") + if(verbose) print("We generate a matrix with the adjusted P values.") p_values <- matrix(NA, nrow = length(somatic_uniques), ncol = length(mt_uniques)) rownames(p_values) <- somatic_uniques colnames(p_values) <- mt_uniques for(i in 1:length(somatic_uniques)){ - correlation_results_subset <- subset(correlation_results, Variant1 == somatic_uniques[i]) + correlation_results_subset <- subset(correlation_results, correlation_results$Variant1 == somatic_uniques[i]) p_values_use <- correlation_results_subset$P_adj_logged names(p_values_use) <- correlation_results_subset$Variant2 p_values[somatic_uniques[i],names(p_values_use)] <- p_values_use } - print("Setting the column and row annotations for the heat map.") + if(verbose) print("Setting the column and row annotations for the heat map.") annotation_top <- ComplexHeatmap::columnAnnotation(Mutations = mt_uniques, show_legend = FALSE, show_annotation_name = FALSE) annotation_left <- ComplexHeatmap::rowAnnotation(Mutations = somatic_uniques, show_legend = FALSE, show_annotation_name = FALSE) - print("Since we can have no results left after the subsetting, we check if the P value matrix has values.") + if(verbose) print("Since we can have no results left after the subsetting, we check if the P value matrix has values.") if(all(dim(p_values) > 0)){ - print("Generating the actual heat map.") - p1 <- Heatmap(p_values, name = "-log10(P)", - column_title = paste0("Patient ", patient, "\nLogged adj. P values between the mutations"), - row_title = "", show_row_names = TRUE, show_column_names = TRUE, - col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, - column_title_gp = grid::gpar(fontsize = 40), row_title_gp = grid::gpar(fontsize = 40), - column_names_gp = grid::gpar(fontsize = 40), row_names_gp = grid::gpar(fontsize = 40), - column_names_rot = 45, - row_names_side = "left", - heatmap_legend_param = list(labels_gp = gpar(fontsize = 40), title_gp = gpar(fontsize = 40, fontface = "bold")), - cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) + if(verbose) print("Generating the actual heat map.") + p1 <- ComplexHeatmap::Heatmap(p_values, name = "-log10(P)", + column_title = paste0("Patient ", patient, "\nLogged adj. P values between the mutations"), + row_title = "", show_row_names = TRUE, show_column_names = TRUE, + col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, + column_title_gp = grid::gpar(fontsize = 40), row_title_gp = grid::gpar(fontsize = 40), + column_names_gp = grid::gpar(fontsize = 40), row_names_gp = grid::gpar(fontsize = 40), + column_names_rot = 45, + row_names_side = "left", + heatmap_legend_param = list(labels_gp = grid::gpar(fontsize = 40), title_gp = grid::gpar(fontsize = 40, fontface = "bold")), + cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) if(!is.null(output_path)){ - print("Saving the png.") - png(paste0(output_path, "Correlation_Pvalue_", patient, ".png"), width = width_use, height = height_use, units = "px", type = "cairo", antialias = "none") - draw(p1, padding = unit(padding_use, "mm")) - dev.off() + if(verbose) print("Saving the png.") + grDevices::png(paste0(output_path, "Correlation_Pvalue_", patient, ".png"), width = width_use, height = height_use, units = "px", type = "cairo", antialias = "none") + ComplexHeatmap::draw(p1, padding = unit(padding_use, "mm")) + grDevices::dev.off() } else{ return(p1) } diff --git a/R/VariantFisherTestHeatmap.R b/R/VariantFisherTestHeatmap.R index 3efe177..131e373 100644 --- a/R/VariantFisherTestHeatmap.R +++ b/R/VariantFisherTestHeatmap.R @@ -1,26 +1,31 @@ #'VariantFisherTestHeatmap #'@description #'We generate a heatmap showing the Fisher test of somatic variants with the MT variants. -#'@import circlize ComplexHeatmap ggplot2 Matrix parallel rcompanion tidyr grid +#'Packages I want to remove. +#'@importFrom ComplexHeatmap columnAnnotation rowAnnotation Heatmap +#'@importFrom circlize colorRamp2 +#'@importFrom grid gpar +#'@importFrom stats na.omit #'@param fisher_results Data.frame with the correlation results. #'@param patient The patient for this heatmap. #'@param min_alt_cells Minimum number of mutated cells needed, otherwise an association will not be plotted. #'@param min_oddsratio Minimum correlation needed. +#'@param verbose Should the function be verbose? Default = TRUE #'@export -VariantFisherTestHeatmap <- function(fisher_results, patient, min_alt_cells = 5, min_oddsratio = 1){ +VariantFisherTestHeatmap <- function(fisher_results, patient, min_alt_cells = 5, min_oddsratio = 1, verbose = TRUE){ fisher_results$P_adj_logged <- -log10(fisher_results$P_adj) - fisher_results <- subset(fisher_results, P_adj_logged > -log10(0.05)) - fisher_results <- subset(fisher_results, Cells_Alt_1_2 >= min_alt_cells) - fisher_results <- subset(fisher_results, OddsRatio > min_oddsratio) - - - print("We get the unique variants.") + fisher_results <- subset(fisher_results, fisher_results$P_adj_logged > -log10(0.05)) + fisher_results <- subset(fisher_results, fisher_results$Cells_Alt_1_2 >= min_alt_cells) + fisher_results <- subset(fisher_results, fisher_results$OddsRatio > min_oddsratio) + + + if(verbose) print("We get the unique variants.") somatic_uniques <- unique(fisher_results$Variant1) mt_uniques <- unique(fisher_results$Variant2) - - - print("Getting the maximum P value.") - pvalue_max <- as.numeric(na.omit(fisher_results$P_adj_logged)) + + + if(verbose) print("Getting the maximum P value.") + pvalue_max <- as.numeric(stats::na.omit(fisher_results$P_adj_logged)) if(length(pvalue_max) > 1){ pvalue_max <- pvalue_max[pvalue_max != Inf] if(length(pvalue_max) >= 1){ @@ -33,41 +38,41 @@ VariantFisherTestHeatmap <- function(fisher_results, patient, min_alt_cells = 5, pvalue_max <- max(pvalue_max, 100) } fisher_results$P_adj_logged[fisher_results$P_adj_logged == Inf] <- pvalue_max - col_fun <- colorRamp2(c(0,pvalue_max), c("white", "red")) + col_fun <- circlize::colorRamp2(c(0,pvalue_max), c("white", "red")) - print("We set insignificant P values to NA.") + if(verbose) print("We set insignificant P values to NA.") fisher_results$P_adj_logged <- ifelse(fisher_results$P_adj_logged > -log10(0.05), fisher_results$P_adj_logged, NA) - print("We generate a matrix with the adjusted P values.") + if(verbose) print("We generate a matrix with the adjusted P values.") p_values <- matrix(NA, nrow = length(somatic_uniques), ncol = length(mt_uniques)) rownames(p_values) <- somatic_uniques colnames(p_values) <- mt_uniques for(i in 1:length(somatic_uniques)){ - fisher_results_subset <- subset(fisher_results, Variant1 == somatic_uniques[i]) + fisher_results_subset <- subset(fisher_results, fisher_results$Variant1 == somatic_uniques[i]) p_values_use <- fisher_results_subset$P_adj_logged names(p_values_use) <- fisher_results_subset$Variant2 - p_values[somatic_uniques[i],names(p_values_use)] <- p_values_use + p_values[somatic_uniques[i], names(p_values_use)] <- p_values_use } - print("Setting the column and row annotations for the heat map.") + if(verbose) print("Setting the column and row annotations for the heat map.") annotation_top <- ComplexHeatmap::columnAnnotation(Mutations = mt_uniques, show_legend = FALSE, show_annotation_name = FALSE) annotation_left <- ComplexHeatmap::rowAnnotation(Mutations = somatic_uniques, show_legend = FALSE, show_annotation_name = FALSE) - print("Since we can have no results left after the subsetting, we check if the P value matrix has values.") + if(verbose) print("Since we can have no results left after the subsetting, we check if the P value matrix has values.") if(all(dim(p_values) > 0)){ - print("Generating the actual heat map.") - p <- Heatmap(p_values, name = "-log10(P)", - column_title = paste0("Patient ", patient, "\nLogged adj. P values between the variants"), - row_title = "", show_row_names = TRUE, show_column_names = TRUE, - col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, - column_names_rot = 45, row_names_side = "left", - column_names_gp = grid::gpar(hjust = 1), - cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) + if(verbose) print("Generating the actual heat map.") + p <- ComplexHeatmap::Heatmap(p_values, name = "-log10(P)", + column_title = paste0("Patient ", patient, "\nLogged adj. P values between the variants"), + row_title = "", show_row_names = TRUE, show_column_names = TRUE, + col = col_fun, left_annotation = annotation_left, top_annotation = annotation_top, + column_names_rot = 45, row_names_side = "left", + column_names_gp = grid::gpar(hjust = 1), + cluster_columns = FALSE, cluster_rows = FALSE, use_raster = FALSE, show_row_dend = FALSE, show_column_dend = FALSE) } return(p) } diff --git a/R/VariantQuantileThresholding.R b/R/VariantQuantileThresholding.R index 16b1dfd..a0f7314 100755 --- a/R/VariantQuantileThresholding.R +++ b/R/VariantQuantileThresholding.R @@ -4,7 +4,8 @@ #'If you use top_cells and top_VAF, you have to only supply one quantil value (quantiles = 0.9, thresholds = 0). #'This function is adapted from the Peter van Galen. #'Source: https://github.com/petervangalen/MAESTER-2021 -#'@import dplyr SummarizedExperiment +#'@importFrom SummarizedExperiment assays colData rowData +#'@importFrom stats quantile #'@param SE SummarizedExperiment object. #'@param min_coverage Minimum coverage needed. #'@param quantiles The lower and upper quantile you want to use. @@ -17,70 +18,78 @@ #'@param group1 The first group of interest. #'@param group2 The second group of interest. #'@param group_factor How much higher has the mean allele frequency to be in group 1 when compared to group 2? +#'@param verbose Should the function be verbose? Default = TRUE #'@export -VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, min_quality = 30, mean_allele_frequency = 0, - group_of_interest = NULL, group1 = NULL, group2 = NULL, group_factor = NULL){ - print("Get the mean allele frequency and coverage.") - mean_af <- rowMeans(assays(SE)[["fraction"]], na.rm = TRUE) - mean_cov <- rowMeans(assays(SE)[["coverage"]], na.rm = TRUE) +VariantQuantileThresholding <- function(SE, min_coverage = 2, quantiles = c(0.1, 0.9), thresholds = c(0.1, 0.9), top_cells = NULL, top_VAF = NULL, min_quality = NULL, mean_allele_frequency = 0, + group_of_interest = NULL, group1 = NULL, group2 = NULL, group_factor = NULL, verbose = TRUE){ + if(verbose) print("Get the mean allele frequency and coverage.") + mean_af <- rowMeans(SummarizedExperiment::assays(SE)[["fraction"]], na.rm = TRUE) + mean_cov <- rowMeans(SummarizedExperiment::assays(SE)[["coverage"]], na.rm = TRUE) + if(all(is.null(min_quality), is.numeric(min_quality))) stop("Error: Your minimum quality is not either NULL or a numeric.") + if(all(is.null(mean_allele_frequency), is.numeric(mean_allele_frequency))) stop("Error: Your mean allele frequency is not either NULL or a numeric.") + if(all(!is.null(group_of_interest), !is.null(group1), !is.null(group2))){ - if(!group_of_interest %in% colnames(colData(SE))) stop("Error: Your group_of_interest is not in the colData.") - if(!group1 %in% colData(SE)[,group_of_interest]) stop("Error: Your group1 is not in the group_of_interest.") - if(!group2 %in% colData(SE)[,group_of_interest]) stop("Error: Your group2 is not in the group_of_interest.") - cells_group1 <- colData(SE)[,group_of_interest, drop = FALSE] + if(!group_of_interest %in% colnames(SummarizedExperiment::colData(SE))) stop("Error: Your group_of_interest is not in the colData.") + if(!group1 %in% SummarizedExperiment::colData(SE)[, group_of_interest]) stop("Error: Your group1 is not in the group_of_interest.") + if(!group2 %in% SummarizedExperiment::colData(SE)[, group_of_interest]) stop("Error: Your group2 is not in the group_of_interest.") + cells_group1 <- SummarizedExperiment::colData(SE)[, group_of_interest, drop = FALSE] cells_group1 <- cells_group1[cells_group1[, group_of_interest] == group1, , drop = FALSE] - cells_group2 <- colData(SE)[,group_of_interest, drop = FALSE] + cells_group2 <- SummarizedExperiment::colData(SE)[, group_of_interest, drop = FALSE] cells_group2 <- cells_group2[cells_group2[, group_of_interest] == group2, , drop = FALSE] - mean_af_group1 <- rowMeans(assays(SE)[["fraction"]][,rownames(cells_group1)], na.rm = TRUE) - mean_af_group2 <- rowMeans(assays(SE)[["fraction"]][,rownames(cells_group2)], na.rm = TRUE) + mean_af_group1 <- rowMeans(SummarizedExperiment::assays(SE)[["fraction"]][, rownames(cells_group1)], na.rm = TRUE) + mean_af_group2 <- rowMeans(SummarizedExperiment::assays(SE)[["fraction"]][, rownames(cells_group2)], na.rm = TRUE) mean_af_group_check <- mean_af_group1 > (group_factor * mean_af_group2) - print("Get the quantiles of the VAFs of each variant.") - quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) - # vars <- do.call(cbind, c(list(mean_af), list(mean_cov), list(rowData(SE)$VariantQuality), quantiles)) - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + if(verbose) print("Get the quantiles of the VAFs of each variant.") + quantiles <- lapply(quantiles, function(x) apply(SummarizedExperiment::assays(SE)[["fraction"]], 1, stats::quantile, x, na.rm = TRUE)) + if(!is.null(min_quality)){ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = SummarizedExperiment::rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + vars <- vars[is.na(vars$VariantQuality), ] + vars <- subset(vars, vars$VariantQuality > min_quality) + } else{ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + } vars <- vars[mean_af_group_check,] - print("Thresholding using the quantile approach.") - if(length(quantiles) != 2) stop("Your quantiles are not of length 2.") + if(verbose) print("Thresholding using the quantile approach.") + if(length(quantiles) != 2) stop("Your quantiles are not of length 2.") if(length(thresholds) != 2) stop("Your thresholds are not of length 2.") - #voi_ch <- subset(vars, vars[,1] > mean_allele_frequency & vars[,2] > min_coverage & vars[,4] < thresholds[1] & vars[,5] > thresholds[2]) - voi_ch <- subset(vars, Mean_AF > mean_allele_frequency & Mean_Cov > min_coverage & Quantile1 < thresholds[1] & Quantile2 > thresholds[2]) - if(!is.null(min_quality)){ - voi_ch <- voi_ch[!is.na(voi_ch$Quality),] - voi_ch <- subset(voi_ch, Quality > min_quality) - } + voi_ch <- subset(vars, vars$Mean_AF > mean_allele_frequency & vars$Mean_Cov > min_coverage & vars$Quantile1 < thresholds[1] & vars$Quantile2 > thresholds[2]) + + } else if(any(is.null(top_cells), is.null(top_VAF))){ - print("Get the quantiles of the VAFs of each variant.") - quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) + if(verbose) print("Get the quantiles of the VAFs of each variant.") + quantiles <- lapply(quantiles, function(x) apply(SummarizedExperiment::assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) - # vars <- do.call(cbind, c(list(mean_af), list(mean_cov), list(rowData(SE)$VariantQuality), quantiles)) - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + if(!is.null(min_quality)){ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, VariantQuality = SummarizedExperiment::rowData(SE)$VariantQuality, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + vars <- vars[is.na(vars$VariantQuality), ] + vars <- subset(vars, vars$VariantQuality > min_quality) + } else{ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quantile1 = quantiles[[1]], Quantile2 = quantiles[[2]]) + } - print("Thresholding using the quantile approach.") + if(verbose) print("Thresholding using the quantile approach.") if(length(quantiles) != 2) stop("Your quantiles are not of length 2.") if(length(thresholds) != 2) stop("Your thresholds are not of length 2.") - #voi_ch <- subset(vars, vars[,1] > mean_allele_frequency & vars[,2] > min_coverage & vars[,4] < thresholds[1] & vars[,5] > thresholds[2]) - voi_ch <- subset(vars, Mean_AF > mean_allele_frequency & Mean_Cov > min_coverage & Quantile1 < thresholds[1] & Quantile2 > thresholds[2]) - if(!is.null(min_quality)){ - voi_ch <- voi_ch[!is.na(voi_ch$Quality),] - voi_ch <- subset(voi_ch, Quality > min_quality) - } + voi_ch <- subset(vars, vars$Mean_AF > mean_allele_frequency & vars$Mean_Cov > min_coverage & vars$Quantile1 < thresholds[1] & vars$Quantile2 > thresholds[2]) } else{ - print("Get the quantile of the VAF of each variant.") + if(verbose) print("Get the quantile of the VAF of each variant.") if(length(quantiles) > 1) stop("You are providing more than 1 quantile. You should only provide 1.") if(length(thresholds) > 1) stop("You are providing more than 1 threshold. You should only provide 1.") - quantiles <- lapply(quantiles, function(x) apply(assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) + quantiles <- lapply(quantiles, function(x) apply(SummarizedExperiment::assays(SE)[["fraction"]], 1, quantile, x, na.rm = TRUE)) quantiles <- quantiles[[1]] - top_cells_values <- assays(SE)[["fraction"]] + top_cells_values <- SummarizedExperiment::assays(SE)[["fraction"]] top_cells_values <- top_cells_values > top_VAF top_cells_values <- rowSums(top_cells_values) - vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = rowData(SE)$VariantQuality, Quantile = quantiles, TopCells = top_cells_values) - voi_ch <- subset(vars, Mean_Cov > min_coverage & Quantile <= thresholds[1] & TopCells >= top_cells) if(!is.null(min_quality)){ - voi_ch <- voi_ch[!is.na(voi_ch$Quality),] - voi_ch <- subset(voi_ch, Quality > min_quality) + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quality = SummarizedExperiment::rowData(SE)$VariantQuality, Quantile = quantiles, TopCells = top_cells_values) + vars <- vars[is.na(vars$VariantQuality), ] + vars <- subset(vars, vars$VariantQuality > min_quality) + } else{ + vars <- data.frame(Mean_AF = mean_af, Mean_Cov = mean_cov, Quantile = quantiles, TopCells = top_cells_values) } + voi_ch <- subset(vars, vars$Mean_Cov > min_coverage & vars$Quantile <= thresholds[1] & vars$TopCells >= top_cells) } voi_ch <- rownames(voi_ch) return(voi_ch) diff --git a/R/VariantWiseCorrelation.R b/R/VariantWiseCorrelation.R index 658a6a0..cb8431d 100644 --- a/R/VariantWiseCorrelation.R +++ b/R/VariantWiseCorrelation.R @@ -2,12 +2,14 @@ #'@description #'We correlate the variants with each other using the Pearson correlation. #'This function calls CalculateCorrelationPValue to perform the actual correlation. -#'@import Matrix parallel SummarizedExperiment +#'@importFrom parallel mclapply +#'@importFrom stats p.adjust #'@param variants_list List of fraction values. #'@param n_cores Number of cores you want to use. Numeric. #'@param p_value_adjustment Method for P value adjustment. See p.adjust for details. +#'@param verbose Should the function be verbose? Default = TRUE #'@export -VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustment = "fdr"){ +VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustment = "fdr", verbose = TRUE){ # We correlate the somatic variants with each other and the MT variants. # Since we have tens of thousands of MT variants, we do not correlate them with each other. variants <- names(variants_list) @@ -18,24 +20,25 @@ VariantWiseCorrelation <- function(variants_list, n_cores = 1, p_value_adjustmen number_of_variants <- length(variants) for(i in 1:length(variants)){ variant_use <- variants[i] - print(paste0("Correlating Variant: ", variant_use, ", ", i, " out of ", number_of_variants)) + if(verbose) print(paste0("Correlating Variant: ", variant_use, ", ", i, " out of ", number_of_variants)) variants_values_use <- variants_list[[variant_use]] variants_list_use <- variants_list[names(variants_list) != variant_use] all_variants <- names(variants_list_use) - results <- mclapply(X = all_variants, CalculateCorrelationPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) + results <- parallel::mclapply(X = all_variants, CalculateCorrelationPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) results <- do.call("rbind", results) results <- data.frame(Variant1 = variant_use, Variant2 = all_variants, P = results[,1], Corr = results[,2], Cells_1_Alt = results[,3], Cells_1_Ref = results[,4], Cells_2_Alt = results[,5], Cells_2_Ref = results[,6]) results_total <- rbind(results_total, results) } - print("We remove the NA P values.") + if(verbose) print("We remove the NA P values.") results_total <- results_total[!is.na(results_total$P),] - print("We remove the negative corrlated SNPs.") + if(verbose) print("We remove the negative corrlated SNPs.") results_total <- subset(results_total, Corr > 0) - print(paste0("Adjusting P values using ", p_value_adjustment, ".")) - results_total$P_adj <- p.adjust(results_total$P, method = p_value_adjustment) + if(verbose) print(paste0("Adjusting P values using ", p_value_adjustment, ".")) + results_total$P_adj <- stats::p.adjust(results_total$P, method = p_value_adjustment) + rownames(results_total) <- NULL return(results_total) } diff --git a/R/VariantWiseFisherTest.R b/R/VariantWiseFisherTest.R index bcfe6cd..beb7f88 100644 --- a/R/VariantWiseFisherTest.R +++ b/R/VariantWiseFisherTest.R @@ -2,12 +2,14 @@ #'@description #'We perform the Fisher test to determine which variants are associated. #'This function calls CalculateFisherTestPValue to perform the actual testing. -#'@import Matrix parallel SummarizedExperiment +#'@importFrom parallel mclapply +#'@importFrom stats p.adjust #'@param variants_list List of fraction values. #'@param n_cores Number of cores you want to use. Numeric. #'@param p_value_adjustment Method for P value adjustment. See p.adjust for details. +#'@param verbose Should the function be verbose? Default = TRUE #'@export -VariantWiseFisherTest <- function(variants_list, n_cores = 1, p_value_adjustment = "fdr"){ +VariantWiseFisherTest <- function(variants_list, n_cores = 1, p_value_adjustment = "fdr", verbose = TRUE){ # We correlate the somatic variants with each other and the MT variants. # Since we have tens of thousands of MT variants, we do not correlate them with each other. variants <- names(variants_list) @@ -18,24 +20,25 @@ VariantWiseFisherTest <- function(variants_list, n_cores = 1, p_value_adjustment number_of_variants <- length(variants) for(i in 1:number_of_variants){ variant_use <- variants[i] - print(paste0("Testing Variant: ", variant_use, ", ", i, " out of ", number_of_variants)) + if(verbose) print(paste0("Testing Variant: ", variant_use, ", ", i, " out of ", number_of_variants)) variants_values_use <- variants_list[[variant_use]] variants_list_use <- variants_list[names(variants_list) != variant_use] all_variants <- names(variants_list_use) - results <- mclapply(X = all_variants, CalculateFisherTestPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) + results <- parallel::mclapply(X = all_variants, CalculateFisherTestPValue, variant_values = variants_values_use, all_variants_list = variants_list_use, mc.cores = n_cores) results <- do.call("rbind", results) results <- data.frame(Variant1 = variant_use, Variant2 = all_variants, P = results[,1], OddsRatio = results[,2], Cells_Alt_1_2 = results[,3], Cells_Alt_1_Ref_2 = results[,4], Cells_Alt_2_Ref_1 = results[,5], Cells_Ref_1_2 = results[,6]) results_total <- rbind(results_total, results) } - print("We remove the NA P values.") + if(verbose) print("We remove the NA P values.") results_total <- results_total[!is.na(results_total$P),] - print("We remove the SNPs with a odds ratio lower than 1.") + if(verbose) print("We remove the SNPs with a odds ratio lower than 1.") results_total <- subset(results_total, OddsRatio > 1) - print(paste0("Adjusting P values using ", p_value_adjustment, ".")) - results_total$P_adj <- p.adjust(results_total$P, method = p_value_adjustment) + if(verbose) print(paste0("Adjusting P values using ", p_value_adjustment, ".")) + results_total$P_adj <- stats::p.adjust(results_total$P, method = p_value_adjustment) + rownames(results_total) <- NULL return(results_total) } diff --git a/R/char_to_numeric.R b/R/char_to_numeric.R new file mode 100644 index 0000000..75d4406 --- /dev/null +++ b/R/char_to_numeric.R @@ -0,0 +1,12 @@ +#'char_to_numeric +#'@description +#'A function to convert the heterozygous/homozygous information from the VCF to the consensus information from VarTrix. +#'It is only used in LoadingVCF_typewise.R. +#'@param char_value What is the genotype encoding you want to convert? +#'@export +char_to_numeric <- function(char_value) { + if(char_value == "1/1") return(2) + if(char_value %in% c("1/0", "0/1")) return(2) + if(char_value == "0/0") return(1) + return(0) +} diff --git a/R/combine_NAMES.R b/R/combine_NAMES.R index 46d8450..6f97c1c 100644 --- a/R/combine_NAMES.R +++ b/R/combine_NAMES.R @@ -6,5 +6,6 @@ #'@export combine_NAMES <- function(x, y) { shared_names <- intersect(x, y) - c(x, setdiff(y, shared_names)) + combined_names <- c(x, setdiff(y, shared_names)) + return(combined_names) } diff --git a/R/combine_SparseMatrix.R b/R/combine_SparseMatrix.R index f3ff079..e5ed3ec 100644 --- a/R/combine_SparseMatrix.R +++ b/R/combine_SparseMatrix.R @@ -1,7 +1,7 @@ #'combine_sparseMatrix #'@description #'We combine two sparse matrices -#'@import SummarizedExperiment BiocGenerics Matrix +#'@importFrom Matrix sparseMatrix #'@param matrix_1 Your first sparse matrix. #'@param matrix_2 Your second matrix. #'@export @@ -41,8 +41,8 @@ combine_SparseMatrix <- function(matrix_1, matrix_2){ positions_2[,"j"] <- new_cols[positions_2[,"j"]] positions_combined <- rbind(positions_1, positions_2) - result <- sparseMatrix(i = positions_combined[,"i"], j = positions_combined[,"j"], x = positions_combined[,"x"], - dimnames = list(variants_unique, cells_unique), dims = c(length(variants_unique), length(cells_unique))) + result <- Matrix::sparseMatrix(i = positions_combined[,"i"], j = positions_combined[,"j"], x = positions_combined[,"x"], + dimnames = list(variants_unique, cells_unique), dims = c(length(variants_unique), length(cells_unique))) } diff --git a/R/computeAFMutMatrix.R b/R/computeAFMutMatrix.R index a4cf860..ac01b42 100644 --- a/R/computeAFMutMatrix.R +++ b/R/computeAFMutMatrix.R @@ -6,49 +6,22 @@ #'See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD-is-lower-than-expected #'and https://github.com/caleblareau/mgatk/issues/1 #'We simply set these values to 1, since that is the actual information we have in this case. -#'This issue can be solved on the MAEGATK/GATK side. -#'@import SummarizedExperiment +#'@importFrom SummarizedExperiment assays rowRanges #'@param SE SummarizedExperiment object. +#'@param chromosome_prefix The prefix of the chromosome. #'@export computeAFMutMatrix <- function(SE, chromosome_prefix = "chrM"){ - cov <- assays(SE)[["coverage"]] + 0.000001 - ref_allele <- as.character(rowRanges(SE)$refAllele) + cov <- SummarizedExperiment::assays(SE)[["coverage"]] + 0.000001 + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) - getMutMatrix <- function(letter){ - names_rows <- paste0(chromosome_prefix, "_", 1:nrow(cov), "_", toupper(ref_allele), "_", letter) - names_rows <- names_rows[toupper(ref_allele) != letter] - mat_fow <- assays(SE)[[paste0(letter, "_counts_fw")]] - mat_rev <- assays(SE)[[paste0(letter, "_counts_rev")]] - mat <- mat_fow + mat_rev - mat <- mat[toupper(ref_allele) != letter,] - cov_use <- cov[toupper(ref_allele) != letter,] - mat <- mat / cov_use - gc() - # We can get AF values greater than 1, which is due to uninformative reads. - # See: https://gatk.broadinstitute.org/hc/en-us/articles/360035532252-Allele-Depth-AD-is-lower-than-expected - # and https://github.com/caleblareau/mgatk/issues/1 - # We simply set these values to 1, since that is the actual information we have in this case. - # This issue can be solved on the MAEGATK/GATK side. - mat[mat > 1] <- 1 - rownames(mat) <- names_rows - #mat <- as(mat, "dgCMatrix") - mat <- as(mat, "CsparseMatrix") - return(mat) - } - - A_matrix <- getMutMatrix("A") - #A_matrix <- as.matrix(A_matrix) + A_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "A", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() - C_matrix <- getMutMatrix("C") - #C_matrix <- as.matrix(C_matrix) + C_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "C", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() - G_matrix <- getMutMatrix("G") - #G_matrix <- as.matrix(G_matrix) + G_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "G", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() - T_matrix <- getMutMatrix("T") - #T_matrix <- as.matrix(T_matrix) + T_matrix <- getMutMatrix(SE = SE, cov = cov, letter = "T", ref_allele = ref_allele, chromosome_prefix = chromosome_prefix) gc() result <- rbind(A_matrix, C_matrix, G_matrix, T_matrix) -# result <- as.matrix(result) return(result) } diff --git a/R/getAltMatrix.R b/R/getAltMatrix.R index 0979eee..782d00e 100644 --- a/R/getAltMatrix.R +++ b/R/getAltMatrix.R @@ -2,15 +2,14 @@ #'@description #'We get the alt values from the MAEGATK results. #'Source: https://github.com/petervangalen/MAESTER-2021 -#'@import SummarizedExperiment +#'@importFrom SummarizedExperiment assays rowRanges #'@param SE_object SummarizedExperiment object. #'@param letter The base you want to use. Character. -#'@param ref_allele The reference alleles. #'@param chromosome_prefix The chromosome prefix used. #'@export getAltMatrix <- function(SE_object, letter, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE_object)$refAllele) - mat <- (assays(SE_object)[[paste0(letter, "_counts_fw")]] + assays(SE_object)[[paste0(letter, "_counts_rev")]]) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE_object)$refAllele) + mat <- (SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_fw")]] + SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_rev")]]) rownames(mat) <- paste0(chromosome_prefix, "_", as.character(1:dim(mat)[1]), "_", toupper(ref_allele), ">", letter) mat <- mat[toupper(ref_allele) != letter,] return(mat) diff --git a/R/getMutMatrix.R b/R/getMutMatrix.R new file mode 100644 index 0000000..ea0481c --- /dev/null +++ b/R/getMutMatrix.R @@ -0,0 +1,27 @@ +#'getMutMatrix +#'@description +#'This function gets the allele frequency for a specific allele. It is used in computeAFMutMatrix. +#'Source: https://github.com/petervangalen/MAESTER-2021 +#'@importFrom SummarizedExperiment assays +#'@importFrom methods as +#'@param SE SummarizedExperiment object. +#'@param cov The coverage matrix from MAEGATK/MGATK. +#'@param letter The base we are interested in. +#'@param ref_allele Vector of reference alleles. +#'@param chromosome_prefix The chromosome prefix used. +#'@export +getMutMatrix <- function(SE, cov, letter, ref_allele, chromosome_prefix){ + names_rows <- paste0(chromosome_prefix, "_", 1:nrow(cov), "_", toupper(ref_allele), "_", letter) + names_rows <- names_rows[toupper(ref_allele) != letter] + mat_fow <- SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_fw")]] + mat_rev <- SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_rev")]] + mat <- mat_fow + mat_rev + mat <- mat[toupper(ref_allele) != letter,] + cov_use <- cov[toupper(ref_allele) != letter,] + mat <- mat / cov_use + gc() + mat[mat > 1] <- 1 + rownames(mat) <- names_rows + mat <- methods::as(mat, "CsparseMatrix") + return(mat) +} diff --git a/R/getReadMatrix.R b/R/getReadMatrix.R index 6b5e693..deec5e9 100644 --- a/R/getReadMatrix.R +++ b/R/getReadMatrix.R @@ -1,12 +1,12 @@ #'Get the counts for a specific base over all positions. -#'@import SummarizedExperiment +#'@importFrom SummarizedExperiment assays rowRanges #'@param SE SummarizedExperiment object. #'@param letter The base for which we want the counts. #'@param chromosome_prefix The chromosome name used as a prefix. #'@export getReadMatrix <- function(SE, letter, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE)$refAllele) - mat <- (assays(SE)[[paste0(letter, "_counts_fw")]] + assays(SE)[[paste0(letter, "_counts_rev")]]) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele) + mat <- (SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_fw")]] + SummarizedExperiment::assays(SE)[[paste0(letter, "_counts_rev")]]) rownames(mat) <- paste0(chromosome_prefix, "_", 1:nrow(mat), "_", toupper(ref_allele), "_", letter) return(mat) } diff --git a/R/getRefMatrix.R b/R/getRefMatrix.R index e0374d4..15b1da7 100644 --- a/R/getRefMatrix.R +++ b/R/getRefMatrix.R @@ -2,15 +2,14 @@ #'@description #'We get the reference values from the MAEGATK result. #'Source: https://github.com/petervangalen/MAESTER-2021 -#'@import SummarizedExperiment +#'@importFrom SummarizedExperiment assays rowRanges #'@param SE_object SummarizedExperiment object. #'@param letter The base you are analysing. You get a matrix that shows which cells have how many reference reads for this letter. -#'@param ref_allele The reference alleles. #'@param chromosome_prefix The chromosome prefix used. #'@export getRefMatrix <- function(SE_object, letter, chromosome_prefix = "chrM"){ - ref_allele <- as.character(rowRanges(SE_object)$refAllele) - mat <- (assays(SE_object)[[paste0(letter, "_counts_fw")]] + assays(SE_object)[[paste0(letter, "_counts_rev")]]) + ref_allele <- as.character(SummarizedExperiment::rowRanges(SE_object)$refAllele) + mat <- (SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_fw")]] + SummarizedExperiment::assays(SE_object)[[paste0(letter, "_counts_rev")]]) rownames(mat) <- paste0(chromosome_prefix, "_", as.character(1:dim(mat)[1]), "_", toupper(ref_allele), ">", letter) mat <- mat[toupper(ref_allele) %in% letter,] return(mat) diff --git a/R/get_consensus.R b/R/get_consensus.R index 1d5ebd2..2c18557 100644 --- a/R/get_consensus.R +++ b/R/get_consensus.R @@ -1,8 +1,9 @@ #'get_consensus #'@description #'We get the consensus information for a specific matrix. -#'@import dplyr MatrixGenerics SummarizedExperiment -#'@param letter The alternative base. +#'I want to remove some packages if they are not needed. See below which package apperantly wasn't needed. +#'@importFrom methods as +#'@param alt_base The alternative base. #'@param ref_base The reference base. #'@param input_matrix Input matrix with the present reads numerically encoded. #'@param chromosome_prefix The chromosome name used as a prefix. @@ -20,8 +21,6 @@ get_consensus <- function(alt_base, ref_base, input_matrix, chromosome_prefix = # Both is not accurate in this context. Therefore, we set these cases to 0 (NoCall). other_homo_values <- base_numeric[!base_numeric %in% base_numeric[c(alt_base, ref_base)]] - - #output_matrix <- input_matrix output_matrix <- matrix(0, nrow = nrow(input_matrix), ncol = ncol(input_matrix)) rownames(output_matrix) <- rownames(input_matrix) colnames(output_matrix) <- colnames(input_matrix) @@ -43,7 +42,7 @@ get_consensus <- function(alt_base, ref_base, input_matrix, chromosome_prefix = output_matrix[input_matrix %in% other_homo_values] <- 0 rownames(output_matrix) <- paste0(chromosome_prefix, "_", gsub("[^[:digit:]., ]", "", rownames(output_matrix)), "_", ref_base, "_", alt_base) - #output_matrix <- as(output_matrix, "dgCMatrix") - output_matrix <- as(output_matrix, "CsparseMatrix") + #output_matrix <- methods::as(output_matrix, "dgCMatrix") + output_matrix <- methods::as(output_matrix, "CsparseMatrix") return(output_matrix) } diff --git a/R/ggsci_pal.R b/R/ggsci_pal.R index 254506d..b632a2c 100644 --- a/R/ggsci_pal.R +++ b/R/ggsci_pal.R @@ -1,8 +1,10 @@ #'ggsci_pal #'@description #'Function to return colours from a ggsci palette. -#'@import assertthat ggsci glue +#'@import ggsci +#'@importFrom glue glue #'@param option Your colour palette of choice. +#'@param ... Further options passed to the palette function. #'@details #'The function returns a colour palette from ggsci. #'Options are: @@ -14,8 +16,7 @@ #'ucscgb: 26 #'@export ggsci_pal <- function(option, ...){ - func_name = glue("pal_{option}") - func_call = glue('{func_name}(...)') - assertthat::assert_that(func_name %in% ls("package:ggsci")) + func_name = glue::glue("pal_{option}") + func_call = glue::glue('{func_name}(...)') return(eval(parse(text=func_call))) } diff --git a/R/load_object.R b/R/load_object.R index 3f840d0..82d7286 100644 --- a/R/load_object.R +++ b/R/load_object.R @@ -6,8 +6,13 @@ #'@param file_name The path to the file. #'@export load_object <- function(file_name){ - con <- archive::file_read(file = file_name) - res <- readRDS(file = con) - close(con) - return(res) + if(!file.exists(file_name)) stop(paste0("File '",file_name,"' not found.")) + + if(requireNamespace("archive", quietly = TRUE)){ + con <- archive::file_read(file = file_name) + res <- readRDS(file = con) + close(con) + return(res) + } + res <- readRDS(file = file_name) } diff --git a/R/save_object.R b/R/save_object.R index 00968c7..7618f1f 100644 --- a/R/save_object.R +++ b/R/save_object.R @@ -7,18 +7,25 @@ #'@param file_name The path were the file shall be save. #'@param file_format The format of the save file. Has to be one of: zstd, lz4, gzip, bzip2, xz, nocomp. #'@export -save_object <- function(object, file_name, file_format = NULL){ +save_object <- function(object, file_name, file_format = "zstd"){ stopifnot(file_format %in% c("zstd", "lz4", "gzip", "bzip2", "xz", "nocomp")) - if(file_format %in% "nocomp"){ - saveRDS(object = object, file = file_name, compress = FALSE) - return(invisible(NULL)) - } + stopifnot(length(file_format) == 1) if(file_format %in% c("zstd", "lz4")){ - con <- archive::file_write(file = file_name, filter = file_format) - open(con) - saveRDS(object = object, file = con) - close(con) - }else{ - saveRDS(object = object, file = file_name, compress = file_format) + if(requireNamespace("archive", quietly = TRUE)){ + con <- archive::file_write(file = file_name, filter = file_format) + open(con) + saveRDS(object = object, file = con) + close(con) + }else{ + warning("Package 'archive' needs to be installed to compress files in formats 'zstd' and 'lz4'.\n Saving object with default 'saveRDS()' function instead.") + saveRDS(object = object, file = file_name, compress = TRUE) + } + return(invisible(NULL)) } + saveRDS( + object = object, + file = file_name, + compress = ifelse(file_format %in% "nocomp", FALSE, file_format) + ) } + diff --git a/R/sdiv.R b/R/sdiv.R index 8a8cbde..fcefdee 100644 --- a/R/sdiv.R +++ b/R/sdiv.R @@ -1,5 +1,5 @@ #'Division of sparse matrix. -#'@import Matrix +#'@importFrom Matrix sparseMatrix #'@param X First sparse matrix. #'@param Y Second sparse matrix. #'@param names The dimension names (dimnames(X)). @@ -8,6 +8,6 @@ sdiv <- function(X, Y, names = dimnames(X)) { sX <- summary(X) sY <- summary(Y) sRes <- merge(sX, sY, by = c("i", "j")) - sparseMatrix(i = sRes[,1], j = sRes[,2], x = sRes[,3] / sRes[,4], - dimnames = names) + result <- Matrix::sparseMatrix(i = sRes[,1], j = sRes[,2], x = sRes[,3] / sRes[,4], dimnames = names) + return() } diff --git a/README.md b/README.md index 29b2305..f02514c 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ The mutation data was obtained from the Sanger Institute Catalogue Of Somatic Mu ``` -# Current Features v0.2.23 +# Current Features v0.2.32 - Loading data from VarTrix and MAEGATK. - Transforming the data to be compatible for joint analysis. diff --git a/docs/404.html b/docs/404.html index 78aa6b2..b487215 100644 --- a/docs/404.html +++ b/docs/404.html @@ -24,7 +24,7 @@ sigurd - 0.2.15 + 0.2.30 + + + + + +
+
+
+ +
+

We get the genotyping information for a set of variants. +The function returns a matrix with the values from the specified assay.

+
+ +
+

Usage

+
GetVariantInfo(SE, information = "consensus", variants = NULL, cells = NULL)
+
+ +
+

Arguments

+
SE
+

SummarizedExperiment object.

+ + +
information
+

The assay with the desired information. Default: consensus

+ + +
variants
+

A vector of variants.

+ + +
cells
+

A vector of cell IDs. On default all cells are returned. Default: NULL

+ +
+ +
+ + +
+ + + + + + + diff --git a/docs/reference/HeatmapVoi.html b/docs/reference/HeatmapVoi.html index cf703c0..45f63b7 100644 --- a/docs/reference/HeatmapVoi.html +++ b/docs/reference/HeatmapVoi.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 + + + + + +
+
+
+ +
+

We load a cellwise pileup result from a VCF file. +If you want to only load a single sample without the use of an input file, you have to set the following variables.

  1. samples_path

  2. +
  3. barcodes_path

  4. +
  5. patient

  6. +
  7. samples_file = NULL

  8. +
+ +
+

Usage

+
LoadingVCF_typewise(
+  samples_file,
+  samples_path = NULL,
+  barcodes_path = NULL,
+  vcf_path,
+  patient,
+  type_use = "scRNAseq_Somatic",
+  min_reads = NULL,
+  min_cells = 2,
+  verbose = TRUE
+)
+
+ +
+

Arguments

+
samples_file
+

Path to the csv file with the samples to be loaded.

+ + +
samples_path
+

Path to the input folder. Must include a barcodes file.

+ + +
barcodes_path
+

Path to the cell barcodes tsv. Default = NULL

+ + +
vcf_path
+

Path to the VCF file with the variants.

+ + +
patient
+

The patient you want to load.

+ + +
type_use
+

The type of input. Has to be one of: scRNAseq_Somatic, Amplicon_Somatic, scRNAseq_MT, Amplicon_MT.

+ + +
min_reads
+

The minimum number of reads we want. Otherwise we treat this as a NoCall. Default = NULL.

+ + +
min_cells
+

The minimum number of cells for a variant. Otherwise, we will remove a variant. Default = 2.

+ + +
verbose
+

Should the function be verbose? Default = TRUE

+ +
+ +
+ + +
+ + + + + + + diff --git a/docs/reference/LoadingVarTrix_ori.html b/docs/reference/LoadingVarTrix_ori.html index 2f277bb..c1d0d08 100644 --- a/docs/reference/LoadingVarTrix_ori.html +++ b/docs/reference/LoadingVarTrix_ori.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.29 + + + + + +
+
+
+ +
+

We add the genotyping information for a set of variants to a Seurat object. +The function returns a matrix with the values from the specified assay.

+
+ +
+

Usage

+
SetVariantInfo(SE, seurat_object, information = "consensus", variants = NULL)
+
+ +
+

Arguments

+
SE
+

SummarizedExperiment object.

+ + +
seurat_object
+

The Seurat object.

+ + +
information
+

The assay with the desired information. Default: consensus

+ + +
variants
+

A vector of variants.

+ +
+ +
+ + +
+ + + + + + + diff --git a/docs/reference/VariantBurden.html b/docs/reference/VariantBurden.html index a03b092..85f8c6c 100644 --- a/docs/reference/VariantBurden.html +++ b/docs/reference/VariantBurden.html @@ -12,7 +12,7 @@ sigurd - 0.2.15 + 0.2.30 + + + + + +
+
+
+ +
+

A function to convert the heterozygous/homozygous information from the VCF to the consensus information from VarTrix. +It is only used in LoadingVCF_typewise.R.

+
+ +
+

Usage

+
char_to_numeric(char_value)
+
+ +
+

Arguments

+
char_value
+

What is the genotype encoding you want to convert?

+ +
+ +
+ + +
+ + + + + + + diff --git a/docs/reference/combine_NAMES.html b/docs/reference/combine_NAMES.html index f5b69b6..6b7c402 100644 --- a/docs/reference/combine_NAMES.html +++ b/docs/reference/combine_NAMES.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30 + + + + + +
+
+
+ +
+

This function gets the allele frequency for a specific allele. It is used in computeAFMutMatrix. +Source: https://github.com/petervangalen/MAESTER-2021

+
+ +
+

Usage

+
getMutMatrix(SE, cov, letter, ref_allele, chromosome_prefix)
+
+ +
+

Arguments

+
SE
+

SummarizedExperiment object.

+ + +
cov
+

The coverage matrix from MAEGATK/MGATK.

+ + +
letter
+

The base we are interested in.

+ + +
ref_allele
+

Vector of reference alleles.

+ + +
chromosome_prefix
+

The chromosome prefix used.

+ +
+ +
+ + +
+ + + + + + + diff --git a/docs/reference/getReadMatrix.html b/docs/reference/getReadMatrix.html index ad4ca69..35f1e5b 100644 --- a/docs/reference/getReadMatrix.html +++ b/docs/reference/getReadMatrix.html @@ -10,7 +10,7 @@ sigurd - 0.2.15 + 0.2.30