Skip to content

Commit

Permalink
Merge pull request #23 from CostaLab/devel
Browse files Browse the repository at this point in the history
Merging the recent changes into the master branch.
  • Loading branch information
grasshoffm authored Aug 29, 2023
2 parents 1987d2e + 5b9f8d0 commit 9ebfab3
Show file tree
Hide file tree
Showing 73 changed files with 787 additions and 167 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: sigurd
Type: Package
Title: Single cell Genotyping Using RNA Data
Version: 0.2.13
Version: 0.2.23
Authors@R: c(
person(given = "Martin",
family = "Grasshoff",
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,19 @@ export(CalculateConsensus)
export(CalculateCorrelationPValue)
export(CalculateCoverage)
export(CalculateFisherTestPValue)
export(CalculateQuality)
export(CalculateStrandCorrelation)
export(CombineSEobjects)
export(Filtering)
export(GetCellInfoPerVariant)
export(GetVariantInfo)
export(HeatmapVoi)
export(LoadingMAEGATK_typewise)
export(LoadingVarTrix_typewise)
export(Merging_SE_list)
export(RowWiseSplit)
export(SeparatingMatrixToList)
export(SetVariantInfo)
export(VariantBurden)
export(VariantCloneSizeThresholding)
export(VariantCorrelationHeatmap)
Expand Down
38 changes: 35 additions & 3 deletions R/AmpliconSupplementing.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,42 @@ AmpliconSupplementing <- function(scRNAseq, amplicon){
print("We get the new meta data.")
new_meta_data <- merge(colData(scRNAseq), colData(amplicon), by = "Cell", all.x = TRUE, all.y = TRUE,
suffixes = c("scRNAseq", "Amplicon"))
rownames(new_meta_data) <- new_meta_data$Cell
# We add an AverageCoverage column to the new meta data.
new_meta_data$AverageCoverage <- new_meta_data$AverageCoveragescRNAseq
amplicon_value <- new_meta_data$AverageCoverageAmplicon
names(amplicon_value) <- colnames(amplicon)
amplicon_value <- na.omit(amplicon_value)
new_meta_data[names(amplicon_value), "AverageCoverage"] <- amplicon_value

new_row_data <- merge(rowData(scRNAseq), rowData(amplicon), by = "VariantName", all.x = TRUE, all.y = TRUE,
suffixes = c("scRNAseq", "Amplicon"))
rownames(new_row_data) <- new_row_data$VariantName
# We add a VariantQuality column to the row data, showing the scRNAseq quality with the supplemented amplicon quality.
# We do the same for the concordance and the depth.
new_row_data$VariantQuality <- new_row_data$VariantQualityscRNAseq
amplicon_value <- rowData(amplicon)$VariantQuality
names(amplicon_value) <- rownames(amplicon)
amplicon_value <- na.omit(amplicon_value)
new_row_data[names(amplicon_value), "VariantQuality"] <- amplicon_value

new_row_data$Concordance <- new_row_data$ConcordancescRNAseq
amplicon_value <- rowData(amplicon)$Concordance
names(amplicon_value) <- rownames(amplicon)
amplicon_value <- na.omit(amplicon_value)
new_row_data[names(amplicon_value), "Concordance"] <- amplicon_value

new_row_data$Depth <- new_row_data$DepthscRNAseq
amplicon_value <- rowData(amplicon)$Depth
names(amplicon_value) <- rownames(amplicon)
amplicon_value <- na.omit(amplicon_value)
new_row_data[names(amplicon_value), "Depth"] <- amplicon_value

print("We get all cells and variants.")
all_cells <- unique(c(colnames(scRNAseq), colnames(amplicon)))
all_variants <- unique(c(rownames(scRNAseq), rownames(amplicon)))
all_cells <- unique(c(colnames(scRNAseq), colnames(amplicon)))
all_variants <- unique(c(rownames(scRNAseq), rownames(amplicon)))
new_meta_data <- new_meta_data[all_cells,]
new_row_data <- new_row_data[all_variants,]

print("We generate our output matrices.")
consensus <- matrix(0, ncol = length(all_cells), nrow = length(all_variants))
Expand Down Expand Up @@ -47,6 +79,6 @@ AmpliconSupplementing <- function(scRNAseq, amplicon){
#assays(scRNAseq)[["coverage"]][rownames(amplicon), colnames(amplicon)] <- as.matrix(assays(amplicon)$coverage)

se <- SummarizedExperiment(assays = list(consensus = as(consensus, "dgCMatrix"), fraction = as(fraction, "dgCMatrix"), coverage = as(reads, "dgCMatrix"), alts = as(alts, "dgCMatrix"), refs = as(refs, "dgCMatrix")),
colData = new_meta_data)
colData = new_meta_data, rowData = new_row_data)
return(se)
}
32 changes: 32 additions & 0 deletions R/CalculateQuality.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#'CalculateQuality
#'@description
#'We calculate the quality per variant.
#'@import MatrixGenerics SummarizedExperiment
#'@param SE SummarizedExperiment object.
#'@param chromosome_prefix List of matrices for the alternative reads.
#'@export
CalculateQuality <- function(SE, variants = rownames(reads_alt), chromosome_prefix = "chrM"){
variants <- gsub(paste0(chromosome_prefix, "_"), "", variants)
qualities <- lapply(c("A", "T", "C", "G"), function(x){
fwrev <- cbind(assays(SE)[[paste0(x, "_counts_fw")]], assays(SE)[[paste0(x, "_counts_rev")]])
qualities_fwrev <- cbind(assays(SE)[[paste0(x, "_qual_fw")]], assays(SE)[[paste0(x, "_qual_rev")]])
variants_use <- strsplit(variants, "")
variants_use <- sapply(variants_use, tail, n = 1)
variants_use <- variants_use == x
variants_use_names <- variants[variants_use]
variants_use <- as.numeric(gsub("_.*", "", variants_use_names))
variants_use_names <- paste0(chromosome_prefix, "_", variants_use_names)
fwrev <- fwrev[variants_use,]
rownames(fwrev) <- variants_use_names
qualities_fwrev <- qualities_fwrev[variants_use,]
rownames(qualities_fwrev) <- variants_use_names
fwrev <- fwrev > 0
qualities_fwrev <- qualities_fwrev * fwrev
qualities <- apply(qualities_fwrev, 1, sum) / rowSums(fwrev > 0)
qualities[qualities == 0] <- NA
return(qualities)
})
qualities <- unlist(qualities)
qualities <- qualities[paste0(chromosome_prefix, "_", variants)]
return(qualities)
}
8 changes: 4 additions & 4 deletions R/CalculateStrandCorrelation.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ CalculateStrandCorrelation <- function(SE, chromosome_prefix = "chrM"){
dt <- data.table(variant = variants_A[dt[[1]]],
cell_id = dt[[2]],
fw = dt[[3]], rev = dt[[4]])
cor_result_A <- dt[, .(cor = cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete")), by = list(variant)]
cor_result_A <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)]

variants_C <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_C")
variants_C <- variants_C[ref_allele != "C"]
Expand All @@ -39,7 +39,7 @@ CalculateStrandCorrelation <- function(SE, chromosome_prefix = "chrM"){
dt <- data.table(variant = variants_C[dt[[1]]],
cell_id = dt[[2]],
fw = dt[[3]], rev = dt[[4]])
cor_result_C <- dt[, .(cor = cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete")), by = list(variant)]
cor_result_C <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)]

variants_G <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_G")
variants_G <- variants_G[ref_allele != "G"]
Expand All @@ -56,7 +56,7 @@ CalculateStrandCorrelation <- function(SE, chromosome_prefix = "chrM"){
dt <- data.table(variant = variants_G[dt[[1]]],
cell_id = dt[[2]],
fw = dt[[3]], rev = dt[[4]])
cor_result_G <- dt[, .(cor = cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete")), by = list(variant)]
cor_result_G <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)]

variants_T <- paste0(chromosome_prefix, "_", 1:length(ref_allele), "_", ref_allele, "_T")
variants_T <- variants_T[ref_allele != "T"]
Expand All @@ -73,7 +73,7 @@ CalculateStrandCorrelation <- function(SE, chromosome_prefix = "chrM"){
dt <- data.table(variant = variants_T[dt[[1]]],
cell_id = dt[[2]],
fw = dt[[3]], rev = dt[[4]])
cor_result_T <- dt[, .(cor = cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete")), by = list(variant)]
cor_result_T <- dt[, .(cor = suppressWarnings(cor(c(fw), c(rev), method = "pearson", use = "pairwise.complete"))), by = list(variant)]

cor_results <- rbind(cor_result_A, cor_result_C, cor_result_G, cor_result_T)

Expand Down
26 changes: 25 additions & 1 deletion R/CombineSEobjects.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,30 @@ CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT"))
meta_data <- merge(meta_data_somatic, meta_data_MT, by = "Cell", all = TRUE, suffixes = suffixes)
meta_data <- meta_data[match(cells, meta_data$Cell),]

meta_row_somatic <- rowData(se_somatic)
meta_row_MT <- rowData(se_MT)
if(ncol(meta_row_somatic) > 0 & ncol(meta_row_MT) > 0){
meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes)
meta_row <- meta_row[match(features, meta_row$VariantName),]
rownames(meta_row) <- meta_row$VariantName
} else if(ncol(meta_row_somatic) == 0 & ncol(meta_row_MT) > 0){
meta_row_somatic <- matrix(NA, nrow = nrow(meta_row_somatic), ncol = ncol(meta_row_MT))
rownames(meta_row_somatic) <- rownames(se_somatic)
colnames(meta_row_somatic) <- colnames(meta_row_MT)
meta_row_somatic <- DataFrame(meta_row_somatic)
meta_row_somatic$VariantName <- rownames(meta_row_somatic)
meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes)
meta_row <- meta_row[match(features, meta_row$VariantName),]
} else if(ncol(meta_row_somatic) > 0 & ncol(meta_row_MT) > 0){
meta_row_MT <- matrix(NA, nrow = nrow(meta_row_MT), ncol = ncol(meta_row_somatic))
rownames(meta_row_MT) <- rownames(se_MT)
colnames(meta_row_MT) <- colnames(meta_row_somatic)
meta_row_MT <- DataFrame(meta_row_MT)
meta_row_MT$VariantName <- rownames(meta_row_MT)
meta_row <- merge(meta_row_somatic, meta_row_MT, by = "VariantName", all = TRUE, suffixes = suffixes)
meta_row <- meta_row[match(features, meta_row$VariantName),]
}

#assays_somatic <- assays(se_somatic)
#assays_somatic <- lapply(assays_somatic, as.matrix)
#assays_MT <- assays(se_MT)
Expand All @@ -39,7 +63,7 @@ CombineSEobjects <- function(se_somatic, se_MT, suffixes = c("_somatic", "_MT"))
names(assays_combined) <- assay_names_somatic

se_combined <- SummarizedExperiment(assays = assays_combined,
colData = meta_data)
colData = meta_data, rowData = meta_row)
return(se_combined)
}

26 changes: 8 additions & 18 deletions R/Filtering.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,17 @@
#'We want to remove:
#' \enumerate{
#' \item all cells that are blacklisted,
#' \item all cells that are not in a Seurat object,
#' \item all cells that do not have at least one variant with >1 (Reference),
#' \item all variants that are for alternative transcripts,
#' \item all variants that are always NoCall,
#' \item set variants with a VAF below a threshold to reference.
#' \item set variants with a VAF below a threshold to NoCall or Reference.
#' }
#'@import fastmatch Matrix Seurat SummarizedExperiment
#'@import fastmatch Matrix SummarizedExperiment
#'@param se SummarizedExperiment object.
#'@param blacklisted_barcodes_path Barcodes you want to remove. Path to a file with one column without header.
#'@param fraction_threshold Variants with an VAF below this threshold are set to 0. Numeric. Default = NULL.
#'@param alts_threshold Variants with a number of alt reads less than this threshold are set to 0. Numeric. Default = NULL.
#'@param min_cells_per_variant In how many cells should a variant be present to be included? Numeric. Default = 2.
#'@param path_seurat Path to a Seurat object. Cells not present in the object will be removed.
#'@param min_variants_per_cell How many variants should be covered in a cell have to be included? Default = 1.
#'@param reject_value Should cells that fall below a threshold (fraction_threshold or alts_threshold) be treated as Reference or NoCall? Default = NoCall.
#'@examples
Expand All @@ -30,7 +28,7 @@
#' se_geno <- Filtering(se_geno, min_cells_per_variant = 2, fraction_threshold = 0.05)
#' }
#'@export
Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = NULL, alts_threshold = NULL, path_seurat = NULL, min_cells_per_variant = 2, min_variants_per_cell = 1, reject_value = "NoCall"){
Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold = NULL, alts_threshold = NULL, min_cells_per_variant = 2, min_variants_per_cell = 1, reject_value = "NoCall"){
# Checking if the reject_value variable is correct.
if(!reject_value %in% c("Reference", "NoCall")){
stop(paste0("Your reject_value is ", reject_value, ".\nIt should be Reference or NoCall."))
Expand All @@ -49,21 +47,13 @@ Filtering <- function(se, blacklisted_barcodes_path = NULL, fraction_threshold =
se <- se[, barcodes_keep]
}

if(!is.null(path_seurat)){
print("We load the Seurat object.")
scrna <- load_object(path_seurat)

print("We remove all cells that are not in the Seurat object.")
seu_cells <- colnames(se)
seu_cells <- seu_cells[seu_cells %in% colnames(scrna)]
se <- se[,seu_cells]
# If the fraction_threshold is 0, we skip the thresholding. 0 and NULL would be the same.
# We might want to use a fraction_threshold of 0 to use the same variable to create file paths later.
if(fraction_threshold == 0){
fraction_threshold <- NULL
}

#print("We remove all variants for alternative transcripts.")
#keep_variants <- !grepl("_ENST", rownames(se))
#se <- se[keep_variants,]



if(!is.null(fraction_threshold)){
if(any(fraction_threshold >= 1, fraction_threshold <= 0)){
stop("Your fraction threshold is not 0 < x < 1.")
Expand Down
44 changes: 44 additions & 0 deletions R/GetVariantInfo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#'GetVariantInfo
#'@description
#'We get the genotyping information for a set of variants.
#'The function returns a matrix with the values from the specified assay.
#'@import SummarizedExperiment
#'@param SE SummarizedExperiment object.
#'@param information The assay with the desired information. Default: consensus
#'@param variants A vector of variants.
#'@param cells A vector of cell IDs. On default all cells are returned. Default: NULL
#'@export
GetVariantInfo <- function(SE, information = "consensus", variants = NULL, cells = NULL){
# We check if a vector of variants has been supplied.
if(is.null(variants)){
stop("Error: You forgot to supply a vector of variants.")
}
# We check if the variants are actually in a vector.
if(!is.vector(variants)){
stop("Error: Your variants are not in a vector.")
}
# We check if all variants are in the SE object.
variants_check <- variants %in% rownames(SE)
if(!all(variants_check)){
variants_check <- sum(variants_check)
stop(paste0("Only ", variants_check, " of ", length(variants), " are in the SE object."))
}
# We check if the requested assay is actually present.
assay_check <- information %in% names(assays(SE))
if(!assay_check){
stop("The assay you wants is not present in the object.")
}
res <- assays(SE)[[information]][variants, , drop = FALSE]
# We subset the result to only include the cells of interest.
# We check if the cells vector is not NULL.
if(!is.null(cells)){
# We check if all cell IDs are in the SE object.
cell_check <- cells %in% colnames(SE)
if(!all(cell_check)){
stop(paste0("Error: Only ", sum(cell_check), " cells are present in the object."))
} else{
res <- res[, cells, drop = FALSE]
}
}
return(res)
}
33 changes: 13 additions & 20 deletions R/HeatmapVOI.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
#'HeatmapVoi
#'@description
#'We plot a heatmap of a set of Variants Of Interest using the Variant Allele Frequency values of a SummarizedExperiment object.
#'@import ComplexHeatmap SummarizedExperiment circlize ggsci Seurat scales
#'@import ComplexHeatmap SummarizedExperiment grid circlize scales
#'@param SE SummarizedExperiment object.
#'@param voi Variants Of Interest.
#'@param annotation_trait Cell Annotation at the bottom of the heat map.
#'@export
HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL){
#colours_list <- c("#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99",
# "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a",
# "#ffff99", "deeppink", "green", "blue", "gold", "indianred3",
# "firebrick4", "#FF6F00FF", "#C71000FF", "#008EA0FF", "#8A4198FF",
# "#5A9599FF", "#FF6348FF", "#84D7E1FF", "#FF95A8FF", "#3D3B25FF",
# "#ADE2D0FF", "#1A5354FF", "#3F4041FF", "chocolate4", "cornsilk3",
# "cyan", "darkgreen", "gray0", "dodgerblue4", "forestgreen",
# "darkviolet", "indianred3", "midnightblue", "mintcream",
# "mediumaquamarine", "slateblue4"
#)
HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL, remove_empty_cells = FALSE){

fraction <- assays(SE)[["fraction"]][voi,]
fraction[is.na(fraction)] <- 0
Expand All @@ -26,10 +16,13 @@ HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL){
} else if(length(voi) > 1){
fraction <- as.matrix(fraction)
}
# We remove cells that are negative for all variants.
if(remove_empty_cells){
cell_check <- colSums(fraction > 0) > 0
fraction <- fraction[,cell_check, drop = FALSE]
}
if(!is.null(annotation_trait)){
colours_use <- hue_pal(length(unique(colData(SE)[,annotation_trait])))
#colours_use <- colours_list[1:length(unique(colData(SE)[,annotation_trait]))]
#colours_use <- DiscretePalette(length(unique(colData(SE)[,annotation_trait])))
colours_use <- scales::hue_pal(length(unique(colData(SE)[,annotation_trait])))
names(colours_use) <- unique(colData(SE)[,annotation_trait])
ha <- ComplexHeatmap::columnAnnotation(annotation_trait = colData(SE)[,annotation_trait],
col = list(annotation_trait = colours_use))
Expand All @@ -43,12 +36,12 @@ HeatmapVoi <- function(SE, voi, annotation_trait = NULL, column_title = NULL){
}

heatmap_voi <- Heatmap(fraction,
column_title_gp = gpar(fontsize = 20, fontface = "bold"),
row_title_gp = gpar(fontsize = 20, fontface = "bold"),
column_title_gp = grid::gpar(fontsize = 20, fontface = "bold"),
row_title_gp = grid::gpar(fontsize = 20, fontface = "bold"),
row_names_gp = grid::gpar(fontsize = 20, fontface = "bold"),
col = colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9),
c("#FCFCFC","#FFEDB0","#FFDF5F","#FEC510","#FA8E24","#F14C2B","#DA2828","#BE2222","#A31D1D")),
show_row_names = T, show_column_names = F, cluster_columns = T, cluster_rows = F, name = "VAF",
col = circlize::colorRamp2(seq(0, round(max(fraction, na.rm = TRUE)), length.out = 9),
c("#FCFCFC","#FFEDB0","#FFDF5F","#FEC510","#FA8E24","#F14C2B","#DA2828","#BE2222","#A31D1D")),
show_row_names = T, show_column_names = F, cluster_columns = T, clustering_method_columns = "complete", cluster_rows = F, name = "VAF",
heatmap_legend_param = list(border = "#000000", grid_height = unit(10, "mm")),
bottom_annotation = ha, border = T, use_raster = T,
column_title = column_title,
Expand Down
Loading

0 comments on commit 9ebfab3

Please sign in to comment.