diff --git a/DESCRIPTION b/DESCRIPTION index 53fb36d2..cf5a029c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: EWCE Type: Package Title: Expression Weighted Celltype Enrichment -Version: 1.10.0 +Version: 1.10.1 Authors@R: c(person(given = "Alan", family = "Murphy", diff --git a/R/ewce_plot.r b/R/ewce_plot.r index 24377464..46267b3c 100755 --- a/R/ewce_plot.r +++ b/R/ewce_plot.r @@ -23,7 +23,8 @@ #' @inheritParams check_percent_hits #' #' @returns A named list containing versions of the \link[ggplot2]{ggplot} -#' with and without the dendrogram. +#' with and without the dendrogram. Note that cell type order on the x-axis is +#' based on hierarchical clustering for both plots if make_dendro = TRUE. #' #' @export #' @import ggplot2 @@ -42,138 +43,146 @@ ewce_plot <- function(total_res, heights = c(.3, 1), make_dendro = FALSE, verbose = TRUE) { - # templateR:::args2vars(ewce_plot) - - requireNamespace("ggplot2") - requireNamespace("patchwork") - - check_mtc_method(mtc_method = mtc_method) - multiList <- TRUE - if (is.null(total_res$list)) multiList <- FALSE - #### If using dendrogram #### - if(isTRUE(make_dendro)){ - #### Check if ctd is provided #### - if(is.null(ctd)){ - messager( - "Warning: Can only add the dendrogram when ctd is provided.", - "Setting make_dendro=FALSE.", - v=verbose) - make_dendro <- FALSE - } else { - # Find the relevant level of the CTD annotation - if (length(ctd[[annotLevel]]$plotting) > 0) { - annotLevel <- - which(unlist(lapply(ctd, - FUN = cells_in_ctd, - cells = as.character( - total_res$CellType - ) - )) == 1) - err_msg2 <- paste0( - "All of the cells within total_res should come", - " from a single annotation layer of the CTD" - ) - if (length(annotLevel) == 0) { - stop(err_msg2) - } - } - #### Set order of cells #### - if (length(ctd[[annotLevel]]$plotting) > 0) { - total_res$CellType <- - factor(x = fix_celltype_names(total_res$CellType), - levels = fix_celltype_names( - ctd[[annotLevel]]$plotting$cell_ordering - ), - ordered = TRUE - ) - } - } - } - #### Multiple testing correction across all rows #### - if (!"q" %in% colnames(total_res)) { - total_res$q <- stats::p.adjust(total_res$p, - method = mtc_method - ) - } - #### Mark significant rows with asterixes #### - ast_q <- rep("", dim(total_res)[1]) - ast_q[total_res$q < q_threshold] <- "*" - total_res$ast_q <- ast_q - #### Plot #### - total_res$sd_from_mean[total_res$sd_from_mean < 0] <- 0 - graph_theme <- ggplot2::theme_bw(base_size = 12, - base_family = "Helvetica") + - ggplot2::theme( - text = ggplot2::element_text(size = 14), - axis.title.y = ggplot2::element_text(vjust = 0.6), - strip.background = ggplot2::element_rect(fill = "white"), - strip.text = ggplot2::element_text(color = "black") - ) - - upperLim <- max(abs(total_res$sd_from_mean), na.rm = TRUE) - total_res$y_ast <- total_res$sd_from_mean * 1.05 - total_res$abs_sd <- abs(total_res$sd_from_mean) - - if ("Direction" %in% colnames(total_res)) { - the_plot <- ggplot2::ggplot(total_res) + - ggplot2::geom_bar( - ggplot2::aes_string(x = "CellType", y = "abs_sd", - fill = "Direction" - ), - position = "dodge", stat = "identity" - ) + - graph_theme + # templateR:::args2vars(ewce_plot) + + requireNamespace("ggplot2") + requireNamespace("patchwork") + + check_mtc_method(mtc_method = mtc_method) + multiList <- TRUE + if (is.null(total_res$list)) multiList <- FALSE + #### If using dendrogram #### + if(isTRUE(make_dendro)){ + #### Check if ctd is provided #### + if(is.null(ctd)){ + messager( + "Warning: Can only add the dendrogram when ctd is provided.", + "Setting make_dendro=FALSE.", + v=verbose) + make_dendro <- FALSE } else { - the_plot <- ggplot2::ggplot(total_res) + - ggplot2::geom_bar( - ggplot2::aes_string(x = "CellType", y = "abs_sd", - fill = "abs_sd"), - stat = "identity" - ) + - ggplot2::scale_fill_gradient(low = "blue", high = "red") + - graph_theme + - ggplot2::theme(legend.position = "none") - } - - # Setup the main plot - the_plot <- the_plot + - ggplot2::theme( - plot.margin = ggplot2::unit(c(.5, 0, 0, 0), "mm"), - axis.text.x = ggplot2::element_text(angle = 55, hjust = 1) - ) + - ggplot2::theme(panel.border = ggplot2::element_rect( - colour = "black", - fill = NA, linewidth = 1 - )) + - ggplot2::xlab("Cell type") + - ggplot2::theme(strip.text.y = ggplot2::element_text(angle = 0)) + - ggplot2::ylab("Std.Devs. from the mean") - - the_plot <- the_plot + - ggplot2::scale_y_continuous(breaks = c(0, ceiling(upperLim * 0.66)), - expand = c(0, 1.1)) + - ggplot2::geom_text( - ggplot2::aes_string(label = "ast_q", x = "CellType", y = "y_ast"), - size = 10 + # Find the relevant level of the CTD annotation + if (length(ctd[[annotLevel]]$plotting) > 0) { + annotLevel <- + which(unlist(lapply(ctd, + FUN = cells_in_ctd, + cells = as.character( + total_res$CellType + ) + )) == 1) + err_msg2 <- paste0( + "All of the cells within total_res should come", + " from a single annotation layer of the CTD" ) - if (isTRUE(multiList)) { - the_plot <- the_plot + - ggplot2::facet_grid("list ~ .", - scales = "free", - space = "free_x") - } - #### Prepare output list #### - output <- list() - output$plain <- the_plot - if (isTRUE(make_dendro)) { + if (length(annotLevel) == 0) { + stop(err_msg2) + } + cell_ordr <- ctd[[annotLevel]]$plotting$cell_ordering + }else{ + #generate dendrogram - gives ordering ctdIN <- prep_dendro(ctdIN = ctd[[annotLevel]], expand = c(0, .66)) - output$withDendro <- patchwork::wrap_plots( - ctdIN$plotting$ggdendro_horizontal, - the_plot, - heights = heights, - ncol = 1) - } - - return(output) -} + cell_ordr <- ctdIN$plotting$cell_ordering + } + #### Set order of cells #### + total_res$CellType <- + factor(x = fix_celltype_names(total_res$CellType), + levels = fix_celltype_names( + cell_ordr + ), + ordered = TRUE + ) + } + } + #### Multiple testing correction across all rows #### + if (!"q" %in% colnames(total_res)) { + total_res$q <- stats::p.adjust(total_res$p, + method = mtc_method + ) + } + #### Mark significant rows with asterixes #### + ast_q <- rep("", dim(total_res)[1]) + ast_q[total_res$q < q_threshold] <- "*" + total_res$ast_q <- ast_q + #### Plot #### + total_res$sd_from_mean[total_res$sd_from_mean < 0] <- 0 + graph_theme <- ggplot2::theme_bw(base_size = 12, + base_family = "Helvetica") + + ggplot2::theme( + text = ggplot2::element_text(size = 14), + axis.title.y = ggplot2::element_text(vjust = 0.6), + strip.background = ggplot2::element_rect(fill = "white"), + strip.text = ggplot2::element_text(color = "black") + ) + + upperLim <- max(abs(total_res$sd_from_mean), na.rm = TRUE) + total_res$y_ast <- total_res$sd_from_mean * 1.05 + total_res$abs_sd <- abs(total_res$sd_from_mean) + + if ("Direction" %in% colnames(total_res)) { + the_plot <- ggplot2::ggplot(total_res) + + ggplot2::geom_bar( + ggplot2::aes_string(x = "CellType", y = "abs_sd", + fill = "Direction" + ), + position = "dodge", stat = "identity" + ) + + graph_theme + } else { + the_plot <- ggplot2::ggplot(total_res) + + ggplot2::geom_bar( + ggplot2::aes_string(x = "CellType", y = "abs_sd", + fill = "abs_sd"), + stat = "identity" + ) + + ggplot2::scale_fill_gradient(low = "blue", high = "red") + + graph_theme + + ggplot2::theme(legend.position = "none") + } + + # Setup the main plot + the_plot <- the_plot + + ggplot2::theme( + plot.margin = ggplot2::unit(c(.5, 0, 0, 0), "mm"), + axis.text.x = ggplot2::element_text(angle = 55, hjust = 1) + ) + + ggplot2::theme(panel.border = ggplot2::element_rect( + colour = "black", + fill = NA, linewidth = 1 + )) + + ggplot2::xlab("Cell type") + + ggplot2::theme(strip.text.y = ggplot2::element_text(angle = 0)) + + ggplot2::ylab("Std.Devs. from the mean") + + the_plot <- the_plot + + ggplot2::scale_y_continuous(breaks = c(0, ceiling(upperLim * 0.66)), + expand = c(0, 1.1)) + + ggplot2::geom_text( + ggplot2::aes_string(label = "ast_q", x = "CellType", y = "y_ast"), + size = 10 + ) + if (isTRUE(multiList)) { + the_plot <- the_plot + + ggplot2::facet_grid("list ~ .", + scales = "free", + space = "free_x") + } + #### Prepare output list #### + output <- list() + output$plain <- the_plot + if (isTRUE(make_dendro)) { + #ctdIN wion't exist if plotting found earlier + if(length(ctd[[annotLevel]]$plotting) > 0){ + ctdIN <- prep_dendro(ctdIN = ctd[[annotLevel]], + expand = c(0, .66)) + } + #update plot ordering by dendrogram + output$withDendro <- patchwork::wrap_plots( + ctdIN$plotting$ggdendro_horizontal, + the_plot, + heights = heights, + ncol = 1) + } + + return(output) +} \ No newline at end of file diff --git a/man/drop_uninformative_genes.Rd b/man/drop_uninformative_genes.Rd index aa1edb9f..c1a9ff8d 100755 --- a/man/drop_uninformative_genes.Rd +++ b/man/drop_uninformative_genes.Rd @@ -152,6 +152,27 @@ Set to \code{NULL} to skip aggregation step (default).} Passed to \link[gprofiler2]{gorth}. Only used when \code{method="gprofiler"} (\emph{DEFAULT : }\code{Inf}).} \item{\code{sort_rows}}{Sort \code{gene_df} rows alphanumerically.} + \item{\code{gene_map}}{A \link[base]{data.frame} that maps the current gene names +to new gene names. +This function's behaviour will adapt to different situations as follows: +\itemize{ +\item{\code{gene_map=} :\cr}{ When a data.frame containing the +gene key:value columns +(specified by \code{input_col} and \code{output_col}, respectively) +is provided, this will be used to perform aggregation/expansion.} +\item{\code{gene_map=NULL} and \code{input_species!=output_species} :\cr}{ +A \code{gene_map} is automatically generated by + \link[orthogene]{map_orthologs} to perform inter-species + gene aggregation/expansion.} +\item{\code{gene_map=NULL} and \code{input_species==output_species} :\cr}{ +A \code{gene_map} is automatically generated by + \link[orthogene]{map_genes} to perform within-species + gene gene symbol standardization and aggregation/expansion.} +}} + \item{\code{input_col}}{Column name within \code{gene_map} with gene names matching +the row names of \code{X}.} + \item{\code{output_col}}{Column name within \code{gene_map} with gene names +that you wish you map the row names of \code{X} onto.} }} } \value{ diff --git a/man/ewce_plot.Rd b/man/ewce_plot.Rd index 644167bd..4211c284 100755 --- a/man/ewce_plot.Rd +++ b/man/ewce_plot.Rd @@ -46,7 +46,8 @@ Passed to \link[patchwork]{wrap_plots}.} } \value{ A named list containing versions of the \link[ggplot2]{ggplot} - with and without the dendrogram. + with and without the dendrogram. Note that cell type order on the x-axis is + based on hierarchical clustering for both plots if make_dendro = TRUE. } \description{ \code{ewce_plot} generates plots of EWCE enrichment results diff --git a/man/extract_matrix.Rd b/man/extract_matrix.Rd index 598ffc14..b7eb9a8b 100755 --- a/man/extract_matrix.Rd +++ b/man/extract_matrix.Rd @@ -154,6 +154,27 @@ Set to \code{NULL} to skip aggregation step (default).} Passed to \link[gprofiler2]{gorth}. Only used when \code{method="gprofiler"} (\emph{DEFAULT : }\code{Inf}).} \item{\code{sort_rows}}{Sort \code{gene_df} rows alphanumerically.} + \item{\code{gene_map}}{A \link[base]{data.frame} that maps the current gene names +to new gene names. +This function's behaviour will adapt to different situations as follows: +\itemize{ +\item{\code{gene_map=} :\cr}{ When a data.frame containing the +gene key:value columns +(specified by \code{input_col} and \code{output_col}, respectively) +is provided, this will be used to perform aggregation/expansion.} +\item{\code{gene_map=NULL} and \code{input_species!=output_species} :\cr}{ +A \code{gene_map} is automatically generated by + \link[orthogene]{map_orthologs} to perform inter-species + gene aggregation/expansion.} +\item{\code{gene_map=NULL} and \code{input_species==output_species} :\cr}{ +A \code{gene_map} is automatically generated by + \link[orthogene]{map_genes} to perform within-species + gene gene symbol standardization and aggregation/expansion.} +}} + \item{\code{input_col}}{Column name within \code{gene_map} with gene names matching +the row names of \code{X}.} + \item{\code{output_col}}{Column name within \code{gene_map} with gene names +that you wish you map the row names of \code{X} onto.} }} } \value{ diff --git a/man/filter_nonorthologs.Rd b/man/filter_nonorthologs.Rd index bea4c81b..7c0fd60b 100755 --- a/man/filter_nonorthologs.Rd +++ b/man/filter_nonorthologs.Rd @@ -140,6 +140,27 @@ it will be returned as a sparse matrix \item{\code{as_DelayedArray}}{Convert aggregated matrix to \link[DelayedArray]{DelayedArray}.} \item{\code{sort_rows}}{Sort \code{gene_df} rows alphanumerically.} + \item{\code{gene_map}}{A \link[base]{data.frame} that maps the current gene names +to new gene names. +This function's behaviour will adapt to different situations as follows: +\itemize{ +\item{\code{gene_map=} :\cr}{ When a data.frame containing the +gene key:value columns +(specified by \code{input_col} and \code{output_col}, respectively) +is provided, this will be used to perform aggregation/expansion.} +\item{\code{gene_map=NULL} and \code{input_species!=output_species} :\cr}{ +A \code{gene_map} is automatically generated by + \link[orthogene]{map_orthologs} to perform inter-species + gene aggregation/expansion.} +\item{\code{gene_map=NULL} and \code{input_species==output_species} :\cr}{ +A \code{gene_map} is automatically generated by + \link[orthogene]{map_genes} to perform within-species + gene gene symbol standardization and aggregation/expansion.} +}} + \item{\code{input_col}}{Column name within \code{gene_map} with gene names matching +the row names of \code{X}.} + \item{\code{output_col}}{Column name within \code{gene_map} with gene names +that you wish you map the row names of \code{X} onto.} }} } \value{ diff --git a/man/generate_celltype_data.Rd b/man/generate_celltype_data.Rd index 1d448e36..d813bef4 100755 --- a/man/generate_celltype_data.Rd +++ b/man/generate_celltype_data.Rd @@ -175,6 +175,27 @@ Set to \code{NULL} to skip aggregation step (default).} Passed to \link[gprofiler2]{gorth}. Only used when \code{method="gprofiler"} (\emph{DEFAULT : }\code{Inf}).} \item{\code{sort_rows}}{Sort \code{gene_df} rows alphanumerically.} + \item{\code{gene_map}}{A \link[base]{data.frame} that maps the current gene names +to new gene names. +This function's behaviour will adapt to different situations as follows: +\itemize{ +\item{\code{gene_map=} :\cr}{ When a data.frame containing the +gene key:value columns +(specified by \code{input_col} and \code{output_col}, respectively) +is provided, this will be used to perform aggregation/expansion.} +\item{\code{gene_map=NULL} and \code{input_species!=output_species} :\cr}{ +A \code{gene_map} is automatically generated by + \link[orthogene]{map_orthologs} to perform inter-species + gene aggregation/expansion.} +\item{\code{gene_map=NULL} and \code{input_species==output_species} :\cr}{ +A \code{gene_map} is automatically generated by + \link[orthogene]{map_genes} to perform within-species + gene gene symbol standardization and aggregation/expansion.} +}} + \item{\code{input_col}}{Column name within \code{gene_map} with gene names matching +the row names of \code{X}.} + \item{\code{output_col}}{Column name within \code{gene_map} with gene names +that you wish you map the row names of \code{X} onto.} }} } \value{ diff --git a/man/standardise_ctd.Rd b/man/standardise_ctd.Rd index 95080173..4aa7131b 100755 --- a/man/standardise_ctd.Rd +++ b/man/standardise_ctd.Rd @@ -164,6 +164,27 @@ Set to \code{NULL} to skip aggregation step (default).} Passed to \link[gprofiler2]{gorth}. Only used when \code{method="gprofiler"} (\emph{DEFAULT : }\code{Inf}).} \item{\code{sort_rows}}{Sort \code{gene_df} rows alphanumerically.} + \item{\code{gene_map}}{A \link[base]{data.frame} that maps the current gene names +to new gene names. +This function's behaviour will adapt to different situations as follows: +\itemize{ +\item{\code{gene_map=} :\cr}{ When a data.frame containing the +gene key:value columns +(specified by \code{input_col} and \code{output_col}, respectively) +is provided, this will be used to perform aggregation/expansion.} +\item{\code{gene_map=NULL} and \code{input_species!=output_species} :\cr}{ +A \code{gene_map} is automatically generated by + \link[orthogene]{map_orthologs} to perform inter-species + gene aggregation/expansion.} +\item{\code{gene_map=NULL} and \code{input_species==output_species} :\cr}{ +A \code{gene_map} is automatically generated by + \link[orthogene]{map_genes} to perform within-species + gene gene symbol standardization and aggregation/expansion.} +}} + \item{\code{input_col}}{Column name within \code{gene_map} with gene names matching +the row names of \code{X}.} + \item{\code{output_col}}{Column name within \code{gene_map} with gene names +that you wish you map the row names of \code{X} onto.} }} } \value{ diff --git a/tests/testthat/test-ewce_plot.r b/tests/testthat/test-ewce_plot.r index 16d0ffee..9ff50bc0 100644 --- a/tests/testthat/test-ewce_plot.r +++ b/tests/testthat/test-ewce_plot.r @@ -1,14 +1,30 @@ test_that("ewce_plot works", { - - full_results <- EWCE::example_bootstrap_results() - ctd <- ewceData::ctd() - #### ewce_plot #### - ewce_plot_res <- ewce_plot( - total_res = full_results$results, - ctd = ctd, - make_dendro = TRUE - ) - # Fail if any but ggplot returned - testthat::expect_true(methods::is(ewce_plot_res$plain, "gg")) - testthat::expect_true(methods::is(ewce_plot_res$withDendro, "gg")) -}) + + full_results <- EWCE::example_bootstrap_results() + ctd <- ewceData::ctd() + #### ewce_plot #### + ewce_plot_res <- ewce_plot( + total_res = full_results$results, + ctd = ctd, + make_dendro = TRUE + ) + # Fail if any but ggplot returned + testthat::expect_true(methods::is(ewce_plot_res$plain, "gg")) + testthat::expect_true(methods::is(ewce_plot_res$withDendro, "gg")) + #remove short cut dendrogram res from ctd anbd rerun, should get same order + ctd_basic <- ctd + ctd_basic[[1]]$plotting <- NULL + #### ewce_plot #### + ewce_plot_res_basic <- ewce_plot( + total_res = full_results$results, + ctd = ctd, + make_dendro = TRUE + ) + #so order of 4 plots should be the same + testthat::expect_true( + all(sapply(list(ewce_plot_res_basic$withDendro$data$CellType, + ewce_plot_res$plain$data$CellType, + ewce_plot_res_basic$plain$data$CellType), + FUN = identical, ewce_plot_res$withDendro$data$CellType)) + ) +}) \ No newline at end of file