diff --git a/DESCRIPTION b/DESCRIPTION index b67819e..7c9fb28 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,8 +31,8 @@ Imports: cowplot (>= 1.1.1), dplyr (>= 1.0.9), GeomxTools (>= 3.1.1), - ggforce (== 0.3.4), - ggplot2 (== 3.3.6), + ggforce (>= 0.3.4), + ggplot2 (>= 3.3.6), gridExtra (>= 2.3), grid (>= 4.1.3), gtable (>= 0.3.0), diff --git a/NAMESPACE b/NAMESPACE index 86b2fc3..e91555b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,7 @@ importFrom(dplyr,count) importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,pull) +importFrom(dplyr,rename) importFrom(dplyr,select) importFrom(ggforce,gather_set_data) importFrom(ggforce,geom_parallel_sets) diff --git a/R/filtering.R b/R/filtering.R index db967c0..c128a1f 100644 --- a/R/filtering.R +++ b/R/filtering.R @@ -10,6 +10,19 @@ #' @details This function will run various filtering parameters for NanoStringGeoMxSet datasets #' #' @param object A NanoStringGeoMxSet dataset +#' @param loq.cutoff The number of standard deviations above the negative probe +#' geometric mean to use as a cutoff for the limit of quantification +#' @param loq.min The minimum value for the limit of quantification +#' @param segment.gene.rate.cutoff A decimal for the minimum cutoff for the +#' genes detected in a given segment over the total number of genes in the +#' probe set +#' @param study.gene.rate.cutoff = A decimal for the minimum cutoff for the +#' average amount a given gene is detected in all segments +#' @param sankey.exclude.slide A toggle for including the slide name in the +#' Sankey Plot +#' @param goi A list of genes of interest to evaluate for their study-wide +#' detection rate +#' #' @importFrom scales percent #' @importFrom Biobase pData #' @importFrom Biobase fData @@ -18,12 +31,6 @@ #' @export #' @return A list containing the .... -# To call function, must have data = raw object, dsp.obj = QC demoData, -# loq.cutoff 2 is recommended, -# loq.min 2 is recommend, -# segment.gene.rate.cutoff = remove segments with less than x% of the gene set detected; .05-.1 recommended, -# study.gene.rate.cutoff = remove genes detected in less than x% of segments; .05-.2 recommended, -# goi = goi (genes of interest). Must be a vector of genes (i.e c("PDCD1", "CD274")), filtering <- function(object, loq.cutoff = 2, loq.min = 2, @@ -135,32 +142,46 @@ filtering <- function(object, stop(paste0("Error: You have the wrong data class, must be NanoStringGeoMxSet" )) } - # Gather the data and plot in order: class, slide name, region, segment # gather_set_data creates x, id, y, and n fields within sankey.count.data # Establish the levels of the Sankey with or without the slide name if(sankey.exclude.slide == TRUE){ + # Create a dataframe used to make the Sankey plot sankey.count.data <- gather_set_data(count.mat, 1:3) - sankey.count.data$x <- - factor( - sankey.count.data$x, - levels = c("class", "region", "segment") - ) + + # Define the annotations to use for the Sankey x axis labels + sankey.count.data$x[sankey.count.data$x == 1] <- "class" + sankey.count.data$x[sankey.count.data$x == 2] <- "region" + sankey.count.data$x[sankey.count.data$x == 3] <- "segment" + + factor( + sankey.count.data$x, + levels = c("class", "region", "segment") + ) + # For position of Sankey 100 segment scale adjust.scale.pos = 1 } else { + # Create a dataframe used to make the Sankey plot sankey.count.data <- gather_set_data(count.mat, 1:4) - sankey.count.data$x <- - factor( - sankey.count.data$x, - levels = c("class", "slide_name", "region", "segment") - ) + + # Define the annotations to use for the Sankey x axis labels + sankey.count.data$x[sankey.count.data$x == 1] <- "slide_name" + sankey.count.data$x[sankey.count.data$x == 2] <- "class" + sankey.count.data$x[sankey.count.data$x == 3] <- "region" + sankey.count.data$x[sankey.count.data$x == 4] <- "segment" + + factor( + sankey.count.data$x, + levels = c("class", "slide_name", "region", "segment") + ) + # For position of Sankey 100 segment scale adjust.scale.pos = 0 } # plot Sankey sankey.plot <- ggplot(sankey.count.data, aes(x, id = id, split = y, value = n)) + - geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) + + geom_parallel_sets(aes(fill = class), alpha = 0.5, axis.width = 0.1) + geom_parallel_sets_axes(axis.width = 0.2) + geom_parallel_sets_labels(color = "gray", size = 5, angle = 0) + theme_classic(base_size = 17) + diff --git a/R/spatial_deconvolution.R b/R/spatial_deconvolution.R index b6e8fcd..b6beeb5 100644 --- a/R/spatial_deconvolution.R +++ b/R/spatial_deconvolution.R @@ -1,8 +1,8 @@ -#' @title Spatial Deconvolution -#' #' Helper functions comes from #' https://bioconductor.org/packages/release/bioc/vignettes/SpatialDecon/inst/doc/SpatialDecon_vignette_NSCLC.html -#' +#' +#' @title Spatial Deconvolution +#' #' @description spatialDeconvolution estimate cell composition across DSP #' samples from reference expression matrix #' @@ -41,18 +41,6 @@ #' @importFrom ComplexHeatmap pheatmap #' #' @export -#' @example Do not run: spatialDeconvolution(object = NanostringGeomx, -#' expr.type = "q_norm", -#' prof.mtx = profile_matrix, -#' clust.rows = TRUE, -#' clust.cols = TRUE, -#' group.by = "none", -#' plot.fontsize = 5, -#' use.custom.prof.mtx = FALSE, -#' ref.mtx = reference_matrix, -#' ref.annot = reference_annotation, -#' cell.id.col = "CellID", -#' celltype.col = "LabeledCellType") #' #' @return A list dsp.data containing the results of spatial deconvolution, #' res$beta: matrix of estimated cell abundances @@ -66,7 +54,7 @@ spatialDeconvolution <- function(object, - expr.type, + expr.type = "q_norm", prof.mtx, clust.rows = TRUE, clust.cols = TRUE, @@ -79,8 +67,8 @@ spatialDeconvolution <- function(object, min.genes = 10, ref.mtx, ref.annot, - cell.id.col, - celltype.col) { + cell.id.col = "CellID", + celltype.col = "LabeledCellType") { # Check for Parameter Misspecification Error(s) if (!expr.type %in% names(object@assayData)) { diff --git a/R/study_design.R b/R/study_design.R index f02f1f1..dc6404e 100644 --- a/R/study_design.R +++ b/R/study_design.R @@ -27,10 +27,14 @@ #' phenoDataFile containing data about the experiment's meta-data. #' @param slide.name.col The name of the field that contains the slide names #' @param class.col The name of the field that contains the class annotation -#' @param region.col The name of the field that contains the class annotation -#' @param segment.col The name of the field that contains the class annotation -#' -#' +#' @param region.col The name of the field that contains the region annotation +#' @param segment.col The name of the field that contains the segment name +#' @param area.col The name of the field that contains the segment area +#' @param nuclei.col The name of the field that contains the nuclei number +#' @param sankey.exclude.slide A toggle for including the slide name in the +#' Sankey plot +#' @param segment.id.length The number of characters to use from each of the +#' annotation fields class, region, and segment to create the segment ID #' #' @importFrom GeomxTools readNanoStringGeoMxSet #' @importFrom knitr kable @@ -52,8 +56,6 @@ #' @export #' @return A list containing the NanoString Object and the Sankey plot. - - studyDesign <- function(dcc.files, pkc.files, pheno.data.file, @@ -177,26 +179,39 @@ studyDesign <- function(dcc.files, rownames(count.mat) <- 1:nrow(count.mat) } - - # Gather the data and plot in order: class, slide name, region, segment # gather_set_data creates x, id, y, and n fields within sankey.count.data # Establish the levels of the Sankey with or without the slide name if(sankey.exclude.slide == TRUE){ + # Create a dataframe used to make the Sankey plot sankey.count.data <- gather_set_data(count.mat, 1:3) - sankey.count.data$x <- - factor( - sankey.count.data$x, - levels = c("class", "region", "segment") - ) + + # Define the annotations to use for the Sankey x axis labels + sankey.count.data$x[sankey.count.data$x == 1] <- "class" + sankey.count.data$x[sankey.count.data$x == 2] <- "region" + sankey.count.data$x[sankey.count.data$x == 3] <- "segment" + + factor( + sankey.count.data$x, + levels = c("class", "region", "segment") + ) + # For position of Sankey 100 segment scale adjust.scale.pos = 1 } else { + # Create a dataframe used to make the Sankey plot sankey.count.data <- gather_set_data(count.mat, 1:4) - sankey.count.data$x <- - factor( - sankey.count.data$x, - levels = c("class", "slide_name", "region", "segment") - ) + + # Define the annotations to use for the Sankey x axis labels + sankey.count.data$x[sankey.count.data$x == 1] <- "slide_name" + sankey.count.data$x[sankey.count.data$x == 2] <- "class" + sankey.count.data$x[sankey.count.data$x == 3] <- "region" + sankey.count.data$x[sankey.count.data$x == 4] <- "segment" + + factor( + sankey.count.data$x, + levels = c("class", "slide_name", "region", "segment") + ) + # For position of Sankey 100 segment scale adjust.scale.pos = 0 } @@ -210,7 +225,7 @@ studyDesign <- function(dcc.files, split = y, value = n )) + - geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) + + geom_parallel_sets(aes(fill = class), alpha = 0.5, axis.width = 0.1) + geom_parallel_sets_axes(axis.width = 0.2) + geom_parallel_sets_labels(color = "gray", size = 5, diff --git a/R/violin_plot.R b/R/violin_plot.R index 895435a..b5c4513 100644 --- a/R/violin_plot.R +++ b/R/violin_plot.R @@ -19,18 +19,13 @@ #' @importFrom gridExtra arrangeGrob #' #' @export -#' @example Do not run: violinPlot(object = NanostringGeomx, -#' expr.type = "q_norm", -#' genes = c("FOXP3","CD4"), -#' group = "CellType", -#' facet.by = "segment") #' #' @return an arranged grob of violin plots violinPlot <- function(object, - expr.type, - genes, - group, + expr.type = "q_norm", + genes = c("FOXP3","CD4"), + group = "CellType", facet.by = "none") { # Check for Parameter Misspecification Error(s) diff --git a/man/diffExpr.Rd b/man/diffExpr.Rd index 10222ca..1c489d0 100644 --- a/man/diffExpr.Rd +++ b/man/diffExpr.Rd @@ -11,11 +11,11 @@ diffExpr( group.col, regions, region.col, - slide.col = "slide name", + slide.col = "slide_name", element = "q_norm", multi.core = TRUE, n.cores = 1, - p.adjust = "BY", + p.adjust = "fdr", pairwise = TRUE, fc.lim = 1.2, pval.lim.1 = 0.05, @@ -50,7 +50,7 @@ slide)} (default is 1)} \item{p.adjust}{Method to use for pvalue adjustment. Choices are "holm", -"hochberg","hommel","bonferroni","BH","BY","fdr","none". (default is "BY")} +"hochberg","hommel","bonferroni","BH","BY","fdr","none". (default is "BH")} \item{pairwise}{Boolean to calculate least-square means pairwise differences (default is TRUE)} @@ -66,7 +66,7 @@ is 1.2)} } \value{ a list containing mixed model output data frame, grid tables for -samples and summary of genelists +samples used in analysis and summary of significant genelists } \description{ diffExpr returns a DEG table with fold changes and p-values and diff --git a/man/filtering.Rd b/man/filtering.Rd index df45836..1c9a93e 100644 --- a/man/filtering.Rd +++ b/man/filtering.Rd @@ -4,10 +4,36 @@ \alias{filtering} \title{Filtering NanoStringGeoMxSet dataset} \usage{ -filtering(object, loq.cutoff, loq.min, cut.segment, goi) +filtering( + object, + loq.cutoff = 2, + loq.min = 2, + segment.gene.rate.cutoff = 0.05, + study.gene.rate.cutoff = 0.05, + sankey.exclude.slide = FALSE, + goi +) } \arguments{ \item{object}{A NanoStringGeoMxSet dataset} + +\item{loq.cutoff}{The number of standard deviations above the negative probe +geometric mean to use as a cutoff for the limit of quantification} + +\item{loq.min}{The minimum value for the limit of quantification} + +\item{segment.gene.rate.cutoff}{A decimal for the minimum cutoff for the +genes detected in a given segment over the total number of genes in the +probe set} + +\item{study.gene.rate.cutoff}{= A decimal for the minimum cutoff for the +average amount a given gene is detected in all segments} + +\item{sankey.exclude.slide}{A toggle for including the slide name in the +Sankey Plot} + +\item{goi}{A list of genes of interest to evaluate for their study-wide +detection rate} } \value{ A list containing the .... diff --git a/man/geomxNorm.Rd b/man/geomxNorm.Rd index 36c4ebe..c3bea8d 100644 --- a/man/geomxNorm.Rd +++ b/man/geomxNorm.Rd @@ -9,7 +9,7 @@ geomxNorm(object, norm) \arguments{ \item{object}{A NanoStringGeoMxSet dataset} -\item{norm}{A vector with options of c(quant or neg)} +\item{norm}{A vector with options of c(q3 or neg)} } \value{ A list containing the ggplot grid, a boxplot, an normalized dataframe. diff --git a/man/spatialDeconvolution.Rd b/man/spatialDeconvolution.Rd index 43e8ce4..c17775f 100644 --- a/man/spatialDeconvolution.Rd +++ b/man/spatialDeconvolution.Rd @@ -2,25 +2,25 @@ % Please edit documentation in R/spatial_deconvolution.R \name{spatialDeconvolution} \alias{spatialDeconvolution} -\title{Spatial Deconvolution - -Helper functions comes from -https://bioconductor.org/packages/release/bioc/vignettes/SpatialDecon/inst/doc/SpatialDecon_vignette_NSCLC.html} +\title{Spatial Deconvolution} \usage{ spatialDeconvolution( object, - expr.type, - ref.mtx, - ref.annot, - cell.id.col, - celltype.col, - group.by = NULL, - out.directory = NULL, - matrix.name = "customDSPmtx", + expr.type = "q_norm", + prof.mtx, + clust.rows = TRUE, + clust.cols = TRUE, + group.by = "none", + plot.fontsize = 5, + use.custom.prof.mtx = FALSE, + discard.celltype = FALSE, normalize = FALSE, min.cell.num = 0, min.genes = 10, - discard.celltype = FALSE + ref.mtx, + ref.annot, + cell.id.col = "CellID", + celltype.col = "LabeledCellType" ) } \arguments{ @@ -28,29 +28,39 @@ spatialDeconvolution( \item{expr.type}{Name of slot containing normalized gene expression data} -\item{ref.mtx}{Reference expression matrix (Gene x Reference_Samples)} - -\item{ref.annot}{Reference data.frame with cell.id and celltype information} +\item{prof.mtx}{Use stored profile matrix} -\item{cell.id.col}{Column of data.frame containing cell.id.col info} +\item{clust.rows}{Cluster rows in heatmap (Default: TRUE)} -\item{celltype.col}{Column of data.frame containing celltype info} +\item{clust.cols}{Cluster columns in heatmap (Default: TRUE)} -\item{group.by}{Organize heatmap / barplot columns by metadata group} +\item{group.by}{Organize heatmap / barplot columns by metadata group +(Default: "none")} -\item{out.directory}{Path to desired output directory, set to NULL if matrix -should not be written} +\item{plot.fontsize}{Set size of labels on all figures (Default: 5)} -\item{matrix.name}{Name given to deconvolution signature matrix} +\item{discard.celltype}{Remove any celltype(s) that is not of interest +(Default: FALSE)} -\item{normalize}{Scale profile matrix gene expression according to gene count} +\item{normalize}{Scale profile matrix gene expression according to gene count +(Default: FALSE)} \item{min.cell.num}{Prevent deconvolution of celltype(s) if number of -corresponding cells is below this threshold} +corresponding cells is below this threshold (Default: 0)} -\item{min.genes}{Filter cells based on minimum number of genes expressed} +\item{min.genes}{Filter cells based on minimum number of genes expressed +(Default: 10)} -\item{discard.celltype}{Remove any celltype(s) that is not of interest} +\item{ref.mtx}{Custom reference expression matrix (Gene x Reference_Samples)} + +\item{ref.annot}{Custom reference data.frame with cell.id and celltype +information} + +\item{cell.id.col}{Column of data.frame containing cell.id.col info} + +\item{celltype.col}{Column of data.frame containing celltype info} + +\item{use.custom.matrix}{Generate custom profile matrix (Default: FALSE)} } \value{ A list dsp.data containing the results of spatial deconvolution, @@ -68,6 +78,9 @@ spatialDeconvolution estimate cell composition across DSP samples from reference expression matrix } \details{ +Helper functions comes from +https://bioconductor.org/packages/release/bioc/vignettes/SpatialDecon/inst/doc/SpatialDecon_vignette_NSCLC.html + Uses Nanostring developed functions to compute estimated cell fractions in DSP samples. Allows for users to group samples based on metadata information diff --git a/man/studyDesign.Rd b/man/studyDesign.Rd index a94766f..e16541b 100644 --- a/man/studyDesign.Rd +++ b/man/studyDesign.Rd @@ -11,7 +11,15 @@ studyDesign( pheno.data.sheet = "Template", pheno.data.dcc.col.name = "Sample_ID", protocol.data.col.names = c("aoi", "roi"), - experiment.data.col.names = c("panel") + experiment.data.col.names = c("panel"), + slide.name.col = "slide name", + class.col = "class", + region.col = "region", + segment.col = "segment", + area.col = "area", + nuclei.col = "nuclei", + sankey.exclude.slide = FALSE, + segment.id.length = 4 ) } \arguments{ @@ -36,6 +44,24 @@ sequencing data.} \item{experiment.data.col.names}{Character list of column names from phenoDataFile containing data about the experiment's meta-data.} + +\item{slide.name.col}{The name of the field that contains the slide names} + +\item{class.col}{The name of the field that contains the class annotation} + +\item{region.col}{The name of the field that contains the region annotation} + +\item{segment.col}{The name of the field that contains the segment name} + +\item{area.col}{The name of the field that contains the segment area} + +\item{nuclei.col}{The name of the field that contains the nuclei number} + +\item{sankey.exclude.slide}{A toggle for including the slide name in the +Sankey plot} + +\item{segment.id.length}{The number of characters to use from each of the +annotation fields class, region, and segment to create the segment ID} } \value{ A list containing the NanoString Object and the Sankey plot. diff --git a/man/violinPlot.Rd b/man/violinPlot.Rd index 70542d5..1f7ae8c 100644 --- a/man/violinPlot.Rd +++ b/man/violinPlot.Rd @@ -4,7 +4,13 @@ \alias{violinPlot} \title{Violin plot of spatially profiled genes} \usage{ -violinPlot(object, expr.type, genes, group, facet.by = NULL) +violinPlot( + object, + expr.type = "q_norm", + genes = c("FOXP3", "CD4"), + group = "CellType", + facet.by = "none" +) } \arguments{ \item{object}{Name of NanostringGeoMxSet to perform analysis on} diff --git a/vignettes/Integration_Test_Kidney.Rmd b/vignettes/Integration_Test_Kidney.Rmd index 4c5e372..cee8b38 100644 --- a/vignettes/Integration_Test_Kidney.Rmd +++ b/vignettes/Integration_Test_Kidney.Rmd @@ -71,7 +71,7 @@ This runs the DSPworkflow package to completion using the Human Kidney Dataset: nuclei.col = "nuclei") # For creating fixture RDS - create.rds <- TRUE + create.rds <- FALSE if(create.rds) { study.design.human.kidney <- sdesign.list$object saveRDS(study.design.human.kidney, file = "tests/testthat/fixtures/Human_Kidney/studyDesignHumanKidney.RDS") @@ -101,7 +101,7 @@ qc.output <- qcProc(object = sdesign.list$object, print.plots = TRUE) print(qc.output$segments.qc) - create.rds <- TRUE + create.rds <- FALSE if(create.rds) { qc.human.kidney <- qc.output$object saveRDS(qc.human.kidney, file = "tests/testthat/fixtures/Human_Kidney/qcHumanKidney.RDS") @@ -118,11 +118,12 @@ qc.output <- qcProc(object = sdesign.list$object, filtering.output <- filtering(object = qc.output$object, loq.cutoff = 2, loq.min = 2, - cut.segment = .1, + segment.gene.rate.cutoff = 0.05, + study.gene.rate.cutoff = 0.05, goi = goi) print(filtering.output$`stacked.bar.plot`) - print(filtering.output$`tab`) + print(filtering.output$`segment.table`) print(filtering.output$`sankey.plot`) print(filtering.output$`genes.detected.plot`) print(filtering.output$'goi.table', row.names = FALSE)