Skip to content

Commit

Permalink
Merge pull request #71 from NIDAP-Community/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
escauley authored Apr 24, 2023
2 parents 3803ebc + 13a9ebc commit 6873033
Show file tree
Hide file tree
Showing 61 changed files with 462 additions and 3,077,837 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/gitflow-R-action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ jobs:
Activating_Parser:
uses: fnlcr-bids-sdsi/gitflow-R/.github/workflows/parser.yml@DSPWorkflow
with:
image_to_use: ghcr.io/nidap-community/dsp_github_workflow_env:r_env_v1.0
image_to_use: ghcr.io/nidap-community/dsp_github_workflow_env:DSPv0.9.3

30 changes: 19 additions & 11 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,20 @@
Package: DSPWorkflow
Title: What the Package Does (One Line, Title Case)
Version: 1.0.0.0
Authors@R:
person("First", "Last", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "YOUR-ORCID-ID"))
Description: The DSP Workflow addresses a growing need to streamline the analysis of Spatial Transcriptomics data produced from Digital Spatial Profiling Technology (NanoString). It can be run in a docker container, and for biologists, in user-friendly web-based interactive notebooks (NIDAP, Palantir Foundry).
Title: A Workflow for Analyzing Digital Spatial Profiling RNA Data
Version: 0.9.2.0
Authors@R: c(person("Rui", "He", email = "[email protected]", role = "aut"),
person("Maggie", "Cam", email = "[email protected]", role = "aut", comment = c(ORCID = "0000-0001-8190-9766")),
person("Ned", "Cauley", email = "[email protected]", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8968-6621")),
person("Jing", "Bian", email = "[email protected]", role = "aut", comment = c(ORCID = "0000-0001-7109-716X")),
person("Difei", "Wang", email = "[email protected]", role = "aut", comment = c(ORCID = "0000-0003-4088-3859")),
person("Chad", "Highfill", email = "[email protected]", role = "aut", comment = c(ORCID = "0000-0003-0046-3593")))
Description: A set of functions for analyzing RNA data from the spatial
transcriptomics approach Digital Spatial Profiling (Nanostring). The user
provides read count data and annotations, and the package outputs
normalized differential expression of genes and further visualizations and
analysis based on user input. It can be run in a docker container and in
user-friendly web-based interactive notebooks (NIDAP, Palantir Foundry).
URL: https://github.com/NIDAP-Community/DSPWorkflow
BugReports: https://github.com/NIDAP-Community/DSPWorkflow/issues
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
Expand All @@ -14,7 +24,6 @@ Suggests:
Depends:
R (>= 3.6)
Imports:
backports (>= 1.4.1),
Biobase (>= 2.54.0),
BiocGenerics (>= 0.40.0),
cowplot (>= 1.1.1),
Expand All @@ -23,21 +32,20 @@ Imports:
ggforce (>= 0.3.4),
ggplot2 (>= 3.3.6),
gridExtra (>= 2.3),
grid (>= 4.1.3),
gtable (>= 0.3.0),
knitr (>= 1.40),
NanoStringNCTools (>= 1.2.0),
patchwork (>= 1.1.2),
reshape2 (>= 1.4.4),
Rmpfr (>= 0.8-9),
Rtsne (>= 0.16),
scales (>= 1.2.1),
stats (>= 4.1.3),
SpatialDecon (>= 1.4.3),
tibble (>= 3.1.8),
tidyr (>= 1.2.1),
tidyverse (>= 1.3.2),
umap (>= 0.2.9.0),
pheatmap (>= 1.0.12),
stringr,
magrittr
magrittr (>= 2.0.3),
ComplexHeatmap (>= 2.10.0)
Config/testthat/edition: 3
19 changes: 11 additions & 8 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,15 @@
export(diffExpr)
export(dimReduct)
export(filtering)
export(geomxnorm)
export(geomxNorm)
export(heatMap)
export(qcProc)
export(spatialDeconvolution)
export(studyDesign)
export(violinPlot)
import(GeomxTools)
import(Biobase)
import(NanoStringNCTools)
import(SpatialDecon)
import(ggplot2)
import(gridExtra)
import(pheatmap)
import(stats)
importFrom(Biobase,assayDataElement)
importFrom(Biobase,exprs)
importFrom(Biobase,fData)
Expand All @@ -26,8 +22,10 @@ importFrom(BiocGenerics,annotation)
importFrom(BiocGenerics,colnames)
importFrom(BiocGenerics,rbind)
importFrom(BiocGenerics,rownames)
importFrom(ComplexHeatmap,pheatmap)
importFrom(GeomxTools,aggregateCounts)
importFrom(GeomxTools,mixedModelDE)
importFrom(GeomxTools,ngeoMean)
importFrom(GeomxTools,normalize)
importFrom(GeomxTools,readNanoStringGeoMxSet)
importFrom(GeomxTools,setBioProbeQCFlags)
Expand All @@ -38,6 +36,9 @@ importFrom(NanoStringNCTools,esBy)
importFrom(NanoStringNCTools,negativeControlSubset)
importFrom(NanoStringNCTools,sData)
importFrom(Rtsne,Rtsne)
importFrom(SpatialDecon,create_profile_matrix)
importFrom(SpatialDecon,derive_GeoMx_background)
importFrom(SpatialDecon,spatialdecon)
importFrom(cowplot,plot_grid)
importFrom(dplyr,arrange)
importFrom(dplyr,count)
Expand Down Expand Up @@ -72,27 +73,29 @@ importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_bw)
importFrom(ggplot2,theme_classic)
importFrom(graphics,boxplot)
importFrom(grid,gpar)
importFrom(grid,grid.draw)
importFrom(grid,grid.newpage)
importFrom(grid,grobHeight)
importFrom(grid,textGrob)
importFrom(gridExtra,arrangeGrob)
importFrom(gridExtra,grid.arrange)
importFrom(gridExtra,tableGrob)
importFrom(gridExtra,ttheme_default)
importFrom(gtable,gtable_add_grob)
importFrom(gtable,gtable_add_rows)
importFrom(knitr,kable)
importFrom(magrittr,"%>%")
importFrom(parallel,detectCores)
importFrom(patchwork,guide_area)
importFrom(patchwork,patchworkGrob)
importFrom(patchwork,plot_annotation)
importFrom(patchwork,plot_layout)
importFrom(patchwork,wrap_elements)
importFrom(patchwork,wrap_plots)
importFrom(pheatmap,pheatmap)
importFrom(reshape2,melt)
importFrom(scales,percent)
importFrom(stats,as.formula)
importFrom(stats,p.adjust)
importFrom(stats,prcomp)
importFrom(stats,quantile)
Expand Down
16 changes: 15 additions & 1 deletion R/differential_expression_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,11 @@
#' @importFrom grid grid.newpage textGrob gpar grobHeight grid.draw
#' @importFrom gtable gtable_add_rows gtable_add_grob
#' @importFrom tibble rownames_to_column
#' @importFrom gridExtra tableGrob
#' @importFrom gridExtra tableGrob ttheme_default
#' @importFrom BiocGenerics rownames colnames rbind
#' @importFrom magrittr %>%
#' @importFrom Biobase pData assayDataElement
#' @importFrom parallel detectCores
#' @export
#'
#' @return a list containing mixed model output data frame, grid tables for
Expand All @@ -71,6 +72,19 @@ diffExpr <- function(object,
pval.lim.1 = 0.05,
pval.lim.2 = 0.01) {

# Check the number of cores available for the current machine
available.cores <- detectCores()

if (n.cores > available.cores) {
print(paste0("The number of cores selected is greater than the number of available cores, reducing number of cores to maximum of ", available.cores))
n.cores <- available.cores
}

# Adjust the number of cores selected within the machine's range




testClass <- testRegion <- Gene <- Subset <- NULL

# convert test variables to factors after checking input
Expand Down
47 changes: 24 additions & 23 deletions R/filtering.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# loq.cutoff 2 is recommended, loq.min 2 is recommend,
# cut.segment = remove segments with less than 10% of the genes detected; .05-.1 recommended,
# goi = goi (genes of interest). Must be a vector of genes (i.e c("PDCD1", "CD274")),
filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
filtering <- function(object, loq.cutoff, loq.min, cut.segment, goi) {

if(class(object)[1] != "NanoStringGeoMxSet"){
stop(paste0("Error: You have the wrong data class, must be NanoStringGeoMxSet" ))
Expand All @@ -41,7 +41,8 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
stop(paste0("Error: You have the wrong data class, must be numeric" ))
}
# Define Modules
pkc.file <- pkc.file
#pkc.file <- pkc.file
pkc.file <- annotation(object)
if(class(pkc.file)[1] != "character"){
stop(paste0("Error: You have the wrong data class, must be character" ))
}
Expand All @@ -65,22 +66,22 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
pData(object)$loq <- loq

## 4.5.0 Filtering
loq_mat <- c()
loq.mat <- c()
for(module in modules) {
ind <- fData(object)$Module == module
mat_i <- t(esApply(object[ind, ], MARGIN = 1,
mat.i <- t(esApply(object[ind, ], MARGIN = 1,
FUN = function(x) {
x > loq[, module]
}))
loq_mat <- rbind(loq_mat, mat_i)
loq.mat <- rbind(loq.mat, mat.i)
}
# ensure ordering since this is stored outside of the geomxSet
loq_mat <- loq_mat[fData(object)$TargetName, ]
loq.mat <- loq.mat[fData(object)$TargetName, ]

##4.5.1S egment Gene Detection
# Save detection rate information to pheno data
pData(object)$GenesDetected <-
colSums(loq_mat, na.rm = TRUE)
colSums(loq.mat, na.rm = TRUE)
pData(object)$GeneDetectionRate <-
pData(object)$GenesDetected / nrow(object)

Expand Down Expand Up @@ -114,17 +115,17 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {

# select the annotations we want to show, use `` to surround column names with
# spaces or special symbols
count_mat <- count(pData(object), `slide name`, class, region, segment)
count.mat <- count(pData(object), `slide name`, class, region, segment)
if(class(object)[1] != "NanoStringGeoMxSet"){
stop(paste0("Error: You have the wrong data class, must be NanoStringGeoMxSet" ))
}
# simplify the slide names
count_mat$`slide name` <- gsub("disease", "d", gsub("normal", "n", count_mat$`slide name`))
count.mat$`slide name` <- gsub("disease", "d", gsub("normal", "n", count.mat$`slide name`))
# gather the data and plot in order: class, slide name, region, segment
test_gr <- gather_set_data(count_mat, 1:4)
test_gr$x <-factor(test_gr$x, levels = c("class", "slide name", "region", "segment"))
test.gr <- gather_set_data(count.mat, 1:4)
test.gr$x <-factor(test.gr$x, levels = c("class", "slide name", "region", "segment"))
# plot Sankey
sankey.plot<- ggplot(test_gr, aes(x, id = id, split = y, value = n)) +
sankey.plot<- ggplot(test.gr, aes(x, id = id, split = y, value = n)) +
geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) +
geom_parallel_sets_axes(axis.width = 0.2) +
geom_parallel_sets_labels(color = "white", size = 5) +
Expand All @@ -143,8 +144,8 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {

##4.5.2 Gene Detection Rate
# Calculate detection rate:
loq_mat <- loq_mat[, colnames(object)]
fData(object)$DetectedSegments <- rowSums(loq_mat, na.rm = TRUE)
loq.mat <- loq.mat[, colnames(object)]
fData(object)$DetectedSegments <- rowSums(loq.mat, na.rm = TRUE)
fData(object)$DetectionRate <-
fData(object)$DetectedSegments / nrow(pData(object))

Expand All @@ -153,20 +154,20 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
if(class(goi)[1] != "character"){
stop(paste0("Error: You have the wrong data class, must be character vector" ))
}
goi_df <- data.frame(Gene = goi,
goi.df <- data.frame(Gene = goi,
Number = fData(object)[goi, "DetectedSegments"],
DetectionRate = percent(fData(object)[goi, "DetectionRate"]))

## 4.5.3 Gene Filtering
# Plot detection rate:
plot_detect <- data.frame(Freq = c(1, 5, 10, 20, 30, 50))
plot_detect$Number <-
plot.detect <- data.frame(Freq = c(1, 5, 10, 20, 30, 50))
plot.detect$Number <-
unlist(lapply(c(0.01, 0.05, 0.1, 0.2, 0.3, 0.5),
function(x) {sum(fData(object)$DetectionRate >= x)}))
plot_detect$Rate <- plot_detect$Number / nrow(fData(object))
rownames(plot_detect) <- plot_detect$Freq
plot.detect$Rate <- plot.detect$Number / nrow(fData(object))
rownames(plot.detect) <- plot.detect$Freq

genes.detected.plot <- ggplot(plot_detect, aes(x = as.factor(Freq), y = Rate, fill = Rate)) +
genes.detected.plot <- ggplot(plot.detect, aes(x = as.factor(Freq), y = Rate, fill = Rate)) +
geom_bar(stat = "identity") +
geom_text(aes(label = formatC(Number, format = "d", big.mark = ",")),
vjust = 1.6, color = "black", size = 4) +
Expand All @@ -182,10 +183,10 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {

# Subset to target genes detected in at least 10% of the samples.
# Also manually include the negative control probe, for downstream use
negativeProbefData <- subset(fData(object), CodeClass == "Negative")
neg_probes <- unique(negativeProbefData$TargetName)
negative.probe.fData <- subset(fData(object), CodeClass == "Negative")
neg.probes <- unique(negative.probe.fData$TargetName)
object <- object[fData(object)$DetectionRate >= 0.1 |
fData(object)$TargetName %in% neg_probes, ]
fData(object)$TargetName %in% neg.probes, ]

# retain only detected genes of interest
goi <- goi[goi %in% rownames(object)]
Expand Down
4 changes: 2 additions & 2 deletions R/heatmap.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@
#'
#' @importFrom NanoStringNCTools assayDataApply
#' @importFrom Biobase assayDataElement
#' @importFrom pheatmap pheatmap
#' @importFrom ComplexHeatmap pheatmap
#'
#' @export
#'
#' @return A list containing the plot genes data matrix, and the heatmap plot.
#' @return A list containing the plot genes data matrix, and the heatmap plot
##
heatMap <- function(
object,
Expand Down
16 changes: 7 additions & 9 deletions R/normalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#' @importFrom cowplot plot_grid
#' @importFrom Biobase exprs
#' @importFrom Biobase pData
#' @importFrom graphics boxplot
#' @importFrom stats quantile
#' @importFrom utils head
#' @importFrom GeomxTools normalize
Expand All @@ -34,40 +33,39 @@


# To call function, must have object = object; norm = c(quant or neg)
geomxnorm <- function(object, norm) {
geomxNorm <- function(object, norm) {

if(class(object)[1] != "NanoStringGeoMxSet"){
stop(paste0("Error: You have the wrong data class, must be NanoStringGeoMxSet" ))
}

# run reductions ====
color.variable <-
Value <- Statistic <- NegProbe <- Q3 <- Annotation <- NULL
color.variable <- Value <- Statistic <- NegProbe <- Q3 <- Annotation <- NULL

# Start Function
neg.probes<- "NegProbe-WTX"
ann.of.interest <- "region"

Stat.data <- base::data.frame(row.names = colnames(exprs(object)),
stat.data <- base::data.frame(row.names = colnames(exprs(object)),
Segment = colnames(exprs(object)),
Annotation = Biobase::pData(object)[, ann.of.interest],
Q3 = unlist(apply(exprs(object), 2,
quantile, 0.75, na.rm = TRUE)),
NegProbe = exprs(object)[neg.probes, ])

Stat.data.m <- melt(Stat.data, measures.vars = c("Q3", "NegProbe"),
stat.data.m <- melt(stat.data, measures.vars = c("Q3", "NegProbe"),
variable.name = "Statistic", value.name = "Value")


plt1 <- ggplot(Stat.data.m,
plt1 <- ggplot(stat.data.m,
aes(x = Value, fill = Statistic)) +
geom_histogram(bins = 40) + theme_bw() +
scale_x_continuous(trans = "log2") +
facet_wrap(~Annotation, nrow = 1) +
scale_fill_brewer(palette = 3, type = "qual") +
labs(x = "Counts", y = "Segments, #")

plt2 <- ggplot(Stat.data,
plt2 <- ggplot(stat.data,
aes(x = NegProbe, y = Q3, color = Annotation)) +
geom_abline(intercept = 0, slope = 1, lty = "dashed", color = "darkgray") +
geom_point() + guides(color = "none") + theme_bw() +
Expand All @@ -76,7 +74,7 @@ geomxnorm <- function(object, norm) {
theme(aspect.ratio = 1) +
labs(x = "Negative Probe GeoMean, Counts", y = "Q3 Value, Counts")

plt3 <- ggplot(Stat.data,
plt3 <- ggplot(stat.data,
aes(x = NegProbe, y = Q3 / NegProbe, color = Annotation)) +
geom_hline(yintercept = 1, lty = "dashed", color = "darkgray") +
geom_point() + theme_bw() +
Expand Down
Loading

0 comments on commit 6873033

Please sign in to comment.