Skip to content

Commit

Permalink
Begin transitioning to the new simplified world.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Nov 29, 2023
1 parent 7c24f10 commit ae611bc
Show file tree
Hide file tree
Showing 7 changed files with 224 additions and 155 deletions.
5 changes: 4 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

export(emptyRowRanges)
export(loadSummarizedExperiment)
export(readSummarizedExperiment)
exportMethods(saveObject)
exportMethods(stageObject)
import(alabaster.base)
import(methods)
Expand All @@ -11,7 +13,6 @@ importFrom(IRanges,PartitioningByEnd)
importFrom(S4Vectors,"mcols<-")
importFrom(S4Vectors,make_zero_col_DFrame)
importFrom(S4Vectors,mcols)
importFrom(SummarizedExperiment,"assays<-")
importFrom(SummarizedExperiment,SummarizedExperiment)
importFrom(SummarizedExperiment,assay)
importFrom(SummarizedExperiment,assayNames)
Expand All @@ -20,5 +21,7 @@ importFrom(SummarizedExperiment,rowData)
importFrom(SummarizedExperiment,rowRanges)
importFrom(alabaster.base,.stageObject)
importFrom(alabaster.base,.writeMetadata)
importFrom(jsonlite,fromJSON)
importFrom(jsonlite,toJSON)
importMethodsFrom(alabaster.matrix,stageObject)
importMethodsFrom(alabaster.ranges,stageObject)
64 changes: 51 additions & 13 deletions R/loadSummarizedExperiment.R → R/readSummarizedExperiment.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
#' Load a SummarizedExperiment
#' Read a SummarizedExperiment from disk
#'
#' Default loading of \linkS4class{SummarizedExperiment}s based on the metadata stored by the corresponding \code{\link{stageObject}} method.
#' Read a \linkS4class{SummarizedExperiment} from its on-disk representation.
#'
#' @param exp.info Named list containing the metadata for this experiment.
#' @param project Any argument accepted by the acquisition functions, see \code{?\link{acquireFile}}.
#' By default, this should be a string containing the path to a staging directory.
#' @param path String containing a path to a directory, itself created using the \code{\link{stageObject}} method for \linkS4class{SummarizedExperiment} objects.
#' @param ... Further arguments passed to internal \code{\link{altReadObject}} calls.
#'
#' @return A \linkS4class{SummarizedExperiment} or \linkS4class{RangedSummarizedExperiment} object.
#' @return A \linkS4class{SummarizedExperiment} object.
#'
#' @author Aaron Lun
#' @seealso
#' \code{"\link{saveObject,SummarizedExperiment-method}"}, to save the SummarizedExperiment to disk.
#'
#' @examples
#' # Mocking up an experiment:
Expand All @@ -21,18 +22,55 @@
#' rowData(se)$blah <- runif(1000)
#' metadata(se)$whee <- "YAY"
#'
#' # Staging it:
#' tmp <- tempfile()
#' dir.create(tmp)
#' info <- stageObject(se, dir=tmp, "rna-seq")
#'
#' # And loading it back in:
#' loadSummarizedExperiment(info, tmp)
#' saveObject(se, tmp)
#' readSummarizedExperiment(tmp)
#'
#' @export
#' @importFrom SummarizedExperiment SummarizedExperiment assays<-
#' @aliases loadSummarizedExperiment
#'
#' @importFrom SummarizedExperiment SummarizedExperiment
#' @importFrom jsonlite fromJSON
#' @importFrom S4Vectors make_zero_col_DFrame
#' @import alabaster.base
readSummarizedExperiment <- function(path, ...) {
info <- fromJSON(file.path(path, "summarized_experiment.json"))
ass.names <- fromJSON(file.path(path, "assays", "names.json"))

all.assays <- list()
for (y in seq_along(ass.names)) {
all.assays[[ass.names[y]]] <- altReadObject(file.path(path, "assays", y - 1L), ...)
}

cd.path <- file.path(path, "column_data")
if (file.exists(cd.path)) {
cd <- altReadObject(cd.path, ...)
} else {
cd <- make_zero_col_DFrame(info$dimensions[2])
}

rd.path <- file.path(path, "row_data")
if (file.exists(rd.path)) {
rd <- altReadObject(rd.path, ...)
} else {
rd <- make_zero_col_DFrame(info$dimensions[1])
}

se <- SummarizedExperiment(all.assays, colData=cd, rowData=rd, checkDimnames=FALSE)

# Need to force the dimnames to match the DFs, because if they're NULL,
# the dimnames from the assays end up being used instead.
rownames(se) <- rownames(rd)
colnames(se) <- rownames(cd)

readMetadata(se, mcols.path=NULL, metadata.path = file.path(path, "other_data"))
}

##################################
######### OLD STUFF HERE #########
##################################

#' @export
loadSummarizedExperiment <- function(exp.info, project) {
all.assays <- list()
for (y in seq_along(exp.info$summarized_experiment$assays)) {
Expand Down
97 changes: 72 additions & 25 deletions R/stageSummarizedExperiment.R → R/saveSummarizedExperiment.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,16 @@
#' Stage a SummarizedExperiment
#' Save a SummarizedExperiment to disk
#'
#' Save a \linkS4class{SummarizedExperiment} to file inside the staging directory.
#' Save a \linkS4class{SummarizedExperiment} to its on-disk representation.
#'
#' @param x A \linkS4class{SummarizedExperiment} object or one of its subclasses.
#' @inheritParams alabaster.base::stageObject
#' @param meta.name String containing the name of the metadata file.
#' @param ... Further arguments to pass to the \linkS4class{SummarizedExperiment} method.
#' For the SummarizedExperiment itself, all further arguments are just ignored.
#' @param skip.ranges Logical scalar indicating whether to avoid saving the \code{\link{rowRanges}}.
#' @inheritParams alabaster.base::saveObject
#' @param summarizedexperiment.allow.dataframe.assay Logical scalar indicating whether to allow data frames as assays of \code{x}.
#' @param ... Further arguments to pass to internal \code{\link{altSaveObject}} calls.
#'
#' @return A named list of metadata that follows the \code{summarized_experiment} schema.
#' The contents of \code{x} are saved into a \code{path} subdirectory inside \code{dir}.
#' @return \code{x} is saved into \code{path} and \code{NULL} is invisibly returned.
#'
#' @details
#' \code{meta.name} is only needed to set up the output \code{path}, for consistency with the \code{\link{stageObject}} contract.
#' Callers should make sure to write the metadata to the same path by using \code{\link{.writeMetadata}} to create the JSON file.
#'
#' If \code{skip.ranges=TRUE}, the RangedSummarizedExperiment method just calls the SummarizedExperiment method, i.e., \code{\link{rowRanges}} are not saved.
#' This avoids the hassle of switching classes and the associated problems, e.g., \url{https://github.com/Bioconductor/SummarizedExperiment/issues/29}.
#' Note that any subsequent \code{\link{loadObject}} call on the staged assets will return a non-ranged SummarizedExperiment.
#'
#' If \code{x} is a RangedSummarizedExperiment with \dQuote{empty} \code{\link{rowRanges}} (i.e., a \linkS4class{GRangesList} with zero-length entries),
#' If \code{rangedsummarizedexperiment.skip.empty.ranges=TRUE} and \code{x} is a RangedSummarizedExperiment with \dQuote{empty} \code{\link{rowRanges}} (i.e., a \linkS4class{GRangesList} with zero-length entries),
#' \code{stageObject} will save it to file without any genomic range information.
#' This means that any subsequent \code{\link{loadObject}} on the staged assets will return a non-ranged SummarizedExperiment.
#'
Expand All @@ -30,11 +20,10 @@
#' Note that this only works for \linkS4class{DataFrame} objects - data.frame objects will not be saved correctly.
#'
#' @author Aaron Lun
#' @seealso
#' \code{\link{readSummarizedExperiment}}, to read the SummarizedExperiment back into the R session.
#'
#' @examples
#' tmp <- tempfile()
#' dir.create(tmp)
#'
#' mat <- matrix(rpois(10000, 10), ncol=10)
#' colnames(mat) <- letters[1:10]
#' rownames(mat) <- sprintf("GENE_%i", seq_len(nrow(mat)))
Expand All @@ -44,16 +33,75 @@
#' rowData(se)$blah <- runif(1000)
#' metadata(se)$whee <- "YAY"
#'
#' dir.create(tmp)
#' stageObject(se, dir=tmp, "rna-seq")
#' list.files(file.path(tmp, "rna-seq"))
#' tmp <- tempfile()
#' saveObject(se, tmp)
#' list.files(tmp, recursive=TRUE)
#'
#' @export
#' @rdname stageSummarizedExperiment
#' @aliases stageObject,SummarizedExperiment-method
#' @name saveSummarizedExperiment
#' @importFrom SummarizedExperiment colData rowData
#' @importFrom S4Vectors make_zero_col_DFrame
#' @importFrom jsonlite toJSON
#' @import alabaster.base
#' @import methods
setMethod("saveObject", "SummarizedExperiment", function(x, path, summarizedexperiment.allow.dataframe.assay=FALSE, ...) {
dir.create(path)
write(file=file.path(path, "OBJECT"), "summarized_experiment")
write(toJSON(list(dimensions=dim(x), version="1.0"), auto_unbox=TRUE), file=file.path(path, "summarized_experiment.json"))
args <- list(summarizedexperiment.allow.dataframe.assay=summarizedexperiment.allow.dataframe.assay, ...)

cd <- colData(x)
empty.cd <- make_zero_col_DFrame(nrow(cd))
if (!identical(cd, empty.cd)) { # respect row names, metadata, mcols...
tryCatch({
do.call(altSaveObject, c(list(cd, file.path(path, "column_data")), args))
}, error=function(e) {
stop("failed to stage 'colData(<", class(x)[1], ">)'\n - ", e$message)
})
}

rd <- rowData(x)
empty.rd <- make_zero_col_DFrame(nrow(rd))
if (!identical(rd, empty.rd)) { # respect row names, metadata, mcols...
tryCatch({
do.call(altSaveObject, c(list(rd, file.path(path, "row_data")), args))
}, error=function(e) {
stop("failed to stage 'rowData(<", class(x)[1], ">)'\n - ", e$message)
})
}

adir <- file.path(path, "assays")
dir.create(adir)
ass.names <- assayNames(x)
if (anyDuplicated(ass.names)) {
stop("assays should be uniquely named")
}
write(toJSON(ass.names), file=file.path(adir, "names.json"))

for (i in seq_along(ass.names)) {
aname <- as.character(i - 1L)
curmat <- assay(x, i, withDimnames=FALSE)

if (is.data.frame(curmat) || (is(curmat, "DataFrame") && !summarizedexperiment.allow.dataframe.assay)) {
stop("assays should not contain data frames, see ?'saveObject,SummarizedExperiment-method'")
}

tryCatch({
do.call(altSaveObject, c(list(curmat, file.path(adir, aname)), args))
}, error=function(e) {
stop("failed to stage 'assay(<", class(x)[1], ">, ", i, ")'\n - ", e$message)
})
}

saveMetadata(x, metadata.path=file.path(path, "other_data"), mcols.path=NULL)
})

##################################
######### OLD STUFF HERE #########
##################################

#' @export
setMethod("stageObject", "SummarizedExperiment", function(x, dir, path, child=FALSE, meta.name="experiment.json", ...) {
dir.create(file.path(dir, path), showWarnings=FALSE)

Expand Down Expand Up @@ -127,7 +175,6 @@ setMethod("stageObject", "SummarizedExperiment", function(x, dir, path, child=FA
}

#' @export
#' @rdname stageSummarizedExperiment
#' @importFrom SummarizedExperiment rowRanges
#' @importFrom alabaster.base .stageObject .writeMetadata
#' @importMethodsFrom alabaster.ranges stageObject
Expand Down
43 changes: 0 additions & 43 deletions man/loadSummarizedExperiment.Rd

This file was deleted.

42 changes: 42 additions & 0 deletions man/readSummarizedExperiment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

55 changes: 55 additions & 0 deletions man/saveSummarizedExperiment.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit ae611bc

Please sign in to comment.