From ae611bce3beb1cd10d88aa340de69cf021c03f71 Mon Sep 17 00:00:00 2001 From: LTLA Date: Tue, 28 Nov 2023 17:14:53 -0800 Subject: [PATCH] Begin transitioning to the new simplified world. --- NAMESPACE | 5 +- ...xperiment.R => readSummarizedExperiment.R} | 64 +++++++++--- ...xperiment.R => saveSummarizedExperiment.R} | 97 ++++++++++++++----- man/loadSummarizedExperiment.Rd | 43 -------- man/readSummarizedExperiment.Rd | 42 ++++++++ man/saveSummarizedExperiment.Rd | 55 +++++++++++ man/stageSummarizedExperiment.Rd | 73 -------------- 7 files changed, 224 insertions(+), 155 deletions(-) rename R/{loadSummarizedExperiment.R => readSummarizedExperiment.R} (53%) rename R/{stageSummarizedExperiment.R => saveSummarizedExperiment.R} (59%) delete mode 100644 man/loadSummarizedExperiment.Rd create mode 100644 man/readSummarizedExperiment.Rd create mode 100644 man/saveSummarizedExperiment.Rd delete mode 100644 man/stageSummarizedExperiment.Rd diff --git a/NAMESPACE b/NAMESPACE index 4224cbe..d693950 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,8 @@ export(emptyRowRanges) export(loadSummarizedExperiment) +export(readSummarizedExperiment) +exportMethods(saveObject) exportMethods(stageObject) import(alabaster.base) import(methods) @@ -11,7 +13,6 @@ importFrom(IRanges,PartitioningByEnd) importFrom(S4Vectors,"mcols<-") importFrom(S4Vectors,make_zero_col_DFrame) importFrom(S4Vectors,mcols) -importFrom(SummarizedExperiment,"assays<-") importFrom(SummarizedExperiment,SummarizedExperiment) importFrom(SummarizedExperiment,assay) importFrom(SummarizedExperiment,assayNames) @@ -20,5 +21,7 @@ importFrom(SummarizedExperiment,rowData) importFrom(SummarizedExperiment,rowRanges) importFrom(alabaster.base,.stageObject) importFrom(alabaster.base,.writeMetadata) +importFrom(jsonlite,fromJSON) +importFrom(jsonlite,toJSON) importMethodsFrom(alabaster.matrix,stageObject) importMethodsFrom(alabaster.ranges,stageObject) diff --git a/R/loadSummarizedExperiment.R b/R/readSummarizedExperiment.R similarity index 53% rename from R/loadSummarizedExperiment.R rename to R/readSummarizedExperiment.R index c2e3eba..71f4f91 100644 --- a/R/loadSummarizedExperiment.R +++ b/R/readSummarizedExperiment.R @@ -1,14 +1,15 @@ -#' Load a SummarizedExperiment +#' Read a SummarizedExperiment from disk #' -#' Default loading of \linkS4class{SummarizedExperiment}s based on the metadata stored by the corresponding \code{\link{stageObject}} method. +#' Read a \linkS4class{SummarizedExperiment} from its on-disk representation. #' -#' @param exp.info Named list containing the metadata for this experiment. -#' @param project Any argument accepted by the acquisition functions, see \code{?\link{acquireFile}}. -#' By default, this should be a string containing the path to a staging directory. +#' @param path String containing a path to a directory, itself created using the \code{\link{stageObject}} method for \linkS4class{SummarizedExperiment} objects. +#' @param ... Further arguments passed to internal \code{\link{altReadObject}} calls. #' -#' @return A \linkS4class{SummarizedExperiment} or \linkS4class{RangedSummarizedExperiment} object. +#' @return A \linkS4class{SummarizedExperiment} object. #' #' @author Aaron Lun +#' @seealso +#' \code{"\link{saveObject,SummarizedExperiment-method}"}, to save the SummarizedExperiment to disk. #' #' @examples #' # Mocking up an experiment: @@ -21,18 +22,55 @@ #' rowData(se)$blah <- runif(1000) #' metadata(se)$whee <- "YAY" #' -#' # Staging it: #' tmp <- tempfile() -#' dir.create(tmp) -#' info <- stageObject(se, dir=tmp, "rna-seq") -#' -#' # And loading it back in: -#' loadSummarizedExperiment(info, tmp) +#' saveObject(se, tmp) +#' readSummarizedExperiment(tmp) #' #' @export -#' @importFrom SummarizedExperiment SummarizedExperiment assays<- +#' @aliases loadSummarizedExperiment +#' +#' @importFrom SummarizedExperiment SummarizedExperiment +#' @importFrom jsonlite fromJSON #' @importFrom S4Vectors make_zero_col_DFrame #' @import alabaster.base +readSummarizedExperiment <- function(path, ...) { + info <- fromJSON(file.path(path, "summarized_experiment.json")) + ass.names <- fromJSON(file.path(path, "assays", "names.json")) + + all.assays <- list() + for (y in seq_along(ass.names)) { + all.assays[[ass.names[y]]] <- altReadObject(file.path(path, "assays", y - 1L), ...) + } + + cd.path <- file.path(path, "column_data") + if (file.exists(cd.path)) { + cd <- altReadObject(cd.path, ...) + } else { + cd <- make_zero_col_DFrame(info$dimensions[2]) + } + + rd.path <- file.path(path, "row_data") + if (file.exists(rd.path)) { + rd <- altReadObject(rd.path, ...) + } else { + rd <- make_zero_col_DFrame(info$dimensions[1]) + } + + se <- SummarizedExperiment(all.assays, colData=cd, rowData=rd, checkDimnames=FALSE) + + # Need to force the dimnames to match the DFs, because if they're NULL, + # the dimnames from the assays end up being used instead. + rownames(se) <- rownames(rd) + colnames(se) <- rownames(cd) + + readMetadata(se, mcols.path=NULL, metadata.path = file.path(path, "other_data")) +} + +################################## +######### OLD STUFF HERE ######### +################################## + +#' @export loadSummarizedExperiment <- function(exp.info, project) { all.assays <- list() for (y in seq_along(exp.info$summarized_experiment$assays)) { diff --git a/R/stageSummarizedExperiment.R b/R/saveSummarizedExperiment.R similarity index 59% rename from R/stageSummarizedExperiment.R rename to R/saveSummarizedExperiment.R index 35598ad..5722347 100644 --- a/R/stageSummarizedExperiment.R +++ b/R/saveSummarizedExperiment.R @@ -1,26 +1,16 @@ -#' Stage a SummarizedExperiment +#' Save a SummarizedExperiment to disk #' -#' Save a \linkS4class{SummarizedExperiment} to file inside the staging directory. +#' Save a \linkS4class{SummarizedExperiment} to its on-disk representation. #' #' @param x A \linkS4class{SummarizedExperiment} object or one of its subclasses. -#' @inheritParams alabaster.base::stageObject -#' @param meta.name String containing the name of the metadata file. -#' @param ... Further arguments to pass to the \linkS4class{SummarizedExperiment} method. -#' For the SummarizedExperiment itself, all further arguments are just ignored. -#' @param skip.ranges Logical scalar indicating whether to avoid saving the \code{\link{rowRanges}}. +#' @inheritParams alabaster.base::saveObject +#' @param summarizedexperiment.allow.dataframe.assay Logical scalar indicating whether to allow data frames as assays of \code{x}. +#' @param ... Further arguments to pass to internal \code{\link{altSaveObject}} calls. #' -#' @return A named list of metadata that follows the \code{summarized_experiment} schema. -#' The contents of \code{x} are saved into a \code{path} subdirectory inside \code{dir}. +#' @return \code{x} is saved into \code{path} and \code{NULL} is invisibly returned. #' #' @details -#' \code{meta.name} is only needed to set up the output \code{path}, for consistency with the \code{\link{stageObject}} contract. -#' Callers should make sure to write the metadata to the same path by using \code{\link{.writeMetadata}} to create the JSON file. -#' -#' If \code{skip.ranges=TRUE}, the RangedSummarizedExperiment method just calls the SummarizedExperiment method, i.e., \code{\link{rowRanges}} are not saved. -#' This avoids the hassle of switching classes and the associated problems, e.g., \url{https://github.com/Bioconductor/SummarizedExperiment/issues/29}. -#' Note that any subsequent \code{\link{loadObject}} call on the staged assets will return a non-ranged SummarizedExperiment. -#' -#' If \code{x} is a RangedSummarizedExperiment with \dQuote{empty} \code{\link{rowRanges}} (i.e., a \linkS4class{GRangesList} with zero-length entries), +#' If \code{rangedsummarizedexperiment.skip.empty.ranges=TRUE} and \code{x} is a RangedSummarizedExperiment with \dQuote{empty} \code{\link{rowRanges}} (i.e., a \linkS4class{GRangesList} with zero-length entries), #' \code{stageObject} will save it to file without any genomic range information. #' This means that any subsequent \code{\link{loadObject}} on the staged assets will return a non-ranged SummarizedExperiment. #' @@ -30,11 +20,10 @@ #' Note that this only works for \linkS4class{DataFrame} objects - data.frame objects will not be saved correctly. #' #' @author Aaron Lun +#' @seealso +#' \code{\link{readSummarizedExperiment}}, to read the SummarizedExperiment back into the R session. #' #' @examples -#' tmp <- tempfile() -#' dir.create(tmp) -#' #' mat <- matrix(rpois(10000, 10), ncol=10) #' colnames(mat) <- letters[1:10] #' rownames(mat) <- sprintf("GENE_%i", seq_len(nrow(mat))) @@ -44,16 +33,75 @@ #' rowData(se)$blah <- runif(1000) #' metadata(se)$whee <- "YAY" #' -#' dir.create(tmp) -#' stageObject(se, dir=tmp, "rna-seq") -#' list.files(file.path(tmp, "rna-seq")) +#' tmp <- tempfile() +#' saveObject(se, tmp) +#' list.files(tmp, recursive=TRUE) #' #' @export -#' @rdname stageSummarizedExperiment +#' @aliases stageObject,SummarizedExperiment-method +#' @name saveSummarizedExperiment #' @importFrom SummarizedExperiment colData rowData #' @importFrom S4Vectors make_zero_col_DFrame +#' @importFrom jsonlite toJSON #' @import alabaster.base #' @import methods +setMethod("saveObject", "SummarizedExperiment", function(x, path, summarizedexperiment.allow.dataframe.assay=FALSE, ...) { + dir.create(path) + write(file=file.path(path, "OBJECT"), "summarized_experiment") + write(toJSON(list(dimensions=dim(x), version="1.0"), auto_unbox=TRUE), file=file.path(path, "summarized_experiment.json")) + args <- list(summarizedexperiment.allow.dataframe.assay=summarizedexperiment.allow.dataframe.assay, ...) + + cd <- colData(x) + empty.cd <- make_zero_col_DFrame(nrow(cd)) + if (!identical(cd, empty.cd)) { # respect row names, metadata, mcols... + tryCatch({ + do.call(altSaveObject, c(list(cd, file.path(path, "column_data")), args)) + }, error=function(e) { + stop("failed to stage 'colData(<", class(x)[1], ">)'\n - ", e$message) + }) + } + + rd <- rowData(x) + empty.rd <- make_zero_col_DFrame(nrow(rd)) + if (!identical(rd, empty.rd)) { # respect row names, metadata, mcols... + tryCatch({ + do.call(altSaveObject, c(list(rd, file.path(path, "row_data")), args)) + }, error=function(e) { + stop("failed to stage 'rowData(<", class(x)[1], ">)'\n - ", e$message) + }) + } + + adir <- file.path(path, "assays") + dir.create(adir) + ass.names <- assayNames(x) + if (anyDuplicated(ass.names)) { + stop("assays should be uniquely named") + } + write(toJSON(ass.names), file=file.path(adir, "names.json")) + + for (i in seq_along(ass.names)) { + aname <- as.character(i - 1L) + curmat <- assay(x, i, withDimnames=FALSE) + + if (is.data.frame(curmat) || (is(curmat, "DataFrame") && !summarizedexperiment.allow.dataframe.assay)) { + stop("assays should not contain data frames, see ?'saveObject,SummarizedExperiment-method'") + } + + tryCatch({ + do.call(altSaveObject, c(list(curmat, file.path(adir, aname)), args)) + }, error=function(e) { + stop("failed to stage 'assay(<", class(x)[1], ">, ", i, ")'\n - ", e$message) + }) + } + + saveMetadata(x, metadata.path=file.path(path, "other_data"), mcols.path=NULL) +}) + +################################## +######### OLD STUFF HERE ######### +################################## + +#' @export setMethod("stageObject", "SummarizedExperiment", function(x, dir, path, child=FALSE, meta.name="experiment.json", ...) { dir.create(file.path(dir, path), showWarnings=FALSE) @@ -127,7 +175,6 @@ setMethod("stageObject", "SummarizedExperiment", function(x, dir, path, child=FA } #' @export -#' @rdname stageSummarizedExperiment #' @importFrom SummarizedExperiment rowRanges #' @importFrom alabaster.base .stageObject .writeMetadata #' @importMethodsFrom alabaster.ranges stageObject diff --git a/man/loadSummarizedExperiment.Rd b/man/loadSummarizedExperiment.Rd deleted file mode 100644 index 3666cd3..0000000 --- a/man/loadSummarizedExperiment.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/loadSummarizedExperiment.R -\name{loadSummarizedExperiment} -\alias{loadSummarizedExperiment} -\title{Load a SummarizedExperiment} -\usage{ -loadSummarizedExperiment(exp.info, project) -} -\arguments{ -\item{exp.info}{Named list containing the metadata for this experiment.} - -\item{project}{Any argument accepted by the acquisition functions, see \code{?\link{acquireFile}}. -By default, this should be a string containing the path to a staging directory.} -} -\value{ -A \linkS4class{SummarizedExperiment} or \linkS4class{RangedSummarizedExperiment} object. -} -\description{ -Default loading of \linkS4class{SummarizedExperiment}s based on the metadata stored by the corresponding \code{\link{stageObject}} method. -} -\examples{ -# Mocking up an experiment: -mat <- matrix(rpois(10000, 10), ncol=10) -colnames(mat) <- letters[1:10] -rownames(mat) <- sprintf("GENE_\%i", seq_len(nrow(mat))) - -se <- SummarizedExperiment(list(counts=mat)) -se$stuff <- LETTERS[1:10] -rowData(se)$blah <- runif(1000) -metadata(se)$whee <- "YAY" - -# Staging it: -tmp <- tempfile() -dir.create(tmp) -info <- stageObject(se, dir=tmp, "rna-seq") - -# And loading it back in: -loadSummarizedExperiment(info, tmp) - -} -\author{ -Aaron Lun -} diff --git a/man/readSummarizedExperiment.Rd b/man/readSummarizedExperiment.Rd new file mode 100644 index 0000000..1292497 --- /dev/null +++ b/man/readSummarizedExperiment.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readSummarizedExperiment.R +\name{readSummarizedExperiment} +\alias{readSummarizedExperiment} +\alias{loadSummarizedExperiment} +\title{Read a SummarizedExperiment from disk} +\usage{ +readSummarizedExperiment(path, ...) +} +\arguments{ +\item{path}{String containing a path to a directory, itself created using the \code{\link{stageObject}} method for \linkS4class{SummarizedExperiment} objects.} + +\item{...}{Further arguments passed to internal \code{\link{altReadObject}} calls.} +} +\value{ +A \linkS4class{SummarizedExperiment} object. +} +\description{ +Read a \linkS4class{SummarizedExperiment} from its on-disk representation. +} +\examples{ +# Mocking up an experiment: +mat <- matrix(rpois(10000, 10), ncol=10) +colnames(mat) <- letters[1:10] +rownames(mat) <- sprintf("GENE_\%i", seq_len(nrow(mat))) + +se <- SummarizedExperiment(list(counts=mat)) +se$stuff <- LETTERS[1:10] +rowData(se)$blah <- runif(1000) +metadata(se)$whee <- "YAY" + +tmp <- tempfile() +saveObject(se, tmp) +readSummarizedExperiment(tmp) + +} +\seealso{ +\code{"\link{saveObject,SummarizedExperiment-method}"}, to save the SummarizedExperiment to disk. +} +\author{ +Aaron Lun +} diff --git a/man/saveSummarizedExperiment.Rd b/man/saveSummarizedExperiment.Rd new file mode 100644 index 0000000..18550d4 --- /dev/null +++ b/man/saveSummarizedExperiment.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/saveSummarizedExperiment.R +\name{saveSummarizedExperiment} +\alias{saveSummarizedExperiment} +\alias{stageObject,SummarizedExperiment-method} +\title{Save a SummarizedExperiment to disk} +\usage{ +\S4method{saveObject}{SummarizedExperiment}(x, path, summarizedexperiment.allow.dataframe.assay = FALSE, ...) +} +\arguments{ +\item{x}{A \linkS4class{SummarizedExperiment} object or one of its subclasses.} + +\item{path}{String containing the path to a directory in which to save \code{x}.} + +\item{summarizedexperiment.allow.dataframe.assay}{Logical scalar indicating whether to allow data frames as assays of \code{x}.} + +\item{...}{Further arguments to pass to internal \code{\link{altSaveObject}} calls.} +} +\value{ +\code{x} is saved into \code{path} and \code{NULL} is invisibly returned. +} +\description{ +Save a \linkS4class{SummarizedExperiment} to its on-disk representation. +} +\details{ +If \code{rangedsummarizedexperiment.skip.empty.ranges=TRUE} and \code{x} is a RangedSummarizedExperiment with \dQuote{empty} \code{\link{rowRanges}} (i.e., a \linkS4class{GRangesList} with zero-length entries), +\code{stageObject} will save it to file without any genomic range information. +This means that any subsequent \code{\link{loadObject}} on the staged assets will return a non-ranged SummarizedExperiment. + +By default, we consider the presence of data frames in the assays to be an error. +Users should coerce these into an appropriate matrix type, e.g., a dense matrix or a sparse dgCMatrix. +If a DataFrame as an assay is truly desired, users may set \code{\link{options}(alabaster.se.reject_data.frames=FALSE)} to skip the error. +Note that this only works for \linkS4class{DataFrame} objects - data.frame objects will not be saved correctly. +} +\examples{ +mat <- matrix(rpois(10000, 10), ncol=10) +colnames(mat) <- letters[1:10] +rownames(mat) <- sprintf("GENE_\%i", seq_len(nrow(mat))) + +se <- SummarizedExperiment(list(counts=mat)) +se$stuff <- LETTERS[1:10] +rowData(se)$blah <- runif(1000) +metadata(se)$whee <- "YAY" + +tmp <- tempfile() +saveObject(se, tmp) +list.files(tmp, recursive=TRUE) + +} +\seealso{ +\code{\link{readSummarizedExperiment}}, to read the SummarizedExperiment back into the R session. +} +\author{ +Aaron Lun +} diff --git a/man/stageSummarizedExperiment.Rd b/man/stageSummarizedExperiment.Rd deleted file mode 100644 index 3629dea..0000000 --- a/man/stageSummarizedExperiment.Rd +++ /dev/null @@ -1,73 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/stageSummarizedExperiment.R -\name{stageObject,SummarizedExperiment-method} -\alias{stageObject,SummarizedExperiment-method} -\alias{stageObject,RangedSummarizedExperiment-method} -\title{Stage a SummarizedExperiment} -\usage{ -\S4method{stageObject}{SummarizedExperiment}(x, dir, path, child = FALSE, meta.name = "experiment.json", ...) - -\S4method{stageObject}{RangedSummarizedExperiment}(x, dir, path, child = FALSE, ..., skip.ranges = FALSE) -} -\arguments{ -\item{x}{A \linkS4class{SummarizedExperiment} object or one of its subclasses.} - -\item{dir}{String containing the path to the staging directory.} - -\item{path}{String containing a prefix of the relative path inside \code{dir} where \code{x} is to be saved. -The actual path used to save \code{x} may include additional components, see Details.} - -\item{child}{Logical scalar indicating whether \code{x} is a child of a larger object.} - -\item{meta.name}{String containing the name of the metadata file.} - -\item{...}{Further arguments to pass to the \linkS4class{SummarizedExperiment} method. -For the SummarizedExperiment itself, all further arguments are just ignored.} - -\item{skip.ranges}{Logical scalar indicating whether to avoid saving the \code{\link{rowRanges}}.} -} -\value{ -A named list of metadata that follows the \code{summarized_experiment} schema. -The contents of \code{x} are saved into a \code{path} subdirectory inside \code{dir}. -} -\description{ -Save a \linkS4class{SummarizedExperiment} to file inside the staging directory. -} -\details{ -\code{meta.name} is only needed to set up the output \code{path}, for consistency with the \code{\link{stageObject}} contract. -Callers should make sure to write the metadata to the same path by using \code{\link{.writeMetadata}} to create the JSON file. - -If \code{skip.ranges=TRUE}, the RangedSummarizedExperiment method just calls the SummarizedExperiment method, i.e., \code{\link{rowRanges}} are not saved. -This avoids the hassle of switching classes and the associated problems, e.g., \url{https://github.com/Bioconductor/SummarizedExperiment/issues/29}. -Note that any subsequent \code{\link{loadObject}} call on the staged assets will return a non-ranged SummarizedExperiment. - -If \code{x} is a RangedSummarizedExperiment with \dQuote{empty} \code{\link{rowRanges}} (i.e., a \linkS4class{GRangesList} with zero-length entries), -\code{stageObject} will save it to file without any genomic range information. -This means that any subsequent \code{\link{loadObject}} on the staged assets will return a non-ranged SummarizedExperiment. - -By default, we consider the presence of data frames in the assays to be an error. -Users should coerce these into an appropriate matrix type, e.g., a dense matrix or a sparse dgCMatrix. -If a DataFrame as an assay is truly desired, users may set \code{\link{options}(alabaster.se.reject_data.frames=FALSE)} to skip the error. -Note that this only works for \linkS4class{DataFrame} objects - data.frame objects will not be saved correctly. -} -\examples{ -tmp <- tempfile() -dir.create(tmp) - -mat <- matrix(rpois(10000, 10), ncol=10) -colnames(mat) <- letters[1:10] -rownames(mat) <- sprintf("GENE_\%i", seq_len(nrow(mat))) - -se <- SummarizedExperiment(list(counts=mat)) -se$stuff <- LETTERS[1:10] -rowData(se)$blah <- runif(1000) -metadata(se)$whee <- "YAY" - -dir.create(tmp) -stageObject(se, dir=tmp, "rna-seq") -list.files(file.path(tmp, "rna-seq")) - -} -\author{ -Aaron Lun -}