From ae611bce3beb1cd10d88aa340de69cf021c03f71 Mon Sep 17 00:00:00 2001
From: LTLA <infinite.monkeys.with.keyboards@gmail.com>
Date: Tue, 28 Nov 2023 17:14:53 -0800
Subject: [PATCH] Begin transitioning to the new simplified world.

---
 NAMESPACE                                     |  5 +-
 ...xperiment.R => readSummarizedExperiment.R} | 64 +++++++++---
 ...xperiment.R => saveSummarizedExperiment.R} | 97 ++++++++++++++-----
 man/loadSummarizedExperiment.Rd               | 43 --------
 man/readSummarizedExperiment.Rd               | 42 ++++++++
 man/saveSummarizedExperiment.Rd               | 55 +++++++++++
 man/stageSummarizedExperiment.Rd              | 73 --------------
 7 files changed, 224 insertions(+), 155 deletions(-)
 rename R/{loadSummarizedExperiment.R => readSummarizedExperiment.R} (53%)
 rename R/{stageSummarizedExperiment.R => saveSummarizedExperiment.R} (59%)
 delete mode 100644 man/loadSummarizedExperiment.Rd
 create mode 100644 man/readSummarizedExperiment.Rd
 create mode 100644 man/saveSummarizedExperiment.Rd
 delete mode 100644 man/stageSummarizedExperiment.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 4224cbe..d693950 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,8 @@
 
 export(emptyRowRanges)
 export(loadSummarizedExperiment)
+export(readSummarizedExperiment)
+exportMethods(saveObject)
 exportMethods(stageObject)
 import(alabaster.base)
 import(methods)
@@ -11,7 +13,6 @@ importFrom(IRanges,PartitioningByEnd)
 importFrom(S4Vectors,"mcols<-")
 importFrom(S4Vectors,make_zero_col_DFrame)
 importFrom(S4Vectors,mcols)
-importFrom(SummarizedExperiment,"assays<-")
 importFrom(SummarizedExperiment,SummarizedExperiment)
 importFrom(SummarizedExperiment,assay)
 importFrom(SummarizedExperiment,assayNames)
@@ -20,5 +21,7 @@ importFrom(SummarizedExperiment,rowData)
 importFrom(SummarizedExperiment,rowRanges)
 importFrom(alabaster.base,.stageObject)
 importFrom(alabaster.base,.writeMetadata)
+importFrom(jsonlite,fromJSON)
+importFrom(jsonlite,toJSON)
 importMethodsFrom(alabaster.matrix,stageObject)
 importMethodsFrom(alabaster.ranges,stageObject)
diff --git a/R/loadSummarizedExperiment.R b/R/readSummarizedExperiment.R
similarity index 53%
rename from R/loadSummarizedExperiment.R
rename to R/readSummarizedExperiment.R
index c2e3eba..71f4f91 100644
--- a/R/loadSummarizedExperiment.R
+++ b/R/readSummarizedExperiment.R
@@ -1,14 +1,15 @@
-#' Load a SummarizedExperiment
+#' Read a SummarizedExperiment from disk
 #'
-#' Default loading of \linkS4class{SummarizedExperiment}s based on the metadata stored by the corresponding \code{\link{stageObject}} method.
+#' Read a \linkS4class{SummarizedExperiment} from its on-disk representation.
 #'
-#' @param exp.info Named list containing the metadata for this experiment.
-#' @param project Any argument accepted by the acquisition functions, see \code{?\link{acquireFile}}. 
-#' By default, this should be a string containing the path to a staging directory.
+#' @param path String containing a path to a directory, itself created using the \code{\link{stageObject}} method for \linkS4class{SummarizedExperiment} objects.
+#' @param ... Further arguments passed to internal \code{\link{altReadObject}} calls.
 #' 
-#' @return A \linkS4class{SummarizedExperiment} or \linkS4class{RangedSummarizedExperiment} object.
+#' @return A \linkS4class{SummarizedExperiment} object.
 #'
 #' @author Aaron Lun
+#' @seealso
+#' \code{"\link{saveObject,SummarizedExperiment-method}"}, to save the SummarizedExperiment to disk.
 #'
 #' @examples
 #' # Mocking up an experiment:
@@ -21,18 +22,55 @@
 #' rowData(se)$blah <- runif(1000)
 #' metadata(se)$whee <- "YAY"
 #' 
-#' # Staging it:
 #' tmp <- tempfile()
-#' dir.create(tmp)
-#' info <- stageObject(se, dir=tmp, "rna-seq") 
-#'
-#' # And loading it back in:
-#' loadSummarizedExperiment(info, tmp)
+#' saveObject(se, tmp)
+#' readSummarizedExperiment(tmp)
 #'
 #' @export
-#' @importFrom SummarizedExperiment SummarizedExperiment assays<-
+#' @aliases loadSummarizedExperiment
+#'
+#' @importFrom SummarizedExperiment SummarizedExperiment
+#' @importFrom jsonlite fromJSON
 #' @importFrom S4Vectors make_zero_col_DFrame
 #' @import alabaster.base
+readSummarizedExperiment <- function(path, ...) {
+    info <- fromJSON(file.path(path, "summarized_experiment.json"))
+    ass.names <- fromJSON(file.path(path, "assays", "names.json"))
+
+    all.assays <- list()
+    for (y in seq_along(ass.names)) {
+        all.assays[[ass.names[y]]] <- altReadObject(file.path(path, "assays", y - 1L), ...)
+    }
+
+    cd.path <- file.path(path, "column_data")
+    if (file.exists(cd.path)) {
+        cd <- altReadObject(cd.path, ...)
+    } else {
+        cd <- make_zero_col_DFrame(info$dimensions[2])
+    }
+
+    rd.path <- file.path(path, "row_data")
+    if (file.exists(rd.path)) {
+        rd <- altReadObject(rd.path, ...)
+    } else {
+        rd <- make_zero_col_DFrame(info$dimensions[1])
+    }
+
+    se <- SummarizedExperiment(all.assays, colData=cd, rowData=rd, checkDimnames=FALSE)
+
+    # Need to force the dimnames to match the DFs, because if they're NULL,
+    # the dimnames from the assays end up being used instead.
+    rownames(se) <- rownames(rd)
+    colnames(se) <- rownames(cd)
+
+    readMetadata(se, mcols.path=NULL, metadata.path = file.path(path, "other_data"))
+}
+
+##################################
+######### OLD STUFF HERE #########
+##################################
+
+#' @export
 loadSummarizedExperiment <- function(exp.info, project) {
     all.assays <- list()
     for (y in seq_along(exp.info$summarized_experiment$assays)) {
diff --git a/R/stageSummarizedExperiment.R b/R/saveSummarizedExperiment.R
similarity index 59%
rename from R/stageSummarizedExperiment.R
rename to R/saveSummarizedExperiment.R
index 35598ad..5722347 100644
--- a/R/stageSummarizedExperiment.R
+++ b/R/saveSummarizedExperiment.R
@@ -1,26 +1,16 @@
-#' Stage a SummarizedExperiment
+#' Save a SummarizedExperiment to disk
 #'
-#' Save a \linkS4class{SummarizedExperiment} to file inside the staging directory.
+#' Save a \linkS4class{SummarizedExperiment} to its on-disk representation.
 #' 
 #' @param x A \linkS4class{SummarizedExperiment} object or one of its subclasses.
-#' @inheritParams alabaster.base::stageObject
-#' @param meta.name String containing the name of the metadata file.
-#' @param ... Further arguments to pass to the \linkS4class{SummarizedExperiment} method.
-#' For the SummarizedExperiment itself, all further arguments are just ignored.
-#' @param skip.ranges Logical scalar indicating whether to avoid saving the \code{\link{rowRanges}}.
+#' @inheritParams alabaster.base::saveObject
+#' @param summarizedexperiment.allow.dataframe.assay Logical scalar indicating whether to allow data frames as assays of \code{x}.
+#' @param ... Further arguments to pass to internal \code{\link{altSaveObject}} calls.
 #'
-#' @return A named list of metadata that follows the \code{summarized_experiment} schema.
-#' The contents of \code{x} are saved into a \code{path} subdirectory inside \code{dir}.
+#' @return \code{x} is saved into \code{path} and \code{NULL} is invisibly returned.
 #'
 #' @details
-#' \code{meta.name} is only needed to set up the output \code{path}, for consistency with the \code{\link{stageObject}} contract.
-#' Callers should make sure to write the metadata to the same path by using \code{\link{.writeMetadata}} to create the JSON file.
-#'
-#' If \code{skip.ranges=TRUE}, the RangedSummarizedExperiment method just calls the SummarizedExperiment method, i.e., \code{\link{rowRanges}} are not saved.
-#' This avoids the hassle of switching classes and the associated problems, e.g., \url{https://github.com/Bioconductor/SummarizedExperiment/issues/29}.
-#' Note that any subsequent \code{\link{loadObject}} call on the staged assets will return a non-ranged SummarizedExperiment.
-#'
-#' If \code{x} is a RangedSummarizedExperiment with \dQuote{empty} \code{\link{rowRanges}} (i.e., a \linkS4class{GRangesList} with zero-length entries),
+#' If \code{rangedsummarizedexperiment.skip.empty.ranges=TRUE} and \code{x} is a RangedSummarizedExperiment with \dQuote{empty} \code{\link{rowRanges}} (i.e., a \linkS4class{GRangesList} with zero-length entries),
 #' \code{stageObject} will save it to file without any genomic range information.
 #' This means that any subsequent \code{\link{loadObject}} on the staged assets will return a non-ranged SummarizedExperiment.
 #'
@@ -30,11 +20,10 @@
 #' Note that this only works for \linkS4class{DataFrame} objects - data.frame objects will not be saved correctly.
 #'
 #' @author Aaron Lun
+#' @seealso
+#' \code{\link{readSummarizedExperiment}}, to read the SummarizedExperiment back into the R session.
 #' 
 #' @examples
-#' tmp <- tempfile()
-#' dir.create(tmp)
-#'
 #' mat <- matrix(rpois(10000, 10), ncol=10)
 #' colnames(mat) <- letters[1:10]
 #' rownames(mat) <- sprintf("GENE_%i", seq_len(nrow(mat)))
@@ -44,16 +33,75 @@
 #' rowData(se)$blah <- runif(1000)
 #' metadata(se)$whee <- "YAY"
 #' 
-#' dir.create(tmp)
-#' stageObject(se, dir=tmp, "rna-seq") 
-#' list.files(file.path(tmp, "rna-seq"))
+#' tmp <- tempfile()
+#' saveObject(se, tmp)
+#' list.files(tmp, recursive=TRUE)
 #' 
 #' @export
-#' @rdname stageSummarizedExperiment
+#' @aliases stageObject,SummarizedExperiment-method
+#' @name saveSummarizedExperiment
 #' @importFrom SummarizedExperiment colData rowData
 #' @importFrom S4Vectors make_zero_col_DFrame
+#' @importFrom jsonlite toJSON
 #' @import alabaster.base
 #' @import methods
+setMethod("saveObject", "SummarizedExperiment", function(x, path, summarizedexperiment.allow.dataframe.assay=FALSE, ...) {
+    dir.create(path)
+    write(file=file.path(path, "OBJECT"), "summarized_experiment")
+    write(toJSON(list(dimensions=dim(x), version="1.0"), auto_unbox=TRUE), file=file.path(path, "summarized_experiment.json"))
+    args <- list(summarizedexperiment.allow.dataframe.assay=summarizedexperiment.allow.dataframe.assay, ...)
+
+    cd <- colData(x)
+    empty.cd <- make_zero_col_DFrame(nrow(cd))
+    if (!identical(cd, empty.cd)) { # respect row names, metadata, mcols...
+        tryCatch({
+            do.call(altSaveObject, c(list(cd, file.path(path, "column_data")), args))
+        }, error=function(e) {
+            stop("failed to stage 'colData(<", class(x)[1], ">)'\n  - ", e$message)
+        })
+    }
+
+    rd <- rowData(x)
+    empty.rd <- make_zero_col_DFrame(nrow(rd))
+    if (!identical(rd, empty.rd)) { # respect row names, metadata, mcols...
+        tryCatch({
+            do.call(altSaveObject, c(list(rd, file.path(path, "row_data")), args))
+        }, error=function(e) {
+            stop("failed to stage 'rowData(<", class(x)[1], ">)'\n  - ", e$message)
+        })
+    }
+
+    adir <- file.path(path, "assays")
+    dir.create(adir)
+    ass.names <- assayNames(x)
+    if (anyDuplicated(ass.names)) {
+        stop("assays should be uniquely named")
+    }
+    write(toJSON(ass.names), file=file.path(adir, "names.json"))
+
+    for (i in seq_along(ass.names)) {
+        aname <- as.character(i - 1L)
+        curmat <- assay(x, i, withDimnames=FALSE)
+
+        if (is.data.frame(curmat) || (is(curmat, "DataFrame") && !summarizedexperiment.allow.dataframe.assay)) {
+            stop("assays should not contain data frames, see ?'saveObject,SummarizedExperiment-method'")
+        }
+
+        tryCatch({
+            do.call(altSaveObject, c(list(curmat, file.path(adir, aname)), args))
+        }, error=function(e) {
+            stop("failed to stage 'assay(<", class(x)[1], ">, ", i, ")'\n  - ", e$message)
+        })
+    }
+
+    saveMetadata(x, metadata.path=file.path(path, "other_data"), mcols.path=NULL)
+})
+
+##################################
+######### OLD STUFF HERE #########
+##################################
+
+#' @export
 setMethod("stageObject", "SummarizedExperiment", function(x, dir, path, child=FALSE, meta.name="experiment.json", ...) {
     dir.create(file.path(dir, path), showWarnings=FALSE)
 
@@ -127,7 +175,6 @@ setMethod("stageObject", "SummarizedExperiment", function(x, dir, path, child=FA
 }
 
 #' @export
-#' @rdname stageSummarizedExperiment
 #' @importFrom SummarizedExperiment rowRanges
 #' @importFrom alabaster.base .stageObject .writeMetadata
 #' @importMethodsFrom alabaster.ranges stageObject
diff --git a/man/loadSummarizedExperiment.Rd b/man/loadSummarizedExperiment.Rd
deleted file mode 100644
index 3666cd3..0000000
--- a/man/loadSummarizedExperiment.Rd
+++ /dev/null
@@ -1,43 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/loadSummarizedExperiment.R
-\name{loadSummarizedExperiment}
-\alias{loadSummarizedExperiment}
-\title{Load a SummarizedExperiment}
-\usage{
-loadSummarizedExperiment(exp.info, project)
-}
-\arguments{
-\item{exp.info}{Named list containing the metadata for this experiment.}
-
-\item{project}{Any argument accepted by the acquisition functions, see \code{?\link{acquireFile}}. 
-By default, this should be a string containing the path to a staging directory.}
-}
-\value{
-A \linkS4class{SummarizedExperiment} or \linkS4class{RangedSummarizedExperiment} object.
-}
-\description{
-Default loading of \linkS4class{SummarizedExperiment}s based on the metadata stored by the corresponding \code{\link{stageObject}} method.
-}
-\examples{
-# Mocking up an experiment:
-mat <- matrix(rpois(10000, 10), ncol=10)
-colnames(mat) <- letters[1:10]
-rownames(mat) <- sprintf("GENE_\%i", seq_len(nrow(mat)))
-
-se <- SummarizedExperiment(list(counts=mat))
-se$stuff <- LETTERS[1:10]
-rowData(se)$blah <- runif(1000)
-metadata(se)$whee <- "YAY"
-
-# Staging it:
-tmp <- tempfile()
-dir.create(tmp)
-info <- stageObject(se, dir=tmp, "rna-seq") 
-
-# And loading it back in:
-loadSummarizedExperiment(info, tmp)
-
-}
-\author{
-Aaron Lun
-}
diff --git a/man/readSummarizedExperiment.Rd b/man/readSummarizedExperiment.Rd
new file mode 100644
index 0000000..1292497
--- /dev/null
+++ b/man/readSummarizedExperiment.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/readSummarizedExperiment.R
+\name{readSummarizedExperiment}
+\alias{readSummarizedExperiment}
+\alias{loadSummarizedExperiment}
+\title{Read a SummarizedExperiment from disk}
+\usage{
+readSummarizedExperiment(path, ...)
+}
+\arguments{
+\item{path}{String containing a path to a directory, itself created using the \code{\link{stageObject}} method for \linkS4class{SummarizedExperiment} objects.}
+
+\item{...}{Further arguments passed to internal \code{\link{altReadObject}} calls.}
+}
+\value{
+A \linkS4class{SummarizedExperiment} object.
+}
+\description{
+Read a \linkS4class{SummarizedExperiment} from its on-disk representation.
+}
+\examples{
+# Mocking up an experiment:
+mat <- matrix(rpois(10000, 10), ncol=10)
+colnames(mat) <- letters[1:10]
+rownames(mat) <- sprintf("GENE_\%i", seq_len(nrow(mat)))
+
+se <- SummarizedExperiment(list(counts=mat))
+se$stuff <- LETTERS[1:10]
+rowData(se)$blah <- runif(1000)
+metadata(se)$whee <- "YAY"
+
+tmp <- tempfile()
+saveObject(se, tmp)
+readSummarizedExperiment(tmp)
+
+}
+\seealso{
+\code{"\link{saveObject,SummarizedExperiment-method}"}, to save the SummarizedExperiment to disk.
+}
+\author{
+Aaron Lun
+}
diff --git a/man/saveSummarizedExperiment.Rd b/man/saveSummarizedExperiment.Rd
new file mode 100644
index 0000000..18550d4
--- /dev/null
+++ b/man/saveSummarizedExperiment.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/saveSummarizedExperiment.R
+\name{saveSummarizedExperiment}
+\alias{saveSummarizedExperiment}
+\alias{stageObject,SummarizedExperiment-method}
+\title{Save a SummarizedExperiment to disk}
+\usage{
+\S4method{saveObject}{SummarizedExperiment}(x, path, summarizedexperiment.allow.dataframe.assay = FALSE, ...)
+}
+\arguments{
+\item{x}{A \linkS4class{SummarizedExperiment} object or one of its subclasses.}
+
+\item{path}{String containing the path to a directory in which to save \code{x}.}
+
+\item{summarizedexperiment.allow.dataframe.assay}{Logical scalar indicating whether to allow data frames as assays of \code{x}.}
+
+\item{...}{Further arguments to pass to internal \code{\link{altSaveObject}} calls.}
+}
+\value{
+\code{x} is saved into \code{path} and \code{NULL} is invisibly returned.
+}
+\description{
+Save a \linkS4class{SummarizedExperiment} to its on-disk representation.
+}
+\details{
+If \code{rangedsummarizedexperiment.skip.empty.ranges=TRUE} and \code{x} is a RangedSummarizedExperiment with \dQuote{empty} \code{\link{rowRanges}} (i.e., a \linkS4class{GRangesList} with zero-length entries),
+\code{stageObject} will save it to file without any genomic range information.
+This means that any subsequent \code{\link{loadObject}} on the staged assets will return a non-ranged SummarizedExperiment.
+
+By default, we consider the presence of data frames in the assays to be an error.
+Users should coerce these into an appropriate matrix type, e.g., a dense matrix or a sparse dgCMatrix.
+If a DataFrame as an assay is truly desired, users may set \code{\link{options}(alabaster.se.reject_data.frames=FALSE)} to skip the error.
+Note that this only works for \linkS4class{DataFrame} objects - data.frame objects will not be saved correctly.
+}
+\examples{
+mat <- matrix(rpois(10000, 10), ncol=10)
+colnames(mat) <- letters[1:10]
+rownames(mat) <- sprintf("GENE_\%i", seq_len(nrow(mat)))
+
+se <- SummarizedExperiment(list(counts=mat))
+se$stuff <- LETTERS[1:10]
+rowData(se)$blah <- runif(1000)
+metadata(se)$whee <- "YAY"
+
+tmp <- tempfile()
+saveObject(se, tmp)
+list.files(tmp, recursive=TRUE)
+
+}
+\seealso{
+\code{\link{readSummarizedExperiment}}, to read the SummarizedExperiment back into the R session.
+}
+\author{
+Aaron Lun
+}
diff --git a/man/stageSummarizedExperiment.Rd b/man/stageSummarizedExperiment.Rd
deleted file mode 100644
index 3629dea..0000000
--- a/man/stageSummarizedExperiment.Rd
+++ /dev/null
@@ -1,73 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/stageSummarizedExperiment.R
-\name{stageObject,SummarizedExperiment-method}
-\alias{stageObject,SummarizedExperiment-method}
-\alias{stageObject,RangedSummarizedExperiment-method}
-\title{Stage a SummarizedExperiment}
-\usage{
-\S4method{stageObject}{SummarizedExperiment}(x, dir, path, child = FALSE, meta.name = "experiment.json", ...)
-
-\S4method{stageObject}{RangedSummarizedExperiment}(x, dir, path, child = FALSE, ..., skip.ranges = FALSE)
-}
-\arguments{
-\item{x}{A \linkS4class{SummarizedExperiment} object or one of its subclasses.}
-
-\item{dir}{String containing the path to the staging directory.}
-
-\item{path}{String containing a prefix of the relative path inside \code{dir} where \code{x} is to be saved.
-The actual path used to save \code{x} may include additional components, see Details.}
-
-\item{child}{Logical scalar indicating whether \code{x} is a child of a larger object.}
-
-\item{meta.name}{String containing the name of the metadata file.}
-
-\item{...}{Further arguments to pass to the \linkS4class{SummarizedExperiment} method.
-For the SummarizedExperiment itself, all further arguments are just ignored.}
-
-\item{skip.ranges}{Logical scalar indicating whether to avoid saving the \code{\link{rowRanges}}.}
-}
-\value{
-A named list of metadata that follows the \code{summarized_experiment} schema.
-The contents of \code{x} are saved into a \code{path} subdirectory inside \code{dir}.
-}
-\description{
-Save a \linkS4class{SummarizedExperiment} to file inside the staging directory.
-}
-\details{
-\code{meta.name} is only needed to set up the output \code{path}, for consistency with the \code{\link{stageObject}} contract.
-Callers should make sure to write the metadata to the same path by using \code{\link{.writeMetadata}} to create the JSON file.
-
-If \code{skip.ranges=TRUE}, the RangedSummarizedExperiment method just calls the SummarizedExperiment method, i.e., \code{\link{rowRanges}} are not saved.
-This avoids the hassle of switching classes and the associated problems, e.g., \url{https://github.com/Bioconductor/SummarizedExperiment/issues/29}.
-Note that any subsequent \code{\link{loadObject}} call on the staged assets will return a non-ranged SummarizedExperiment.
-
-If \code{x} is a RangedSummarizedExperiment with \dQuote{empty} \code{\link{rowRanges}} (i.e., a \linkS4class{GRangesList} with zero-length entries),
-\code{stageObject} will save it to file without any genomic range information.
-This means that any subsequent \code{\link{loadObject}} on the staged assets will return a non-ranged SummarizedExperiment.
-
-By default, we consider the presence of data frames in the assays to be an error.
-Users should coerce these into an appropriate matrix type, e.g., a dense matrix or a sparse dgCMatrix.
-If a DataFrame as an assay is truly desired, users may set \code{\link{options}(alabaster.se.reject_data.frames=FALSE)} to skip the error.
-Note that this only works for \linkS4class{DataFrame} objects - data.frame objects will not be saved correctly.
-}
-\examples{
-tmp <- tempfile()
-dir.create(tmp)
-
-mat <- matrix(rpois(10000, 10), ncol=10)
-colnames(mat) <- letters[1:10]
-rownames(mat) <- sprintf("GENE_\%i", seq_len(nrow(mat)))
-
-se <- SummarizedExperiment(list(counts=mat))
-se$stuff <- LETTERS[1:10]
-rowData(se)$blah <- runif(1000)
-metadata(se)$whee <- "YAY"
-
-dir.create(tmp)
-stageObject(se, dir=tmp, "rna-seq") 
-list.files(file.path(tmp, "rna-seq"))
-
-}
-\author{
-Aaron Lun
-}