Skip to content

Commit

Permalink
Minor bug fix for rare edge case
Browse files Browse the repository at this point in the history
  • Loading branch information
deryannis committed Sep 20, 2023
1 parent e50400e commit 33fab16
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 33 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: BERT
Title: Hierarchical Batch-Effect Adjustment with Trees
Version: 0.99.4
Version: 0.99.5
Authors@R:
c(person("Yannis", "Schumann", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-2379-200X")),
Expand Down
29 changes: 21 additions & 8 deletions R/bert.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,17 @@ parallel_bert <- function(
#' labels. The default is "Batch".
#' @param referencename A string containing the name of the column to use as
#' reference labels. The default is "Reference".
#' @param samplename A string containing the name of the column to use as sample
#' name. The default is "Sample".
#' @param covariatename A vector containing the names of columns with
#' categorical covariables. The default is NULL, for which all columns with
#' the pattern "Cov" will be selected.
#' @return None
validate_bert_input <- function(data, cores, combatmode,
qualitycontrol, verify, mpi, stopParBatches,
corereduction, backend, method, labelname,
batchname, referencename, covariatename) {
batchname, referencename, samplename,
covariatename) {
if(!(methods::is(data, "SummarizedExperiment") || is.data.frame(data) ||
is.matrix(data))){
logging::logerror(paste("Input data for BERT must be either data.frame",
Expand All @@ -187,6 +190,10 @@ validate_bert_input <- function(data, cores, combatmode,
logging::logerror(paste("Parameter labelname for BERT must be string"))
stop()
}
if(!is.character(samplename)){
logging::logerror(paste("Parameter samplename for BERT must be string"))
stop()
}
if(!is.character(batchname)){
logging::logerror(paste("Parameter batchname for BERT must be string"))
stop()
Expand Down Expand Up @@ -297,6 +304,8 @@ validate_bert_input <- function(data, cores, combatmode,
#' labels. The default is "Batch".
#' @param referencename A string containing the name of the column to use as ref.
#' labels. The default is "Reference".
#' @param samplename A string containing the name of the column to use as sample
#' name. The default is "Sample".
#' @param covariatename A vector containing the names of columns with
#' categorical covariables. The default is NULL, for which all columns with
#' the pattern "Cov" will be selected.
Expand All @@ -322,6 +331,7 @@ BERT <- function(
labelname="Label",
batchname="Batch",
referencename="Reference",
samplename="Sample",
covariatename=NULL){

# dummy code to suppress bioccheck warning
Expand All @@ -330,10 +340,7 @@ BERT <- function(
validate_bert_input(data, cores, combatmode,
qualitycontrol, verify, mpi, stopParBatches,
corereduction, backend, method, labelname,
batchname, referencename, covariatename)

# rename columns according to user input
old_names <- colnames(data)
batchname, referencename, samplename, covariatename)


# store original cores
Expand All @@ -350,7 +357,7 @@ BERT <- function(

# format dataframe
if(verify){
data <- format_DF(data, labelname, batchname, referencename,
data <- format_DF(data, labelname, batchname, referencename, samplename,
covariatename)
}else{
logging::loginfo("Skipping initial DF formatting")
Expand Down Expand Up @@ -501,8 +508,14 @@ BERT <- function(
}
}

# rename everything back
names(data) <- old_names
# rename again
colnames(data)[colnames(data)=="Batch"] <- batchname
colnames(data)[colnames(data)=="Label"] <- labelname
colnames(data)[colnames(data)=="Sample"] <- samplename

for(x in covariatename){
colnames(data)[colnames(data)==paste("Cov_",x, sep="")] <- x
}

# if SummarizedExperiment, return as such object as well
if(methods::is(data, "SummarizedExperiment") ){
Expand Down
19 changes: 12 additions & 7 deletions R/format_Data.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,17 @@ replace_missing <- function(data){
#' labels. The default is "Batch".
#' @param referencename A string containing the name of the column to use as ref.
#' labels. The default is "Reference".
#' @param samplename A string containing the name of the column to use as sample
#' name. The default is "Sample".
#' @param covariatename A vector containing the names of columns with
#' categorical covariables. The default is NULL, for which all columns with
#' the pattern "Cov" will be selected.
#' Additional column names are "Batch", "Cov_X" (were X may be any number),
#' "Label" and "Sample".
#' @return The formatted matrix.
format_DF <- function(data, labelname="Label",batchname="Batch",
referencename="Reference", covariatename=NULL){
referencename="Reference", samplename="Sample",
covariatename=NULL){
logging::loginfo("Formatting Data.")

if(methods::is(data, "SummarizedExperiment")){
Expand All @@ -71,16 +74,18 @@ format_DF <- function(data, labelname="Label",batchname="Batch",
# in columns
raw_data <- data.frame(t(SummarizedExperiment::assay(data)))
# obtain batch/label/sample/reference column
raw_data["Batch"] <- SummarizedExperiment::colData(data)$Batch
raw_data["Batch"] <- SummarizedExperiment::colData(data)[batchname][,1]
if("Sample" %in% names(SummarizedExperiment::colData(data))){
raw_data["Sample"] <- SummarizedExperiment::colData(data)$Sample
raw_data["Sample"] <- SummarizedExperiment::colData(
data)[samplename][,1]
}
if("Label" %in% names(SummarizedExperiment::colData(data))){
raw_data["Label"] <- SummarizedExperiment::colData(data)$Label
if(labelname %in% names(SummarizedExperiment::colData(data))){
raw_data["Label"] <- SummarizedExperiment::colData(
data)[labelname][,1]
}
if("Reference" %in% names(SummarizedExperiment::colData(data))){
if(referencename %in% names(SummarizedExperiment::colData(data))){
raw_data["Reference"] <- SummarizedExperiment::colData(
data)$Reference
data)[referencename][,1]
}
# potential covariables
cov_names <- names(
Expand Down
4 changes: 4 additions & 0 deletions man/BERT.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/format_DF.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/validate_bert_input.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 28 additions & 17 deletions tests/testthat/test_bert.R
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,17 @@ test_that("BERT allows the user to specify custom names for references", {
expect_true(all.equal(y_1, y_2[rownames(y_2), colnames(y_2)]))
})

test_that("BERT removes empty columns and still renames everything back", {
nrows <- 8
ncols <- 3
counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
y = data.frame(counts)
y[,1] = NA
y["t"] = c(1,1,1,1,2,2,2,2)
y2 = BERT(y, batchname = "t")
expect_true(all.equal(c("X2", "X3", "t"), colnames(y2)))
})

test_that("BERT allows the user to specify custom names for covariables", {
y <- generate_dataset(5,2,25,0.1,2)
y["Cov_1"] = y$Label
Expand All @@ -211,41 +222,41 @@ test_that("bert validates all user input -- BERT", {
y <- generate_dataset(100,5,10,0.1,2)
# this should work
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, FALSE, 1, 2,
"file", "None", "X", "B", "R", NULL), NA)
"file", "None", "X", "B", "R", "S",NULL), NA)
expect_error(BERT(y, 1, 1,"ComBat", TRUE, FALSE, FALSE, 1, 2, "file", "X",
"B", "R", NULL), NA)
"B", "R", "S", NULL), NA)
# this should crash
expect_error(validate_bert_input("blubb", 1, 1, TRUE, FALSE, FALSE, 1, 2,
"file", "None", "X", "B", "R", NULL))
"file", "None", "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, -1, 1, TRUE, FALSE, FALSE, 1, 2,
"file", "None", "X", "B", "R", NULL))
"file", "None", "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 10, TRUE, FALSE, FALSE, 1, 2,
"file", "None", "X", "B", "R", NULL))
"file", "None", "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, "TRUE", FALSE, FALSE, 1, 2,
"file", "None", "X", "B", "R", NULL))
"file", "None", "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, TRUE, "", FALSE, 1, 2,
"file", "None", "X", "B", "R", NULL))
"file", "None", "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, -10, 1, 2,
"file", "None", "X", "B", "R", NULL))
"file", "None", "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, FALSE, FALSE, 2,
"file", "None", "X", "B", "R", NULL))
"file", "None", "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, FALSE, 1, "",
"file", "None", "X", "B", "R", NULL))
"file", "None", "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, FALSE, 1, 2,
"f", "None", "X", "B", "R", NULL))
"f", "None", "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, FALSE, 1, 2,
-1, "None", "X", "B", "R", NULL))
-1, "None", "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, FALSE, 1, 2,
"file", FALSE, "X", "B", "R", NULL))
"file", FALSE, "X", "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, FALSE, 1, 2,
"file", "None", -1, "B", "R", NULL))
"file", "None", -1, "B", "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, FALSE, 1, 2,
"file", "None", "X", FALSE, "R", NULL))
"file", "None", "X", FALSE, "R", "S", NULL))
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, FALSE, 1, 2,
"file", "None", "X", FALSE, "R", c()))
"file", "None", "X", FALSE, "R", "S", c()))
expect_error(validate_bert_input(y, 1, 1, TRUE, FALSE, FALSE, 1, 2,
"file", "None", "X", FALSE, "R",
c("c", 1)))
"S", c("c", 1)))

})

Expand Down

0 comments on commit 33fab16

Please sign in to comment.