From e631c4c22dea24ce8b19116defd85eed99bd53db Mon Sep 17 00:00:00 2001 From: Lorenzo Gaborini Date: Fri, 31 May 2019 14:50:30 +0200 Subject: [PATCH] Fix bug in prior elicitation when background item is seen only once --- DESCRIPTION | 2 +- NEWS.md | 5 +++ R/two_level_functions.R | 36 +++++++++++++++++----- man/inv_pd.Rd | 2 +- man/samesource_C.Rd | 2 +- man/two.level.multivariate.calculate.UC.Rd | 10 ++++-- 6 files changed, 44 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2331c92..255fa7e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: bayessource Type: Package Title: Bayesian approach to evaluate whether two datasets come from the same population -Version: 0.3 +Version: 0.3.1 Authors@R: c(person("Lorenzo", "Gaborini", role = c("aut", "cre"), email = "lorenzo.gaborini@gmail.com"), person("Silvia", "Bozza", role = c("ctb", "cph"))) diff --git a/NEWS.md b/NEWS.md index d940d0b..7e53911 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# bayessource 0.3.1 + +* Fig bug in `make_priors_and_init` when a background item is seen only once. + Now warns or fails gracefully. + # bayessource 0.3 * Add prior and init elicitation functions. diff --git a/R/two_level_functions.R b/R/two_level_functions.R index dfa360c..344f503 100644 --- a/R/two_level_functions.R +++ b/R/two_level_functions.R @@ -9,14 +9,16 @@ #' #' Two-level function: item is chosen with the third parameter. #' +#' Items who are observed only once cannot contribute to within covariance estimation. +#' In that case, either we skip its contribute (default) or fail. +#' #' Multivariate functions supporting paper: #' Bozza, A., Taroni, F., Marquis, R., M. Schmittbuhl #' -#' This function is RNG-free. -#' #' @param population matrix or dataframe containing data columns, item column and other columns #' @param idx.variables data columns (indexes or character vectors) #' @param idx.item item column (index or character vector) +#' @param singular.action action to take when one of the items is observed once; one of `'skip'` (default) or `'fail'` #' @importFrom stats cov #' @importFrom utils modifyList #' @return list with items: @@ -25,7 +27,9 @@ #' - `W` (the within covariance matrix) #' - `B` (the between covariance matrix) #' @keywords internal -two.level.multivariate.calculate.UC <- function(population, idx.variables, idx.item) { +two.level.multivariate.calculate.UC <- function(population, idx.variables, idx.item, singular.action = 'skip') { + + stopifnot(singular.action %in% c('skip', 'fail')) # Convert to matrix mtx_population <- as.matrix(population[, idx.variables]) @@ -37,10 +41,14 @@ two.level.multivariate.calculate.UC <- function(population, idx.variables, idx.i all.items <- as.vector(unique(item_population)) n.items <- length(all.items) + if (n.items == 1) { + stop('two.level.multivariate.calculate.UC: only one item in population') + } + variable.names <- colnames(population)[idx.variables] n.variables <- length(idx.variables) if (n.variables < 2) { - stop("cannot handle fewer than two variables with this function") + stop("two.level.multivariate.calculate.UC: cannot handle fewer than two variables with this function") } # How many replicates per item @@ -64,6 +72,7 @@ two.level.multivariate.calculate.UC <- function(population, idx.variables, idx.i colnames(group.means) <- variable.names n.pop <- dim(population)[1] + n.items.skip <- 0 # for each unique item for (i.item in 1:n.items) { @@ -75,12 +84,22 @@ two.level.multivariate.calculate.UC <- function(population, idx.variables, idx.i n.replicates <- length(idx.replicates) # pick out the measurements for the item - tempdat <- mtx_population[idx.replicates, ] + tempdat <- mtx_population[idx.replicates, , drop = FALSE] + # assign values to the group means for the population means <- colMeans(tempdat) group.means[i.item, ] <- means + if (n.replicates == 1) { + if (identical(singular.action, 'skip')) { + message(paste0('two.level.multivariate.calculate.UC: item "', i.item, '" has 1 replicate: cannot contribute to within covariance matrix estimation. Skipping.')) + n.items.skip <- n.items.skip + 1 + next + } + stop(paste0('two.level.multivariate.calculate.UC: item "', i.item, '" has 1 replicate: cannot contribute to within covariance matrix estimation. Failing.')) + } + # sum the within and between item means S.this <- t(means - all.means) %*% (means - all.means) S <- S + S.this @@ -93,11 +112,14 @@ two.level.multivariate.calculate.UC <- function(population, idx.variables, idx.i Sw <- Sw + Cov.this } + # Only these items have contributed to S, Sw + n.items.contributing <- n.items - n.items.skip + # convert matrix Sw to matrix U - W <- Sw/(n.pop - n.items) + W <- Sw/(n.pop - n.items.contributing) # convert matrix S to matrix C - B <- S/(n.items - 1) + B <- S/(n.items.contributing - 1) op <- list(group.means = group.means, all.means = all.means, W = W, B = B) return(op) diff --git a/man/inv_pd.Rd b/man/inv_pd.Rd index 6bd9083..bddaeeb 100644 --- a/man/inv_pd.Rd +++ b/man/inv_pd.Rd @@ -13,5 +13,5 @@ inv_pd(X) the inverse of X } \description{ -Compute the inverse of a positive-definite matrix using Cholesky composition. +Compute the inverse of a positive-definite matrix using Cholesky decomposition. } diff --git a/man/samesource_C.Rd b/man/samesource_C.Rd index 86b4c18..833cb40 100644 --- a/man/samesource_C.Rd +++ b/man/samesource_C.Rd @@ -33,7 +33,7 @@ samesource_C(quest, ref, n.iter, B.inv, W.inv.1, W.inv.2, U, nw, mu, \item{marginals}{return the marginal likelihoods in the LR formula (default: FALSE)} } \value{ -the log-LR value, or a list with the marginals: \code{list(value, log_ml_Hp, log_ml_Hd_ref, log_ml_Hd_quest)} +the log-BF value (base e), or a list with the log-BF and the marginals: \code{list(value, log_ml_Hp, log_ml_Hd_ref, log_ml_Hd_quest)} } \description{ Implemented in C. diff --git a/man/two.level.multivariate.calculate.UC.Rd b/man/two.level.multivariate.calculate.UC.Rd index 4c90e18..1e84f00 100644 --- a/man/two.level.multivariate.calculate.UC.Rd +++ b/man/two.level.multivariate.calculate.UC.Rd @@ -4,7 +4,8 @@ \alias{two.level.multivariate.calculate.UC} \title{Calculate Between/Within covariance matrices} \usage{ -two.level.multivariate.calculate.UC(population, idx.variables, idx.item) +two.level.multivariate.calculate.UC(population, idx.variables, idx.item, + singular.action = "skip") } \arguments{ \item{population}{matrix or dataframe containing data columns, item column and other columns} @@ -12,6 +13,8 @@ two.level.multivariate.calculate.UC(population, idx.variables, idx.item) \item{idx.variables}{data columns (indexes or character vectors)} \item{idx.item}{item column (index or character vector)} + +\item{singular.action}{action to take when one of the items is observed once; one of \code{'skip'} (default) or \code{'fail'}} } \value{ list with items: @@ -26,9 +29,10 @@ list with items: Two-level function: item is chosen with the third parameter. } \details{ +Items who are observed only once cannot contribute to within covariance estimation. +In that case, either we skip its contribute (default) or fail. + Multivariate functions supporting paper: Bozza, A., Taroni, F., Marquis, R., M. Schmittbuhl - -This function is RNG-free. } \keyword{internal}