diff --git a/DESCRIPTION b/DESCRIPTION
index 71b1097..c7de546 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,8 +2,8 @@ Encoding: UTF-8
Package: bigsnpr
Type: Package
Title: Analysis of Massive SNP Arrays
-Version: 1.12.9
-Date: 2024-05-15
+Version: 1.12.10
+Date: 2024-08-13
Authors@R: c(
person("Florian", "Privé", email = "florian.prive.21@gmail.com", role = c("aut", "cre")),
person("Michael", "Blum", role = "ths"),
diff --git a/NEWS.md b/NEWS.md
index 7597a48..f1b1711 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+## bigsnpr 1.12.10
+
+- In function `snp_ancestry_summary()`, add parameter `sum_to_one` to optionally allows for ancestry coefficients to have a sum lower than 1 (when `FALSE`; default is `TRUE`).
+
## bigsnpr 1.12.9
- In function `snp_modifyBuild()`, you can now provide `local_chain` as a vector of two, for when using `check_reverse`. You can now also modify the `base_url` from where to download the chain files.
diff --git a/R/ancestry-summary.R b/R/ancestry-summary.R
index 1f1400d..6822bc8 100644
--- a/R/ancestry-summary.R
+++ b/R/ancestry-summary.R
@@ -15,6 +15,8 @@
#' Default is 0.4. When correlation is lower, an error is returned.
#' For individual genotypes, this should be larger than 0.6.
#' For allele frequencies, this should be larger than 0.9.
+#' @param sum_to_one Whether to force ancestry coefficients to sum to 1?
+#' Default is `TRUE` (otherwise, the sum can be lower than 1).
#'
#' @return Vector of coefficients representing the ancestry proportions.
#' Also (as attributes) `cor_each`, the correlation between input
@@ -27,7 +29,7 @@
#' @example examples/example-ancestry-summary.R
#'
snp_ancestry_summary <- function(freq, info_freq_ref, projection, correction,
- min_cor = 0.4) {
+ min_cor = 0.4, sum_to_one = TRUE) {
assert_package("quadprog")
assert_nona(freq)
@@ -53,9 +55,9 @@ snp_ancestry_summary <- function(freq, info_freq_ref, projection, correction,
res <- quadprog::solve.QP(
Dmat = cp_X_pd$mat,
dvec = crossprod(y, X),
- Amat = cbind(1, diag(ncol(X))),
- bvec = c(1, rep(0, ncol(X))),
- meq = 1
+ Amat = cbind(-1, diag(ncol(X))),
+ bvec = c(-1, rep(0, ncol(X))),
+ meq = `if`(sum_to_one, 1, 0)
)
cor_pred <- drop(cor(drop(X0 %*% res$solution), freq))
diff --git a/docs/news/index.html b/docs/news/index.html
index e7febc0..1738759 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -17,7 +17,7 @@
bigsnpr
- 1.12.9
+ 1.12.10
@@ -91,6 +91,10 @@
Changelog
Source: NEWS.md
+
+
+
- In function
snp_ancestry_summary()
, add parameter sum_to_one
to optionally allows for ancestry coefficients to have a sum lower than 1 (when FALSE
; default is TRUE
).
+
- In function
snp_modifyBuild()
, you can now provide local_chain
as a vector of two, for when using check_reverse
. You can now also modify the base_url
from where to download the chain files.
diff --git a/man/snp_ancestry_summary.Rd b/man/snp_ancestry_summary.Rd
index 4749293..c099f20 100644
--- a/man/snp_ancestry_summary.Rd
+++ b/man/snp_ancestry_summary.Rd
@@ -9,7 +9,8 @@ snp_ancestry_summary(
info_freq_ref,
projection,
correction,
- min_cor = 0.4
+ min_cor = 0.4,
+ sum_to_one = TRUE
)
}
\arguments{
@@ -27,6 +28,9 @@ project allele frequencies.}
Default is 0.4. When correlation is lower, an error is returned.
For individual genotypes, this should be larger than 0.6.
For allele frequencies, this should be larger than 0.9.}
+
+\item{sum_to_one}{Whether to force ancestry coefficients to sum to 1?
+Default is \code{TRUE} (otherwise, the sum can be lower than 1).}
}
\value{
Vector of coefficients representing the ancestry proportions.
diff --git a/tests/testthat/test-5-match.R b/tests/testthat/test-5-match.R
index 4e5eed2..7412a3a 100644
--- a/tests/testthat/test-5-match.R
+++ b/tests/testthat/test-5-match.R
@@ -148,6 +148,13 @@ test_that("snp_ancestry_summary() works (with no projection here)", {
res <- snp_ancestry_summary(y, X, Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X)))
expect_equal(res, prop, check.attributes = FALSE)
expect_equal(attr(res, "cor_pred"), 1)
+ expect_equal(sum(res), 1)
+
+ res.2 <- snp_ancestry_summary(y, X, Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X)),
+ sum_to_one = FALSE)
+ expect_equal(res.2, prop, check.attributes = FALSE)
+ expect_equal(attr(res.2, "cor_pred"), 1)
+ expect_equal(sum(res.2), 1)
expect_warning(res2 <- snp_ancestry_summary(
y, X[, -1], Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X))),
@@ -156,6 +163,24 @@ test_that("snp_ancestry_summary() works (with no projection here)", {
expect_equal(res2, prop[-1], tolerance = 0.1, check.attributes = FALSE)
expect_equal(attr(res2, "cor_pred"), drop(cor(y, X[, -1] %*% res2)))
+ expect_warning(res2.2 <- snp_ancestry_summary(
+ y, X[, -1], Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X)), sum_to_one = FALSE),
+ "The solution does not perfectly match the frequencies.")
+ expect_true(all(res2.2 > prop[-1]))
+ expect_equal(res2.2, prop[-1], tolerance = 0.1, check.attributes = FALSE)
+ expect_equal(attr(res2.2, "cor_pred"), drop(cor(y, X[, -1] %*% res2.2)))
+ # better than when forced to sum to 1
+ expect_true(all(res2.2 < res2))
+ expect_gt(attr(res2.2, "cor_pred"), attr(res2, "cor_pred"))
+ expect_lt(sum(res2.2), 1)
+
+ res.3 <- snp_ancestry_summary(y, cbind(X, rbeta(1000, 1, 3)),
+ Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X)),
+ sum_to_one = sample(c(TRUE, FALSE), 1))
+ expect_equal(res.3, c(prop, 0), check.attributes = FALSE)
+ expect_equal(attr(res.3, "cor_pred"), 1)
+ expect_equal(sum(res.3), 1)
+
expect_error(res3 <- snp_ancestry_summary(
y, X[, -3], Matrix::Diagonal(nrow(X), 1), rep(1, nrow(X)), min_cor = 0.6),
"Correlation between frequencies is too low:")