From 8324249533cfa6ba205f4fa043bca19e8bb5b0d0 Mon Sep 17 00:00:00 2001 From: Ben Schneider Date: Sun, 11 Jun 2023 14:54:52 -0400 Subject: [PATCH 1/7] Fix #159, expand tests accordingly --- R/subset_svy_vars.R | 21 ++++++++++++--- tests/testthat/test_survey_statistics.r | 34 +++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/R/subset_svy_vars.R b/R/subset_svy_vars.R index f5f33ea..686e473 100644 --- a/R/subset_svy_vars.R +++ b/R/subset_svy_vars.R @@ -12,7 +12,11 @@ subset_svy_vars.survey.design2 <- function(x, ..., .preserve = FALSE) { if (is.calibrated(x) || is.pps(x)){ ## Set weights to zero: no memory saving possible ## Will always be numeric because srvyr's construction - x$prob[-row_numbers] <- Inf + if (length(row_numbers) == 0) { + x$prob <- rep(Inf, length(x$prob)) + } else { + x$prob[-row_numbers] <- Inf + } index <- is.finite(x$prob) psu <- !duplicated(x$cluster[index, 1]) @@ -68,9 +72,18 @@ subset_svy_vars.twophase2 <- function(x, ..., .preserve = FALSE) { ## Set weights to zero: don't try to save memory ## Will always have numeric because of srvyr's structure - x$prob[-row_numbers] <- Inf - x$phase2$prob[-row_numbers] <- Inf - x$dcheck <- lapply(x$dcheck, function(m) {m[-row_numbers, -row_numbers] <- 0; m}) + if (length(row_numbers) == 0) { + x$prob <- rep(Inf, length(x$prob)) + x$phase2$prob <- rep(Inf, length(x$phase2$prob)) + x$dcheck <- lapply(x$dcheck, function(m) { + m[seq_len(nrow(m)), seq_len(ncol(m))] <- 0 + m + }) + } else { + x$prob[-row_numbers] <- Inf + x$phase2$prob[-row_numbers] <- Inf + x$dcheck <- lapply(x$dcheck, function(m) {m[-row_numbers, -row_numbers] <- 0; m}) + } index <- is.finite(x$prob) psu <- !duplicated(x$phase2$cluster[index, 1]) diff --git a/tests/testthat/test_survey_statistics.r b/tests/testthat/test_survey_statistics.r index b0ad9c3..ef73672 100644 --- a/tests/testthat/test_survey_statistics.r +++ b/tests/testthat/test_survey_statistics.r @@ -729,6 +729,8 @@ test_that("unweighted allows named arguments", { test_that( "unweighted works with filtered data in calibrated or PPS designs", { + + # First check for calibrated designs data(api, package = "survey") dclus1 <- as_survey_design(apiclus1, id = dnum, weights = pw, fpc = fpc) @@ -739,6 +741,7 @@ test_that( sample.margins = list(~stype,~sch.wide), population.margins = list(pop.types, pop.schwide)) + # Check when filtering returns at least one row out_calib <- raked_design %>% filter(sch.wide == "Yes") %>% group_by(stype) %>% @@ -752,11 +755,28 @@ test_that( expect_equal(out_calib[['sample_size']], out_noncalib[['sample_size']]) + # Check when filtering returns zero rows + + out_calib <- raked_design %>% + filter(sch.wide == "Fake Category") %>% + summarize(sample_size = unweighted(n())) + + out_noncalib <- dclus1 %>% + filter(sch.wide == "Fake Category") %>% + summarize(sample_size = unweighted(n())) + + expect_equal(out_calib[['sample_size']], + expected = 0) + expect_equal(out_noncalib[['sample_size']], + expected = 0) + + # Next check for PPS design data(election, package = "survey") non_pps_design <- as_survey_design(election_pps, id = 1) pps_design <- as_survey_design(election_pps, id = 1, fpc = p, pps = "brewer") + # Check correct results when filtering returns at least one row out_nonpps <- non_pps_design %>% filter(County == "Los Angeles") %>% summarize(n_rows = unweighted(n())) @@ -767,6 +787,20 @@ test_that( expect_equal(out_pps[['n_rows']], out_nonpps[['n_rows']]) + + # Check correct results when filtering returns zero rows + out_nonpps <- non_pps_design %>% + filter(County == "Fake Category") %>% + summarize(n_rows = unweighted(n())) + + out_pps <- pps_design %>% + filter(County == "Fake Category") %>% + summarize(n_rows = unweighted(n())) + + expect_equal(out_pps[['n_rows']], + expected = 0) + expect_equal(out_nonpps[['n_rows']], + expected = 0) } ) From 265f8234898e13de6e7b9c9ae10db0cd39899528 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 27 Sep 2023 10:41:46 +0200 Subject: [PATCH 2/7] some speedup --- R/survey_statistics_helpers.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/survey_statistics_helpers.R b/R/survey_statistics_helpers.R index d140748..a9264c6 100644 --- a/R/survey_statistics_helpers.R +++ b/R/survey_statistics_helpers.R @@ -43,7 +43,7 @@ set_survey_vars <- function( out$phase1$sample$variables[[name]] <- x } else { if (!add) { - out$variables <- select(out$variables, dplyr::one_of(group_vars(out))) + out$variables <- out$variables[, group_vars(out), drop = FALSE] } out$variables[[name]] <- x } @@ -83,7 +83,7 @@ get_var_est <- function( se <- survey::SE(stat) # Needed for grouped quantile if (!inherits(se, "data.frame")) { - se <- data.frame(matrix(se, ncol = out_width)) + se <- as.data.frame(se) } names(se) <- "_se" se @@ -122,7 +122,7 @@ get_var_est <- function( } }) - coef <- data.frame(matrix(coef(stat), ncol = out_width)) + coef <- as.data.frame(coef(stat)) names(coef) <- "coef" out <- c(list(coef), out) @@ -136,7 +136,7 @@ get_var_est <- function( out <- c(out, list(deff)) } - as_srvyr_result_df(dplyr::bind_cols(out)) + as_srvyr_result_df(do.call(cbind, out)) } # Largely the same as get_var_est(), but need to handle the fact that there can be From d3540772b530dff86fa0b4bdbd8ecbbe46d37272 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 27 Sep 2023 10:57:10 +0200 Subject: [PATCH 3/7] fix test --- R/survey_statistics_helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/survey_statistics_helpers.R b/R/survey_statistics_helpers.R index a9264c6..1792bbb 100644 --- a/R/survey_statistics_helpers.R +++ b/R/survey_statistics_helpers.R @@ -122,7 +122,7 @@ get_var_est <- function( } }) - coef <- as.data.frame(coef(stat)) + coef <- as.data.frame(unclass(coef(stat))) names(coef) <- "coef" out <- c(list(coef), out) From e3c03587bd9c17c6b5f292a57bd28da286e115ae Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Thu, 12 Oct 2023 13:33:42 +0200 Subject: [PATCH 4/7] try additional speedup --- R/survey_statistics_helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/survey_statistics_helpers.R b/R/survey_statistics_helpers.R index 1792bbb..0fab7d1 100644 --- a/R/survey_statistics_helpers.R +++ b/R/survey_statistics_helpers.R @@ -43,7 +43,7 @@ set_survey_vars <- function( out$phase1$sample$variables[[name]] <- x } else { if (!add) { - out$variables <- out$variables[, group_vars(out), drop = FALSE] + out$variables <- out$variables[group_vars(out)] } out$variables[[name]] <- x } From 9cea023d95d3918581daa65243844db9d218eed4 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Thu, 12 Oct 2023 14:36:34 +0200 Subject: [PATCH 5/7] Revert "try additional speedup" This reverts commit e3c03587bd9c17c6b5f292a57bd28da286e115ae. --- R/survey_statistics_helpers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/survey_statistics_helpers.R b/R/survey_statistics_helpers.R index 0fab7d1..1792bbb 100644 --- a/R/survey_statistics_helpers.R +++ b/R/survey_statistics_helpers.R @@ -43,7 +43,7 @@ set_survey_vars <- function( out$phase1$sample$variables[[name]] <- x } else { if (!add) { - out$variables <- out$variables[group_vars(out)] + out$variables <- out$variables[, group_vars(out), drop = FALSE] } out$variables[[name]] <- x } From 38f87565bf12f77ef8fc648ff9858387abddcd7e Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Sun, 23 Jul 2023 09:16:12 -0500 Subject: [PATCH 6/7] bump version --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index b8a17c4..272fec2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,7 @@ Title: 'dplyr'-Like Syntax for Summary Statistics of Survey Data Description: Use piping, verbs like 'group_by' and 'summarize', and other 'dplyr' inspired syntactic style when calculating summary statistics on survey data using functions from the 'survey' package. -Version: 1.2.0 +Version: 1.2.0.9000 Date: 2023-02-20 Authors@R: c(person("Greg", "Freedman Ellis", email = "greg.freedman@gmail.com", role = c("aut", "cre")), person("Thomas", "Lumley", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index c29f165..83f271d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# srvyr (development version) + # srvyr 1.2.0 * `survey_prop()` now uses proportions as the default, which should confidence interval improve coverage, but does mean results may slightly change (#141, #142, thanks @szimmer) * New function `survey_corr()` calculates the correlation between 2 variables, (#150, #151, thanks @szimmer & @bschneidr) From ebad9ab6eef0345ee8fa7afcfc20e51b7e626759 Mon Sep 17 00:00:00 2001 From: Greg Freedman Ellis Date: Mon, 16 Oct 2023 14:40:42 -0500 Subject: [PATCH 7/7] appease cran --- R/srvyr.r | 3 +-- man/srvyr.Rd | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/R/srvyr.r b/R/srvyr.r index d46072c..3ae4bef 100644 --- a/R/srvyr.r +++ b/R/srvyr.r @@ -46,6 +46,5 @@ #' Within summarise, you can also use \code{\link{unweighted}}, which calculates #' a function without taking into consideration the survey weighting. #' -#' @docType package #' @name srvyr -NULL +"_PACKAGE" diff --git a/man/srvyr.Rd b/man/srvyr.Rd index 6a52ad0..14daa53 100644 --- a/man/srvyr.Rd +++ b/man/srvyr.Rd @@ -3,6 +3,7 @@ \docType{package} \name{srvyr} \alias{srvyr} +\alias{srvyr-package} \title{srvyr: A package for 'dplyr'-Like Syntax for Summary Statistics of Survey Data.} \description{ The srvyr package provides a new way of calculating summary statistics @@ -57,3 +58,28 @@ Within summarise, you can also use \code{\link{unweighted}}, which calculates a function without taking into consideration the survey weighting. } +\seealso{ +Useful links: +\itemize{ + \item \url{http://gdfe.co/srvyr/} + \item \url{https://github.com/gergness/srvyr/} + \item Report bugs at \url{https://github.com/gergness/srvyr/issues} +} + +} +\author{ +\strong{Maintainer}: Greg Freedman Ellis \email{greg.freedman@gmail.com} + +Authors: +\itemize{ + \item Ben Schneider [contributor] +} + +Other contributors: +\itemize{ + \item Thomas Lumley [contributor] + \item Tomasz Żółtak [contributor] + \item Pavel N. Krivitsky \email{pavel@statnet.org} [contributor] +} + +}