Skip to content

Commit

Permalink
Merge branch 'main' into add-degf-option
Browse files Browse the repository at this point in the history
  • Loading branch information
gergness authored Jul 14, 2024
2 parents 003d4fb + 43d2072 commit e676e2e
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 12 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Title: 'dplyr'-Like Syntax for Summary Statistics of Survey Data
Description: Use piping, verbs like 'group_by' and 'summarize', and other
'dplyr' inspired syntactic style when calculating summary statistics on survey
data using functions from the 'survey' package.
Version: 1.2.0
Version: 1.2.0.9000
Date: 2023-02-20
Authors@R: c(person("Greg", "Freedman Ellis", email = "[email protected]", role = c("aut", "cre")),
person("Thomas", "Lumley", role = "ctb"),
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (development)
# srvyr (development version)
* `as_survey_rep()` now has an argument `degf`, corresponding to the same argument in the survey function `svrepdesign()`. This argument can be useful for large data sets, since specifying a value for `degf` avoids a calculation which can be slow for very large data sets.

# srvyr 1.2.0
Expand Down
3 changes: 1 addition & 2 deletions R/srvyr.r
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,5 @@
#' Within summarise, you can also use \code{\link{unweighted}}, which calculates
#' a function without taking into consideration the survey weighting.
#'
#' @docType package
#' @name srvyr
NULL
"_PACKAGE"
21 changes: 17 additions & 4 deletions R/subset_svy_vars.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ subset_svy_vars.survey.design2 <- function(x, ..., .preserve = FALSE) {
if (is.calibrated(x) || is.pps(x)){
## Set weights to zero: no memory saving possible
## Will always be numeric because srvyr's construction
x$prob[-row_numbers] <- Inf
if (length(row_numbers) == 0) {
x$prob <- rep(Inf, length(x$prob))
} else {
x$prob[-row_numbers] <- Inf
}

index <- is.finite(x$prob)
psu <- !duplicated(x$cluster[index, 1])
Expand Down Expand Up @@ -68,9 +72,18 @@ subset_svy_vars.twophase2 <- function(x, ..., .preserve = FALSE) {

## Set weights to zero: don't try to save memory
## Will always have numeric because of srvyr's structure
x$prob[-row_numbers] <- Inf
x$phase2$prob[-row_numbers] <- Inf
x$dcheck <- lapply(x$dcheck, function(m) {m[-row_numbers, -row_numbers] <- 0; m})
if (length(row_numbers) == 0) {
x$prob <- rep(Inf, length(x$prob))
x$phase2$prob <- rep(Inf, length(x$phase2$prob))
x$dcheck <- lapply(x$dcheck, function(m) {
m[seq_len(nrow(m)), seq_len(ncol(m))] <- 0
m
})
} else {
x$prob[-row_numbers] <- Inf
x$phase2$prob[-row_numbers] <- Inf
x$dcheck <- lapply(x$dcheck, function(m) {m[-row_numbers, -row_numbers] <- 0; m})
}

index <- is.finite(x$prob)
psu <- !duplicated(x$phase2$cluster[index, 1])
Expand Down
8 changes: 4 additions & 4 deletions R/survey_statistics_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ set_survey_vars <- function(
out$phase1$sample$variables[[name]] <- x
} else {
if (!add) {
out$variables <- select(out$variables, dplyr::one_of(group_vars(out)))
out$variables <- out$variables[, group_vars(out), drop = FALSE]
}
out$variables[[name]] <- x
}
Expand Down Expand Up @@ -83,7 +83,7 @@ get_var_est <- function(
se <- survey::SE(stat)
# Needed for grouped quantile
if (!inherits(se, "data.frame")) {
se <- data.frame(matrix(se, ncol = out_width))
se <- as.data.frame(se)
}
names(se) <- "_se"
se
Expand Down Expand Up @@ -122,7 +122,7 @@ get_var_est <- function(
}
})

coef <- data.frame(matrix(coef(stat), ncol = out_width))
coef <- as.data.frame(unclass(coef(stat)))
names(coef) <- "coef"
out <- c(list(coef), out)

Expand All @@ -136,7 +136,7 @@ get_var_est <- function(
out <- c(out, list(deff))
}

as_srvyr_result_df(dplyr::bind_cols(out))
as_srvyr_result_df(do.call(cbind, out))
}

# Largely the same as get_var_est(), but need to handle the fact that there can be
Expand Down
26 changes: 26 additions & 0 deletions man/srvyr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 34 additions & 0 deletions tests/testthat/test_survey_statistics.r
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,8 @@ test_that("unweighted allows named arguments", {

test_that(
"unweighted works with filtered data in calibrated or PPS designs", {

# First check for calibrated designs
data(api, package = "survey")
dclus1 <- as_survey_design(apiclus1, id = dnum, weights = pw, fpc = fpc)

Expand All @@ -739,6 +741,7 @@ test_that(
sample.margins = list(~stype,~sch.wide),
population.margins = list(pop.types, pop.schwide))

# Check when filtering returns at least one row
out_calib <- raked_design %>%
filter(sch.wide == "Yes") %>%
group_by(stype) %>%
Expand All @@ -752,11 +755,28 @@ test_that(
expect_equal(out_calib[['sample_size']],
out_noncalib[['sample_size']])

# Check when filtering returns zero rows

out_calib <- raked_design %>%
filter(sch.wide == "Fake Category") %>%
summarize(sample_size = unweighted(n()))

out_noncalib <- dclus1 %>%
filter(sch.wide == "Fake Category") %>%
summarize(sample_size = unweighted(n()))

expect_equal(out_calib[['sample_size']],
expected = 0)
expect_equal(out_noncalib[['sample_size']],
expected = 0)

# Next check for PPS design
data(election, package = "survey")

non_pps_design <- as_survey_design(election_pps, id = 1)
pps_design <- as_survey_design(election_pps, id = 1, fpc = p, pps = "brewer")

# Check correct results when filtering returns at least one row
out_nonpps <- non_pps_design %>%
filter(County == "Los Angeles") %>%
summarize(n_rows = unweighted(n()))
Expand All @@ -767,6 +787,20 @@ test_that(

expect_equal(out_pps[['n_rows']],
out_nonpps[['n_rows']])

# Check correct results when filtering returns zero rows
out_nonpps <- non_pps_design %>%
filter(County == "Fake Category") %>%
summarize(n_rows = unweighted(n()))

out_pps <- pps_design %>%
filter(County == "Fake Category") %>%
summarize(n_rows = unweighted(n()))

expect_equal(out_pps[['n_rows']],
expected = 0)
expect_equal(out_nonpps[['n_rows']],
expected = 0)
}
)

Expand Down

0 comments on commit e676e2e

Please sign in to comment.