diff --git a/.Rbuildignore b/.Rbuildignore index 442e33c..9f82338 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,4 +1,3 @@ -^ipccheckr\.Rproj$ ^\.Rproj\.user$ ^LICENSE\.md$ ^README\.Rmd$ @@ -14,6 +13,9 @@ ^README_files$ ^mwana\.Rproj$ ^vignettes/plausibility_files$ +ˆvingettes/plausibility.html$ ^vignettes/prevalence_files$ +ˆvignettes/prevalence.html$ ^vignettes/sample_size_files$ +ˆvignettes/sample_size.html$ ^CODE_OF_CONDUCT\.md$ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 4c48be5..586ae71 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, dev] pull_request: - branches: [main, master] + branches: [main, dev] name: R-CMD-check diff --git a/DESCRIPTION b/DESCRIPTION index 0e7c747..181cc94 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package -Package: ipccheckr +Package: mwana Title: Utilities for Analysing Children's Nutritional Status -Version: 0.0.0.9000 +Version: 0.1.0.9000 Authors@R: c( person("Tomás", "Zaba", , "tomas.zaba@outlook.com", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0002-7079-3574")), @@ -12,7 +12,7 @@ Description: A streamlined and comprehensive implementation of the Standardized Monitoring and Assessment of Relief and Transition (SMART) Methodology guidelines for data quality checks and prevalence estimation, with enhanced programmable process particularly when - handling large multiple datasets. + handling multiple area datasets. License: GPL (>= 3) URL: https://github.com/nutriverse/mwana, https://nutriverse.io/mwana Imports: @@ -24,7 +24,8 @@ Imports: srvyr, stats, zscorer, - tibble + tibble, + methods Suggests: knitr, rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index b3fa0d8..21319a7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,24 +1,24 @@ # Generated by roxygen2: do not edit by hand -export(age_ratio_test) -export(check_plausibility_mfaz) -export(check_plausibility_muac) -export(check_plausibility_wfhz) -export(check_sample_size) -export(classify_overall_quality) export(compute_combined_prevalence) export(compute_muac_prevalence) -export(compute_quality_score) export(compute_wfhz_prevalence) export(define_wasting) export(flag_outliers) -export(generate_pretty_table_mfaz) -export(generate_pretty_table_muac) -export(generate_pretty_table_wfhz) -export(process_age) -export(process_muac_data) -export(process_wfhz_data) +export(get_age_months) +export(mw_check_ipcamn_ssreq) +export(mw_neat_output_mfaz) +export(mw_neat_output_muac) +export(mw_neat_output_wfhz) +export(mw_plausibility_check_mfaz) +export(mw_plausibility_check_muac) +export(mw_plausibility_check_wfhz) +export(mw_stattest_ageratio) +export(mw_wrangle_age) +export(mw_wrangle_muac) +export(mw_wrangle_wfhz) export(recode_muac) +export(remove_flags) importFrom(dplyr,across) importFrom(dplyr,case_when) importFrom(dplyr,ends_with) @@ -31,12 +31,15 @@ importFrom(dplyr,n_distinct) importFrom(dplyr,rename) importFrom(dplyr,summarise) importFrom(lubridate,ymd) +importFrom(methods,is) importFrom(nipnTK,ageRatioTest) importFrom(nipnTK,digitPreference) importFrom(nipnTK,greensIndex) importFrom(nipnTK,sexRatioTest) importFrom(nipnTK,skewKurt) importFrom(rlang,.data) +importFrom(rlang,enquo) +importFrom(rlang,eval_tidy) importFrom(rlang,sym) importFrom(scales,label_percent) importFrom(scales,label_pvalue) @@ -47,4 +50,5 @@ importFrom(stats,pnorm) importFrom(stats,prop.test) importFrom(stats,sd) importFrom(stats,setNames) +importFrom(tibble,as_tibble) importFrom(zscorer,addWGSR) diff --git a/NEWS.md b/NEWS.md index 009bba3..b9812e5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,3 @@ -# ipccheckr v0.0.0.9000 (development version) +# mwana v0.0.0.9000 (development version) * Initial pre-release version for alpha-testing. diff --git a/R/age.R b/R/age.R deleted file mode 100644 index 0320c49..0000000 --- a/R/age.R +++ /dev/null @@ -1,162 +0,0 @@ -#' -#' Calculate child's age in days -#' -#' @param x A double vector of child's age in months. -#' -#' @returns A double vector of the same length as `x` of age in days. -#' -#' -compute_month_to_days <- function(x) { - x * (365.25 / 12) -} - - - - -#' -#' Calculate child's age in months -#' -#' @description -#' Calculate child's age in months based on date of birth and the data collection date. -#' -#' @param surv_date A vector of class `Date` for data collection date. -#' -#' @param birth_date A vector of class `Date` for child's date of birth. -#' -#' @returns A vector of class `double` for child's age in months with two decimal places. -#' Any value less than 6.0 and greater than or equal to 60.0 months will be set to `NA`. -#' -#' -compute_age_in_months <- function (surv_date, birth_date) { - avg_day <- 365.25 / 12 - int <- surv_date - birth_date - age_mo <- round(int / avg_day, digits = 2) - age_mo <- ifelse(age_mo < 6.0 | age_mo >= 60.0, NA, age_mo) -} - - - - -#' -#' Process child's age -#' -#' @description -#' Process child's age for downstream analysis. This includes calculating age -#' in months based on the date of data collection and child's date of birth and -#' setting to `NA` the age values that are less than 6.0 and greater than or equal -#' to 60.0 months old. -#' -#' @param df A dataset of class `data.frame` to process age from. -#' -#' @param svdate A vector of class `Date` for date of data collection. -#' Default is `NULL`. -#' -#' @param birdate A vector of class `Date` for child's date of birth. -#' Default is `NULL`. -#' -#' @param age A vector of class `integer` of age in months, usually estimated -#' using local event calendars. -#' -#' @returns A `data.frame` based on `df`. The variable `age` that is required to be -#' included in `df` will be filled where applicable with the age in months for -#' each row of data in `df`. A new variable for `df` named `age_days` will be -#' created. Values for `age` and `age_days` for children less than 6.0 and greater -#' than or equal to 60.0 months old will be set to `NA`. -#' -#' @examples -#' -#' ## A sample data ---- -#' -#' df <- data.frame( -#' survy_date = as.Date(c( -#' "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), -#' birthdate = as.Date(c( -#' "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), -#' age = c(NA, 36, NA, NA, NA) -#' ) -#' -#' ## Apply the function ---- -#' df |> -#' process_age( -#' svdate = "survy_date", -#' birdate = "birthdate", -#' age = age -#' ) -#' -#' @export -#' - -process_age <- function(df, svdate = NULL, birdate = NULL, age) { - if (!is.null({{ birdate }}) || !is.null({{ svdate }})) { - df <- df |> - mutate( - age = ifelse( - is.na({{ age }}), - compute_age_in_months( - birth_date = !!sym({{ birdate }}), surv_date = !!sym({{ svdate }}) - ), {{ age }}), - age_days = round(age * 30.44, 2) - ) - - } else { - df <- df |> - mutate( - age_days = round({{ age }} * 30.44, 2) - ) - } - tibble::as_tibble(df) -} - - - -#' -#' Test for statistical difference between the proportion of children aged 24 to -#' 59 months old over those aged 6 to 23 months old -#' -#' @description -#' Calculate the observed age ratio of children aged 24 to 59 months old over -#' those aged 6 to 23 months old and test if there is a statistical difference -#' between the observed and the expected. -#' -#' @param age A double vector of age in months. -#' -#' @param .expectedP The expected proportion of children aged 24 to 59 months -#' old over those aged 6 to 23 months old. This is estimated to be 0.66 as in the -#' [SMART MUAC tool](https://smartmethodology.org/survey-planning-tools/updated-muac-tool/). -#' -#' @returns A vector of class `list` of three statistics: `p` for p-value of the -#' statistical difference between the observed and the expected proportion of -#' children aged 24 to 59 months old over those aged 6 to 23 months old; -#' `observedR` and `observedP` for the observed ratio and proportion respectively. -#' -#' @details -#' This function should be used specifically for assessing MUAC data. For -#' age ratio tests of children aged 6 to 29 months old over 30 to 59 months old, as -#' performed in the SMART plausibility check, use [nipnTK::ageRatioTest()] instead. -#' -#' @examples -#' -#' ## An example of application using `anthro.02` dataset ---- -#' age_ratio_test( -#' age = anthro.02$age, -#' .expectedP = 0.66 -#' ) -#' -#' @export -#' -age_ratio_test <- function(age, .expectedP = 0.66) { - - x <- ifelse(age >= 24, 1, 2) - sum_o24 <- sum(na.omit(x == 1)) - sum_u24 <- sum(na.omit(x == 2)) - total <- sum(table(na.omit(x))) - ratio <- sum_o24 / sum_u24 - prop <- sum_o24 / total - test <- prop.test(sum_o24, total, p = .expectedP, correct = FALSE) - - list( - p = test$p.value, - observedR = ratio, - observedP = prop - ) -} diff --git a/R/ipc_amn_check.R b/R/ipc_amn_check.R new file mode 100644 index 0000000..9b1a7f8 --- /dev/null +++ b/R/ipc_amn_check.R @@ -0,0 +1,72 @@ +#' +#' Check whether IPC Acute Malnutrition (IPC AMN) sample size requirements were met +#' +#' @description +#' Evidence on the prevalence of acute malnutrition used in the IPC AMN +#' can come from different sources: surveys, screenings or community-based +#' surveillance system. The IPC set minimum sample size requirements +#' for each source. This function helps in verifying whether the requirements +#' were met or not depending on the source. +#' +#' @param df A dataset object of class `data.frame` to check. +#' +#' @param cluster A vector of class `integer` or `character` of unique cluster or +#' screening or sentinel site IDs. If a `character` vector, ensure that names are +#' correct and each name represents one location for accurate counts. If the class +#' does not match the above expected type, the function will stop execution and +#' return an error message indicating the type of mismatch. +#' +#' @param .source The source of evidence. A choice between "survey" for +#' representative survey data at the area of analysis; "screening" for +#' screening data; "ssite" for community-based sentinel site data. +#' +#' @returns A summary table of class `data.frame`, of length 3 and width 1, for +#' the check results. `n_clusters` is for the total number of unique clusters or +#' screening or site IDs; `n_obs` for the correspondent total number of children +#' in the dataset; and `meet_ipc` for whether the IPC AMN requirements were met. +#' +#' @references +#' IPC Global Partners. 2021. *Integrated Food Security Phase Classification* +#' *Technical Manual Version 3.1.Evidence and Standards for Better Food Security* +#' *and Nutrition Decisions*. Rome. Available at: +#' . +#' +#' @examples +#' mw_check_ipcamn_ssreq( +#' df = anthro.01, +#' cluster = cluster, +#' .source = "survey" +#' ) +#' +#' @export +#' +mw_check_ipcamn_ssreq <- function(df, + cluster, + .source = c("survey", "screening", "ssite")) { + ## Difuse and evaluate arguments ---- + cluster <- eval_tidy(enquo(cluster), df) + + ## Enforce the options in `.source` ---- + .source <- match.arg(.source) + + ## Enforce the class of `cluster` ---- + if (!(class(cluster) %in% c("integer", "character"))) { + stop( + "`cluster` must be of class `integer` or `character`; not ", shQuote(class(cluster)), ". Please try again." + ) + } + + ## Summarize ---- + df <- df |> + summarise( + n_clusters = n_distinct({{ cluster }}), + n_obs = n(), + meet_ipc = case_when( + .source == "survey" & n_clusters >= 25 ~ "yes", + .source == "screening" & n_clusters >= 3 & n_obs >= 600 ~ "yes", + .source == "ssite" & n_clusters >= 5 & n_obs >= 200 ~ "yes", + .default = "no" + ) + ) + as_tibble(df) +} diff --git a/R/ipccheckr-package.R b/R/mwana-package.R similarity index 68% rename from R/ipccheckr-package.R rename to R/mwana-package.R index a16dff9..d1e15b2 100644 --- a/R/ipccheckr-package.R +++ b/R/mwana-package.R @@ -3,13 +3,17 @@ ## usethis namespace: start #' @importFrom dplyr across case_when group_by mutate n n_distinct rename summarise -#' @importFrom dplyr ends_with everything filter +#' @importFrom dplyr ends_with everything filter mutate +#' @importFrom dplyr group_by +#' @importFrom dplyr summarise #' @importFrom lubridate ymd +#' @importFrom methods is #' @importFrom nipnTK ageRatioTest digitPreference sexRatioTest skewKurt greensIndex -#' @importFrom rlang .data sym +#' @importFrom rlang .data sym enquo eval_tidy #' @importFrom scales label_percent label_pvalue #' @importFrom srvyr as_survey_design survey_mean #' @importFrom stats na.omit prop.test sd pnorm setNames +#' @importFrom tibble as_tibble #' @importFrom zscorer addWGSR ## usethis namespace: end NULL diff --git a/R/plausibility_check_mfaz.R b/R/plausibility_check_mfaz.R new file mode 100644 index 0000000..f96b50d --- /dev/null +++ b/R/plausibility_check_mfaz.R @@ -0,0 +1,190 @@ +#' +#' +#' Check the plausibility and acceptability of muac-for-age z-score (MFAZ) data +#' +#' @description +#' Check the overall plausibility and acceptability of MFAZ data through a +#' structured test suite encompassing sampling and measurement-related biases checks +#' in the dataset. The test suite in this function follows the recommendation made +#' by Bilukha, O., & Kianian, B. (2023) on the plausibility of +#' constructing a comprehensive plausibility check similar to WFHZ to evaluate the +#' acceptability of MUAC data when the variable age exists in the dataset. +#' +#' The function works on a data frame returned from this package's wrangling +#' function for age and for MFAZ data. +#' +#' @param df A dataset object of class `data.frame` to check. +#' +#' @param sex A vector of class `numeric` of child's sex. +#' +#' @param age A vector of class `double` of child's age in months. +#' +#' @param muac A vector of class `numeric` of child's MUAC in centimeters. +#' +#' @param flags A vector of class `numeric` of flagged records. +#' +#' @returns +#' A summarised table of class `data.frame`, of length 17 and width 1, for +#' the plausibility test results and their respective acceptability ratings. +#' +#' @references +#' Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement +#' quality of mid‐upper arm circumference data in anthropometric surveys and +#' mass nutritional screenings conducted in humanitarian and refugee settings. +#' *Maternal & Child Nutrition*, 19, e13478. +#' +#' SMART Initiative (2017). *Standardized Monitoring and Assessment for Relief +#' and Transition*. Manual 2.0. Available at: . +#' +#' @seealso [mw_wrangle_age()] [mw_wrangle_muac()] [mw_stattest_ageratio()] +#' [flag_outliers()] +#' +#' @examples +#' ## First wrangle age data ---- +#' data <- mw_wrangle_age( +#' df = anthro.01, +#' dos = dos, +#' dob = dob, +#' age = age, +#' .decimals = 2 +#' ) +#' +#' ## Then wrangle MUAC data ---- +#' data_muac <- mw_wrangle_muac( +#' df = data, +#' sex = sex, +#' age = age, +#' muac = muac, +#' .recode_sex = TRUE, +#' .recode_muac = TRUE, +#' .to = "cm" +#' ) +#' +#' ## And finally run plausibility check ---- +#' mw_plausibility_check_mfaz( +#' df = data_muac, +#' flags = flag_mfaz, +#' sex = sex, +#' muac = muac, +#' age = age +#' ) +#' +#' @export +mw_plausibility_check_mfaz <- function(df, sex, muac, age, flags) { + ## Summarise statistics ---- + df <- df |> + summarise( + n = n(), + flagged = sum({{ flags }}, na.rm = TRUE) / n(), + flagged_class = rate_propof_flagged(.data$flagged, .in = "mfaz"), + sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))$p, + sex_ratio_class = rate_agesex_ratio(.data$sex_ratio), + age_ratio = mw_stattest_ageratio({{ age }}, .expectedP = 0.66)$p, + age_ratio_class = rate_agesex_ratio(.data$age_ratio), + dps = digitPreference({{ muac }}, digits = 1, values = 0:9)$dps, + dps_class = digitPreference({{ muac }}, digits = 1, values = 0:9)$dpsClass, + sd = sd(remove_flags(.data$mfaz, .from = "zscores"), na.rm = TRUE), + sd_class = rate_std(.data$sd, .of = "zscores"), + skew = skewKurt(remove_flags(.data$mfaz, .from = "zscores"))$s, + skew_class = rate_skewkurt(.data$skew), + kurt = skewKurt(remove_flags(.data$mfaz, .from = "zscores"))$k, + kurt_class = rate_skewkurt(.data$kurt), + quality_score = score_overall_quality( + cl_flags = .data$flagged_class, + cl_sex = .data$sex_ratio_class, + cl_age = .data$age_ratio_class, + cl_dps_m = .data$dps_class, + cl_std = .data$sd_class, + cl_skw = .data$skew_class, + cl_kurt = .data$kurt_class, + .for = "mfaz" + ), + quality_class = rate_overall_quality(.data$quality_score), + .groups = "drop" + ) + ## Return ---- + df +} + + +#' +#' +#' Clean and format the output table returned from the MFAZ plausibility check +#' for improved clarity and readability +#' +#' @description +#' Clean and format the output table returned from the MFAZ plausibility check +#' for improved clarity and readability. It converts scientific notations to standard +#' notations, round values and rename columns to meaningful names. +#' +#' @param df A data frame containing the summary table returned by this package's +#' MFAZ plausibility check function. Must be of class `data.frame`. +#' +#' @returns +#' A data frame of the same length and width as `df`, with column names and +#' values formatted for clarity. +#' +#' @examples +#' ## First wrangle age data ---- +#' data <- mw_wrangle_age( +#' df = anthro.01, +#' dos = dos, +#' dob = dob, +#' age = age, +#' .decimals = 2 +#' ) +#' +#' ## Then wrangle MUAC data ---- +#' data_mfaz <- mw_wrangle_muac( +#' df = data, +#' sex = sex, +#' age = age, +#' muac = muac, +#' .recode_sex = TRUE, +#' .recode_muac = TRUE, +#' .to = "cm" +#' ) +#' +#' ## Then run plausibility check ---- +#' pl <- mw_plausibility_check_mfaz( +#' df = data_mfaz, +#' flags = flag_mfaz, +#' sex = sex, +#' muac = muac, +#' age = age +#' ) +#' +#' ## Now neat the output table ---- +#' mw_neat_output_mfaz(df = pl) +#' +#' @export +#' +mw_neat_output_mfaz <- function(df) { + ## Format data frame ---- + df <- df |> + mutate( + flagged = .data$flagged |> + label_percent(accuracy = 0.1, suffix = "%", decimal.mark = ".")(), + sex_ratio = .data$sex_ratio |> + label_pvalue()(), + age_ratio = .data$age_ratio |> + label_pvalue()(), + sd = round(.data$sd, digits = 2), + dps = round(.data$dps), + skew = round(.data$skew, digits = 2), + kurt = round(.data$kurt, digits = 2) + ) |> + ## Rename columns ---- + setNames( + c( + "Total children", "Flagged data (%)", + "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", + "Age ratio (p)", "Class. of age ratio", "DPS (#)", + "Class. of DPS", "Standard Dev* (#)", "Class. of standard dev", + "Skewness* (#)", "Class. of skewness", "Kurtosis* (#)", + "Class. of kurtosis", "Overall score", "Overall quality" + ) + ) + ## Return data frame ---- + df +} diff --git a/R/plausibility_check_muac.R b/R/plausibility_check_muac.R new file mode 100644 index 0000000..36c2702 --- /dev/null +++ b/R/plausibility_check_muac.R @@ -0,0 +1,140 @@ +#' +#' Check the plausibility and acceptability of raw MUAC data +#' +#' @description +#' Check the overall plausibility and acceptability of raw MUAC data through a +#' structured test suite encompassing sampling and measurement-related biases checks +#' in the dataset. The test suite in this function follows the recommendation made +#' by Bilukha, O., & Kianian, B. (2023). +#' +#' @param df A dataset object of class `data.frame` to check. It must have been +#' wrangled using this package's wrangling function for MUAC. +#' +#' @param sex A vector of class `numeric` of child's sex. +#' +#' @param muac A vector of class `double` of child's MUAC in centimeters. +#' +#' @param flags A vector of class `numeric` of flagged records. +#' +#' @returns A summarised table of class `data.frame`, of length 9 and width 1, for +#' the plausibility test results and their respective acceptability ratings.. +#' +#' @references +#' Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement +#' quality of mid‐upper arm circumference data in anthropometric surveys and +#' mass nutritional screenings conducted in humanitarian and refugee settings. +#' *Maternal & Child Nutrition*, 19, e13478. +#' +#' SMART Initiative (2017). *Standardized Monitoring and Assessment for Relief +#' and Transition*. Manual 2.0. Available at: . +#' +#' @seealso [mw_wrangle_muac()] [flag_outliers()] +#' +#' @examples +#' ## First wranlge MUAC data ---- +#' df_muac <- mw_wrangle_muac( +#' df = anthro.01, +#' sex = sex, +#' muac = muac, +#' age = NULL, +#' .recode_sex = TRUE, +#' .recode_muac = FALSE, +#' .to = "none" +#' ) +#' +#' ## Then run the plausibility check ---- +#' mw_plausibility_check_muac( +#' df = df_muac, +#' flags = flag_muac, +#' sex = sex, +#' muac = muac +#' ) +#' +#' @export +#' +mw_plausibility_check_muac <- function(df, sex, muac, flags) { + ## Summarise statistics ---- + df <- df |> + summarise( + n = n(), + flagged = sum({{ flags }}, na.rm = TRUE) / n(), + flagged_class = rate_propof_flagged(.data$flagged, .in = "raw_muac"), + sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))[["p"]], + sex_ratio_class = rate_agesex_ratio(.data$sex_ratio), + dps = digitPreference({{ muac }}, digits = 0, values = 0:9)[["dps"]], + dps_class = digitPreference({{ muac }}, digits = 0, values = 0:9)[["dpsClass"]], + sd = sd(remove_flags({{ muac }}, .from = "raw_muac"), na.rm = TRUE), + sd_class = rate_std(.data$sd, .of = "raw_muac"), + .groups = "drop" + ) + + ## Return data frame ---- + df +} + + + +#' +#' +#' +#' Clean and format the output table returned from the MUAC plausibility check +#' for improved clarity and readability. +#' +#' @description +#' Clean and format the output table returned from the plausibility check of raw +#' MUAC data for improved clarity and readability. It converts scientific notations +#' to standard notations, round values and rename columns to meaningful names. +#' +#' @param df A data frame containing the summary table returned by this package's +#' plausibility check function for raw MUAC data. Must be of class `data.frame`. +#' +#' @returns +#' A data frame of the same length and width as `df`, with column names and +#' values formatted for clarity. +#' +#' @examples +#' ## First wranlge MUAC data ---- +#' df_muac <- mw_wrangle_muac( +#' df = anthro.01, +#' sex = sex, +#' muac = muac, +#' age = NULL, +#' .recode_sex = TRUE, +#' .recode_muac = FALSE, +#' .to = "none" +#' ) +#' +#' ## Then run the plausibility check ---- +#' pl_muac <- mw_plausibility_check_muac( +#' df = df_muac, +#' flags = flag_muac, +#' sex = sex, +#' muac = muac +#' ) +#' +#' ## Neat the output table ---- +#' +#' mw_neat_output_muac(df = pl_muac) +#' +#' @export +#' +mw_neat_output_muac <- function(df) { + + ## Format data frame ---- + df <- df |> + mutate( + flagged = .data$flagged |> + label_percent(accuracy = 0.1, suffix = "%", decimal.mark = ".")(), + sex_ratio = .data$sex_ratio |> scales::label_pvalue()(), + sd = round(.data$sd, digits = 2), + dps = round(.data$dps) + ) |> + ## Rename columns ---- + setNames( + c("Total children", "Flagged data (%)", "Class. of flagged data", "Sex ratio (p)", + "Class. of sex ratio", "DPS(#)", "Class. of DPS", "Standard Dev* (#)", + "Class. of standard dev") + ) + ## Return data frame ---- + df +} diff --git a/R/plausibility_check_wfhz.R b/R/plausibility_check_wfhz.R new file mode 100644 index 0000000..18bc38c --- /dev/null +++ b/R/plausibility_check_wfhz.R @@ -0,0 +1,196 @@ +#' +#' Check the plausibility and acceptability of weight-for-height z-score (WFHZ) data +#' +#' @description +#' Check the overall plausibility and acceptability of WFHZ data through a +#' structured test suite encompassing sampling and measurement-related biases checks +#' in the dataset. The test suite, including the criteria and corresponding rating of +#' acceptability, follows the standards in the SMART plausibility check. The only +#' exception is the exclusion of MUAC checks. MUAC is checked separately using more +#' comprehensive test suite as well. +#' +#' The function works on a data frame returned from this package's wrangling +#' function for age and for WFHZ data. +#' +#' @param df A dataset object of class `data.frame` to check. +#' +#' @param sex A vector of class `numeric` of child's sex. +#' +#' @param age A vector of class `double` of child's age in months. +#' +#' @param weight A vector of class `double` of child's weight in kilograms. +#' +#' @param height A vector of class `double` of child's height in centimeters. +#' +#' @param flags A vector of class `numeric` of flagged records. +#' +#' @returns +#' A summarised table of class `data.frame`, of length 19 and width 1, for +#' the plausibility test results and their respective acceptability rates. +#' +#' @seealso [mw_plausibility_check_mfaz()] [mw_plausibility_check_muac()] +#' [mw_wrangle_age()] +#' +#' @references +#' SMART Initiative (2017). *Standardized Monitoring and Assessment for Relief +#' and Transition*. Manual 2.0. Available at: . +#' +#' @examples +#' ## First wrangle age data ---- +#' data <- mw_wrangle_age( +#' df = anthro.01, +#' dos = dos, +#' dob = dob, +#' age = age, +#' .decimals = 2 +#' ) +#' +#' ## Then wrangle WFHZ data ---- +#' data_wfhz <- mw_wrangle_wfhz( +#' df = data, +#' sex = sex, +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE +#' ) +#' +#' ## Now run the plausibility check ---- +#' mw_plausibility_check_wfhz( +#' df = data_wfhz, +#' sex = sex, +#' age = age, +#' weight = weight, +#' height = height, +#' flags = flag_wfhz +#' ) +#' +#' +#' @export +#' +mw_plausibility_check_wfhz <- function(df, + sex, + age, + weight, + height, + flags) { + ## Summarise statistics ---- + df <- df |> + summarise( + n = n(), + flagged = sum({{ flags }}, na.rm = TRUE) / n(), + flagged_class = rate_propof_flagged(.data$flagged, .in = "wfhz"), + sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))$p, + sex_ratio_class = rate_agesex_ratio(.data$sex_ratio), + age_ratio = ageRatioTest({{ age }}, ratio = 0.85)$p, + age_ratio_class = rate_agesex_ratio(.data$age_ratio), + dps_wgt = digitPreference({{ weight }}, digits = 1)$dps, + dps_wgt_class = digitPreference({{ weight }}, digits = 1)$dpsClass, + dps_hgt = digitPreference({{ height }}, digits = 1)$dps, + dps_hgt_class = digitPreference({{ height }}, digits = 1)$dpsClass, + sd = sd(remove_flags(.data$wfhz, .from = "zscores"), na.rm = TRUE), + sd_class = rate_std(.data$sd, .of = "zscores"), + skew = skewKurt(remove_flags(.data$wfhz, .from = "zscores"))$s, + skew_class = rate_skewkurt(.data$skew), + kurt = skewKurt(remove_flags(.data$wfhz, .from = "zscores"))$k, + kurt_class = rate_skewkurt(.data$kurt), + quality_score = score_overall_quality( + cl_flags = .data$flagged_class, + cl_sex = .data$sex_ratio_class, + cl_age = .data$age_ratio_class, + cl_dps_h = .data$dps_hgt_class, + cl_dps_w = .data$dps_wgt_class, + cl_std = .data$sd_class, + cl_skw = .data$skew_class, + cl_kurt = .data$kurt_class, + .for = "wfhz" + ), + quality_class = rate_overall_quality(.data$quality_score), + .groups = "drop" + ) + + ## Return data frame ---- + df +} + +#' +#' +#' Clean and format the output table returned from the WFHZ plausibility check +#' for improved clarity and readability +#' +#' @description +#' Clean and format the output table returned from the WFHZ plausibility check +#' for improved clarity and readability. It converts scientific notations to standard +#' notations, round values and rename columns to meaningful names. +#' +#' @param df A data frame containing the summary table returned by this package's +#' WFHZ plausibility check function. Must be of class `data.frame`. +#' +#' @returns +#' A data frame of the same length and width as `df`, with column names and +#' values formatted for clarity. +#' +#' @examples +#' ## First wrangle age data ---- +#' data <- mw_wrangle_age( +#' df = anthro.01, +#' dos = dos, +#' dob = dob, +#' age = age, +#' .decimals = 2 +#' ) +#' +#' ## Then wrangle WFHZ data ---- +#' data_wfhz <- mw_wrangle_wfhz( +#' df = data, +#' sex = sex, +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE +#' ) +#' +#' ## Now run the plausibility check ---- +#' pl <- mw_plausibility_check_wfhz( +#' df = data_wfhz, +#' sex = sex, +#' age = age, +#' weight = weight, +#' height = height, +#' flags = flag_wfhz +#' ) +#' +#' ## Now neat the output table ---- +#' mw_neat_output_wfhz(df = pl) +#' +#' +#' @export +mw_neat_output_wfhz <- function(df) { + +## Format data frame ---- +df <- df |> + mutate( + flagged = .data$flagged |> + label_percent(accuracy = 0.1, suffix = "%", decimal.mark = ".")(), + sex_ratio = .data$sex_ratio |> + label_pvalue()(), + age_ratio = .data$age_ratio |> + label_pvalue()(), + sd = round(.data$sd, digits = 2), + dps_wgt = round(.data$dps_wgt), + dps_hgt = round(.data$dps_hgt), + skew = round(.data$skew, digits = 2), + kurt = round(.data$kurt, digits = 2) + ) |> + ## Rename columns ---- +setNames( + c("Total children", "Flagged data (%)", "Class. of flagged data", + "Sex ratio (p)", "Class. of sex ratio", "Age ratio (p)", + "Class. of age ratio", "DPS weight (#)", "Class. DPS weight", + "DPS height (#)", "Class. DPS height", "Standard Dev* (#)", + "Class. of standard dev", "Skewness* (#)", "Class. of skewness", + "Kurtosis* (#)", "Class. of kurtosis", "Overall score", "Overall quality" + ) +) +## Return data frame ---- +df +} + diff --git a/R/pretty_tables.R b/R/pretty_tables.R deleted file mode 100644 index 2b0c7f7..0000000 --- a/R/pretty_tables.R +++ /dev/null @@ -1,178 +0,0 @@ -#' Get a formatted and presentable output table for the plausibility checkers -#' -#' @description -#' Useful to getting the output returned from the plausibility checkers -#' into a presentable format. It converts scientific notations to standard -#' notations, round values and rename columns to meaningful names. -#' -#' @param df A summary table object of class `data.frame` returned by the -#' plausibility checkers. -#' -#' @returns A `data.frame` as `df`. Columns are renamed, values formatted and -#' ready to be shared. -#' -#' @examples -#' -#' ## Check the plausibility of WFHZ data ---- -#' -#' anthro.01 |> -#' process_wfhz_data( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) |> -#' check_plausibility_wfhz( -#' sex = sex, -#' age = age, -#' weight = weight, -#' height = height, -#' flags = flag_wfhz, -#' area = area -#' ) |> -#' generate_pretty_table_wfhz() -#' -#' ## Check the plausibility of MUAC data ---- -#' -#' anthro.01 |> -#' process_muac_data( -#' sex = sex, -#' muac = muac, -#' age = NULL, -#' .recode_sex = TRUE, -#' .recode_muac = FALSE, -#' unit = "none" -#' ) |> -#' check_plausibility_muac( -#' flags = flag_muac, -#' sex = sex, -#' muac = muac -#' ) |> -#' generate_pretty_table_muac() -#' -#' ## Check the plausibility of MFAZ data ---- -#' -#' anthro.01 |> -#' process_age( -#' svdate = "dos", -#' birdate = "dob", -#' age = age -#' ) |> -#' process_muac_data( -#' sex = sex, -#' age = "age", -#' muac = muac, -#' .recode_sex = TRUE, -#' .recode_muac = TRUE, -#' unit = "cm" -#' ) |> -#' check_plausibility_mfaz( -#' flags = flag_mfaz, -#' sex = sex, -#' muac = muac, -#' age = age, -#' area = area -#' ) |> -#' generate_pretty_table_mfaz() -#' -#' @rdname pretty_table -#' -#' @export -#' -#' -generate_pretty_table_mfaz <- function(df) { - - ## Format data frame ---- - df <- df |> - mutate( - flagged = .data$flagged |> - label_percent(accuracy = 0.1, suffix = "%", decimal.mark = ".")(), - sex_ratio = .data$sex_ratio |> - label_pvalue()(), - age_ratio = .data$age_ratio |> - label_pvalue()(), - sd = round(.data$sd, digits = 2), - dps = round(.data$dps), - skew = round(.data$skew, digits = 2), - kurt = round(.data$kurt, digits = 2) - ) |> - ## Rename columns ---- - setNames( - c("Area", "Total children", "Flagged data (%)", - "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", - "Age ratio (p)", "Class. of age ratio", "DPS (#)", - "Class. of DPS", "Standard Dev* (#)", "Class. of standard dev", - "Skewness* (#)", "Class. of skewness", "Kurtosis* (#)", - "Class. of kurtosis", "Overall score", "Overall quality" - ) - ) - ## Return data frame ---- - df -} - - -#' -#' -#' @rdname pretty_table -#' -#' @export -#' -#' -generate_pretty_table_wfhz <- function(df) { - - ## Format data frame ---- - df <- df |> - mutate( - flagged = .data$flagged |> - label_percent(accuracy = 0.1, suffix = "%", decimal.mark = ".")(), - sex_ratio = .data$sex_ratio |> - label_pvalue()(), - age_ratio = .data$age_ratio |> - label_pvalue()(), - sd = round(.data$sd, digits = 2), - dps_wgt = round(.data$dps_wgt), - dps_hgt = round(.data$dps_hgt), - skew = round(.data$skew, digits = 2), - kurt = round(.data$kurt, digits = 2) - ) |> - ## Rename columns ---- - setNames( - c("Area", "Total children", "Flagged data (%)", "Class. of flagged data", - "Sex ratio (p)", "Class. of sex ratio", "Age ratio (p)", - "Class. of age ratio", "DPS weight (#)", "Class. DPS weight", - "DPS height (#)", "Class. DPS height", "Standard Dev* (#)", - "Class. of standard dev", "Skewness* (#)", "Class. of skewness", - "Kurtosis* (#)", "Class. of kurtosis", "Overall score", "Overall quality" - ) - ) - ## Return data frame ---- - df -} - - -#' -#' -#' @rdname pretty_table -#' -#' @export -#' -generate_pretty_table_muac <- function(df) { - - ## Format data frame ---- - df <- df |> - mutate( - flagged = .data$flagged |> - label_percent(accuracy = 0.1, suffix = "%", decimal.mark = ".")(), - sex_ratio = .data$sex_ratio |> scales::label_pvalue()(), - sd = round(.data$sd, digits = 2), - dps = round(.data$dps) - ) |> - ## Rename columns ---- - setNames( - c("Total children", "Flagged data (%)", "Class. of flagged data", "Sex ratio (p)", - "Class. of sex ratio", "DPS(#)", "Class. of DPS", "Standard Dev* (#)", - "Class. of standard dev") - ) - ## Return data frame ---- - df -} diff --git a/R/prevalence_combined.R b/R/prevalence_combined.R index c73f7b2..3b658c2 100644 --- a/R/prevalence_combined.R +++ b/R/prevalence_combined.R @@ -127,17 +127,22 @@ compute_pps_based_combined_prevalence <- function(df, #' #' ## When working on data frame with multiple survey areas ---- #' s <- anthro.03 |> -#' process_age(age = age) |> -#' process_muac_data( +#' mw_wrangle_age( +#' dos = NULL, +#' dob = NULL, +#' age = age, +#' .decimals = 2 +#' ) |> +#' mw_wrangle_muac( #' sex = sex, #' muac = muac, #' age = "age", #' .recode_sex = TRUE, #' .recode_muac = TRUE, -#' unit = "cm" +#' .to = "cm" #' ) |> -#' dplyr::mutate(muac = recode_muac(muac, unit = "mm")) |> -#' process_wfhz_data( +#' dplyr::mutate(muac = recode_muac(muac, .to = "mm")) |> +#' mw_wrangle_wfhz( #' sex = sex, #' weight = weight, #' height = height, @@ -167,9 +172,9 @@ compute_combined_prevalence <- function(df, ## Grouped summary of standard deviation classification ---- x <- summarise( df, - std_wfhz = classify_sd(sd(remove_flags(as.numeric(.data$wfhz), "zscore"), na.rm = TRUE)), - age_ratio = classify_age_sex_ratio(age_ratio_test(.data$age, .expectedP = 0.66)$p), - std_mfaz = classify_sd(sd(remove_flags(as.numeric(.data$mfaz), "zscore"), na.rm = TRUE)), + std_wfhz = rate_std(sd(remove_flags(as.numeric(.data$wfhz), "zscores"), na.rm = TRUE)), + age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), + std_mfaz = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), muac_analysis_approach = tell_muac_analysis_strategy(.data$age_ratio, .data$std_mfaz), .by = !!.summary_by ) @@ -177,9 +182,9 @@ compute_combined_prevalence <- function(df, ## Non-grouped summary ---- x <- summarise( df, - std_wfhz = classify_sd(sd(remove_flags(as.numeric(.data$wfhz), "zscore"), na.rm = TRUE)), - age_ratio = classify_age_sex_ratio(age_ratio_test(.data$age, .expectedP = 0.66)$p), - std_mfaz = classify_sd(sd(remove_flags(as.numeric(.data$mfaz), "zscore"), na.rm = TRUE)), + std_wfhz = rate_std(sd(remove_flags(as.numeric(.data$wfhz), "zscores"), na.rm = TRUE)), + age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), + std_mfaz = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), muac_analysis_approach = tell_muac_analysis_strategy(.data$age_ratio, .data$std_mfaz), ) } diff --git a/R/prevalence_mfaz.R b/R/prevalence_mfaz.R index 185bd32..1371002 100644 --- a/R/prevalence_mfaz.R +++ b/R/prevalence_mfaz.R @@ -76,14 +76,14 @@ compute_mfaz_prevalence <- function(df, ## Grouped summary of standard deviation classification ---- x <- summarise( df, - std = classify_sd(sd(remove_flags(.data$mfaz, "zscore"), na.rm = TRUE)), + std = rate_std(sd(remove_flags(.data$mfaz, "zscores"), na.rm = TRUE)), .by = !!.summary_by ) } else { ## Non-grouped summary ---- x <- summarise( df, - std = classify_sd(sd(remove_flags(.data$mfaz, "zscore"), na.rm = TRUE)) + std = rate_std(sd(remove_flags(.data$mfaz, "zscores"), na.rm = TRUE)) ) } diff --git a/R/prevalence_muac.R b/R/prevalence_muac.R index 38de555..b9ab9fd 100644 --- a/R/prevalence_muac.R +++ b/R/prevalence_muac.R @@ -54,7 +54,7 @@ tell_muac_analysis_strategy <- function(age_ratio_class, sd_class) { #' @returns A vector of class `numeric` of length and size 1. #' #' @details -#' This function is informed by the output of [age_ratio_test()]. +#' This function is informed by the output of [mw_stattest_ageratio()]. #' #' apply_cdc_age_weighting <- function(muac, age, @@ -122,7 +122,7 @@ compute_weighted_prevalence <- function(df, .edema=NULL, .summary_by = NULL) { } else { df <- df |> filter(.data$flag_mfaz == 0) |> - mutate(muac = recode_muac(.data$muac, unit = "mm")) |> + mutate(muac = recode_muac(.data$muac, .to = "mm")) |> summarise( sam = apply_cdc_age_weighting(.data$muac, .data$age, {{ .edema }}, status = "sam"), mam = apply_cdc_age_weighting(.data$muac, .data$age, {{ .edema }}, status = "mam"), @@ -237,8 +237,8 @@ compute_muac_prevalence <- function(df, x <- df |> group_by(!!.summary_by) |> summarise( - age_ratio = classify_age_sex_ratio(age_ratio_test(.data$age, .expectedP = 0.66)$p), - std = classify_sd(sd(remove_flags(as.numeric(.data$mfaz), "zscore"), na.rm = TRUE)), + age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), + std = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), analysis_approach = tell_muac_analysis_strategy(.data$age_ratio, .data$std), .groups = "drop" ) @@ -246,8 +246,8 @@ compute_muac_prevalence <- function(df, ## Non-grouped summary of analysis approach ---- x <- df |> summarise( - age_ratio = classify_age_sex_ratio(age_ratio_test(.data$age, .expectedP = 0.66)$p), - std = classify_sd(sd(remove_flags(as.numeric(.data$mfaz), "zscore"), na.rm = TRUE)), + age_ratio = rate_agesex_ratio(mw_stattest_ageratio(.data$age, .expectedP = 0.66)$p), + std = rate_std(sd(remove_flags(as.numeric(.data$mfaz), "zscores"), na.rm = TRUE)), analysis_approach = tell_muac_analysis_strategy(.data$age_ratio, .data$std) ) } diff --git a/R/prevalence_wfhz.R b/R/prevalence_wfhz.R index b976c87..964b596 100644 --- a/R/prevalence_wfhz.R +++ b/R/prevalence_wfhz.R @@ -27,7 +27,7 @@ #' #' ### When .summary_by = NULL ---- #' anthro.03 |> -#' process_wfhz_data( +#' mw_wrangle_wfhz( #' sex = sex, #' weight = weight, #' height = height, @@ -42,7 +42,7 @@ #' ### When .summary_by is not set to NULL ---- #' #' anthro.03 |> -#' process_wfhz_data( +#' mw_wrangle_wfhz( #' sex = sex, #' weight = weight, #' height = height, @@ -82,14 +82,14 @@ compute_wfhz_prevalence <- function(df, ## Grouped summary of standard deviation classification ---- x <- summarise( df, - std = classify_sd(sd(remove_flags(.data$wfhz, "zscore"), na.rm = TRUE)), + std = rate_std(sd(remove_flags(.data$wfhz, "zscores"), na.rm = TRUE)), .by = !!.summary_by ) } else { ## Non-grouped summary ---- x <- summarise( df, - std = classify_sd(sd(remove_flags(.data$wfhz, "zscore"), na.rm = TRUE)) + std = rate_std(sd(remove_flags(.data$wfhz, "zscores"), na.rm = TRUE)) ) } @@ -216,7 +216,7 @@ compute_pps_based_wfhz_prevalence <- function(df, #' apply_probit_approach <- function(x, .status = c("gam", "sam")) { .status <- match.arg(.status) - mean <- mean(remove_flags(x, "zscore"), na.rm = TRUE) + mean <- mean(remove_flags(x, "zscores"), na.rm = TRUE) ## Return GAM and SAM prevalence with a SD = 1 switch( .status, diff --git a/R/quality_auditors.R b/R/quality_auditors.R deleted file mode 100644 index 2174280..0000000 --- a/R/quality_auditors.R +++ /dev/null @@ -1,215 +0,0 @@ -#' -#' Check the plausibility of the data -#' -#' @description -#' Verify the overall acceptability of the data through a set of -#' structured tests around sampling and measurement-related biases in the data. -#' -#' @param df A dataset object of class `data.frame` to check. It should have been -#' wrangled using this package's wranglers. -#' -#' @param sex A vector of class `numeric` of child's sex: 1 for boy and 2 for girl. -#' -#' @param age A vector of class `double` of child's age in months. -#' -#' @param muac A vector of class `double` of child's MUAC in centimeters. -#' -#' @param weight A vector of class `double` of child's weight in kilograms. -#' -#' @param height A vector of class `double` of child's height in centimeters. -#' -#' @param flags A vector of class `numeric` of flagged observations. -#' -#' @param area A vector of class `character` of the geographical location where -#' data was collected and for which the analysis should be aggregated. -#' -#' @returns A summarised `data.frame` of plausibility test results and their -#' respective acceptability ratings. -#' -#' @examples -#' -#' ## Check the plausibility of WFHZ data ---- -#' -#' anthro.01 |> -#' process_age( -#' svdate = "dos", -#' birdate = "dob", -#' age = age -#' ) |> -#' process_wfhz_data( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) |> -#' check_plausibility_wfhz( -#' sex = sex, -#' age = age, -#' weight = weight, -#' height = height, -#' flags = flag_wfhz, -#' area = area -#' ) -#' -#' ## Check the plausibility of MFAZ data ---- -#' -#' anthro.01 |> -#' process_age( -#' svdate = "dos", -#' birdate = "dob", -#' age = age -#' ) |> -#' process_muac_data( -#' sex = sex, -#' age = "age", -#' muac = muac, -#' .recode_sex = TRUE, -#' .recode_muac = TRUE, -#' unit = "cm" -#' ) |> -#' check_plausibility_mfaz( -#' flags = flag_mfaz, -#' sex = sex, -#' muac = muac, -#' age = age, -#' area = area -#' ) -#' -#' ## Check the plausibility of the absolute MUAC values ---- -#' -#' anthro.01 |> -#' process_muac_data( -#' sex = sex, -#' muac = muac, -#' age = NULL, -#' .recode_sex = TRUE, -#' .recode_muac = FALSE, -#' unit = "none" -#' ) |> -#' check_plausibility_muac( -#' flags = flag_muac, -#' sex = sex, -#' muac = muac -#' ) -#' -#' @rdname plausibility-check -#' -#' @export -#' -check_plausibility_mfaz <- function(df, sex, muac, age, flags, area) { - - ## Summarise statistics ---- - df <- df |> - dplyr::group_by({{ area }}) |> - dplyr::summarise( - n = n(), - flagged = sum({{ flags }}, na.rm = TRUE) / n(), - flagged_class = classify_percent_flagged(.data$flagged, type = "mfaz"), - sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))$p, - sex_ratio_class = classify_age_sex_ratio(.data$sex_ratio), - age_ratio = age_ratio_test({{ age }}, .expectedP = 0.66)$p, - age_ratio_class = classify_age_sex_ratio(.data$age_ratio), - dps = digitPreference({{ muac }}, digits = 1, values = 0:9)$dps, - dps_class = digitPreference({{ muac }}, digits = 1, values = 0:9)$dpsClass, - sd = sd(remove_flags(.data$mfaz, unit = "zscore"), na.rm = TRUE), - sd_class = classify_sd(.data$sd, type = "zscore"), - skew = skewKurt(remove_flags(.data$mfaz, unit = "zscore"))$s, - skew_class = classify_skew_kurt(.data$skew), - kurt = skewKurt(remove_flags(.data$mfaz, unit = "zscore"))$k, - kurt_class = classify_skew_kurt(.data$kurt), - .groups = "drop" - ) - - ## Add quality score to the data frame ---- - - df[["quality_score"]] <- df |> - group_by({{ area }}) |> - compute_quality_score(type = "mfaz") - - ## Add quality class to the data frame ---- - - df[["quality_class"]] <- df |> - group_by({{ area }}) |> - classify_overall_quality() - - ## Return data frame ---- - df -} - - -#' -#' -#' @rdname plausibility-check -#' -#' @export -#' -check_plausibility_wfhz <- function(df, sex, age, weight, height, flags, area) { - - - ## Summarise statistics ---- - df <- df |> - group_by({{ area }}) |> - summarise( - n = n(), - flagged = sum({{ flags }}, na.rm = TRUE) / n(), - flagged_class = classify_percent_flagged(.data$flagged, type = "whz"), - sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))$p, - sex_ratio_class = classify_age_sex_ratio(.data$sex_ratio), - age_ratio = ageRatioTest({{ age }}, ratio = 0.85)$p, - age_ratio_class = classify_age_sex_ratio(.data$age_ratio), - dps_wgt = digitPreference({{ weight }}, digits = 1)$dps, - dps_wgt_class = digitPreference({{ weight }}, digits = 1)$dpsClass, - dps_hgt = digitPreference({{ height }}, digits = 1)$dps, - dps_hgt_class = digitPreference({{ height }}, digits = 1)$dpsClass, - sd = sd(remove_flags(.data$wfhz, unit = "zscore"), na.rm = TRUE), - sd_class = classify_sd(.data$sd, type = "zscore"), - skew = skewKurt(remove_flags(.data$wfhz, unit = "zscore"))$s, - skew_class = classify_skew_kurt(.data$skew), - kurt = skewKurt(remove_flags(.data$wfhz, unit = "zscore"))$k, - kurt_class = classify_skew_kurt(.data$kurt), - .groups = "drop" - ) - - ## Add quality score to the data frame ---- - - df[["quality_score"]] <- df |> - group_by({{ area }}) |> - compute_quality_score(type = "whz") - - ## Add quality class to the data frame ---- - - df[["quality_class"]] <- df |> - group_by({{ area }}) |> - classify_overall_quality() - - ## Return data frame ---- - df -} - - - -#' -#' @rdname plausibility-check -#' -#' @export -#' -check_plausibility_muac <- function(df, flags, sex, muac) { - - ## Summarise statistics ---- - df <- df |> - summarise( - n = n(), - flagged = sum({{ flags }}, na.rm = TRUE) / n(), - flagged_class = classify_percent_flagged(.data$flagged, type = "crude"), - sex_ratio = sexRatioTest({{ sex }}, codes = c(1, 2))[["p"]], - sex_ratio_class = classify_age_sex_ratio(.data$sex_ratio), - dps = digitPreference({{ muac }}, digits = 0, values = 0:9)[["dps"]], - dps_class = digitPreference({{ muac }}, digits = 0, values = 0:9)[["dpsClass"]], - sd = sd(remove_flags({{ muac }}, unit = "crude"), na.rm = TRUE), - sd_class = classify_sd(.data$sd, type = "crude"), - .groups = "drop" - ) - - ## Return data frame ---- - df -} diff --git a/R/quality_raters.R b/R/quality_raters.R index 74fe00e..aeb493b 100644 --- a/R/quality_raters.R +++ b/R/quality_raters.R @@ -1,40 +1,36 @@ #' -#' Rate the acceptability of the standard deviation and the percentage of flagged -#' data #' -#' @description -#' Rate how much high is the standard deviation and the percentage of flagged -#' data in the dataset, hence it's acceptability. -#' -#' @param p A vector of class `double` of the proportions of flagged values in -#' the dataset. +#' Rate the acceptability of the proportion of flagged records #' -#' @param sd A vector of class `double` of the values of the standard deviation. -#' -#' @param type A choice between "wfhz", "mfaz" and "crude" for the basis on which -#' the rating should be done. +#' @description +#' Rate the acceptability of the proportion of flagged records in WFHZ, MFAZ, +#' and raw MUAC data following the SMART methodology criteria. #' -#' @returns A vector of class `character` for the acceptability rate. +#' @param p A vector of class `double`, containing the proportions of flagged +#' records in the dataset. If the class does not match the expected type, the +#' function will stop execution and return an error message indicating the type +#' of mismatch. #' -#' @details -#' The ranges of acceptability are: "Excellent", "Good", "Acceptable", "Problematic". -#' The cut-offs for WFHZ are as in the [SMART Methodology](https://smartmethodology.org/). -#' For the MFAZ and the absolute MUAC values, the maximum acceptable limit for -#' outliers is 2%, as recommended by -#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). -#' Cut-offs for the standard deviation of the absolute MUAC values are based on the -#' [IPC AMN guidelines](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/). +#' @param .in Specifies the dataset where the rating should be done, +#' with options: "wfhz", "mfaz", or "raw_muac". #' +#' @returns A vector of class `factor` of the same length as input, for the +#' acceptability rate. #' -#' @rdname raters +#' @keywords internal #' -classify_percent_flagged <- function(p, type = c("mfaz", "whz", "crude")) { - - type <- match.arg(type) +rate_propof_flagged <- function(p, .in = c("mfaz", "wfhz", "raw_muac")) { + ## Enforce options of `.in` ---- + .in <- match.arg(.in) - if (type == "mfaz" || type == "crude") { + ## Enforce the class of `p` ---- + if (!is.double(p)) { + stop("`p` must be of class double; not ", shQuote(class(p)), ". Please try again.") + } - ## classify percent of outliers in MFAZ ---- + ## Rate the acceptability of the proportion of flagged records ---- + if (.in == "mfaz" || .in == "raw_muac") { + ## In MFAZ or WFHZ ---- x <- cut( x = p, breaks = c(0, 0.01, 0.015, 0.02, Inf), @@ -44,9 +40,8 @@ classify_percent_flagged <- function(p, type = c("mfaz", "whz", "crude")) { ) } - if (type == "whz") { - - ## classify percent of outliers in WHZ ---- + if (.in == "wfhz") { + ## In raw MUAC values ---- x <- cut( x = p, breaks = c(0, 0.025, 0.05, 0.075, Inf), @@ -58,17 +53,39 @@ classify_percent_flagged <- function(p, type = c("mfaz", "whz", "crude")) { x } + + #' +#' Rate the acceptability of the standard deviation #' -#' @rdname raters +#' @description +#' Rate the acceptability of the standard deviation of WFHZ, MFAZ, and raw MUAC data. +#' Rating follows the SMART methodology criteria. #' -classify_sd <- function(sd, type = c("zscore", "crude")) { - - type <- match.arg(type) +#' @param sd A vector of class `double`, containing values of the standard deviation +#' from the dataset. If the class does not match the expected type, the function +#' will stop execution and return an error message indicating the type of mismatch. +#' +#' @param .of Specifies the dataset where the rating should be done, with options: +#' "wfhz", "mfaz", or "raw_muac". +#' +#' @returns A vector of class `factor` of the same length as input, for the +#' acceptability rate. +#' +#' @keywords internal +#' +#' +rate_std <- function(sd, .of = c("zscores", "raw_muac")) { + ## Enforce options of `.of` ---- + .of <- match.arg(.of) - if (type == "zscore") { + ## Enforce the class of `sd` ---- + if (!is.double(sd)) { + stop("`sd` must be of class double; not ", shQuote(class(sd)), ". Please try again.") + } - ## Classify WHZ and MFAZ-based standard deviation ---- + if (.of == "zscores") { + ## Rate the standard deviation of z-scores ---- x <- case_when( sd > 0.9 & sd < 1.1 ~ "Excellent", sd > 0.85 & sd < 1.15 ~ "Good", @@ -77,9 +94,8 @@ classify_sd <- function(sd, type = c("zscore", "crude")) { ) } - if (type == "crude") { - - ## Classify crude MUAC-based standard deviation ---- + if (.of == "raw_muac") { + ## Rate the standard deviation of raw MUAC values ---- x <- cut( x = sd, breaks = c(-Inf, 13, 14, 15, Inf), @@ -96,12 +112,22 @@ classify_sd <- function(sd, type = c("zscore", "crude")) { #' Rate the acceptability of the age and sex ratio test p-values #' #' @param p A vector of class `double` of the age or sex ratio test p-values. +#' If the class does not match the expected type, the function +#' will stop execution and return an error message indicating the type of mismatch. #' #' @returns A vector of class `character` of the same length as `p` for the #' acceptability rate. #' #' -classify_age_sex_ratio <- function(p) { +#' @keywords internal +#' +rate_agesex_ratio <- function(p) { + ## Enforce the class of `p` ---- + if (!is.double(p)) { + stop("`p` must be of class double; not ", shQuote(class(p)), ". Please try again.") + } + + ## Rate ---- case_when( p > 0.1 ~ "Excellent", p > 0.05 ~ "Good", @@ -115,12 +141,21 @@ classify_age_sex_ratio <- function(p) { #' Rate the acceptability of the skewness and kurtosis test results #' #' @param sk A vector of class `double` for skewness or kurtosis test results. +#' If the class does not match the expected type, the function +#' will stop execution and return an error message indicating the type of mismatch. #' -#' @returns A vector of class `character` of the same length as `sk` for the +#' @returns A vector of class `factor` of the same length as `sk` for the #' acceptability rate. #' +#' @keywords internal #' -classify_skew_kurt <- function(sk) { +rate_skewkurt <- function(sk) { + ## Enforce the class of `sk` ---- + if (!is.double(sk)) { + stop("`sk` must be of class double; not ", shQuote(class(sk)), ". Please try again.") + } + + ## Rate ---- cut( x = sk, breaks = c(-Inf, 0.2, 0.4, 0.6, Inf), @@ -132,43 +167,33 @@ classify_skew_kurt <- function(sk) { #' #' -#' Rate the overall acceptability score +#' Rate the overall acceptability of the data #' #' @description -#' Rate the overall acceptability score into "Excellent", "Good", "Acceptable" and -#' "Problematic". -#' -#' @param df A dataset of class `data.frame` containing a vector of the overall -#' acceptability score as yielded from [compute_quality_score()]. -#' -#' @returns A `data.frame` based on `df`. A new column `quality_class` for the -#' overall acceptability rate is created and added to `df`. -#' -#' @examples -#' ## A sample data ---- +#' Rate the overall data acceptability score into "Excellent", "Good", "Acceptable" +#' or "Problematic". #' -#' df <- data.frame( -#' quality_score = 29 -#' ) +#' @param q A vector of class `numeric` or `integer` of data acceptability scores. +#' If the class does not match the expected type, the function +#' will stop execution and return an error message indicating the type of mismatch. #' -#' ## Apply the function ---- -#' classify_overall_quality(df) +#' @returns A vector of class `factor` of the same length as `q`, providing an overall +#' rate of acceptability of the data. #' -#' @export +#' @keywords internal #' -classify_overall_quality <- function(df) { +rate_overall_quality <- function(q) { + ## Enforce the class of `q` ---- + if (!(is.numeric(q)) | is.integer(q)) { + stop("`q` must be of class numeric or integer; not ", shQuote(class(q)), ". Please try again.") + } - qclass <- with( - df, - data.frame( - quality_class <- cut( - x = quality_score, - breaks = c(0, 9, 14, 24, Inf), - labels = c("Excellent", "Good", "Acceptable", "Problematic"), - include.lowest = TRUE, - right = TRUE - ) - ) + ## Rate ---- + cut( + x = q, + breaks = c(0, 9, 14, 24, Inf), + labels = c("Excellent", "Good", "Acceptable", "Problematic"), + include.lowest = TRUE, + right = TRUE ) - qclass$quality_class } diff --git a/R/quality_scorers.R b/R/quality_scorers.R index 58e3490..c20b3c8 100644 --- a/R/quality_scorers.R +++ b/R/quality_scorers.R @@ -1,22 +1,37 @@ #' -#' Score the acceptability classification of the standard deviation and percentage -#' of flagged data test results +#' Score the acceptability rating of the check results that constitutes the +#' plausibility check suite #' #' @description -#' Attribute a penalty point based on the acceptability classification in which -#' the plausibility test result falls. +#' Attribute a score, also known as penalty point, for a given rate of acceptability +#' of the standard deviation, proportion of flagged records, age and sex ratio, +#' skewness, kurtosis and digit preference score check results. #' -#' @param x A vector of class `character` of acceptability classification of the -#' plausibility test results. +#' The scoring criteria and thresholds follows the standards in the SMART +#' plausibility check. #' -#' @returns A vector of class `integer` of the same length as `x` for the score. +#' @param x A vector of class `character` containing the acceptability rate of +#' a given test check. If the class does not match the expected type, the function +#' will stop execution and return an error message indicating the type of mismatch. #' -#' @details -#' The scoring criteria is as in [SMART Plausibility checks](https://smartmethodology.org/). +#' @returns A vector of class `integer` of the same length as `x` for the +#' acceptability score. +#' +#' @references +#' SMART Initiative (2017). *Standardized Monitoring and Assessment for Relief +#' and Transition*. Manual 2.0. Available at: . #' #' @rdname scorer #' -assign_penalty_points_flags_and_sd <- function(x) { +#' @keywords internal +#' +score_std_flags <- function(x) { + ## Enforce the class of `x` ---- + if (!(is.character(x) | is.factor(x))) { + stop("`x` must be of class `character` or `factor`; not ", shQuote(class(x)), ". Please try again.") + } + + ## Score ---- case_when( x == "Excellent" ~ 0, x == "Good" ~ 5, @@ -30,7 +45,15 @@ assign_penalty_points_flags_and_sd <- function(x) { #' #' @rdname scorer #' -assign_penalty_points_age_sex_ratio <- function(x) { +#' @keywords internal +#' +score_agesexr_dps <- function(x) { + ## Enforce the class of `x` ---- + if (!(is.character(x) | is.factor(x))) { + stop("`x` must be of class `character` or `factor`; not ", shQuote(class(x)), ". Please try again.") + } + + ## Score ---- case_when( x == "Excellent" ~ 0, x == "Good" ~ 2, @@ -43,7 +66,15 @@ assign_penalty_points_age_sex_ratio <- function(x) { #' #' @rdname scorer #' -assign_penalty_points_skew_kurt <- function(x) { +#' @keywords internal +#' +score_skewkurt <- function(x) { + ## Enforce the class of `x` ---- + if (!(is.character(x) | is.factor(x))) { + stop("`x` must be of class `character` or `factor`; not ", shQuote(class(x)), ". Please try again.") + } + + ## Score ---- case_when( x == "Excellent" ~ 0, x == "Good" ~ 1, @@ -54,105 +85,52 @@ assign_penalty_points_skew_kurt <- function(x) { #' #' -#' Get the overall acceptability score from the acceptability classification scores -#' -#' @description -#' Calculate the total amount of penalty points based on each plausibility test -#' result acceptability classification for WFHZ and MFAZ. -#' -#' @param df A dataset object of class `data.frame` to calculate from. +#' Get the overall acceptability score from the acceptability rate scores #' -#' @param type A choice between "wfhz" and "mfaz" for the basis on which the +#' @param .for A choice between "wfhz" and "mfaz" for the basis on which the #' calculations should be made. #' -#' @returns A `data.frame` based on `df` with a new column named `"quality_score"` -#' for the overall of acceptability (of quality) score. -#' -#' @examples -#' -#' ## A sample data ---- -#' -#' df <- data.frame( -#' flagged_class = "Excellent", -#' age_ratio_class = "Good", -#' sex_ratio_class = "Problematic", -#' dps_class = "Excellent", -#' sd_class = "Excellent", -#' skew_class = "Good", -#' kurt_class = "Acceptable" -#' ) -#' -#' ## Apply the function ---- -#' compute_quality_score(df, type = "mfaz") -#' -#' @export -#' -compute_quality_score <- function(df, type = c("mfaz", "whz")) { - type <- match.arg(type) - - if (type == "mfaz") { +#' @returns A vector of class `numeric`, of length 1, for the overall +#' data quality (acceptability) score. +#' +#' @keywords internal +#' +score_overall_quality <- function(cl_flags, + cl_sex, + cl_age, + cl_dps_m = NULL, + cl_dps_w = NULL, + cl_dps_h = NULL, + cl_std, + cl_skw, + cl_kurt, + .for = c("wfhz", "mfaz")) { + ## Enforce options in `.for` ---- + .for <- match.arg(.for) - ### Get MFAZ's quality score ---- - qscore <- df |> - summarise( - quality_score = sum( - across( - .cols = c( - .data$flagged_class, - .data$sd_class - ), - .fns = assign_penalty_points_flags_and_sd - ), - across( - .cols = c( - .data$sex_ratio_class, - .data$age_ratio_class, - .data$dps_class - ), - .fns = assign_penalty_points_age_sex_ratio - ), - across( - .cols = c( - .data$skew_class, - .data$kurt_class - ), - .fns = assign_penalty_points_skew_kurt - ) - ) + switch(.for, + "wfhz" = { + qs <- sum( + score_std_flags(cl_flags), + score_agesexr_dps(cl_sex), + score_agesexr_dps(cl_age), + score_agesexr_dps(cl_dps_w), + score_agesexr_dps(cl_dps_h), + score_std_flags(cl_std), + score_skewkurt(cl_skw), + score_skewkurt(cl_kurt) ) - qscore[["quality_score"]] - - } else { - ### Get WHZ's quality score (REVISE)---- - qscore <- df |> - summarise( - quality_score = sum( - across( - .cols = c( - .data$flagged_class, - .data$sd_class - ), - .fns = assign_penalty_points_flags_and_sd - ), - across( - .cols = c( - .data$sex_ratio_class, - .data$age_ratio_class, - .data$dps_wgt_class, - .data$dps_hgt_class - ), - .fns = assign_penalty_points_age_sex_ratio - ), - across( - .cols = c( - .data$skew_class, - .data$kurt_class - ), - .fns = assign_penalty_points_skew_kurt - ) - ) + }, + "mfaz" = { + sum( + score_std_flags(cl_flags), + score_agesexr_dps(cl_sex), + score_agesexr_dps(cl_age), + score_agesexr_dps(cl_dps_m), + score_std_flags(cl_std), + score_skewkurt(cl_skw), + score_skewkurt(cl_kurt) ) - qscore[["quality_score"]] - } + } + ) } - diff --git a/R/sample_size.R b/R/sample_size.R deleted file mode 100644 index a1645dd..0000000 --- a/R/sample_size.R +++ /dev/null @@ -1,55 +0,0 @@ -#' -#' Check whether the IPC Acute Malnutrition sample size requirements were met -#' -#' @description -#' Verify whether the minimum sample size requirements for the area of analysis -#' were met, in accordance with the IPC Acute Malnutrition (IPC AMN) protocols. -#' -#' @param df A dataset of class `data.frame` to check. -#' -#' @param .group A vector of class `integer` of the cluster ID's for survey, -#' screening or site ID's for screenings and sentinel sites. -#' -#' @param .data_type A choice between "survey" for survey data, "screening" for -#' screening data or "ssite" for community-based sentinel site data. -#' -#' @returns A summarised table of three columns: `groups` for the total number -#' of unique cluster or screening or site IDs; `n_obs` for the respective total -#' number of children; and `meet_ipc` for whether the IPC AMN requirements were met. -#' -#' @details -#' [The IPC Manual](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/). -#' -#' -#' @examples -#' -#' anthro.01 |> -#' dplyr::group_by(area) |> -#' check_sample_size( -#' .group = cluster, -#' .data_type = "survey" -#' ) -#' -#' @export -#' -check_sample_size <- function(df, - .group, - .data_type = c("survey", "screening", "ssite")) { - - ## Match arguments ---- - data_type <- match.arg(.data_type) - - ## Summarize unique PSU's and total observations per PSU ---- - df <- df |> - summarise( - groups = n_distinct({{ .group }}), - n_obs = n(), - meet_ipc = case_when( - data_type == "survey" & groups >= 25 ~ "yes", - data_type == "screening" & groups >= 3 & n_obs >= 600 ~ "yes", - data_type == "ssite" & groups >= 5 & n_obs >= 200 ~ "yes", - .default = "no" - ) - ) - tibble::as_tibble(df) -} diff --git a/R/stattests.R b/R/stattests.R new file mode 100644 index 0000000..f7cd642 --- /dev/null +++ b/R/stattests.R @@ -0,0 +1,62 @@ +#' +#' Test for statistical difference between the proportion of children aged 24 to +#' 59 months old over those aged 6 to 23 months old +#' +#' @description +#' Calculate the observed age ratio of children aged 24 to 59 months old over +#' those aged 6 to 23 months old and test if there is a statistical difference +#' between the observed and the expected. +#' +#' @param age A vector of class `numeric` of child's age in months. If different +#' than expected, the function will stop execution and return an error message +#' indicating the type of mismatch. +#' +#' @param .expectedP The expected proportion of children aged 24 to 59 months +#' old over those aged 6 to 23 months old. This is estimated to be 0.66. +#' +#' @returns A vector of class `list` of three statistics: `p` for p-value of the +#' statistical difference between the observed and the expected proportion of +#' children aged 24 to 59 months old over those aged 6 to 23 months old; +#' `observedR` and `observedP` for the observed ratio and proportion respectively. +#' +#' @details +#' This function should be used specifically when assessing the quality of MUAC data. +#' For age ratio test of children aged 6 to 29 months old over 30 to 59 months old, as +#' performed in the SMART plausibility check, use [nipnTK::ageRatioTest()] instead. +#' +#' @references +#' SMART Initiative. *Updated MUAC data collection tool*. Available at: +#' +#' +#' @examples +#' mw_stattest_ageratio( +#' age = anthro.02$age, +#' .expectedP = 0.66 +#' ) +#' +#' @export +#' +mw_stattest_ageratio <- function(age, .expectedP = 0.66) { + ## Enforce the class of `age` ---- + if (!is.numeric(age)) { + stop("`age` must be of class 'numeric'; not ", shQuote(class(age)), ". Please try again.") + } + + ## Calculate observed proportion and ratio ---- + x <- ifelse(age >= 24, 1, 2) + sum_o24 <- sum(na.omit(x == 1)) + sum_u24 <- sum(na.omit(x == 2)) + total <- sum(table(na.omit(x))) + ratio <- sum_o24 / sum_u24 + prop <- sum_o24 / total + + ## Stats test with Yates continuity correction set to false ---- + test <- prop.test(sum_o24, total, p = .expectedP, correct = FALSE) + + ## Return ---- + list( + p = test$p.value, + observedR = ratio, + observedP = prop + ) +} diff --git a/R/sysdata.rda b/R/sysdata.rda deleted file mode 100644 index d774818..0000000 Binary files a/R/sysdata.rda and /dev/null differ diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..340c718 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,271 @@ +#' +#' Calculate child's age in months +#' +#' @description +#' Calculate child's age in months based on the date of birth and the date of +#' data collection. +#' +#' @param dos A vector of class `Date` for the date of data collection. If the class +#' is different than expected, the function will stop execution and return an error +#' message indicating the type of mismatch. +#' +#' @param dob A vector of class `Date` for the child's date of birth. If the class +#' is different than expected, the function will stop execution and return an error +#' message indicating the type of mismatch. +#' +#' @returns A vector of class `numeric` for child's age in months. Any value less +#' than 6.0 and greater than or equal to 60.0 months will be set to `NA`. +#' +#' @examples +#' ## Take two vectors of class "Date" ---- +#' surv_date <- as.Date( +#' c( +#' "2024-01-05", "2024-01-05", "2024-01-05", "2024-01-08", "2024-01-08", +#' "2024-01-08", "2024-01-10", "2024-01-10", "2024-01-10", "2024-01-11" +#' ) +#' ) +#' bir_date <- as.Date( +#' c( +#' "2022-04-04", "2021-05-01", "2023-05-24", "2017-12-12", NA, +#' "2020-12-12", "2022-04-04", "2021-05-01", "2023-05-24", "2020-12-12" +#' ) +#' ) +#' +#' ## Apply the function ---- +#' get_age_months( +#' dos = surv_date, +#' dob = bir_date +#' ) +#' +#' @export +#' +get_age_months <- function(dos, dob) { + ## Enforce the class of `dos` ---- + if (!is(dos, "Date")) { + stop("`dos` must be a vector of class 'Date'; not ", shQuote(class(dos)), ". Please try again.") + } + + ## Enforce the class of `dob` ---- + if (!is(dob, "Date")) { + stop("`dob` must be a vector of class 'Date'; not ", shQuote(class(dob)), ". Please try again.") + } + + ## Calculate age in months ---- + int <- dos - dob + age_mo <- int / (365.25 / 12) + age_mo <- ifelse(age_mo < 6.0 | age_mo >= 60.0, NA, age_mo) + age_mo +} + + +#' +#' +#' Identify, flag outliers and remove them +#' +#' @description +#' Identify outlier z-scores for weight-for-height (WFHZ) and MUAC-for-age (MFAZ) +#' following the SMART methodology. The function can also be used to detect +#' outliers for height-for-age (HFAZ) and weight-for-age (WFAZ) z-scores +#' following the same approach. +#' +#' For raw MUAC values, outliers constitute values that are less than 100 +#' millimeters or greater than 200 millimeters. +#' +#' Removing outliers consist in setting the outlier record to `NA` and not necessarily +#' to delete it from the dataset. This is useful in the analysis procedures +#' where outliers must be removed, such as the analysis of the standard deviation. +#' +#' @param x A vector of class `numeric` of WFHZ, MFAZ, HFAZ, WFAZ or raw MUAC values. +#' The latter should be in millimeters. If the class is different than expected, +#' the function will stop execution and return an error message indicating the +#' type of mismatch. +#' +#' @param .from A choice between `zscores` and `raw_muac` for where outliers should be +#' detected and flagged from. +#' +#' @return A vector of the same length as `x` for flagged records coded as +#' `1` for is a flag and `0` not a flag. +#' +#' @details +#' For z-score-based detection, flagged records represent outliers that deviate +#' substantially from the sample's z-score mean, making them unlikely to reflect +#' accurate measurements. For raw MUAC values, flagged records are those that fall +#' outside the acceptable fixed range. Including such outliers in the analysis could +#' compromise the accuracy and precision of the resulting estimates. +#' +#' The flagging criterion used for raw MUAC values is based on a recommendation +#' by Bilukha, O., & Kianian, B. (2023). +#' +#' @references +#' Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement +#' quality of mid‐upper arm circumference data in anthropometric surveys and +#' mass nutritional screenings conducted in humanitarian and refugee settings. +#' *Maternal & Child Nutrition*, 19, e13478. Available at +#' +#' SMART Initiative (2017). *Standardized Monitoring and Assessment for Relief +#' and Transition*. Manual 2.0. Available at: . +#' +#' +#' @examples +#' ## Sample data of raw MUAC values ---- +#' x <- anthro.01$muac +#' +#' ## Apply the function with `.from` set to "raw_muac" ---- +#' flag_outliers(x, .from = "raw_muac") +#' +#' ## Sample data of z-scores (be it WFHZ, MFAZ, HFAZ or WFAZ) ---- +#' x <- anthro.02$mfaz +#' +#' # Apply the function with `.from` set to "zscores" ---- +#' flag_outliers(x, .from = "zscores") +#' +#' @rdname outliers +#' @export +#' +flag_outliers <- function(x, .from = c("zscores", "raw_muac")) { + ## Enforce the options in `.from` ---- + .from <- match.arg(.from) + + ## Enforce the class of `x` ---- + if (!is.numeric(x)) { + stop("`x` must be of class numeric; not ", shQuote(class(x)), ". Please try again.") + } + + ## Identify and flag outliers from zscores ---- + if (.from == "zscores") { + mean_zscore <- mean(x, na.rm = TRUE) + flags <- ifelse(x < (mean_zscore - 3) | x > (mean_zscore + 3), 1, 0) + flags <- ifelse(is.na(x), NA, flags) + flags + + ## Identify and flag outliers from raw MUAC values ---- + } else { + flags <- ifelse(x < 100 | x > 200, 1, 0) + flags <- ifelse(is.na(x), NA, flags) + flags + } +} + + +#' +#' +#' Remove outliers +#' +#' @examples +#' ## With `.from` set to "zscores" ---- +#' remove_flags( +#' x = wfhz.01$wfhz, +#' .from = "zscores" +#' ) +#' +#' ## With `.from` set to "raw_muac" ---- +#' remove_flags( +#' x = mfaz.01$muac, +#' .from = "raw_muac" +#' ) +#' +#' @rdname outliers +#' +#' @export +#' +remove_flags <- function(x, .from = c("zscores", "raw_muac")) { + ## Enforce options in `.from` ---- + .from <- match.arg(.from) + + ## Enforce the class of `x` ---- + if (!is.numeric(x)) { + stop("`x` must be of class numeric; not ", shQuote(class(x)), ". Please try again.") + } + + ## Control flow based on `.from` ---- + switch(.from, + ### Remove flags when `.from` = "zscores" ---- + "zscores" = { + mean_x <- mean(x, na.rm = TRUE) + zs <- ifelse((x < (mean_x - 3) | x > (mean_x + 3)) | is.na(x), NA_real_, x) + zs + }, + ### Remove flags when `.from` = "raw_muac" ---- + "raw_muac" = { + cr <- ifelse(x < 100 | x > 200 | is.na(x), NA_integer_, x) + cr + } + ) +} + + +#' +#' +#' +#' Convert MUAC values to either centimeters or millimeters +#' +#' @description +#' Convert MUAC values to either centimeters or millimeters as required. +#' Before to covert, the function checks if the supplied MUAC +#' values are in the opposite unit of the intended conversion. If not, +#' execution stops and an error message is returned. +#' +#' @param x A vector of the raw MUAC values. The class can either be +#' `double` or `numeric` or `integer`. If different than expected, the function +#' will stop execution and return an error message indicating the type of mismatch. +#' +#' @param .to A choice between `cm` (centimeters) and `mm` (millimeters) for the +#' measuring unit to convert MUAC values to. Before to execute the conversion, +#' the function checks if values are in the opposite unit; in case not, the +#' execution stops and an error message is returned. Strive to address the error +#' and try again. +#' +#' @returns A `numeric` vector of the same length as `x`, with values converted +#' to the chosen measuring unit. +#' +#' @examples +#' ## Recode from millimeters to centimeters ---- +#' muac_cm <- recode_muac( +#' x = anthro.01$muac, +#' .to = "cm" +#' ) +#' +#' ## Using the `muac_cm` object to recode it back to "mm" ---- +#' muac_mm <- recode_muac( +#' x = muac_cm, +#' .to = "mm" +#' ) +#' +#' @export +#' +recode_muac <- function(x, .to = c("cm", "mm")) { + ## Enfornce the options in `.to` ---- + .to <- match.arg(.to) + + ## Enforce the class of `x` ---- + if (!(is.numeric(x) | is.double(x) | is.integer(x))) { + stop( + "`x` must be of class 'numeric' or `integer` or 'double'; not ", shQuote(class(x)), ". Please try again." + ) + } + + ## Recode muac conditionally ---- + switch(.to, + ### Recode to centimeters ---- + "cm" = { + #### Enforce measuring unit is in "cm" ---- + if (any(grepl("\\.", as.character(x)))) { + stop("MUAC values are not in millimeters. Please try again.") + } + #### Convert MUAC to cm ---- + z <- (x / 10) + z + }, + + ### Recode to millimeters ---- + "mm" = { + #### Enforce measuring unit is in "cm" ---- + if (all(!grepl("\\.", as.character(x)))) { + stop("MUAC values are not in centimeter. Please try again.") + } + #### Convert MUAC to mm ---- + z <- (x * 10) + z + } + ) +} diff --git a/R/wrangle_age.R b/R/wrangle_age.R new file mode 100644 index 0000000..1f360bf --- /dev/null +++ b/R/wrangle_age.R @@ -0,0 +1,100 @@ +#' +#' +#' Wrangle child's age +#' +#' @description +#' Wrangle child's age for downstream analysis. This includes calculating age +#' in months based on the date of data collection and the child's date of birth, and +#' setting to `NA` the age values that are less than 6.0 and greater than or equal +#' to 60.0 months old. +#' +#' @param df A dataset of class `data.frame` to wrangle age from. +#' +#' @param dos A vector of class `Date` for date of data collection from the +#' `df`. Default is `NULL`. +#' +#' @param dob A vector of class `Date` for child's date of birth from the `df`. +#' Default is `NULL`. +#' +#' @param age A vector of class `numeric` of child's age in months. In most +#' cases this will be estimated using local event calendars; in some other +#' cases it can be a mix of the former and the one based on the child's +#' date of birth and the date of data collection. +#' +#' @param .decimals The number of decimals places to which the age should be rounded. +#' Default is 2. +#' +#' @returns A `data.frame` based on `df`. The variable `age` will be automatically +#' filled in each row where age value was missing and both the child's +#' date of birth and the date of data collection are available. Rows where `age` +#' is less than 6.0 and greater than or equal to 60.0 months old will be set to `NA`. +#' Additionally, a new variable for `df` named `age_days`, of class `double`, will +#' be created. +#' +#' @examples +#' +#' ## A sample data ---- +#' df <- data.frame( +#' surv_date = as.Date(c( +#' "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01" +#' )), +#' birth_date = as.Date(c( +#' "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25" +#' )), +#' age = c(NA, 36, NA, NA, NA) +#' ) +#' +#' ## Apply the function ---- +#' mw_wrangle_age( +#' df = df, +#' dos = surv_date, +#' dob = birth_date, +#' age = age, +#' .decimals = 3 +#' ) +#' +#' @export +#' +mw_wrangle_age <- function(df, + dos = NULL, + dob = NULL, + age, + .decimals = 2) { + ## Difuse and evaluate arguments ---- + dos <- eval_tidy(enquo(dos), df) + dob <- eval_tidy(enquo(dob), df) + age <- eval_tidy(enquo(age), df) + + + ## Calculate child's age in months then in days ---- + if (!is.null(dob) | !is.null(dos)) { + ## Check if the class of vector "age" is "numeric" ---- + if (!is.numeric(age)) { + stop("`age` must be of class 'numeric'; not ", shQuote(class(age)), ". Please try again.") + } + + ## Calculate age in months ---- + df <- df |> + mutate( + age = ifelse( + is.na(!!age), + get_age_months(dob = dob, dos = dos), age + ), + age_days = round(.data$age * (365.25 / 12), .decimals) + ) + } else { + ## Enforce the class of `age` ---- + if (!is.numeric(age)) { + stop("`age` must be of class 'numeric'; not ", shQuote(class(age)), ". Please try again.") + } + + ## Calculate age in months ---- + df <- df |> + mutate( + age_days = round(age * (365.25 / 12), .decimals) + ) + } + + ## Return df ---- + as_tibble(df) +} diff --git a/R/wrangle_muac.R b/R/wrangle_muac.R new file mode 100644 index 0000000..9fd6d44 --- /dev/null +++ b/R/wrangle_muac.R @@ -0,0 +1,158 @@ +#' +#' Wrangle MUAC data +#' +#' @description +#' Calculate z-scores for MUAC-for-age (MFAZ) and identify outliers based on +#' the SMART methodology. When age is not supplied, wrangling will consist only +#' in detecting outliers from the raw MUAC values. The function only works after +#' the age has been wrangled. +#' +#' @param df A dataset object of class `data.frame` to wrangle data from. +#' +#' @param sex A `numeric` or `character` vector of child's sex. Code values should +#' only be 1 or "m" for males and 2 or "f" for females. Make sure sex values +#' are coded in either of the aforementioned before calling the function. If input +#' codes are different than expected, the function will stop execution and +#' return an error message with the type of mismatch. +#' +#' @param .recode_sex Logical. Set to `TRUE` if the values for `sex` are not coded +#' as 1 (for males) or 2 (for females). Otherwise, set to `FALSE` (default). +#' +#' @param age A vector of class `numeric` of child's age in months. +#' +#' @param muac A vector of class `numeric` of child's age in months. If the class +#' is different than expected, the function will stop execution and return an error +#' message indicating the type of mismatch. +#' +#' @param .recode_muac Logical. Set to `TRUE` if the values for raw MUAC should be +#' converted to either centimeters or millimeters. Otherwise, set to `FALSE` +#' (default) +#' +#' @param .to A choice of the measuring unit to which the MUAC values should be converted; +#' "cm" for centimeters, "mm" for millimeters and "none" to leave as it is. +#' +#' @param .decimals The number of decimals places the z-scores should have. +#' Default is 3. +#' +#' @returns A data frame based on `df`. New variables named `mfaz` and +#' `flag_mfaz`, of child's MFAZ and detected outliers, will be created. When age +#' is not supplied, only `flag_muac` variable is created. This refers to outliers +#' detected based on the raw MUAC values. +#' +#' @references +#' Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement +#' quality of mid‐upper arm circumference data in anthropometric surveys and +#' mass nutritional screenings conducted in humanitarian and refugee settings. +#' *Maternal & Child Nutrition*, 19, e13478. +#' +#' SMART Initiative (2017). *Standardized Monitoring and Assessment for Relief +#' and Transition*. Manual 2.0. Available at: . +#' +#' @seealso +#' [flag_outliers()] [remove_flags()] [mw_wrangle_age()] +#' +#' +#' @examples +#' ## When age is available, wrangle it first before calling the function ---- +#' w <- mw_wrangle_age( +#' df = anthro.02, +#' dos = NULL, +#' dob = NULL, +#' age = age, +#' .decimals = 2 +#' ) +#' +#' ### Then apply the function to wrangle MUAC data ---- +#' mw_wrangle_muac( +#' df = w, +#' sex = sex, +#' age = age, +#' muac = muac, +#' .recode_sex = TRUE, +#' .recode_muac = TRUE, +#' .to = "cm", +#' .decimals = 3 +#' ) +#' +#' ## When age is not available ---- +#' mw_wrangle_muac( +#' df = anthro.02, +#' sex = sex, +#' age = NULL, +#' muac = muac, +#' .recode_sex = TRUE, +#' .recode_muac = TRUE, +#' .to = "cm", +#' .decimals = 3 +#' ) +#' +#' @export +#' +mw_wrangle_muac <- function(df, + sex, + muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = c("cm", "mm", "none"), + .decimals = 3) { + + ## Enforce options in `.to` ---- + .to <- match.arg(.to) + + ## Difuse sex variable for NSE---- + sex <- eval_tidy(enquo(sex), df) + + ## Enforce code values in `sex` ---- + x <- as.factor(as.character(sex)) + if (!(all(levels(x) %in% c("m", "f")) | all(levels(x) %in% c("1", "2")))) { + stop("Values for sex should either be 'm', 'f' or 1 and 2 for male and female respectively") + } + + ## Capture expressions to evaluate later ---- + recode_sex <- quote( + if (.recode_sex) { + sex <- ifelse({{ sex }} == "m", 1, 2) + } else {{{ sex }}} + ) + + ## Capture expressions to evaluate later ---- + rec_muac <- quote( + if (.recode_muac && .to == "cm") { + muac <- recode_muac({{ muac }}, .to = "cm") + } else if (.recode_muac && .to == "mm") { + muac <- recode_muac({{ muac }}, .to = "mm") + } else {{{ muac }}} + ) + + ## Difuse arguments for NSE ---- + age <- eval_tidy(enquo(age), df) + + if (!is.null(age)) { + ## Calculate z-scores and identify outliers on MFAZ ---- + df <- df |> + mutate( + muac = !!rec_muac, + sex = !!recode_sex, + ) |> + addWGSR( + sex = {{ "sex" }}, + firstPart = {{ "muac" }}, + secondPart = "age_days", + index = "mfa", + digits = .decimals + ) |> + mutate( + flag_mfaz = do.call(flag_outliers, list(.data$mfaz, .from = "zscores")) + ) + } else { + ## Identify outliers on raw MUAC values ---- + df <- df |> + mutate( + sex = !!recode_sex, + flag_muac = do.call(flag_outliers, list({{ muac }}, .from = "raw_muac")) + ) + } + ## Return ---- + as_tibble(df) +} diff --git a/R/wrangle_wfhz.R b/R/wrangle_wfhz.R new file mode 100644 index 0000000..74740e0 --- /dev/null +++ b/R/wrangle_wfhz.R @@ -0,0 +1,106 @@ +#' +#' Wrangle weight-for-height data +#' +#' @description +#' Calculate z-scores for weight-for-height (WFHZ) and identify outliers based on +#' the SMART methodology. +#' +#' @param df A dataset object of class `data.frame` to wrangle data from. +#' +#' @param sex A `numeric` or `character` vector of child's sex. Code values should +#' only be 1 or "m" for males and 2 or "f" for females. Make sure sex values +#' are coded in either of the aforementioned before to call the function. If input +#' codes are neither of the above, the function will stop execution and +#' return an error message with the type of mismatch. +#' +#' @param .recode_sex Logical. Set to `TRUE` if the values for `sex` are not coded +#' as 1 (for males) or 2 (for females). Otherwise, set to `FALSE` (default). +#' +#' @param weight A vector of class `double` of child's weight in kilograms. If the input +#' is of a different class, the function will stop execution and return an error +#' message indicating the type of mismatch. +#' +#' @param height A vector of class `double` of child's height in centimeters. If the input +#' is of a different class, the function will stop execution and return an error +#' message indicating the type of mismatch. +#' +#' @param .decimals The number of decimals places the z-scores should have. +#' Default is 3. +#' +#' @returns A data frame based on `df`. New variables named `wfhz` and +#' `flag_wfhz`, of child's WFHZ and detected outliers, will be created. +#' +#' @references +#' SMART Initiative (2017). *Standardized Monitoring and Assessment for Relief +#' and Transition*. Manual 2.0. Available at: . +#' +#' @seealso +#' [flag_outliers()] [remove_flags()] +#' +#' @examples +#' mw_wrangle_wfhz( +#' df = anthro.01, +#' sex = sex, +#' weight = weight, +#' height = height, +#' .recode_sex = TRUE, +#' .decimals = 2 +#' ) +#' +#' @export +#' +mw_wrangle_wfhz <- function(df, + sex, + weight, + height, + .recode_sex = TRUE, + .decimals = 3) { + ## Difuse arguments to be evaluated later ---- + weight <- eval_tidy(enquo(weight), df) + height <- eval_tidy(enquo(height), df) + + ## Check if the class of vector weight is "double" ---- + if (!is.double(weight)) { + stop("`weight` must be of class 'double'; not ", shQuote(class(weight)), ". Please try again.") + } + + ## Check if the class of vector height is "double" ---- + if (!is.double(height)) { + stop("`height` must be of class 'double'; not ", shQuote(class(height)), ". Please try again.") + } + + ## Difuse sex variable for NSE---- + sex <- eval_tidy(enquo(sex), df) + + ## Enforce code value of `sex` ---- + x <- as.factor(as.character(sex)) + if (!(all(levels(x) %in% c("m", "f")) | all(levels(x) %in% c("1", "2")))) { + stop("Values for sex should either be 'm', 'f' or 1 and 2 for male and female respectively") + } + + ## Capture expressions to evaluate later ---- + recode_sex <- quote( + if (.recode_sex) { + sex <- ifelse({{ sex }} == "m", 1, 2) + } else {{{ sex }}} + ) + + ## Compute z-scores ---- + df <- df |> + mutate( + sex = !!recode_sex + ) |> + addWGSR( + sex = "sex", + firstPart = "weight", + secondPart = "height", + index = "wfh", + digits = .decimals + ) |> + ## Identify and flag outliers ---- + mutate( + flag_wfhz = do.call(flag_outliers, list(.data$wfhz, .from = "zscores")) + ) + ## Return --- + as_tibble(df) +} diff --git a/R/wranglers.R b/R/wranglers.R deleted file mode 100644 index 8026225..0000000 --- a/R/wranglers.R +++ /dev/null @@ -1,318 +0,0 @@ -#' -#' -#' Identify and flag outliers -#' -#' @description -#' Outliers are extreme values that deviate remarkably from the survey mean, making -#' them unlikely to be accurate measurements. This function detects and signals -#' them based on a criterion set for the WFHZ, the MFAZ and for the absolute MUAC -#' values. -#' -#' @param x A vector of class `double` of WFHZ or MFAZ or absolute MUAC values. -#' The latter should be in millimeters. -#' -#' @param type A choice between `zscore` and `crude` for where outliers should be -#' detected and flagged from. -#' -#' @param unit A choice between `zscore` and `crude` for where outliers should be -#' detected and flagged from. -#' -#' @return A vector of the same length as `x` of flagged observations that are -#' outliers: 1 for is a flag and 0 is not a flag. -#' -#' @details -#' The flagging criterion used for the WFHZ and the MFAZ is as in -#' [SMART plausibility check](https://smartmethodology.org/). A fixed flagging -#' criterion is used for the absolute MUAC values. This is as recommended by -#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478) -#' -#' -#' @examples -#' -#' ## Sample data for absolute MUAC values ---- -#' x <- anthro.01$muac -#' -#' ## Apply the function with type set to "crude" ---- -#' flag_outliers(x, type = "crude") -#' -#' ## Sample data for MFAZ or for WFHZ values ---- -#' x <- anthro.02$mfaz -#' -#' # Apply the function with type set to "zscore" ---- -#' flag_outliers(x, type = "zscore") -#' -#' @rdname outliers -#' @export -#' -flag_outliers <- function(x, type = c("zscore", "crude")) { - type <- match.arg(type) - - if (type == "zscore") { - mean_zscore <- mean(x, na.rm = TRUE) - flags <- ifelse((x < (mean_zscore - 3) | x > (mean_zscore + 3)), 1, 0) - flags <- ifelse(is.na(x), NA, flags) - flags - - } else { - flags <- ifelse(x < 100 | x > 200, 1, 0) - flags <- ifelse(is.na(x), NA, flags) - flags - } -} - - -#' -#' -#' Remove outliers -#' -#' @rdname outliers -#' -remove_flags <- function(x, unit = c("zscore", "crude")) { - - ## Match arguments ---- - unit <- match.arg(unit) - - ## Control flow based on unit ---- - switch( - unit, - ### Remove flags when unit = "zscore" ---- - "zscore" = { - mean_x <- mean(x, na.rm = TRUE) - zs <- ifelse((x < (mean_x - 3) | x > (mean_x + 3)) | is.na(x), NA_real_, x) - }, - ### Remove flags when unit = "crude" ---- - "crude" = { - cr <- ifelse(x < 100 | x > 200 | is.na(x), NA_integer_, x) - } - ) -} - - -#' -#' -#' -#' Convert MUAC values to either centimeters or millimeters -#' -#' @description -#' Recode the MUAC values to either centimeters or millimeters as required. -#' -#' @param muac A vector of class `double` or `integer` of the absolute MUAC values. -#' -#' @param unit A choice of the unit to which the MUAC values should be converted. -#' -#' @returns A numeric vector of the same length `muac`, with values converted -#' to the chosen unit. -#' -#' @examples -#' -#' ## Recode from millimeters to centimeters ---- -#' muac <- anthro.01$muac -#' muac_cm <- recode_muac(muac, unit = "cm") -#' -#' ## Using the `muac_cm` object to recode it back to "mm" ---- -#' muac_mm <- recode_muac(muac_cm, unit = "mm") -#' -#' @export -#' -recode_muac <- function(muac, unit = c("cm", "mm")) { - - ## Check if unit's arguments match ---- - stopifnot(unit %in% c("cm", "mm")) - - ## Recode muac conditionally ---- - switch( - unit, - ### Recode to millimeters ---- - "mm" = {muac <- muac * 10}, - ### Recode to centimeters ---- - "cm" = {muac <- muac / 10}, - stop("Invalid 'units' argument. Please choose either 'cm' or 'mm'.") - ) -} - - -#' -#' -#' Wrangle weight-for-height and MUAC data -#' -#' @description -#' This function performs data wrangling by calculating weight-for-height -#' and MUAC-for-age z-scores, followed by the detection and flagging of outliers. -#' For MUAC data, if age is not supplies, z-scores do not get computed. In such -#' cases, outlier detection and flagging are based on the absolute MUAC values. -#' -#' @param df A dataset of class `data.frame` to wrangle data from. -#' -#' @param sex A numeric or character vector of child's sex. Code values should -#' be 1 or "m" for boy and 2 or "f" for girl. The variable name must be sex, -#' otherwise it will not work. -#' -#' @param .recode_sex Logical. Default is `FALSE`. Setting to `TRUE` assumes that -#' the sex variable is a character vector of values "m" for boys and "f" for girls -#' and will recode them to 1 and 2 respectively. -#' -#' @param muac A vector of class `double` or `integer` of the absolute MUAC values. -#' -#' @param .recode_muac Logical. Default is `FALSE`. Set to `TRUE` if MUAC values -#' should be converted to either centimeters or millimeters. -#' -#' @param unit A choice of the unit to which the MUAC values should be converted. -#' "cm" for centimeters, "mm" for millimeters and "none" to leave as it is. -#' -#' @param age A double vector of child's age in months. It must be named age, -#' otherwise it will not work. -#' -#' @param weight A vector of class `double` of child's weight in kilograms. -#' -#' @param height A vector of class `double` of child's height in centimeters. -#' -#' @returns A data frame based on `df`. New variables named `wfhz` and -#' `flag_wfhz`, of child's weight-for-height z-scores and flags, or `mfaz` and -#' `flag_mfaz`, of child's MUAC-for-age z-scores and flags, will be created. For -#' MUAC, when age is not supplied only `flag_muac` variable is created. -#' This refers to flags based on the absolute MUAC values as recommended by -#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478). -#' -#' @details -#' The flagging criterion used for the WFHZ and MFAZ is as in -#' [SMART plausibility check](https://smartmethodology.org/). A fixed flagging -#' criterion is used for the absolute MUAC values. This is as recommended by -#' [Bilukha, O., & Kianian, B. (2023).](https://doi.org/10.1111/mcn.13478) -#' -#' @examples -#' -#' ## An example application of `process_wfhz_data()` ---- -#' -#' anthro.01 |> -#' process_wfhz_data( -#' sex = sex, -#' weight = weight, -#' height = height, -#' .recode_sex = TRUE -#' ) -#' -#' ## An example application of `process_muac_data()` ---- -#' -#' ### Sample data ---- -#' df <- data.frame( -#' survey_date = as.Date(c( -#' "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), -#' birthdate = as.Date(c( -#' "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), -#' age = c(NA, 36, NA, NA, NA), -#' sex = c("m", "f", "m", "m", "f"), -#' muac = c(110, 130, 300, 123, 125) -#' ) -#' -#' ### The application of the function ---- -#' -#' df |> -#' process_age( -#' svdate = "survey_date", -#' birdate = "birthdate", -#' age = age -#' ) |> -#' process_muac_data( -#' sex = sex, -#' age = "age", -#' muac = muac, -#' .recode_sex = TRUE, -#' .recode_muac = TRUE, -#' unit = "cm" -#' ) -#' -#' @rdname wrangler -#' -#' @export -#' - -process_wfhz_data <- function(df, - sex, - weight, - height, - .recode_sex = TRUE) { - - recode_sex <- quote( - if (.recode_sex) { - sex <- ifelse({{ sex }} == "m", 1, 2) - } else { - {{ sex }} - } - ) - - df <- df |> - mutate( - sex = !!recode_sex - ) |> - addWGSR( - sex = {{ "sex" }}, - firstPart = {{ "weight" }}, - secondPart = {{ "height" }}, - index = "wfh", - digits = 3 - ) |> - mutate( - flag_wfhz = do.call(flag_outliers, list(.data$wfhz, type = "zscore")) - ) - tibble::as_tibble(df) -} - - - -#' -#' @rdname wrangler -#' -#' @export -#' -process_muac_data <- function(df, - sex, - muac, - age = NULL, - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = c("cm", "mm", "none")) { - unit <- match.arg(unit) - - recode_sex <- quote( - if (.recode_sex) { - sex <- ifelse({{ sex }} == "m", 1, 2) - } else { - {{ sex }} - } - ) - - rec_muac <- quote( - if (.recode_muac && unit == "cm") { - muac <- recode_muac({{ muac }}, unit = "cm") - } else if (.recode_muac && unit == "mm") { - muac <- recode_muac({{ muac }}, unit = "mm") - } else { - {{ muac }} - } - ) - - if (!is.null({{ age }})) { - df <- df |> - mutate( - muac = !!rec_muac, - sex = !!recode_sex, - ) |> - addWGSR( - sex = "sex", - firstPart = "muac", - secondPart = "age_days", - index = "mfa", - digits = 3 - )|> - mutate( - flag_mfaz = do.call(flag_outliers, list(.data$mfaz, type = "zscore")) - ) - } else { - df <- df |> - mutate( - sex = !!recode_sex, - flag_muac = do.call(flag_outliers, list({{ muac }}, type = "crude")) - ) - } - tibble::as_tibble(df) -} diff --git a/README.md b/README.md index 2a4ae5b..59c2b21 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ -# `mwana`: Utilities for analysing children’s nutritional status +# `mwana`: Utilities for analysing children’s nutritional status @@ -16,6 +16,7 @@ experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](h [![test-coverage](https://github.com/nutriverse/mwana/actions/workflows/test-coverage.yaml/badge.svg)](https://github.com/nutriverse/mwana/actions/workflows/test-coverage.yaml) [![codecov](https://codecov.io/gh/nutriverse/mwana/graph/badge.svg?token=kUUp1WOlSi)](https://codecov.io/gh/nutriverse/mwana) [![CodeFactor](https://www.codefactor.io/repository/github/nutriverse/mwana/badge.png)](https://www.codefactor.io/repository/github/nutriverse/mwana) +[![DOI](https://zenodo.org/badge/867609177.svg)](https://zenodo.org/badge/latestdoi/867609177) Child anthropometric assessments, implemented routinely in most @@ -72,22 +73,23 @@ summaries of the outputs. - `mwana` performs plausibility checks on weight-for-height z-score (WFHZ)-based data by mimicking the SMART plausibility checkers in ENA - for SMART software, their scoring and classification criterion. + for SMART software, their scoring and classification criterion. Read + guide + [here](https://nutriverse.io/mwana/articles/plausibility.html#plausibility-check-on-wfhz-data). - It performs, as well, plausibility checks on MUAC data. For this, `mwana` integrates recent advances in using MUAC-for-age z-score - (MFAZ) for auditing the plausibility of MUAC data. In this way, when - the variable age is available: `mwana` performs plausibility checks - similar to those in WFHZ, however with few differences in the scoring. - Otherwise, when the variables age is missing, a similar test suit used - in the current version of ENA is performed. Read details here. + (MFAZ) for assessing the plausibility and the acceptability of MUAC + data. In this way, when the variable age is available: `mwana` + performs plausibility checks similar to those in WFHZ, with a few + differences in the scoring criteria for flagged data. Otherwise, when + the variables age is missing, a similar test suit used in the current + version of ENA is performed. Read guide + [here](https://nutriverse.io/mwana/articles/plausibility.html#plausibility-check-on-mfaz-data). #### A useful workflow for plausibility check using `mwana` - PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable. - - + ### Prevalence analysis @@ -102,18 +104,21 @@ result. `mwana` computes prevalence for: -- Wasting on the basis of WFHZ and/edema (Read vignettes) -- Wasting on the basis of the absolute values of MUAC and/or edema: - here, when variable age is available, mwana applies MFAZ flags, - otherwise it applies the flagging criteria around the absolute values - of MUAC, to exclude outliers before computing prevalence, but the - actual prevalence is done on the absolute values. (Read link to the - specific section in the vignettes) +- Wasting on the basis of WFHZ and/edema; read the guide + [here](https://nutriverse.io/mwana/articles/prevalence.html#sec-prevalence-wfhz) +- Wasting on the basis of raw MUAC values and/or edema: here, when + variable age is available, mwana applies MFAZ flags, otherwise it + applies the flagging criteria around the raw MUAC values, to exclude + outliers before computing prevalence, but the actual prevalence is + done on the raw values; read the guide + [here](https://nutriverse.io/mwana/articles/prevalence.html#sec-prevalence-muac). - Wasting on the basis of MFAZ and/edema: outliers excluded using MFAZ - flags. (Read link to the specific section in the vignettes) + flags; read guide + [here](https://nutriverse.io/mwana/articles/prevalence.html#estimation-of-the-prevalence-of-wasting-based-on-mfaz). - Combined prevalence of wasting: here a concept of combined flags is - used to streamline the flags removed in WFHZ and those in MUAC. (Read - link to the specific section in the vignettes). + used to streamline the flags removed in WFHZ and those in MUAC; read + guide + [here](https://nutriverse.io/mwana/articles/prevalence.html#estimation-of-the-combined-prevalence-of-wasting). `mwana` provides weighted prevalence analysis, if needed. And this is controlled by the user. This is possible in all calculators, including @@ -123,13 +128,14 @@ In the context of IPC Acute Malnutrition (IPC AMN) analysis workflow, `mwana` provides a handy function for checking if the minimum sample size requirements in a given area were met on the basis of the methodology used to collect the data: survey, screening or sentinel -sites. (Check out the vignette). +sites. Read guide +[here](https://nutriverse.io/mwana/articles/sample_size.html). > [!TIP] > -> If you are undertaking a research and you want to censor your data +> If you are undertaking a research and you want to wrangle your data > before including in your statistical models, etc, `mwana` is a great -> helper, as it identifies flags out of your anthro data. +> helper. > [!WARNING] > @@ -140,7 +146,7 @@ sites. (Check out the vignette). ## Installation -`mwana` is not yet on CRAN but can be installed fromthe [nutriverse R +`mwana` is not yet on CRAN but can be installed from the [nutriverse R Universe](https://nutriverse.r-universe.dev) as follows: ``` r @@ -153,7 +159,7 @@ install.packages( Then load to in memory with ``` r -library(ipccheckr) +library(mwana) ``` # Citation @@ -163,25 +169,24 @@ cite using the suggested citation provided by a call to `citation` function as follows: ``` r -citation("ipccheckr") +citation("mwana") +#> To cite mwana: in publications use: +#> +#> Tomás Zaba, Ernest Guevarra (2024). _mwana: Utilities for Analysing +#> Children's Nutritional Status_. R package version 0.0.0.9000, +#> . +#> +#> A BibTeX entry for LaTeX users is +#> +#> @Manual{, +#> title = {mwana: Utilities for Analysing Children's Nutritional Status}, +#> author = {{Tomás Zaba} and {Ernest Guevarra}}, +#> year = {2024}, +#> note = {R package version 0.0.0.9000}, +#> url = {https://github.com/nutriverse/mwana}, +#> } ``` - To cite ipccheckr: in publications use: - - Tomás Zaba, Ernest Guevarra (2024). _ipccheckr: Toolkit for - Performing IPC Acute Malnutrition-related Data Checks_. R package - version 0.0.0.9000, . - - A BibTeX entry for LaTeX users is - - @Manual{, - title = {ipccheckr: Toolkit for Performing IPC Acute Malnutrition-related Data Checks}, - author = {{Tomás Zaba} and {Ernest Guevarra}}, - year = {2024}, - note = {R package version 0.0.0.9000}, - url = {https://github.com/tomaszaba/ipccheckr}, - } - # Community guidelines Feedback, bug reports and feature requests are welcome; file issues or diff --git a/README.qmd b/README.qmd index 9dc16c9..d6ee535 100644 --- a/README.qmd +++ b/README.qmd @@ -1,18 +1,21 @@ --- format: gfm +knitr: + opts_chunk: + collapse: true + comment: "#>" + fig.path: "man/figures/README-" --- - ```{r} -#| label: global_setup +#| label: load_library #| include: false -#| collapse: true -library(ipccheckr) +library(mwana) ``` -# `mwana`: Utilities for analysing children's nutritional status +# `mwana`: Utilities for analysing children's nutritional status [![Project Status: WIP – Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](https://www.repostatus.org/badges/latest/wip.svg)](https://www.repostatus.org/#wip) @@ -22,6 +25,7 @@ library(ipccheckr) [![test-coverage](https://github.com/nutriverse/mwana/actions/workflows/test-coverage.yaml/badge.svg)](https://github.com/nutriverse/mwana/actions/workflows/test-coverage.yaml) [![codecov](https://codecov.io/gh/nutriverse/mwana/graph/badge.svg?token=kUUp1WOlSi)](https://codecov.io/gh/nutriverse/mwana) [![CodeFactor](https://www.codefactor.io/repository/github/nutriverse/mwana/badge)](https://www.codefactor.io/repository/github/nutriverse/mwana) +[![DOI](https://zenodo.org/badge/867609177.svg)](https://zenodo.org/badge/latestdoi/867609177) Child anthropometric assessments, implemented routinely in most countries worldwide, are the cornerstones of child nutrition and food security surveillance around the world. Ensuring the quality of child anthropometric data, the accuracy of child undernutrition prevalence estimates, and the timeliness of reporting is therefore critical in establishing accurate, robust, and up-to-date child undernutrition status globally. @@ -44,14 +48,15 @@ It automates plausibility checks and prevalence analyses and respective summarie ### Plausibility checks. - + `mwana` performs plausibility checks on weight-for-height z-score (WFHZ)-based data by mimicking the SMART plausibility checkers in ENA for SMART software, their scoring and classification criterion. + + `mwana` performs plausibility checks on weight-for-height z-score (WFHZ)-based data by mimicking the SMART plausibility checkers in ENA for SMART software, their scoring and classification criterion. Read guide [here](https://nutriverse.io/mwana/articles/plausibility.html#plausibility-check-on-wfhz-data). - + It performs, as well, plausibility checks on MUAC data. For this, `mwana` integrates recent advances in using MUAC-for-age z-score (MFAZ) for auditing the plausibility of MUAC data. In this way, when the variable age is available: `mwana` performs plausibility checks similar to those in WFHZ, however with few differences in the scoring. Otherwise, when the variables age is missing, a similar test suit used in the current version of ENA is performed. Read details here. + + It performs, as well, plausibility checks on MUAC data. For this, `mwana` integrates recent advances in using MUAC-for-age z-score (MFAZ) for assessing the plausibility and the acceptability of MUAC data. In this way, when the variable age is available: `mwana` performs plausibility checks similar to those in WFHZ, with a few differences in the scoring criteria for flagged data. Otherwise, when the variables age is missing, a similar test suit used in the current version of ENA is performed. Read guide [here](https://nutriverse.io/mwana/articles/plausibility.html#plausibility-check-on-mfaz-data). #### A useful workflow for plausibility check using `mwana` ```{r} #| label: workflow -#| echo: false +#| echo: false +#| warning: false #| fig-align: center DiagrammeR::grViz(" @@ -65,10 +70,10 @@ digraph mwana { node3 [label = 'Indicator', shape = diamond, color = lightgoldenrod]; node4 [label = 'WFHZ', shape = note]; node5 [label = 'MFAZ', shape = note]; - node6 [label = 'Absolute MUAC', shape = note]; - node7 [label = 'Process age', shape = box]; - node8 [label = 'Process anthro data', shape = box]; - node9 [label = 'Check plausibility', shape = box]; + node6 [label = 'Raw MUAC', shape = note]; + node7 [label = 'Wrangle age', shape = box]; + node8 [label = 'Wrangle anthro data', shape = box]; + node9 [label = 'Plausibility check', shape = box]; node10 [label = 'End of workflow', shape = oval, color = salmon]; ## Data process ---- @@ -104,17 +109,17 @@ digraph mwana { `mwana` computes prevalence for: - + Wasting on the basis of WFHZ and/edema (Read vignettes) - + Wasting on the basis of the absolute values of MUAC and/or edema: here, when variable age is available, mwana applies MFAZ flags, otherwise it applies the flagging criteria around the absolute values of MUAC, to exclude outliers before computing prevalence, but the actual prevalence is done on the absolute values. (Read link to the specific section in the vignettes) - + Wasting on the basis of MFAZ and/edema: outliers excluded using MFAZ flags. (Read link to the specific section in the vignettes) - + Combined prevalence of wasting: here a concept of combined flags is used to streamline the flags removed in WFHZ and those in MUAC. (Read link to the specific section in the vignettes). + + Wasting on the basis of WFHZ and/edema; read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#sec-prevalence-wfhz) + + Wasting on the basis of raw MUAC values and/or edema: here, when variable age is available, mwana applies MFAZ flags, otherwise it applies the flagging criteria around the raw MUAC values, to exclude outliers before computing prevalence, but the actual prevalence is done on the raw values; read the guide [here](https://nutriverse.io/mwana/articles/prevalence.html#sec-prevalence-muac). + + Wasting on the basis of MFAZ and/edema: outliers excluded using MFAZ flags; read guide [here](https://nutriverse.io/mwana/articles/prevalence.html#estimation-of-the-prevalence-of-wasting-based-on-mfaz). + + Combined prevalence of wasting: here a concept of combined flags is used to streamline the flags removed in WFHZ and those in MUAC; read guide [here](https://nutriverse.io/mwana/articles/prevalence.html#estimation-of-the-combined-prevalence-of-wasting). `mwana` provides weighted prevalence analysis, if needed. And this is controlled by the user. This is possible in all calculators, including for MUAC, combined, which is not currently available in ENA for SMART. -In the context of IPC Acute Malnutrition (IPC AMN) analysis workflow, `mwana` provides a handy function for checking if the minimum sample size requirements in a given area were met on the basis of the methodology used to collect the data: survey, screening or sentinel sites. (Check out the vignette). +In the context of IPC Acute Malnutrition (IPC AMN) analysis workflow, `mwana` provides a handy function for checking if the minimum sample size requirements in a given area were met on the basis of the methodology used to collect the data: survey, screening or sentinel sites. Read guide [here](https://nutriverse.io/mwana/articles/sample_size.html). :::{.callout-tip} -If you are undertaking a research and you want to censor your data before including in your statistical models, etc, `mwana` is a great helper, as it identifies flags out of your anthro data. +If you are undertaking a research and you want to wrangle your data before including in your statistical models, etc, `mwana` is a great helper. ::: :::{.callout-warning} @@ -123,7 +128,7 @@ Please note that `mwana` is still highly experimental and is undergoing a lot of ## Installation -`mwana` is not yet on CRAN but can be installed fromthe [nutriverse R Universe](https://nutriverse.r-universe.dev) as follows: +`mwana` is not yet on CRAN but can be installed from the [nutriverse R Universe](https://nutriverse.r-universe.dev) as follows: ```{r} #| label: installation @@ -139,7 +144,7 @@ Then load to in memory with ```{r} #| label: example -library(ipccheckr) +library(mwana) ``` # Citation @@ -150,7 +155,7 @@ If you were enticed to use `mwana` package and found it useful, please cite usin #| label: citation #| eval: true -citation("ipccheckr") +citation("mwana") ``` # Community guidelines diff --git a/README_files/figure-commonmark/workflow-1.png b/README_files/figure-commonmark/workflow-1.png deleted file mode 100644 index e3ef041..0000000 Binary files a/README_files/figure-commonmark/workflow-1.png and /dev/null differ diff --git a/inst/CITATION b/inst/CITATION index 1e21a78..3ec7bfa 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,9 +1,9 @@ bibentry( bibtype = "Manual", - header = "To cite ipccheckr: in publications use:", - title = "ipccheckr: Toolkit for Performing IPC Acute Malnutrition-related Data Checks", + header = "To cite mwana: in publications use:", + title = "mwana: Utilities for Analysing Children's Nutritional Status", author = c(person("Tomás Zaba"), person("Ernest Guevarra")), year = 2024, note = "R package version 0.0.0.9000", - url = "https://github.com/tomaszaba/ipccheckr", + url = "https://github.com/nutriverse/mwana", ) diff --git a/inst/WORDLIST b/inst/WORDLIST index 76e342e..7302988 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -5,10 +5,11 @@ Bilukha CMD Cahora Chiúta -Codecov +CodeFactor ENA Edema Elómwè +HFAZ IOF IPC Inquérito @@ -21,13 +22,14 @@ Maravia Metuge ORCID Orçamento +WFAZ WFHZ WIP -anthro ao callout centimeters cflags +codecov comunas ditricts dob diff --git a/man/apply_cdc_age_weighting.Rd b/man/apply_cdc_age_weighting.Rd index ef4bea0..b29f4de 100644 --- a/man/apply_cdc_age_weighting.Rd +++ b/man/apply_cdc_age_weighting.Rd @@ -25,5 +25,5 @@ children under 2 years to twice the proportion of children over 2 and then dividing by 3. } \details{ -This function is informed by the output of \code{\link[=age_ratio_test]{age_ratio_test()}}. +This function is informed by the output of \code{\link[=mw_stattest_ageratio]{mw_stattest_ageratio()}}. } diff --git a/man/check_sample_size.Rd b/man/check_sample_size.Rd deleted file mode 100644 index f2cb5d9..0000000 --- a/man/check_sample_size.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sample_size.R -\name{check_sample_size} -\alias{check_sample_size} -\title{Check whether the IPC Acute Malnutrition sample size requirements were met} -\usage{ -check_sample_size(df, .group, .data_type = c("survey", "screening", "ssite")) -} -\arguments{ -\item{df}{A dataset of class \code{data.frame} to check.} - -\item{.group}{A vector of class \code{integer} of the cluster ID's for survey, -screening or site ID's for screenings and sentinel sites.} - -\item{.data_type}{A choice between "survey" for survey data, "screening" for -screening data or "ssite" for community-based sentinel site data.} -} -\value{ -A summarised table of three columns: \code{groups} for the total number -of unique cluster or screening or site IDs; \code{n_obs} for the respective total -number of children; and \code{meet_ipc} for whether the IPC AMN requirements were met. -} -\description{ -Verify whether the minimum sample size requirements for the area of analysis -were met, in accordance with the IPC Acute Malnutrition (IPC AMN) protocols. -} -\details{ -\href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{The IPC Manual}. -} -\examples{ - -anthro.01 |> -dplyr::group_by(area) |> -check_sample_size( -.group = cluster, -.data_type = "survey" -) - -} diff --git a/man/classify_overall_quality.Rd b/man/classify_overall_quality.Rd deleted file mode 100644 index 7a61406..0000000 --- a/man/classify_overall_quality.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_raters.R -\name{classify_overall_quality} -\alias{classify_overall_quality} -\title{Rate the overall acceptability score} -\usage{ -classify_overall_quality(df) -} -\arguments{ -\item{df}{A dataset of class \code{data.frame} containing a vector of the overall -acceptability score as yielded from \code{\link[=compute_quality_score]{compute_quality_score()}}.} -} -\value{ -A \code{data.frame} based on \code{df}. A new column \code{quality_class} for the -overall acceptability rate is created and added to \code{df}. -} -\description{ -Rate the overall acceptability score into "Excellent", "Good", "Acceptable" and -"Problematic". -} -\examples{ -## A sample data ---- - -df <- data.frame( -quality_score = 29 -) - -## Apply the function ---- -classify_overall_quality(df) - -} diff --git a/man/combined_prevalence.Rd b/man/combined_prevalence.Rd index 131e552..9f6f540 100644 --- a/man/combined_prevalence.Rd +++ b/man/combined_prevalence.Rd @@ -78,17 +78,22 @@ print(x) ## When working on data frame with multiple survey areas ---- s <- anthro.03 |> -process_age(age = age) |> -process_muac_data( +mw_wrangle_age( +dos = NULL, +dob = NULL, +age = age, +.decimals = 2 +) |> +mw_wrangle_muac( sex = sex, muac = muac, age = "age", .recode_sex = TRUE, .recode_muac = TRUE, -unit = "cm" +.to = "cm" ) |> -dplyr::mutate(muac = recode_muac(muac, unit = "mm")) |> -process_wfhz_data( +dplyr::mutate(muac = recode_muac(muac, .to = "mm")) |> +mw_wrangle_wfhz( sex = sex, weight = weight, height = height, diff --git a/man/compute_age_in_months.Rd b/man/compute_age_in_months.Rd deleted file mode 100644 index 4e96d75..0000000 --- a/man/compute_age_in_months.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/age.R -\name{compute_age_in_months} -\alias{compute_age_in_months} -\title{Calculate child's age in months} -\usage{ -compute_age_in_months(surv_date, birth_date) -} -\arguments{ -\item{surv_date}{A vector of class \code{Date} for data collection date.} - -\item{birth_date}{A vector of class \code{Date} for child's date of birth.} -} -\value{ -A vector of class \code{double} for child's age in months with two decimal places. -Any value less than 6.0 and greater than or equal to 60.0 months will be set to \code{NA}. -} -\description{ -Calculate child's age in months based on date of birth and the data collection date. -} diff --git a/man/compute_month_to_days.Rd b/man/compute_month_to_days.Rd deleted file mode 100644 index 790fd25..0000000 --- a/man/compute_month_to_days.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/age.R -\name{compute_month_to_days} -\alias{compute_month_to_days} -\title{Calculate child's age in days} -\usage{ -compute_month_to_days(x) -} -\arguments{ -\item{x}{A double vector of child's age in months.} -} -\value{ -A double vector of the same length as \code{x} of age in days. -} -\description{ -Calculate child's age in days -} diff --git a/man/compute_quality_score.Rd b/man/compute_quality_score.Rd deleted file mode 100644 index ebe4266..0000000 --- a/man/compute_quality_score.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_scorers.R -\name{compute_quality_score} -\alias{compute_quality_score} -\title{Get the overall acceptability score from the acceptability classification scores} -\usage{ -compute_quality_score(df, type = c("mfaz", "whz")) -} -\arguments{ -\item{df}{A dataset object of class \code{data.frame} to calculate from.} - -\item{type}{A choice between "wfhz" and "mfaz" for the basis on which the -calculations should be made.} -} -\value{ -A \code{data.frame} based on \code{df} with a new column named \code{"quality_score"} -for the overall of acceptability (of quality) score. -} -\description{ -Calculate the total amount of penalty points based on each plausibility test -result acceptability classification for WFHZ and MFAZ. -} -\examples{ - -## A sample data ---- - -df <- data.frame( -flagged_class = "Excellent", -age_ratio_class = "Good", -sex_ratio_class = "Problematic", -dps_class = "Excellent", -sd_class = "Excellent", -skew_class = "Good", -kurt_class = "Acceptable" -) - -## Apply the function ---- -compute_quality_score(df, type = "mfaz") - -} diff --git a/man/figures/README-ipccheckr_workflow-1.png b/man/figures/README-ipccheckr_workflow-1.png deleted file mode 100644 index 5364604..0000000 Binary files a/man/figures/README-ipccheckr_workflow-1.png and /dev/null differ diff --git a/man/figures/README-mermaid-diagram-1.png b/man/figures/README-mermaid-diagram-1.png deleted file mode 100644 index 0b99287..0000000 Binary files a/man/figures/README-mermaid-diagram-1.png and /dev/null differ diff --git a/man/figures/README-worflow-1.png b/man/figures/README-worflow-1.png deleted file mode 100644 index 836a08d..0000000 Binary files a/man/figures/README-worflow-1.png and /dev/null differ diff --git a/man/figures/README-workflow-1.png b/man/figures/README-workflow-1.png new file mode 100644 index 0000000..7a93fe1 Binary files /dev/null and b/man/figures/README-workflow-1.png differ diff --git a/man/figures/logo.png b/man/figures/logo.png index ef717d2..83f11b1 100644 Binary files a/man/figures/logo.png and b/man/figures/logo.png differ diff --git a/man/get_age_months.Rd b/man/get_age_months.Rd new file mode 100644 index 0000000..3e8bd10 --- /dev/null +++ b/man/get_age_months.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{get_age_months} +\alias{get_age_months} +\title{Calculate child's age in months} +\usage{ +get_age_months(dos, dob) +} +\arguments{ +\item{dos}{A vector of class \code{Date} for the date of data collection. If the class +is different than expected, the function will stop execution and return an error +message indicating the type of mismatch.} + +\item{dob}{A vector of class \code{Date} for the child's date of birth. If the class +is different than expected, the function will stop execution and return an error +message indicating the type of mismatch.} +} +\value{ +A vector of class \code{numeric} for child's age in months. Any value less +than 6.0 and greater than or equal to 60.0 months will be set to \code{NA}. +} +\description{ +Calculate child's age in months based on the date of birth and the date of +data collection. +} +\examples{ +## Take two vectors of class "Date" ---- +surv_date <- as.Date( + c( + "2024-01-05", "2024-01-05", "2024-01-05", "2024-01-08", "2024-01-08", + "2024-01-08", "2024-01-10", "2024-01-10", "2024-01-10", "2024-01-11" + ) +) +bir_date <- as.Date( + c( + "2022-04-04", "2021-05-01", "2023-05-24", "2017-12-12", NA, + "2020-12-12", "2022-04-04", "2021-05-01", "2023-05-24", "2020-12-12" + ) +) + +## Apply the function ---- +get_age_months( + dos = surv_date, + dob = bir_date +) + +} diff --git a/man/mw_check_ipcamn_ssreq.Rd b/man/mw_check_ipcamn_ssreq.Rd new file mode 100644 index 0000000..96c218a --- /dev/null +++ b/man/mw_check_ipcamn_ssreq.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ipc_amn_check.R +\name{mw_check_ipcamn_ssreq} +\alias{mw_check_ipcamn_ssreq} +\title{Check whether IPC Acute Malnutrition (IPC AMN) sample size requirements were met} +\usage{ +mw_check_ipcamn_ssreq(df, cluster, .source = c("survey", "screening", "ssite")) +} +\arguments{ +\item{df}{A dataset object of class \code{data.frame} to check.} + +\item{cluster}{A vector of class \code{integer} or \code{character} of unique cluster or +screening or sentinel site IDs. If a \code{character} vector, ensure that names are +correct and each name represents one location for accurate counts. If the class +does not match the above expected type, the function will stop execution and +return an error message indicating the type of mismatch.} + +\item{.source}{The source of evidence. A choice between "survey" for +representative survey data at the area of analysis; "screening" for +screening data; "ssite" for community-based sentinel site data.} +} +\value{ +A summary table of class \code{data.frame}, of length 3 and width 1, for +the check results. \code{n_clusters} is for the total number of unique clusters or +screening or site IDs; \code{n_obs} for the correspondent total number of children +in the dataset; and \code{meet_ipc} for whether the IPC AMN requirements were met. +} +\description{ +Evidence on the prevalence of acute malnutrition used in the IPC AMN +can come from different sources: surveys, screenings or community-based +surveillance system. The IPC set minimum sample size requirements +for each source. This function helps in verifying whether the requirements +were met or not depending on the source. +} +\examples{ +mw_check_ipcamn_ssreq( + df = anthro.01, + cluster = cluster, + .source = "survey" +) + +} +\references{ +IPC Global Partners. 2021. \emph{Integrated Food Security Phase Classification} +\emph{Technical Manual Version 3.1.Evidence and Standards for Better Food Security} +\emph{and Nutrition Decisions}. Rome. Available at: +\url{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}. +} diff --git a/man/mw_neat_output_mfaz.Rd b/man/mw_neat_output_mfaz.Rd new file mode 100644 index 0000000..0690f6d --- /dev/null +++ b/man/mw_neat_output_mfaz.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plausibility_check_mfaz.R +\name{mw_neat_output_mfaz} +\alias{mw_neat_output_mfaz} +\title{Clean and format the output table returned from the MFAZ plausibility check +for improved clarity and readability} +\usage{ +mw_neat_output_mfaz(df) +} +\arguments{ +\item{df}{A data frame containing the summary table returned by this package's +MFAZ plausibility check function. Must be of class \code{data.frame}.} +} +\value{ +A data frame of the same length and width as \code{df}, with column names and +values formatted for clarity. +} +\description{ +Clean and format the output table returned from the MFAZ plausibility check +for improved clarity and readability. It converts scientific notations to standard +notations, round values and rename columns to meaningful names. +} +\examples{ +## First wrangle age data ---- +data <- mw_wrangle_age( + df = anthro.01, + dos = dos, + dob = dob, + age = age, + .decimals = 2 +) + +## Then wrangle MUAC data ---- +data_mfaz <- mw_wrangle_muac( + df = data, + sex = sex, + age = age, + muac = muac, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" +) + +## Then run plausibility check ---- +pl <- mw_plausibility_check_mfaz( + df = data_mfaz, + flags = flag_mfaz, + sex = sex, + muac = muac, + age = age +) + +## Now neat the output table ---- +mw_neat_output_mfaz(df = pl) + +} diff --git a/man/mw_neat_output_muac.Rd b/man/mw_neat_output_muac.Rd new file mode 100644 index 0000000..c6a1396 --- /dev/null +++ b/man/mw_neat_output_muac.Rd @@ -0,0 +1,47 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plausibility_check_muac.R +\name{mw_neat_output_muac} +\alias{mw_neat_output_muac} +\title{Clean and format the output table returned from the MUAC plausibility check +for improved clarity and readability.} +\usage{ +mw_neat_output_muac(df) +} +\arguments{ +\item{df}{A data frame containing the summary table returned by this package's +plausibility check function for raw MUAC data. Must be of class \code{data.frame}.} +} +\value{ +A data frame of the same length and width as \code{df}, with column names and +values formatted for clarity. +} +\description{ +Clean and format the output table returned from the plausibility check of raw +MUAC data for improved clarity and readability. It converts scientific notations +to standard notations, round values and rename columns to meaningful names. +} +\examples{ +## First wranlge MUAC data ---- +df_muac <- mw_wrangle_muac( + df = anthro.01, + sex = sex, + muac = muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" +) + +## Then run the plausibility check ---- +pl_muac <- mw_plausibility_check_muac( + df = df_muac, + flags = flag_muac, + sex = sex, + muac = muac +) + +## Neat the output table ---- + +mw_neat_output_muac(df = pl_muac) + +} diff --git a/man/mw_neat_output_wfhz.Rd b/man/mw_neat_output_wfhz.Rd new file mode 100644 index 0000000..d8b3c0b --- /dev/null +++ b/man/mw_neat_output_wfhz.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plausibility_check_wfhz.R +\name{mw_neat_output_wfhz} +\alias{mw_neat_output_wfhz} +\title{Clean and format the output table returned from the WFHZ plausibility check +for improved clarity and readability} +\usage{ +mw_neat_output_wfhz(df) +} +\arguments{ +\item{df}{A data frame containing the summary table returned by this package's +WFHZ plausibility check function. Must be of class \code{data.frame}.} +} +\value{ +A data frame of the same length and width as \code{df}, with column names and +values formatted for clarity. +} +\description{ +Clean and format the output table returned from the WFHZ plausibility check +for improved clarity and readability. It converts scientific notations to standard +notations, round values and rename columns to meaningful names. +} +\examples{ +## First wrangle age data ---- +data <- mw_wrangle_age( + df = anthro.01, + dos = dos, + dob = dob, + age = age, + .decimals = 2 +) + +## Then wrangle WFHZ data ---- +data_wfhz <- mw_wrangle_wfhz( + df = data, + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE +) + +## Now run the plausibility check ---- +pl <- mw_plausibility_check_wfhz( + df = data_wfhz, + sex = sex, + age = age, + weight = weight, + height = height, + flags = flag_wfhz +) + +## Now neat the output table ---- +mw_neat_output_wfhz(df = pl) + + +} diff --git a/man/mw_plausibility_check_mfaz.Rd b/man/mw_plausibility_check_mfaz.Rd new file mode 100644 index 0000000..8a4d59f --- /dev/null +++ b/man/mw_plausibility_check_mfaz.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plausibility_check_mfaz.R +\name{mw_plausibility_check_mfaz} +\alias{mw_plausibility_check_mfaz} +\title{Check the plausibility and acceptability of muac-for-age z-score (MFAZ) data} +\usage{ +mw_plausibility_check_mfaz(df, sex, muac, age, flags) +} +\arguments{ +\item{df}{A dataset object of class \code{data.frame} to check.} + +\item{sex}{A vector of class \code{numeric} of child's sex.} + +\item{muac}{A vector of class \code{numeric} of child's MUAC in centimeters.} + +\item{age}{A vector of class \code{double} of child's age in months.} + +\item{flags}{A vector of class \code{numeric} of flagged records.} +} +\value{ +A summarised table of class \code{data.frame}, of length 17 and width 1, for +the plausibility test results and their respective acceptability ratings. +} +\description{ +Check the overall plausibility and acceptability of MFAZ data through a +structured test suite encompassing sampling and measurement-related biases checks +in the dataset. The test suite in this function follows the recommendation made +by Bilukha, O., & Kianian, B. (2023) on the plausibility of +constructing a comprehensive plausibility check similar to WFHZ to evaluate the +acceptability of MUAC data when the variable age exists in the dataset. + +The function works on a data frame returned from this package's wrangling +function for age and for MFAZ data. +} +\examples{ +## First wrangle age data ---- +data <- mw_wrangle_age( + df = anthro.01, + dos = dos, + dob = dob, + age = age, + .decimals = 2 +) + +## Then wrangle MUAC data ---- +data_muac <- mw_wrangle_muac( + df = data, + sex = sex, + age = age, + muac = muac, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" +) + +## And finally run plausibility check ---- +mw_plausibility_check_mfaz( + df = data_muac, + flags = flag_mfaz, + sex = sex, + muac = muac, + age = age +) + +} +\references{ +Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement +quality of mid‐upper arm circumference data in anthropometric surveys and +mass nutritional screenings conducted in humanitarian and refugee settings. +\emph{Maternal & Child Nutrition}, 19, e13478. \url{https://doi.org/10.1111/mcn.13478} + +SMART Initiative (2017). \emph{Standardized Monitoring and Assessment for Relief +and Transition}. Manual 2.0. Available at: \url{https://smartmethodology.org}. +} +\seealso{ +\code{\link[=mw_wrangle_age]{mw_wrangle_age()}} \code{\link[=mw_wrangle_muac]{mw_wrangle_muac()}} \code{\link[=mw_stattest_ageratio]{mw_stattest_ageratio()}} +\code{\link[=flag_outliers]{flag_outliers()}} +} diff --git a/man/mw_plausibility_check_muac.Rd b/man/mw_plausibility_check_muac.Rd new file mode 100644 index 0000000..9686240 --- /dev/null +++ b/man/mw_plausibility_check_muac.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plausibility_check_muac.R +\name{mw_plausibility_check_muac} +\alias{mw_plausibility_check_muac} +\title{Check the plausibility and acceptability of raw MUAC data} +\usage{ +mw_plausibility_check_muac(df, sex, muac, flags) +} +\arguments{ +\item{df}{A dataset object of class \code{data.frame} to check. It must have been +wrangled using this package's wrangling function for MUAC.} + +\item{sex}{A vector of class \code{numeric} of child's sex.} + +\item{muac}{A vector of class \code{double} of child's MUAC in centimeters.} + +\item{flags}{A vector of class \code{numeric} of flagged records.} +} +\value{ +A summarised table of class \code{data.frame}, of length 9 and width 1, for +the plausibility test results and their respective acceptability ratings.. +} +\description{ +Check the overall plausibility and acceptability of raw MUAC data through a +structured test suite encompassing sampling and measurement-related biases checks +in the dataset. The test suite in this function follows the recommendation made +by Bilukha, O., & Kianian, B. (2023). +} +\examples{ +## First wranlge MUAC data ---- +df_muac <- mw_wrangle_muac( + df = anthro.01, + sex = sex, + muac = muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" +) + +## Then run the plausibility check ---- +mw_plausibility_check_muac( + df = df_muac, + flags = flag_muac, + sex = sex, + muac = muac +) + +} +\references{ +Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement +quality of mid‐upper arm circumference data in anthropometric surveys and +mass nutritional screenings conducted in humanitarian and refugee settings. +\emph{Maternal & Child Nutrition}, 19, e13478. \url{https://doi.org/10.1111/mcn.13478} + +SMART Initiative (2017). \emph{Standardized Monitoring and Assessment for Relief +and Transition}. Manual 2.0. Available at: \url{https://smartmethodology.org}. +} +\seealso{ +\code{\link[=mw_wrangle_muac]{mw_wrangle_muac()}} \code{\link[=flag_outliers]{flag_outliers()}} +} diff --git a/man/mw_plausibility_check_wfhz.Rd b/man/mw_plausibility_check_wfhz.Rd new file mode 100644 index 0000000..455b57d --- /dev/null +++ b/man/mw_plausibility_check_wfhz.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plausibility_check_wfhz.R +\name{mw_plausibility_check_wfhz} +\alias{mw_plausibility_check_wfhz} +\title{Check the plausibility and acceptability of weight-for-height z-score (WFHZ) data} +\usage{ +mw_plausibility_check_wfhz(df, sex, age, weight, height, flags) +} +\arguments{ +\item{df}{A dataset object of class \code{data.frame} to check.} + +\item{sex}{A vector of class \code{numeric} of child's sex.} + +\item{age}{A vector of class \code{double} of child's age in months.} + +\item{weight}{A vector of class \code{double} of child's weight in kilograms.} + +\item{height}{A vector of class \code{double} of child's height in centimeters.} + +\item{flags}{A vector of class \code{numeric} of flagged records.} +} +\value{ +A summarised table of class \code{data.frame}, of length 19 and width 1, for +the plausibility test results and their respective acceptability rates. +} +\description{ +Check the overall plausibility and acceptability of WFHZ data through a +structured test suite encompassing sampling and measurement-related biases checks +in the dataset. The test suite, including the criteria and corresponding rating of +acceptability, follows the standards in the SMART plausibility check. The only +exception is the exclusion of MUAC checks. MUAC is checked separately using more +comprehensive test suite as well. + +The function works on a data frame returned from this package's wrangling +function for age and for WFHZ data. +} +\examples{ +## First wrangle age data ---- +data <- mw_wrangle_age( + df = anthro.01, + dos = dos, + dob = dob, + age = age, + .decimals = 2 +) + +## Then wrangle WFHZ data ---- +data_wfhz <- mw_wrangle_wfhz( + df = data, + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE +) + +## Now run the plausibility check ---- +mw_plausibility_check_wfhz( + df = data_wfhz, + sex = sex, + age = age, + weight = weight, + height = height, + flags = flag_wfhz +) + + +} +\references{ +SMART Initiative (2017). \emph{Standardized Monitoring and Assessment for Relief +and Transition}. Manual 2.0. Available at: \url{https://smartmethodology.org}. +} +\seealso{ +\code{\link[=mw_plausibility_check_mfaz]{mw_plausibility_check_mfaz()}} \code{\link[=mw_plausibility_check_muac]{mw_plausibility_check_muac()}} +\code{\link[=mw_wrangle_age]{mw_wrangle_age()}} +} diff --git a/man/age_ratio_test.Rd b/man/mw_stattest_ageratio.Rd similarity index 57% rename from man/age_ratio_test.Rd rename to man/mw_stattest_ageratio.Rd index 9b8fe1c..6b79781 100644 --- a/man/age_ratio_test.Rd +++ b/man/mw_stattest_ageratio.Rd @@ -1,41 +1,44 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/age.R -\name{age_ratio_test} -\alias{age_ratio_test} +% Please edit documentation in R/stattests.R +\name{mw_stattest_ageratio} +\alias{mw_stattest_ageratio} \title{Test for statistical difference between the proportion of children aged 24 to 59 months old over those aged 6 to 23 months old} \usage{ -age_ratio_test(age, .expectedP = 0.66) +mw_stattest_ageratio(age, .expectedP = 0.66) } \arguments{ -\item{age}{A double vector of age in months.} +\item{age}{A vector of class \code{numeric} of child's age in months. If different +than expected, the function will stop execution and return an error message +indicating the type of mismatch.} \item{.expectedP}{The expected proportion of children aged 24 to 59 months -old over those aged 6 to 23 months old. This is estimated to be 0.66 as in the -\href{https://smartmethodology.org/survey-planning-tools/updated-muac-tool/}{SMART MUAC tool}.} +old over those aged 6 to 23 months old. This is estimated to be 0.66.} } \value{ A vector of class \code{list} of three statistics: \code{p} for p-value of the statistical difference between the observed and the expected proportion of children aged 24 to 59 months old over those aged 6 to 23 months old; \code{observedR} and \code{observedP} for the observed ratio and proportion respectively. - -@details -This function should be used specifically for assessing MUAC data. For -age ratio tests of children aged 6 to 29 months old over 30 to 59 months old, as -performed in the SMART plausibility check, use \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}} instead. } \description{ Calculate the observed age ratio of children aged 24 to 59 months old over those aged 6 to 23 months old and test if there is a statistical difference between the observed and the expected. } +\details{ +This function should be used specifically when assessing the quality of MUAC data. +For age ratio test of children aged 6 to 29 months old over 30 to 59 months old, as +performed in the SMART plausibility check, use \code{\link[nipnTK:ageRatioTest]{nipnTK::ageRatioTest()}} instead. +} \examples{ - -## An example of application using `anthro.02` dataset ---- -age_ratio_test( -age = anthro.02$age, -.expectedP = 0.66 +mw_stattest_ageratio( + age = anthro.02$age, + .expectedP = 0.66 ) } +\references{ +SMART Initiative. \emph{Updated MUAC data collection tool}. Available at: +\url{https://smartmethodology.org/survey-planning-tools/updated-muac-tool/} +} diff --git a/man/mw_wrangle_age.Rd b/man/mw_wrangle_age.Rd new file mode 100644 index 0000000..7f8bc52 --- /dev/null +++ b/man/mw_wrangle_age.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrangle_age.R +\name{mw_wrangle_age} +\alias{mw_wrangle_age} +\title{Wrangle child's age} +\usage{ +mw_wrangle_age(df, dos = NULL, dob = NULL, age, .decimals = 2) +} +\arguments{ +\item{df}{A dataset of class \code{data.frame} to wrangle age from.} + +\item{dos}{A vector of class \code{Date} for date of data collection from the +\code{df}. Default is \code{NULL}.} + +\item{dob}{A vector of class \code{Date} for child's date of birth from the \code{df}. +Default is \code{NULL}.} + +\item{age}{A vector of class \code{numeric} of child's age in months. In most +cases this will be estimated using local event calendars; in some other +cases it can be a mix of the former and the one based on the child's +date of birth and the date of data collection.} + +\item{.decimals}{The number of decimals places to which the age should be rounded. +Default is 2.} +} +\value{ +A \code{data.frame} based on \code{df}. The variable \code{age} will be automatically +filled in each row where age value was missing and both the child's +date of birth and the date of data collection are available. Rows where \code{age} +is less than 6.0 and greater than or equal to 60.0 months old will be set to \code{NA}. +Additionally, a new variable for \code{df} named \code{age_days}, of class \code{double}, will +be created. +} +\description{ +Wrangle child's age for downstream analysis. This includes calculating age +in months based on the date of data collection and the child's date of birth, and +setting to \code{NA} the age values that are less than 6.0 and greater than or equal +to 60.0 months old. +} +\examples{ + +## A sample data ---- +df <- data.frame( + surv_date = as.Date(c( + "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01" + )), + birth_date = as.Date(c( + "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25" + )), + age = c(NA, 36, NA, NA, NA) +) + +## Apply the function ---- +mw_wrangle_age( + df = df, + dos = surv_date, + dob = birth_date, + age = age, + .decimals = 3 +) + +} diff --git a/man/mw_wrangle_muac.Rd b/man/mw_wrangle_muac.Rd new file mode 100644 index 0000000..ba26ae0 --- /dev/null +++ b/man/mw_wrangle_muac.Rd @@ -0,0 +1,104 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrangle_muac.R +\name{mw_wrangle_muac} +\alias{mw_wrangle_muac} +\title{Wrangle MUAC data} +\usage{ +mw_wrangle_muac( + df, + sex, + muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = c("cm", "mm", "none"), + .decimals = 3 +) +} +\arguments{ +\item{df}{A dataset object of class \code{data.frame} to wrangle data from.} + +\item{sex}{A \code{numeric} or \code{character} vector of child's sex. Code values should +only be 1 or "m" for males and 2 or "f" for females. Make sure sex values +are coded in either of the aforementioned before calling the function. If input +codes are different than expected, the function will stop execution and +return an error message with the type of mismatch.} + +\item{muac}{A vector of class \code{numeric} of child's age in months. If the class +is different than expected, the function will stop execution and return an error +message indicating the type of mismatch.} + +\item{age}{A vector of class \code{numeric} of child's age in months.} + +\item{.recode_sex}{Logical. Set to \code{TRUE} if the values for \code{sex} are not coded +as 1 (for males) or 2 (for females). Otherwise, set to \code{FALSE} (default).} + +\item{.recode_muac}{Logical. Set to \code{TRUE} if the values for raw MUAC should be +converted to either centimeters or millimeters. Otherwise, set to \code{FALSE} +(default)} + +\item{.to}{A choice of the measuring unit to which the MUAC values should be converted; +"cm" for centimeters, "mm" for millimeters and "none" to leave as it is.} + +\item{.decimals}{The number of decimals places the z-scores should have. +Default is 3.} +} +\value{ +A data frame based on \code{df}. New variables named \code{mfaz} and +\code{flag_mfaz}, of child's MFAZ and detected outliers, will be created. When age +is not supplied, only \code{flag_muac} variable is created. This refers to outliers +detected based on the raw MUAC values. +} +\description{ +Calculate z-scores for MUAC-for-age (MFAZ) and identify outliers based on +the SMART methodology. When age is not supplied, wrangling will consist only +in detecting outliers from the raw MUAC values. The function only works after +the age has been wrangled. +} +\examples{ +## When age is available, wrangle it first before calling the function ---- +w <- mw_wrangle_age( + df = anthro.02, + dos = NULL, + dob = NULL, + age = age, + .decimals = 2 +) + +### Then apply the function to wrangle MUAC data ---- +mw_wrangle_muac( + df = w, + sex = sex, + age = age, + muac = muac, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm", + .decimals = 3 +) + +## When age is not available ---- +mw_wrangle_muac( + df = anthro.02, + sex = sex, + age = NULL, + muac = muac, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm", + .decimals = 3 +) + +} +\references{ +Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement +quality of mid‐upper arm circumference data in anthropometric surveys and +mass nutritional screenings conducted in humanitarian and refugee settings. +\emph{Maternal & Child Nutrition}, 19, e13478. \url{https://doi.org/10.1111/mcn.13478} + +SMART Initiative (2017). \emph{Standardized Monitoring and Assessment for Relief +and Transition}. Manual 2.0. Available at: \url{https://smartmethodology.org}. +} +\seealso{ +\code{\link[=flag_outliers]{flag_outliers()}} \code{\link[=remove_flags]{remove_flags()}} \code{\link[=mw_wrangle_age]{mw_wrangle_age()}} +} diff --git a/man/mw_wrangle_wfhz.Rd b/man/mw_wrangle_wfhz.Rd new file mode 100644 index 0000000..9d95ab2 --- /dev/null +++ b/man/mw_wrangle_wfhz.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrangle_wfhz.R +\name{mw_wrangle_wfhz} +\alias{mw_wrangle_wfhz} +\title{Wrangle weight-for-height data} +\usage{ +mw_wrangle_wfhz(df, sex, weight, height, .recode_sex = TRUE, .decimals = 3) +} +\arguments{ +\item{df}{A dataset object of class \code{data.frame} to wrangle data from.} + +\item{sex}{A \code{numeric} or \code{character} vector of child's sex. Code values should +only be 1 or "m" for males and 2 or "f" for females. Make sure sex values +are coded in either of the aforementioned before to call the function. If input +codes are neither of the above, the function will stop execution and +return an error message with the type of mismatch.} + +\item{weight}{A vector of class \code{double} of child's weight in kilograms. If the input +is of a different class, the function will stop execution and return an error +message indicating the type of mismatch.} + +\item{height}{A vector of class \code{double} of child's height in centimeters. If the input +is of a different class, the function will stop execution and return an error +message indicating the type of mismatch.} + +\item{.recode_sex}{Logical. Set to \code{TRUE} if the values for \code{sex} are not coded +as 1 (for males) or 2 (for females). Otherwise, set to \code{FALSE} (default).} + +\item{.decimals}{The number of decimals places the z-scores should have. +Default is 3.} +} +\value{ +A data frame based on \code{df}. New variables named \code{wfhz} and +\code{flag_wfhz}, of child's WFHZ and detected outliers, will be created. +} +\description{ +Calculate z-scores for weight-for-height (WFHZ) and identify outliers based on +the SMART methodology. +} +\examples{ +mw_wrangle_wfhz( + df = anthro.01, + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE, + .decimals = 2 +) + +} +\references{ +SMART Initiative (2017). \emph{Standardized Monitoring and Assessment for Relief +and Transition}. Manual 2.0. Available at: \url{https://smartmethodology.org}. +} +\seealso{ +\code{\link[=flag_outliers]{flag_outliers()}} \code{\link[=remove_flags]{remove_flags()}} +} diff --git a/man/ipccheckr-package.Rd b/man/mwana-package.Rd similarity index 76% rename from man/ipccheckr-package.Rd rename to man/mwana-package.Rd index cd46222..54c6ba3 100644 --- a/man/ipccheckr-package.Rd +++ b/man/mwana-package.Rd @@ -1,14 +1,14 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ipccheckr-package.R +% Please edit documentation in R/mwana-package.R \docType{package} -\name{ipccheckr-package} -\alias{ipccheckr} -\alias{ipccheckr-package} -\title{ipccheckr: Utilities for Analysing Children's Nutritional Status} +\name{mwana-package} +\alias{mwana} +\alias{mwana-package} +\title{mwana: Utilities for Analysing Children's Nutritional Status} \description{ \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} -A streamlined and comprehensive implementation of the Standardized Monitoring and Assessment of Relief and Transition (SMART) Methodology \url{https://smartmethodology.org/} guidelines for data quality checks and prevalence estimation, with enhanced programmable process particularly when handling large multiple datasets. +A streamlined and comprehensive implementation of the Standardized Monitoring and Assessment of Relief and Transition (SMART) Methodology \url{https://smartmethodology.org/} guidelines for data quality checks and prevalence estimation, with enhanced programmable process particularly when handling multiple area datasets. } \seealso{ Useful links: diff --git a/man/outliers.Rd b/man/outliers.Rd index 43c063c..f7c1538 100644 --- a/man/outliers.Rd +++ b/man/outliers.Rd @@ -1,52 +1,82 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/wranglers.R +% Please edit documentation in R/utils.R \name{flag_outliers} \alias{flag_outliers} \alias{remove_flags} -\title{Identify and flag outliers} +\title{Identify, flag outliers and remove them} \usage{ -flag_outliers(x, type = c("zscore", "crude")) +flag_outliers(x, .from = c("zscores", "raw_muac")) -remove_flags(x, unit = c("zscore", "crude")) +remove_flags(x, .from = c("zscores", "raw_muac")) } \arguments{ -\item{x}{A vector of class \code{double} of WFHZ or MFAZ or absolute MUAC values. -The latter should be in millimeters.} +\item{x}{A vector of class \code{numeric} of WFHZ, MFAZ, HFAZ, WFAZ or raw MUAC values. +The latter should be in millimeters. If the class is different than expected, +the function will stop execution and return an error message indicating the +type of mismatch.} -\item{type}{A choice between \code{zscore} and \code{crude} for where outliers should be -detected and flagged from.} - -\item{unit}{A choice between \code{zscore} and \code{crude} for where outliers should be +\item{.from}{A choice between \code{zscores} and \code{raw_muac} for where outliers should be detected and flagged from.} } \value{ -A vector of the same length as \code{x} of flagged observations that are -outliers: 1 for is a flag and 0 is not a flag. +A vector of the same length as \code{x} for flagged records coded as +\code{1} for is a flag and \code{0} not a flag. } \description{ -Outliers are extreme values that deviate remarkably from the survey mean, making -them unlikely to be accurate measurements. This function detects and signals -them based on a criterion set for the WFHZ, the MFAZ and for the absolute MUAC -values. +Identify outlier z-scores for weight-for-height (WFHZ) and MUAC-for-age (MFAZ) +following the SMART methodology. The function can also be used to detect +outliers for height-for-age (HFAZ) and weight-for-age (WFAZ) z-scores +following the same approach. + +For raw MUAC values, outliers constitute values that are less than 100 +millimeters or greater than 200 millimeters. + +Removing outliers consist in setting the outlier record to \code{NA} and not necessarily +to delete it from the dataset. This is useful in the analysis procedures +where outliers must be removed, such as the analysis of the standard deviation. } \details{ -The flagging criterion used for the WFHZ and the MFAZ is as in -\href{https://smartmethodology.org/}{SMART plausibility check}. A fixed flagging -criterion is used for the absolute MUAC values. This is as recommended by -\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).} +For z-score-based detection, flagged records represent outliers that deviate +substantially from the sample's z-score mean, making them unlikely to reflect +accurate measurements. For raw MUAC values, flagged records are those that fall +outside the acceptable fixed range. Including such outliers in the analysis could +compromise the accuracy and precision of the resulting estimates. + +The flagging criterion used for raw MUAC values is based on a recommendation +by Bilukha, O., & Kianian, B. (2023). } \examples{ - -## Sample data for absolute MUAC values ---- +## Sample data of raw MUAC values ---- x <- anthro.01$muac -## Apply the function with type set to "crude" ---- -flag_outliers(x, type = "crude") +## Apply the function with `.from` set to "raw_muac" ---- +flag_outliers(x, .from = "raw_muac") -## Sample data for MFAZ or for WFHZ values ---- +## Sample data of z-scores (be it WFHZ, MFAZ, HFAZ or WFAZ) ---- x <- anthro.02$mfaz -# Apply the function with type set to "zscore" ---- -flag_outliers(x, type = "zscore") +# Apply the function with `.from` set to "zscores" ---- +flag_outliers(x, .from = "zscores") + +## With `.from` set to "zscores" ---- +remove_flags( + x = wfhz.01$wfhz, + .from = "zscores" +) + +## With `.from` set to "raw_muac" ---- +remove_flags( + x = mfaz.01$muac, + .from = "raw_muac" +) + +} +\references{ +Bilukha, O., & Kianian, B. (2023). Considerations for assessment of measurement +quality of mid‐upper arm circumference data in anthropometric surveys and +mass nutritional screenings conducted in humanitarian and refugee settings. +\emph{Maternal & Child Nutrition}, 19, e13478. Available at \url{https://doi.org/10.1111/mcn.13478} +SMART Initiative (2017). \emph{Standardized Monitoring and Assessment for Relief +and Transition}. Manual 2.0. Available at: \url{https://smartmethodology.org}. } diff --git a/man/plausibility-check.Rd b/man/plausibility-check.Rd deleted file mode 100644 index 8c78981..0000000 --- a/man/plausibility-check.Rd +++ /dev/null @@ -1,108 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_auditors.R -\name{check_plausibility_mfaz} -\alias{check_plausibility_mfaz} -\alias{check_plausibility_wfhz} -\alias{check_plausibility_muac} -\title{Check the plausibility of the data} -\usage{ -check_plausibility_mfaz(df, sex, muac, age, flags, area) - -check_plausibility_wfhz(df, sex, age, weight, height, flags, area) - -check_plausibility_muac(df, flags, sex, muac) -} -\arguments{ -\item{df}{A dataset object of class \code{data.frame} to check. It should have been -wrangled using this package's wranglers.} - -\item{sex}{A vector of class \code{numeric} of child's sex: 1 for boy and 2 for girl.} - -\item{muac}{A vector of class \code{double} of child's MUAC in centimeters.} - -\item{age}{A vector of class \code{double} of child's age in months.} - -\item{flags}{A vector of class \code{numeric} of flagged observations.} - -\item{area}{A vector of class \code{character} of the geographical location where -data was collected and for which the analysis should be aggregated.} - -\item{weight}{A vector of class \code{double} of child's weight in kilograms.} - -\item{height}{A vector of class \code{double} of child's height in centimeters.} -} -\value{ -A summarised \code{data.frame} of plausibility test results and their -respective acceptability ratings. -} -\description{ -Verify the overall acceptability of the data through a set of -structured tests around sampling and measurement-related biases in the data. -} -\examples{ - -## Check the plausibility of WFHZ data ---- - -anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -check_plausibility_wfhz( -sex = sex, -age = age, -weight = weight, -height = height, -flags = flag_wfhz, -area = area -) - -## Check the plausibility of MFAZ data ---- - -anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_muac_data( -sex = sex, -age = "age", -muac = muac, -.recode_sex = TRUE, -.recode_muac = TRUE, -unit = "cm" -) |> -check_plausibility_mfaz( -flags = flag_mfaz, -sex = sex, -muac = muac, -age = age, -area = area -) - -## Check the plausibility of the absolute MUAC values ---- - -anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) |> -check_plausibility_muac( -flags = flag_muac, -sex = sex, -muac = muac -) - -} diff --git a/man/pretty_table.Rd b/man/pretty_table.Rd deleted file mode 100644 index 4b334e9..0000000 --- a/man/pretty_table.Rd +++ /dev/null @@ -1,92 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/pretty_tables.R -\name{generate_pretty_table_mfaz} -\alias{generate_pretty_table_mfaz} -\alias{generate_pretty_table_wfhz} -\alias{generate_pretty_table_muac} -\title{Get a formatted and presentable output table for the plausibility checkers} -\usage{ -generate_pretty_table_mfaz(df) - -generate_pretty_table_wfhz(df) - -generate_pretty_table_muac(df) -} -\arguments{ -\item{df}{A summary table object of class \code{data.frame} returned by the -plausibility checkers.} -} -\value{ -A \code{data.frame} as \code{df}. Columns are renamed, values formatted and -ready to be shared. -} -\description{ -Useful to getting the output returned from the plausibility checkers -into a presentable format. It converts scientific notations to standard -notations, round values and rename columns to meaningful names. -} -\examples{ - -## Check the plausibility of WFHZ data ---- - -anthro.01 |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) |> -check_plausibility_wfhz( -sex = sex, -age = age, -weight = weight, -height = height, -flags = flag_wfhz, -area = area -) |> -generate_pretty_table_wfhz() - -## Check the plausibility of MUAC data ---- - -anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) |> -check_plausibility_muac( -flags = flag_muac, -sex = sex, -muac = muac -) |> -generate_pretty_table_muac() - -## Check the plausibility of MFAZ data ---- - -anthro.01 |> -process_age( -svdate = "dos", -birdate = "dob", -age = age -) |> -process_muac_data( -sex = sex, -age = "age", -muac = muac, -.recode_sex = TRUE, -.recode_muac = TRUE, -unit = "cm" -) |> -check_plausibility_mfaz( -flags = flag_mfaz, -sex = sex, -muac = muac, -age = age, -area = area -) |> -generate_pretty_table_mfaz() - -} diff --git a/man/prevalence.Rd b/man/prevalence.Rd index d28b34c..06e74e1 100644 --- a/man/prevalence.Rd +++ b/man/prevalence.Rd @@ -66,7 +66,7 @@ print(p) ### When .summary_by = NULL ---- anthro.03 |> -process_wfhz_data( +mw_wrangle_wfhz( sex = sex, weight = weight, height = height, @@ -81,7 +81,7 @@ compute_wfhz_prevalence( ### When .summary_by is not set to NULL ---- anthro.03 |> -process_wfhz_data( +mw_wrangle_wfhz( sex = sex, weight = weight, height = height, diff --git a/man/process_age.Rd b/man/process_age.Rd deleted file mode 100644 index 417b9a1..0000000 --- a/man/process_age.Rd +++ /dev/null @@ -1,54 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/age.R -\name{process_age} -\alias{process_age} -\title{Process child's age} -\usage{ -process_age(df, svdate = NULL, birdate = NULL, age) -} -\arguments{ -\item{df}{A dataset of class \code{data.frame} to process age from.} - -\item{svdate}{A vector of class \code{Date} for date of data collection. -Default is \code{NULL}.} - -\item{birdate}{A vector of class \code{Date} for child's date of birth. -Default is \code{NULL}.} - -\item{age}{A vector of class \code{integer} of age in months, usually estimated -using local event calendars.} -} -\value{ -A \code{data.frame} based on \code{df}. The variable \code{age} that is required to be -included in \code{df} will be filled where applicable with the age in months for -each row of data in \code{df}. A new variable for \code{df} named \code{age_days} will be -created. Values for \code{age} and \code{age_days} for children less than 6.0 and greater -than or equal to 60.0 months old will be set to \code{NA}. -} -\description{ -Process child's age for downstream analysis. This includes calculating age -in months based on the date of data collection and child's date of birth and -setting to \code{NA} the age values that are less than 6.0 and greater than or equal -to 60.0 months old. -} -\examples{ - -## A sample data ---- - -df <- data.frame( -survy_date = as.Date(c( -"2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), -birthdate = as.Date(c( -"2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), -age = c(NA, 36, NA, NA, NA) -) - -## Apply the function ---- -df |> -process_age( -svdate = "survy_date", -birdate = "birthdate", -age = age -) - -} diff --git a/man/classify_age_sex_ratio.Rd b/man/rate_agesex_ratio.Rd similarity index 63% rename from man/classify_age_sex_ratio.Rd rename to man/rate_agesex_ratio.Rd index ffb2aaa..b4ef564 100644 --- a/man/classify_age_sex_ratio.Rd +++ b/man/rate_agesex_ratio.Rd @@ -1,13 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality_raters.R -\name{classify_age_sex_ratio} -\alias{classify_age_sex_ratio} +\name{rate_agesex_ratio} +\alias{rate_agesex_ratio} \title{Rate the acceptability of the age and sex ratio test p-values} \usage{ -classify_age_sex_ratio(p) +rate_agesex_ratio(p) } \arguments{ -\item{p}{A vector of class \code{double} of the age or sex ratio test p-values.} +\item{p}{A vector of class \code{double} of the age or sex ratio test p-values. +If the class does not match the expected type, the function +will stop execution and return an error message indicating the type of mismatch.} } \value{ A vector of class \code{character} of the same length as \code{p} for the @@ -16,3 +18,4 @@ acceptability rate. \description{ Rate the acceptability of the age and sex ratio test p-values } +\keyword{internal} diff --git a/man/rate_overall_quality.Rd b/man/rate_overall_quality.Rd new file mode 100644 index 0000000..b49c90e --- /dev/null +++ b/man/rate_overall_quality.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_raters.R +\name{rate_overall_quality} +\alias{rate_overall_quality} +\title{Rate the overall acceptability of the data} +\usage{ +rate_overall_quality(q) +} +\arguments{ +\item{q}{A vector of class \code{numeric} or \code{integer} of data acceptability scores. +If the class does not match the expected type, the function +will stop execution and return an error message indicating the type of mismatch.} +} +\value{ +A vector of class \code{factor} of the same length as \code{q}, providing an overall +rate of acceptability of the data. +} +\description{ +Rate the overall data acceptability score into "Excellent", "Good", "Acceptable" +or "Problematic". +} +\keyword{internal} diff --git a/man/rate_propof_flagged.Rd b/man/rate_propof_flagged.Rd new file mode 100644 index 0000000..7de21f4 --- /dev/null +++ b/man/rate_propof_flagged.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_raters.R +\name{rate_propof_flagged} +\alias{rate_propof_flagged} +\title{Rate the acceptability of the proportion of flagged records} +\usage{ +rate_propof_flagged(p, .in = c("mfaz", "wfhz", "raw_muac")) +} +\arguments{ +\item{p}{A vector of class \code{double}, containing the proportions of flagged +records in the dataset. If the class does not match the expected type, the +function will stop execution and return an error message indicating the type +of mismatch.} + +\item{.in}{Specifies the dataset where the rating should be done, +with options: "wfhz", "mfaz", or "raw_muac".} +} +\value{ +A vector of class \code{factor} of the same length as input, for the +acceptability rate. +} +\description{ +Rate the acceptability of the proportion of flagged records in WFHZ, MFAZ, +and raw MUAC data following the SMART methodology criteria. +} +\keyword{internal} diff --git a/man/classify_skew_kurt.Rd b/man/rate_skewkurt.Rd similarity index 54% rename from man/classify_skew_kurt.Rd rename to man/rate_skewkurt.Rd index f8a08ae..ff7265e 100644 --- a/man/classify_skew_kurt.Rd +++ b/man/rate_skewkurt.Rd @@ -1,18 +1,21 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality_raters.R -\name{classify_skew_kurt} -\alias{classify_skew_kurt} +\name{rate_skewkurt} +\alias{rate_skewkurt} \title{Rate the acceptability of the skewness and kurtosis test results} \usage{ -classify_skew_kurt(sk) +rate_skewkurt(sk) } \arguments{ -\item{sk}{A vector of class \code{double} for skewness or kurtosis test results.} +\item{sk}{A vector of class \code{double} for skewness or kurtosis test results. +If the class does not match the expected type, the function +will stop execution and return an error message indicating the type of mismatch.} } \value{ -A vector of class \code{character} of the same length as \code{sk} for the +A vector of class \code{factor} of the same length as \code{sk} for the acceptability rate. } \description{ Rate the acceptability of the skewness and kurtosis test results } +\keyword{internal} diff --git a/man/rate_std.Rd b/man/rate_std.Rd new file mode 100644 index 0000000..54bca22 --- /dev/null +++ b/man/rate_std.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_raters.R +\name{rate_std} +\alias{rate_std} +\title{Rate the acceptability of the standard deviation} +\usage{ +rate_std(sd, .of = c("zscores", "raw_muac")) +} +\arguments{ +\item{sd}{A vector of class \code{double}, containing values of the standard deviation +from the dataset. If the class does not match the expected type, the function +will stop execution and return an error message indicating the type of mismatch.} + +\item{.of}{Specifies the dataset where the rating should be done, with options: +"wfhz", "mfaz", or "raw_muac".} +} +\value{ +A vector of class \code{factor} of the same length as input, for the +acceptability rate. +} +\description{ +Rate the acceptability of the standard deviation of WFHZ, MFAZ, and raw MUAC data. +Rating follows the SMART methodology criteria. +} +\keyword{internal} diff --git a/man/raters.Rd b/man/raters.Rd deleted file mode 100644 index 43284fb..0000000 --- a/man/raters.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/quality_raters.R -\name{classify_percent_flagged} -\alias{classify_percent_flagged} -\alias{classify_sd} -\title{Rate the acceptability of the standard deviation and the percentage of flagged -data} -\usage{ -classify_percent_flagged(p, type = c("mfaz", "whz", "crude")) - -classify_sd(sd, type = c("zscore", "crude")) -} -\arguments{ -\item{p}{A vector of class \code{double} of the proportions of flagged values in -the dataset.} - -\item{type}{A choice between "wfhz", "mfaz" and "crude" for the basis on which -the rating should be done.} - -\item{sd}{A vector of class \code{double} of the values of the standard deviation.} -} -\value{ -A vector of class \code{character} for the acceptability rate. -} -\description{ -Rate how much high is the standard deviation and the percentage of flagged -data in the dataset, hence it's acceptability. -} -\details{ -The ranges of acceptability are: "Excellent", "Good", "Acceptable", "Problematic". -The cut-offs for WFHZ are as in the \href{https://smartmethodology.org/}{SMART Methodology}. -For the MFAZ and the absolute MUAC values, the maximum acceptable limit for -outliers is 2\%, as recommended by -\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. -Cut-offs for the standard deviation of the absolute MUAC values are based on the -\href{https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/}{IPC AMN guidelines}. -} diff --git a/man/recode_muac.Rd b/man/recode_muac.Rd index 086b0da..a993a2e 100644 --- a/man/recode_muac.Rd +++ b/man/recode_muac.Rd @@ -1,30 +1,43 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/wranglers.R +% Please edit documentation in R/utils.R \name{recode_muac} \alias{recode_muac} \title{Convert MUAC values to either centimeters or millimeters} \usage{ -recode_muac(muac, unit = c("cm", "mm")) +recode_muac(x, .to = c("cm", "mm")) } \arguments{ -\item{muac}{A vector of class \code{double} or \code{integer} of the absolute MUAC values.} +\item{x}{A vector of the raw MUAC values. The class can either be +\code{double} or \code{numeric} or \code{integer}. If different than expected, the function +will stop execution and return an error message indicating the type of mismatch.} -\item{unit}{A choice of the unit to which the MUAC values should be converted.} +\item{.to}{A choice between \code{cm} (centimeters) and \code{mm} (millimeters) for the +measuring unit to convert MUAC values to. Before to execute the conversion, +the function checks if values are in the opposite unit; in case not, the +execution stops and an error message is returned. Strive to address the error +and try again.} } \value{ -A numeric vector of the same length \code{muac}, with values converted -to the chosen unit. +A \code{numeric} vector of the same length as \code{x}, with values converted +to the chosen measuring unit. } \description{ -Recode the MUAC values to either centimeters or millimeters as required. +Convert MUAC values to either centimeters or millimeters as required. +Before to covert, the function checks if the supplied MUAC +values are in the opposite unit of the intended conversion. If not, +execution stops and an error message is returned. } \examples{ - ## Recode from millimeters to centimeters ---- -muac <- anthro.01$muac -muac_cm <- recode_muac(muac, unit = "cm") +muac_cm <- recode_muac( + x = anthro.01$muac, + .to = "cm" +) ## Using the `muac_cm` object to recode it back to "mm" ---- -muac_mm <- recode_muac(muac_cm, unit = "mm") +muac_mm <- recode_muac( + x = muac_cm, + .to = "mm" +) } diff --git a/man/score_overall_quality.Rd b/man/score_overall_quality.Rd new file mode 100644 index 0000000..01d30e5 --- /dev/null +++ b/man/score_overall_quality.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/quality_scorers.R +\name{score_overall_quality} +\alias{score_overall_quality} +\title{Get the overall acceptability score from the acceptability rate scores} +\usage{ +score_overall_quality( + cl_flags, + cl_sex, + cl_age, + cl_dps_m = NULL, + cl_dps_w = NULL, + cl_dps_h = NULL, + cl_std, + cl_skw, + cl_kurt, + .for = c("wfhz", "mfaz") +) +} +\arguments{ +\item{.for}{A choice between "wfhz" and "mfaz" for the basis on which the +calculations should be made.} +} +\value{ +A vector of class \code{numeric}, of length 1, for the overall +data quality (acceptability) score. +} +\description{ +Get the overall acceptability score from the acceptability rate scores +} +\keyword{internal} diff --git a/man/scorer.Rd b/man/scorer.Rd index 93590de..db869d0 100644 --- a/man/scorer.Rd +++ b/man/scorer.Rd @@ -1,29 +1,37 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/quality_scorers.R -\name{assign_penalty_points_flags_and_sd} -\alias{assign_penalty_points_flags_and_sd} -\alias{assign_penalty_points_age_sex_ratio} -\alias{assign_penalty_points_skew_kurt} -\title{Score the acceptability classification of the standard deviation and percentage -of flagged data test results} +\name{score_std_flags} +\alias{score_std_flags} +\alias{score_agesexr_dps} +\alias{score_skewkurt} +\title{Score the acceptability rating of the check results that constitutes the +plausibility check suite} \usage{ -assign_penalty_points_flags_and_sd(x) +score_std_flags(x) -assign_penalty_points_age_sex_ratio(x) +score_agesexr_dps(x) -assign_penalty_points_skew_kurt(x) +score_skewkurt(x) } \arguments{ -\item{x}{A vector of class \code{character} of acceptability classification of the -plausibility test results.} +\item{x}{A vector of class \code{character} containing the acceptability rate of +a given test check. If the class does not match the expected type, the function +will stop execution and return an error message indicating the type of mismatch.} } \value{ -A vector of class \code{integer} of the same length as \code{x} for the score. +A vector of class \code{integer} of the same length as \code{x} for the +acceptability score. } \description{ -Attribute a penalty point based on the acceptability classification in which -the plausibility test result falls. +Attribute a score, also known as penalty point, for a given rate of acceptability +of the standard deviation, proportion of flagged records, age and sex ratio, +skewness, kurtosis and digit preference score check results. + +The scoring criteria and thresholds follows the standards in the SMART +plausibility check. } -\details{ -The scoring criteria is as in \href{https://smartmethodology.org/}{SMART Plausibility checks}. +\references{ +SMART Initiative (2017). \emph{Standardized Monitoring and Assessment for Relief +and Transition}. Manual 2.0. Available at: \url{https://smartmethodology.org}. } +\keyword{internal} diff --git a/man/wrangler.Rd b/man/wrangler.Rd deleted file mode 100644 index 97df312..0000000 --- a/man/wrangler.Rd +++ /dev/null @@ -1,108 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/wranglers.R -\name{process_wfhz_data} -\alias{process_wfhz_data} -\alias{process_muac_data} -\title{Wrangle weight-for-height and MUAC data} -\usage{ -process_wfhz_data(df, sex, weight, height, .recode_sex = TRUE) - -process_muac_data( - df, - sex, - muac, - age = NULL, - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = c("cm", "mm", "none") -) -} -\arguments{ -\item{df}{A dataset of class \code{data.frame} to wrangle data from.} - -\item{sex}{A numeric or character vector of child's sex. Code values should -be 1 or "m" for boy and 2 or "f" for girl. The variable name must be sex, -otherwise it will not work.} - -\item{weight}{A vector of class \code{double} of child's weight in kilograms.} - -\item{height}{A vector of class \code{double} of child's height in centimeters.} - -\item{.recode_sex}{Logical. Default is \code{FALSE}. Setting to \code{TRUE} assumes that -the sex variable is a character vector of values "m" for boys and "f" for girls -and will recode them to 1 and 2 respectively.} - -\item{muac}{A vector of class \code{double} or \code{integer} of the absolute MUAC values.} - -\item{age}{A double vector of child's age in months. It must be named age, -otherwise it will not work.} - -\item{.recode_muac}{Logical. Default is \code{FALSE}. Set to \code{TRUE} if MUAC values -should be converted to either centimeters or millimeters.} - -\item{unit}{A choice of the unit to which the MUAC values should be converted. -"cm" for centimeters, "mm" for millimeters and "none" to leave as it is.} -} -\value{ -A data frame based on \code{df}. New variables named \code{wfhz} and -\code{flag_wfhz}, of child's weight-for-height z-scores and flags, or \code{mfaz} and -\code{flag_mfaz}, of child's MUAC-for-age z-scores and flags, will be created. For -MUAC, when age is not supplied only \code{flag_muac} variable is created. -This refers to flags based on the absolute MUAC values as recommended by -\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).}. -} -\description{ -This function performs data wrangling by calculating weight-for-height -and MUAC-for-age z-scores, followed by the detection and flagging of outliers. -For MUAC data, if age is not supplies, z-scores do not get computed. In such -cases, outlier detection and flagging are based on the absolute MUAC values. -} -\details{ -The flagging criterion used for the WFHZ and MFAZ is as in -\href{https://smartmethodology.org/}{SMART plausibility check}. A fixed flagging -criterion is used for the absolute MUAC values. This is as recommended by -\href{https://doi.org/10.1111/mcn.13478}{Bilukha, O., & Kianian, B. (2023).} -} -\examples{ - -## An example application of `process_wfhz_data()` ---- - -anthro.01 |> -process_wfhz_data( -sex = sex, -weight = weight, -height = height, -.recode_sex = TRUE -) - -## An example application of `process_muac_data()` ---- - -### Sample data ---- -df <- data.frame( - survey_date = as.Date(c( - "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), - birthdate = as.Date(c( - "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), - age = c(NA, 36, NA, NA, NA), - sex = c("m", "f", "m", "m", "f"), - muac = c(110, 130, 300, 123, 125) - ) - - ### The application of the function ---- - - df |> - process_age( - svdate = "survey_date", - birdate = "birthdate", - age = age - ) |> - process_muac_data( - sex = sex, - age = "age", - muac = muac, - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = "cm" - ) - -} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 2ff18c9..5d805d1 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -7,9 +7,11 @@ development: template: bootstrap: 5 - bootswatch: cosmo - theme: pygments + bootswatch: pulse + theme: breeze-light ganalytics: G-94YZ36XGLH + bslib: + pkgdown-nav-height: 100px navbar: bg: success @@ -27,7 +29,7 @@ navbar: - text: "Estimating the prevalence of wasting" href: articles/prevalence.html - text: "Checking if IPC Acute Malnutrition sample size requirements were met" - href: articles/sample_size.html + href: articles/ipc_amn_check.html website: icon: "fa globe fa-lg" href: https://nutriverse.io @@ -43,27 +45,11 @@ home: href: https://www.ipcinfo.org/ # reference: -# - title: Age +# - title: Description # contents: -# - compute_age_in_months -# - compute_month_to_days -# - proxess_age +# - mwana -# - title: Classifiers -# contents: -# - classify_age_sex_ratio -# - classify_overall_quality -# - classify_skew_kurt -# - classify_wasting_for_cdc_approach - -# - title: Case definitions -# contents: -# - define_wasting_cases_muac -# - define_wasting_cases_whz -# - define_wasting_cases_combined -# - define_wasting - -# - title: Datasets +# - title: Built-in datasets # contents: # - anthro.01 # - anthro.02 @@ -73,3 +59,37 @@ home: # - mfaz.02 # - wfhz.01 +# - title: Wrangle data +# contents: +# - mw_wrangle_age +# - mw_wrangle_wfhz +# - mw_wrangle_muac + +# - title: Statistical tests +# contents: +# - mw_stattest_ageratio + +# - title: Check IPC AMN sample size requirements +# contents: +# - mw_check_ipcamn_ssreq + +# - title: Check plausibility +# contents: +# - mw_plausibility_check_wfhz +# - mw_plausibility_check_mfaz +# - mw_plausibility_check_muac + +# - title: Neat output tables +# contents: +# - mw_neat_output_wfhz +# - mw_neat_output_mfaz +# - mw_neat_output_muac + +# - title: Utilities +# contents: +# - get_age_months +# - recode_muac +# - flag_outliers +# - remove_flags + + diff --git a/pkgdown/favicon/apple-touch-icon-120x120.png b/pkgdown/favicon/apple-touch-icon-120x120.png deleted file mode 100644 index 328a2f1..0000000 Binary files a/pkgdown/favicon/apple-touch-icon-120x120.png and /dev/null differ diff --git a/pkgdown/favicon/apple-touch-icon-152x152.png b/pkgdown/favicon/apple-touch-icon-152x152.png deleted file mode 100644 index ec72023..0000000 Binary files a/pkgdown/favicon/apple-touch-icon-152x152.png and /dev/null differ diff --git a/pkgdown/favicon/apple-touch-icon-180x180.png b/pkgdown/favicon/apple-touch-icon-180x180.png deleted file mode 100644 index ab471d5..0000000 Binary files a/pkgdown/favicon/apple-touch-icon-180x180.png and /dev/null differ diff --git a/pkgdown/favicon/apple-touch-icon-60x60.png b/pkgdown/favicon/apple-touch-icon-60x60.png deleted file mode 100644 index d8299ca..0000000 Binary files a/pkgdown/favicon/apple-touch-icon-60x60.png and /dev/null differ diff --git a/pkgdown/favicon/apple-touch-icon-76x76.png b/pkgdown/favicon/apple-touch-icon-76x76.png deleted file mode 100644 index 797948e..0000000 Binary files a/pkgdown/favicon/apple-touch-icon-76x76.png and /dev/null differ diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png index 31c08e2..523b112 100644 Binary files a/pkgdown/favicon/apple-touch-icon.png and b/pkgdown/favicon/apple-touch-icon.png differ diff --git a/pkgdown/favicon/favicon-16x16.png b/pkgdown/favicon/favicon-16x16.png deleted file mode 100644 index 8c677c3..0000000 Binary files a/pkgdown/favicon/favicon-16x16.png and /dev/null differ diff --git a/pkgdown/favicon/favicon-32x32.png b/pkgdown/favicon/favicon-32x32.png deleted file mode 100644 index 7f3e599..0000000 Binary files a/pkgdown/favicon/favicon-32x32.png and /dev/null differ diff --git a/pkgdown/favicon/favicon-96x96.png b/pkgdown/favicon/favicon-96x96.png new file mode 100644 index 0000000..5c6061f Binary files /dev/null and b/pkgdown/favicon/favicon-96x96.png differ diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico index e3c76de..18c79eb 100644 Binary files a/pkgdown/favicon/favicon.ico and b/pkgdown/favicon/favicon.ico differ diff --git a/pkgdown/favicon/favicon.svg b/pkgdown/favicon/favicon.svg new file mode 100644 index 0000000..d7219cf --- /dev/null +++ b/pkgdown/favicon/favicon.svg @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/pkgdown/favicon/site.webmanifest b/pkgdown/favicon/site.webmanifest new file mode 100644 index 0000000..4ebda26 --- /dev/null +++ b/pkgdown/favicon/site.webmanifest @@ -0,0 +1,21 @@ +{ + "name": "", + "short_name": "", + "icons": [ + { + "src": "/web-app-manifest-192x192.png", + "sizes": "192x192", + "type": "image/png", + "purpose": "maskable" + }, + { + "src": "/web-app-manifest-512x512.png", + "sizes": "512x512", + "type": "image/png", + "purpose": "maskable" + } + ], + "theme_color": "#ffffff", + "background_color": "#ffffff", + "display": "standalone" +} \ No newline at end of file diff --git a/pkgdown/favicon/web-app-manifest-192x192.png b/pkgdown/favicon/web-app-manifest-192x192.png new file mode 100644 index 0000000..ea1d4f5 Binary files /dev/null and b/pkgdown/favicon/web-app-manifest-192x192.png differ diff --git a/pkgdown/favicon/web-app-manifest-512x512.png b/pkgdown/favicon/web-app-manifest-512x512.png new file mode 100644 index 0000000..09cd5d7 Binary files /dev/null and b/pkgdown/favicon/web-app-manifest-512x512.png differ diff --git a/tests/testthat.R b/tests/testthat.R index ac9493d..81d5c70 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -7,6 +7,6 @@ # * https://testthat.r-lib.org/articles/special-files.html library(testthat) -library(ipccheckr) +library(mwana) -test_check("ipccheckr") +test_check("mwana") diff --git a/tests/testthat/test-age.R b/tests/testthat/test-age.R deleted file mode 100644 index e9e3098..0000000 --- a/tests/testthat/test-age.R +++ /dev/null @@ -1,126 +0,0 @@ -### Test check: compute_month_to_days() ---- -local( - { - #### Sample data ---- - age_mo <- seq(6,23) - df <- data.frame(age_mo) - - #### Expected results ---- - df[["expected_results"]] <- c( - 182.6250, 213.0625, 243.5000, 273.9375, 304.3750, 334.8125, 365.2500, - 395.6875, 426.1250, 456.5625, 487.0000, 517.4375, 547.8750, 578.3125, - 608.7500, 639.1875, 669.6250, 700.0625 - ) - - #### Observed results ---- - age_days <- compute_month_to_days(age_mo) - df[["age_days"]] <- age_days - - #### The test ---- - testthat::test_that( - "compute_month_to_days() does the job as expected", - { - testthat::expect_vector(df[["age_days"]], size = 18) - testthat::expect_equal(df[["age_days"]], df[["expected_results"]]) - } - ) - } -) - -### Test check: compute_age_in_months() ---- - -local( - { - #### Sample data ---- - surv_date <- as.Date(c( - "2024-01-05", "2024-01-05", "2024-01-05", "2024-01-08", "2024-01-08", - "2024-01-08", "2024-01-10", "2024-01-10", "2024-01-10", "2024-01-11", - "2024-01-11", "2024-01-11", "2024-01-12", "2024-01-12", "2024-01-12" - )) - bir_date <- as.Date(c( - "2022-04-04", "2021-05-01", "2023-05-24", "2017-12-12", NA, - "2020-12-12", "2022-04-04", "2021-05-01", "2023-05-24", "2020-12-12", - "2021-05-01","2020-12-12", "2022-04-04", "2021-05-01", "2023-05-24" - )) - - age <- NA_integer_ - df <- data.frame(surv_date, bir_date, age) - - #### Expected results ---- - expected_results <- c( - 21.06, 32.16, 7.43, NA, NA, 36.86, 21.22, - 32.33, 7.59, 36.96, 32.36, 36.96, 21.29, - 32.39, 7.66 - ) - - #### Observed results ---- - df <- df |> - mutate( - age_mo = compute_age_in_months(surv_date = df[["surv_date"]], - birth_date = df[["bir_date"]]) - ) - - #### The test ---- - testthat::test_that( - "compute_age_in_months() does the job as expected", - { - testthat::expect_vector(df[["age_mo"]], size = 15) - testthat::expect_equal(df[["age_mo"]], expected_results) - } - ) - } -) - -### Test check: process_age() ---- - -local( - { - #### Sample data ---- - df <- data.frame( - survy_date = as.Date(c( - "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01")), - birthdate = as.Date(c( - "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25")), - age = c(NA, 36, NA, NA, NA) - ) - - #### Expected results ---- - expected_results <- c(1461.12, 1095.84, 1748.17, 1153.07, 616.11) - - #### Observed results ---- - df <- df |> - process_age( - svdate = "survy_date", - birdate = "birthdate", - age = df$age - ) - - #### The test ---- - testthat::test_that( - "process_age() does the right calculation for age in days", - { - testthat::expect_vector(df[["age_days"]], size = 5) - testthat::expect_equal(df[["age_days"]], expected_results) - } - ) - } -) - -### Test check: age_ratio_test() ---- -local( - { - #### Observed results ---- - obsrv <- age_ratio_test(anthro.01[["age"]], .expectedP = 0.66) - - #### The test ---- - testthat::test_that( - "age_ratio_test() returns a list", - { - testthat::expect_type(obsrv, "list") - testthat::expect_vector(obsrv) - testthat::expect_named(obsrv, c("p", "observedR", "observedP") - ) - } - ) - } -) diff --git a/tests/testthat/test-ipc_amn_check.R b/tests/testthat/test-ipc_amn_check.R new file mode 100644 index 0000000..4faf6c9 --- /dev/null +++ b/tests/testthat/test-ipc_amn_check.R @@ -0,0 +1,27 @@ +# Test check: mw_check_ipcamn_ssreq() ---- +testthat::test_that( + "mw_check_ipcamn_ssreq() works as expected", + { + ## Observed results ---- + x <- mw_check_ipcamn_ssreq( + df = anthro.01, + cluster = cluster, + .source = "survey" + ) + + ## Tests ---- + testthat::expect_s3_class(object = x, class = "tbl_df", exact = FALSE) + testthat::expect_true(all(c("n_clusters", "n_obs", "meet_ipc") %in% names(x))) + testthat::expect_error( + mw_check_ipcamn_ssreq( + df = anthro.01, + cluster = weight, + .source = "survey" + ), + regexp = paste0( + "`cluster` must be of class `integer` or `character`; not ", + shQuote(class(anthro.01$weight)), ". Please try again." + ) + ) + } +) diff --git a/tests/testthat/test-plausibility_check_mfaz.R b/tests/testthat/test-plausibility_check_mfaz.R new file mode 100644 index 0000000..56976c0 --- /dev/null +++ b/tests/testthat/test-plausibility_check_mfaz.R @@ -0,0 +1,91 @@ +# Test check: mw_plausibility_check_mfaz() ---- +testthat::test_that( + "mw_plausibility_check_mfaz() works as expected", + { + ## Wrangle age and MUAC data ---- + df <- anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = "age", + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" + ) + + ## Observed results ---- + pl <- df |> + mw_plausibility_check_mfaz( + flags = flag_mfaz, + sex = sex, + muac = muac, + age = age + ) + + ## Tests ---- + testthat::expect_s3_class(pl, "tbl_df") + testthat::expect_vector(pl) + testthat::expect_equal(ncol(pl), 17) + testthat::expect_equal(nrow(pl), 1) + testthat::expect_true( + all(c( + "n", "flagged", "flagged_class", "sex_ratio", + "sex_ratio_class", "age_ratio", "age_ratio_class", + "dps", "dps_class", "sd", "sd_class", "skew", "skew_class", + "kurt", "kurt_class", "quality_score", "quality_class" + ) %in% names(pl)) + ) + } +) + +# Test check: mw_neat_output_mfaz() ---- +testthat::test_that( + "mw_neat_output_mfaz() works", + { + ## Workflow ---- + quality <- anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + age = age, + muac = muac, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" + ) |> + mw_plausibility_check_mfaz( + flags = flag_mfaz, + sex = sex, + muac = muac, + age = age + ) |> + mw_neat_output_mfaz() + + ## Tests ---- + testthat::expect_s3_class(quality, "tbl_df") + testthat::expect_equal(ncol(quality), 17) + testthat::expect_equal(nrow(quality), 1) + testthat::expect_true( + all(c("Total children", "Flagged data (%)", + "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", + "Age ratio (p)", "Class. of age ratio", "DPS (#)", + "Class. of DPS", "Standard Dev* (#)", "Class. of standard dev", + "Skewness* (#)", "Class. of skewness", "Kurtosis* (#)", + "Class. of kurtosis", "Overall score", "Overall quality" + ) %in% names(quality) + + ) + ) + } +) diff --git a/tests/testthat/test-plausibility_check_muac.R b/tests/testthat/test-plausibility_check_muac.R new file mode 100644 index 0000000..d6fec16 --- /dev/null +++ b/tests/testthat/test-plausibility_check_muac.R @@ -0,0 +1,72 @@ +# Test check: mw_plausibility_check_muac() ---- +testthat::test_that( + "mw_plausibility_check_muac() return a df with expected lentgh and columns", + { + ## Workflow ---- + df <- anthro.01 |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" + ) |> + mw_plausibility_check_muac( + sex = sex, + muac = muac, + flags = flag_muac + ) + + ## Tests ---- + testthat::expect_s3_class(df, "data.frame") + testthat::expect_vector(df) + testthat::expect_equal(ncol(df), 9) + testthat::expect_equal(nrow(df), 1) + testthat::expect_true( + all(c( + "n", "flagged", "flagged_class", "sex_ratio", + "sex_ratio_class", "dps", "dps_class", + "sd", "sd_class" + ) %in% names(df) + + ) + ) + } +) + +# Test check: mw_neat_output_muac()---- +testthat::test_that( + "mw_neat_output_muac() works", + { + ## Workflow ---- + quality <- anthro.01 |> + mw_wrangle_muac( + sex = sex, + muac = muac, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" + ) |> + mw_plausibility_check_muac( + flags = flag_muac, + sex = sex, + muac = muac + )|> + mw_neat_output_muac() + + ## Tests ---- + testthat::expect_s3_class(quality, "data.frame") + testthat::expect_equal(ncol(quality), 9) + testthat::expect_equal(nrow(quality), 1) + testthat::expect_true( + all(c( + "Total children", "Flagged data (%)", "Class. of flagged data", + "Sex ratio (p)", "Class. of sex ratio", "DPS(#)", "Class. of DPS", + "Standard Dev* (#)", "Class. of standard dev" + ) %in% names(quality) + ) + ) + + } +) diff --git a/tests/testthat/test-plausibility_check_wfhz.R b/tests/testthat/test-plausibility_check_wfhz.R new file mode 100644 index 0000000..cb3cc3e --- /dev/null +++ b/tests/testthat/test-plausibility_check_wfhz.R @@ -0,0 +1,90 @@ +# Test check: mw_plausibility_check_wfhz() ---- +testthat::test_that( + "mw_plausibility_check_wfhz() works as expected", + { + ## Wrangle WFHZ data ---- + df <- anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE + ) + + ## Observed plausibility ---- + x <- df |> + mw_plausibility_check_wfhz( + sex = sex, + age = age, + weight = weight, + height = height, + flags = flag_wfhz + ) + + ## Tests ---- + testthat::expect_s3_class(x, "tbl_df") + testthat::expect_vector(x) + testthat::expect_equal(ncol(x), 19) + testthat::expect_equal(nrow(x), 1) + testthat::expect_true( + all(c( + "n", "flagged", "flagged_class", "sex_ratio", + "sex_ratio_class", "age_ratio", "age_ratio_class", + "dps_wgt", "dps_wgt_class", "dps_hgt", "dps_hgt_class", + "sd", "sd_class", "skew", "skew_class", "kurt", "kurt_class", + "quality_score", "quality_class" + ) %in% names(x)) + ) + } +) + +# Test check: ---- +testthat::test_that( + "mw_neat_output_wfhz() works", + { + quality <- anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE + ) |> + mw_plausibility_check_wfhz( + flags = flag_wfhz, + sex = sex, + age = age, + weight = weight, + height = height + ) |> + mw_neat_output_wfhz() + + ## Tests ---- + testthat::expect_s3_class(quality, "tbl_df") + testthat::expect_equal(ncol(quality), 19) + testthat::expect_equal(nrow(quality), 1) + testthat::expect_true( + all(c("Total children", "Flagged data (%)", + "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", + "Age ratio (p)", "Class. of age ratio", "DPS weight (#)", + "Class. DPS weight", "DPS height (#)", "Class. DPS height", + "Standard Dev* (#)", "Class. of standard dev", + "Skewness* (#)", "Class. of skewness", "Kurtosis* (#)", + "Class. of kurtosis", "Overall score", "Overall quality" + ) %in% names(quality) + + ) + ) + } +) diff --git a/tests/testthat/test-pretty_tables.R b/tests/testthat/test-pretty_tables.R deleted file mode 100644 index bed7f87..0000000 --- a/tests/testthat/test-pretty_tables.R +++ /dev/null @@ -1,139 +0,0 @@ -# Test checks: Pretty outputers ------------------------------------------------ - -## Test check: generate_pretty_table_muac() ---- - -local( - { - quality <- anthro.01 |> - process_muac_data( - sex = sex, - muac = muac, - .recode_sex = TRUE, - .recode_muac = FALSE, - unit = "none" - ) |> - check_plausibility_muac( - flags = flag_muac, - sex = sex, - muac = muac - )|> - generate_pretty_table_muac() - - ### The test ---- - testthat::test_that( - "generate_pretty_table_muac() works", - { - testthat::expect_s3_class(quality, "data.frame") - testthat::expect_equal(ncol(quality), 9) - testthat::expect_equal(nrow(quality), 1) - testthat::expect_true( - all(c( - "Total children", "Flagged data (%)", "Class. of flagged data", - "Sex ratio (p)", "Class. of sex ratio", "DPS(#)", "Class. of DPS", - "Standard Dev* (#)", "Class. of standard dev" - ) %in% names(quality) - ) - ) - } - ) - } -) - -## Test check: generate_pretty_table_mfaz() ---- -local( - { - quality <- anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( - sex = sex, - age = "age", - muac = muac, - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = "cm" - ) |> - check_plausibility_mfaz( - flags = flag_mfaz, - sex = sex, - muac = muac, - age = age, - area = area - ) |> - generate_pretty_table_mfaz() - - ### The test ---- - testthat::test_that( - "generate_pretty_table_mfaz() works", - { - testthat::expect_s3_class(quality, "tbl_df") - testthat::expect_equal(ncol(quality), 18) - testthat::expect_equal(nrow(quality), 2) - testthat::expect_true( - all(c("Area", "Total children", "Flagged data (%)", - "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", - "Age ratio (p)", "Class. of age ratio", "DPS (#)", - "Class. of DPS", "Standard Dev* (#)", "Class. of standard dev", - "Skewness* (#)", "Class. of skewness", "Kurtosis* (#)", - "Class. of kurtosis", "Overall score", "Overall quality" - ) %in% names(quality) - - ) - ) - } - ) - } -) - -## Test check: generate_pretty_table_whz() ---- - -local( - { - quality <- anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_wfhz_data( - sex = sex, - weight = weight, - height = height, - .recode_sex = TRUE - ) |> - check_plausibility_wfhz( - flags = flag_wfhz, - sex = sex, - age = age, - weight = weight, - height = height, - area = area - ) |> - generate_pretty_table_wfhz() - - ### The test ---- - testthat::test_that( - "generate_pretty_table_whz() works", - { - testthat::expect_s3_class(quality, "tbl_df") - testthat::expect_equal(ncol(quality), 20) - testthat::expect_equal(nrow(quality), 2) - testthat::expect_true( - all(c("Area", "Total children", "Flagged data (%)", - "Class. of flagged data", "Sex ratio (p)", "Class. of sex ratio", - "Age ratio (p)", "Class. of age ratio", "DPS weight (#)", - "Class. DPS weight", "DPS height (#)", "Class. DPS height", - "Standard Dev* (#)", "Class. of standard dev", - "Skewness* (#)", "Class. of skewness", "Kurtosis* (#)", - "Class. of kurtosis", "Overall score", "Overall quality" - ) %in% names(quality) - - ) - ) - } - ) - } -) diff --git a/tests/testthat/test-prevalence_combined.R b/tests/testthat/test-prevalence_combined.R index 6ab6e88..c3079c0 100644 --- a/tests/testthat/test-prevalence_combined.R +++ b/tests/testthat/test-prevalence_combined.R @@ -211,17 +211,20 @@ local({ ### Get the prevalence estimates ---- p <- anthro.03 |> - process_age(age = age) |> - process_muac_data( + mw_wrangle_age( + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( sex = sex, muac = muac, age = "age", .recode_sex = TRUE, .recode_muac = TRUE, - unit = "cm" + .to = "cm" ) |> - dplyr::mutate(muac = recode_muac(muac, unit = "mm")) |> - process_wfhz_data( + dplyr::mutate(muac = recode_muac(muac, .to = "mm")) |> + mw_wrangle_wfhz( sex = sex, weight = weight, height = height, diff --git a/tests/testthat/test-prevalence_muac.R b/tests/testthat/test-prevalence_muac.R index b7c065d..742cab7 100644 --- a/tests/testthat/test-prevalence_muac.R +++ b/tests/testthat/test-prevalence_muac.R @@ -65,17 +65,19 @@ local({ local({ #### Input data ---- x <- mfaz.01 |> - process_age(age = age) |> - process_muac_data( + mw_wrangle_age( + age = age + ) |> + mw_wrangle_muac( sex = sex, muac = muac, - age = "age", + age = age, .recode_sex = TRUE, .recode_muac = TRUE, - unit = "cm" + .to = "cm" ) |> subset(flag_mfaz == 0) |> - dplyr::mutate(muac = recode_muac(muac, unit = "mm")) + mutate(muac = recode_muac(muac, .to = "mm")) #### Expected results calculated in the CDC/SMART MUAC tool ---- @@ -114,18 +116,21 @@ local({ local({ #### Input data ---- x <- mfaz.01 |> - process_age(age = age) |> - process_muac_data( + mw_wrangle_age( + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( sex = sex, muac = muac, - age = "age", + age = age, .recode_sex = TRUE, .recode_muac = TRUE, - unit = "cm" + .to = "cm" ) |> subset(flag_mfaz == 0) |> - dplyr::mutate( - muac = recode_muac(muac, unit = "mm")) + mutate( + muac = recode_muac(muac, .to = "mm")) #### Expected results calculated in the CDC/SMART MUAC tool ---- diff --git a/tests/testthat/test-prevalence_wfhz.R b/tests/testthat/test-prevalence_wfhz.R index 4c77256..a4fb50d 100644 --- a/tests/testthat/test-prevalence_wfhz.R +++ b/tests/testthat/test-prevalence_wfhz.R @@ -1,7 +1,7 @@ # Test check: apply_probit_approach() ---- local({ x <- anthro.03 |> - process_wfhz_data(sex, weight, height, .recode_sex = TRUE) |> + mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> subset(district == "Metuge") p_gam <- apply_probit_approach(x$wfhz, .status = "gam") @@ -19,7 +19,7 @@ local({ # Test check: compute_probit_prevalence() ---- local({ p <- anthro.03 |> - process_wfhz_data(sex, weight, height, .recode_sex = TRUE) |> + mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> subset(district == "Metuge") |> compute_probit_prevalence() @@ -37,7 +37,7 @@ local({ # Test check: compute_probit_prevalence(.summary_by = district) ---- local({ p <- anthro.03 |> - process_wfhz_data(sex, weight, height, .recode_sex = TRUE) |> + mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> subset(district == "Metuge" | district == "Maravia") |> compute_probit_prevalence(.summary_by = district) @@ -267,7 +267,7 @@ local({ ### Get the prevalence estimates ---- p <- anthro.03 |> - process_wfhz_data(sex, weight, height, .recode_sex = TRUE) |> + mw_wrangle_wfhz(sex, weight, height, .recode_sex = TRUE) |> compute_wfhz_prevalence(.edema = edema, .summary_by = district) ### Subset the dataframe for the district "Metuge" with problematic SD ---- diff --git a/tests/testthat/test-quality_auditors.R b/tests/testthat/test-quality_auditors.R deleted file mode 100644 index 79ae891..0000000 --- a/tests/testthat/test-quality_auditors.R +++ /dev/null @@ -1,143 +0,0 @@ -# Tests checks: Quality Checkers------------------------------------------------ -## Test check: check_plausibility_mfaz() ---- - -local( - { - ### Input data ---- - df <- anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( - sex = sex, - muac = muac, - age = "age", - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = "cm" - ) |> - check_plausibility_mfaz( - flags = flag_mfaz, - sex = sex, - muac = muac, - age = age, - area = area - ) - - ### The test ---- - testthat::test_that( - "evaluate_quality_mfaz() return a df with expected lentgh and width", - { - testthat::expect_s3_class(df, "tbl_df") - testthat::expect_vector(df) - testthat::expect_equal(ncol(df), 18) - testthat::expect_equal(nrow(df), 2) - testthat::expect_true( - all(c( - "area", "n", "flagged", "flagged_class", "sex_ratio", - "sex_ratio_class", "age_ratio", "age_ratio_class", - "dps", "dps_class", "sd", "sd_class", "skew", "skew_class", - "kurt", "kurt_class", "quality_score", "quality_class" - ) %in% names(df) - - ) - ) - } - ) - } -) - -## Test check: check_plausibility_whz() ---- - -local( - { - ### Input data ---- - df <- anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_wfhz_data( - sex = sex, - weight = weight, - height = height, - .recode_sex = TRUE - ) |> - check_plausibility_wfhz( - sex = sex, - age = age, - weight = weight, - height = height, - flags = flag_wfhz, - area = area - ) - - ### The test ---- - testthat::test_that( - "check_plausibility_whz() return a df with expected lentgh and columns", - { - testthat::expect_s3_class(df, "tbl_df") - testthat::expect_vector(df) - testthat::expect_equal(ncol(df), 20) - testthat::expect_equal(nrow(df), 2) - testthat::expect_true( - all(c( - "area", "n", "flagged", "flagged_class", "sex_ratio", - "sex_ratio_class", "age_ratio", "age_ratio_class", - "dps_wgt", "dps_wgt_class", "dps_hgt", "dps_hgt_class", - "sd", "sd_class", "skew", "skew_class", "kurt", "kurt_class", - "quality_score", "quality_class" - ) %in% names(df) - - ) - ) - } - ) - } -) - -## Test check: check_plausibility_crude_muac() ---- - -local( - { - ### Input data ---- - df <- anthro.01 |> - process_muac_data( - sex = sex, - muac = muac, - age = NULL, - .recode_sex = TRUE, - .recode_muac = FALSE, - unit = "none" - ) |> - check_plausibility_muac( - sex = sex, - muac = muac, - flags = flag_muac - ) - - ### The test ---- - testthat::test_that( - "check_plausibility_muac() return a df with expected lentgh and columns", - { - testthat::expect_s3_class(df, "data.frame") - testthat::expect_vector(df) - testthat::expect_equal(ncol(df), 9) - testthat::expect_equal(nrow(df), 1) - testthat::expect_true( - all(c( - "n", "flagged", "flagged_class", "sex_ratio", - "sex_ratio_class", "dps", "dps_class", - "sd", "sd_class" - ) %in% names(df) - - ) - ) - } - ) - } -) - diff --git a/tests/testthat/test-quality_raters.R b/tests/testthat/test-quality_raters.R index 37426e0..af572cd 100644 --- a/tests/testthat/test-quality_raters.R +++ b/tests/testthat/test-quality_raters.R @@ -1,171 +1,205 @@ -# Test checks: Classifiers------------------------------------------------------ - -## Test check: classify_percent_flagged() ---- -### Test check: classify_percent_flagged() with method set to "mfaz" ---- - -local( +# Test check: rate_propof_flagged() ---- +## With `.in` set to "mfaz" ---- +testthat::test_that( + "rate_propof_flagged() with `.in` set to 'mfaz' returns the + expected output and correct rating", { - #### Sample data ---- + ### Sample data ---- props <- c( - 0.0, 0.0, 0.01, 0.015, 0.2, - 0.015, 0.016, 0.017, 0.05, 0.06, - 0.03, 0.03, 0.04, 0.000001, 0 + 0.0, 0.0, 0.01, 0.015, 0.2, 0.015, 0.016, 0.017, 0.05, 0.06, 0.03, + 0.03, 0.04, 0.000001, 0 ) - #### Expected results ---- - expected_results <- c( + ### Expected results ---- + exp <- c( "Excellent", "Excellent", "Excellent", "Good", "Problematic", "Good", "Acceptable", "Acceptable", "Problematic", "Problematic", "Problematic", "Problematic", "Problematic", "Excellent", "Excellent" ) |> factor(levels = c("Excellent", "Good", "Acceptable", "Problematic")) - #### Observed results ---- - class_flagged_data <- classify_percent_flagged(props, type = "mfaz") - - #### The test ---- - testthat::test_that( - "classify_percent_flagged() with type set to 'mfaz' returns - expected output and correct classifications", - { - testthat::expect_vector( - class_flagged_data, - ptype = factor( - c("Excellent", "Good", "Acceptable", "Problematic"), - levels = c("Excellent", "Good", "Acceptable", "Problematic") - ), - size = 15 - ) - testthat::expect_equal(class_flagged_data, expected_results) - } + obs <- rate_propof_flagged(props, .in = "mfaz") + + ### Tests ---- + testthat::expect_vector( + object = obs, + ptype = factor( + x = c("Excellent", "Good", "Acceptable", "Problematic"), + levels = c("Excellent", "Good", "Acceptable", "Problematic") + ), + size = 15 + ) + testthat::expect_equal(obs, exp) + testthat::expect_error( + rate_propof_flagged(as.character(props), .in = "mfaz"), + regexp = paste0( + "`p` must be of class double; not ", shQuote(class(as.character(props))), + ". Please try again." + ) + ) + testthat::expect_error( + rate_propof_flagged(as.integer(props), .in = "mfaz"), + regexp = paste0( + "`p` must be of class double; not ", shQuote(class(as.integer(props))), + ". Please try again." + ) ) } ) -### Test check: classify_percent_of_outliers() with method set to "crude" ---- - -local( +## With `.in` set to "wfhz" ---- +testthat::test_that( + "rate_propof_flagged() with with `.in` set to 'wfhz' returns the + expected output and correct rating", { - #### Sample data ---- + ### Sample data ---- props <- c( - 0.0, 0.0, 0.01, 0.015, 0.2, - 0.015, 0.016, 0.017, 0.05, 0.06, - 0.03, 0.03, 0.04, 0.000001, 0 + 0.0, 0.0, 0.01, 0.015, 0.2, 0.015, 0.016, 0.017, 0.05, 0.06, 0.03, + 0.03, 0.04, 0.000001, 0 ) - #### Expected results ---- - expected_results <- c( - "Excellent", "Excellent", "Excellent", "Good", "Problematic", - "Good", "Acceptable", "Acceptable", "Problematic", "Problematic", - "Problematic", "Problematic", "Problematic", "Excellent", "Excellent" + ### Expected results ---- + exp <- c( + "Excellent", "Excellent", "Excellent", "Excellent", "Problematic", + "Excellent", "Excellent", "Excellent", "Good", "Acceptable", + "Good", "Good", "Good", "Excellent", "Excellent" ) |> factor(levels = c("Excellent", "Good", "Acceptable", "Problematic")) - #### Observed results ---- - class_flagged_data <- classify_percent_flagged(props, type = "crude") - - #### The test ---- - testthat::test_that( - "classify_percent_flagged() with type set to 'crude' returns - expected output and correct classifications", - { - testthat::expect_vector( - class_flagged_data, - ptype = factor( - c("Excellent", "Good", "Acceptable", "Problematic"), - levels = c("Excellent", "Good", "Acceptable", "Problematic") - ), - size = 15 - ) - testthat::expect_equal(class_flagged_data, expected_results) - } + ### Observed results ---- + obs <- rate_propof_flagged(props, .in = "wfhz") + + ### Tests ---- + testthat::expect_vector( + object = obs, + ptype = factor( + x = c("Excellent", "Good", "Acceptable", "Problematic"), + levels = c("Excellent", "Good", "Acceptable", "Problematic") + ), + size = 15 + ) + testthat::expect_equal(obs, exp) + testthat::expect_error( + rate_propof_flagged(as.character(props), .in = "wfhz"), + regexp = paste0( + "`p` must be of class double; not ", shQuote(class(as.character(props))), + ". Please try again." + ) + ) + testthat::expect_error( + rate_propof_flagged(as.integer(props), .in = "wfhz"), + regexp = paste0( + "`p` must be of class double; not ", shQuote(class(as.integer(props))), + ". Please try again." + ) ) } ) -## Test check: classify_age_sex_ratio() ---- -local( +## With `.in` set to "raw_muac" ---- +testthat::test_that( + "rate_propof_flagged() with with `.in` set to 'raw_muac' returns the + expected output and correct rating", { ### Sample data ---- - pvalues <- c( - 0, 0, 0.01, 0.011, 0.2, - 0.015, 0.016, 0.017, 0.05, 0.06, - 0.03, 0.03, 0.04, 0.000001, 0.07 + props <- c( + 0.0, 0.0, 0.01, 0.015, 0.2, 0.015, 0.016, 0.017, 0.05, 0.06, + 0.03, 0.03, 0.04, 0.000001, 0 ) ### Expected results ---- - expected_results <- c( - "Problematic", "Problematic", "Acceptable", "Acceptable", "Excellent", - "Acceptable", "Acceptable", "Acceptable", "Acceptable", "Good", - "Acceptable", "Acceptable", "Acceptable", "Problematic", "Good" - ) + exp <- c( + "Excellent", "Excellent", "Excellent", "Good", "Problematic", + "Good", "Acceptable", "Acceptable", "Problematic", "Problematic", + "Problematic", "Problematic", "Problematic", "Excellent", "Excellent" + ) |> + factor(levels = c("Excellent", "Good", "Acceptable", "Problematic")) ### Observed results ---- - class_age_sex_ratio <- classify_age_sex_ratio(pvalues) - - ### The test ---- - testthat::test_that( - "classify_age_sex_ratio returns expected outpout and correct - classifications", - { - testthat::expect_vector( - class_age_sex_ratio, - ptype = character(), - size = 15 - ) - testthat::expect_equal(class_age_sex_ratio, expected_results) - } + obs <- rate_propof_flagged(props, .in = "raw_muac") + + ### Tests ---- + testthat::expect_vector( + object = obs, + ptype = factor( + x = c("Excellent", "Good", "Acceptable", "Problematic"), + levels = c("Excellent", "Good", "Acceptable", "Problematic") + ), + size = 15 + ) + testthat::expect_equal(obs, exp) + testthat::expect_error( + rate_propof_flagged(as.character(props), .in = "raw_muac"), + regexp = paste0( + "`p` must be of class double; not ", shQuote(class(as.character(props))), + ". Please try again." + ) + ) + testthat::expect_error( + rate_propof_flagged(as.integer(props), .in = "raw_muac"), + regexp = paste0( + "`p` must be of class double; not ", shQuote(class(as.integer(props))), + ". Please try again." + ) ) } ) -## Test check: classify_sd() ---- -### Test check: classify_sd() with method set to "zscore" ---- -local( + +# Test check: rate_std() +## With `.of` set "zscores" ---- +testthat::test_that( + "Rating of acceptability of standard deviation of z-scores works as expected", { - #### Sample data ---- - sdvalues <- c( - 1.253, 1.037, 0.876, 0.861, 0.8, - 1.083, 1.5, 0.922, 1.269, 0.797, + ### Sample data ---- + stds <- c( + 1.253, 1.037, 0.876, 0.861, 0.8, 1.083, 1.5, 0.922, 1.269, 0.797, 0.880, 0.853, 1.041, 1.247, 0.9 ) #### Expected results ---- - expected_results <- c( + exp <- c( "Problematic", "Excellent", "Good", "Good", "Problematic", "Excellent", "Problematic", "Excellent", "Problematic", "Problematic", "Good", "Good", "Excellent", "Problematic", "Good" ) #### Observed results ---- - class_sd_mfaz <- classify_sd(sdvalues, type = "zscore") - - #### The test ---- - testthat::test_that( - "classify_sd with method set to 'zscore' returns expected outpout - and correct classifications", - { - testthat::expect_vector( - class_sd_mfaz, - ptype = character(), - size = 15 - ) - testthat::expect_equal(class_sd_mfaz, expected_results) - } + s <- rate_std(stds, .of = "zscores") + + ### Tests ---- + testthat::expect_vector(s, + ptype = character(), + size = 15 + ) + testthat::expect_equal(s, exp) + testthat::expect_error( + rate_std(as.integer(stds), .of = "zscores"), + regexp = paste0( + "`sd` must be of class double; not ", shQuote(class(as.integer(stds))), + ". Please try again." + ) + ) + testthat::expect_error( + rate_std(as.character(stds), .of = "zscores"), + regexp = paste0( + "`sd` must be of class double; not ", shQuote(class(as.character(stds))), + ". Please try again." + ) ) } ) -### Test check: classify_sd() with method set to "crude" ---- - -local( +## With `.of` set to "raw_muac" ---- +testthat::test_that( + "Rating of acceptability of standard deviation of raw MUAC values works as expected", { - #### Sample data ---- - sdvalues <- c(12, 12, 13, 11, 13, 17, 14, 11, 16, 13, 15, 17, 17, 11, 20) + ### Sample data ---- + val <- c(12, 12, 13, 11, 13, 17, 14, 11, 16, 13, 15, 17, 17, 11, 20) - #### Expected results ---- - expected_results <- c( + ### Expeected results ---- + exp <- c( "Excellent", "Excellent", "Acceptable", "Excellent", "Acceptable", "Problematic", "Poor", "Excellent", "Problematic", "Acceptable", "Problematic", "Problematic", "Problematic", "Excellent", "Problematic" @@ -173,47 +207,144 @@ local( factor(levels = c("Excellent", "Acceptable", "Poor", "Problematic")) #### Observed results ---- - class_sd_muac <- classify_sd(sdvalues, type = "crude") - - #### The test ---- - testthat::test_that( - "classify_sd() returns expected outpout and correct classifications", - { - testthat::expect_vector( - class_sd_muac, - ptype = factor( - c("Excellent", "Acceptable", "Poor", "Problematic"), - levels = c("Excellent", "Acceptable", "Poor", "Problematic") - ), - size = 15 - ) - testthat::expect_equal(class_sd_muac, expected_results) - } + rm <- rate_std(val, .of = "raw_muac") + + ### Tests ---- + testthat::expect_vector( + rm, + ptype = factor( + c("Excellent", "Acceptable", "Poor", "Problematic"), + levels = c("Excellent", "Acceptable", "Poor", "Problematic") + ), + size = 15 + ) + testthat::expect_equal(rm, exp) + testthat::expect_error( + rate_std(as.integer(val), .of = "raw_muac"), + regexp = paste0( + "`sd` must be of class double; not ", shQuote(class(as.integer(val))), + ". Please try again." + ) + ) + testthat::expect_error( + rate_std(as.character(val), .of = "raw_muac"), + regexp = paste0( + "`sd` must be of class double; not ", shQuote(class(as.character(val))), + ". Please try again." + ) ) } ) -### Test check: classify_overall_quality() ---- -local( +# Test check: rate_agesex_ratio() ---- +testthat::test_that( + "Rate of the acceptability of the age and sex ratio test's p-values works well", { - #### Sample data ---- - df <- data.frame(quality_score = 29) + ### Sample data --- + pval <- c( + 0, 0, 0.01, 0.011, 0.2, 0.015, 0.016, 0.017, 0.05, 0.06, + 0.03, 0.03, 0.04, 0.000001, 0.07 + ) - #### Expected result ---- - expected_r <- dplyr::tibble(quality_class = "Problematic") + ### Expected results ---- + exp <- c( + "Problematic", "Problematic", "Acceptable", "Acceptable", "Excellent", + "Acceptable", "Acceptable", "Acceptable", "Acceptable", "Good", + "Acceptable", "Acceptable", "Acceptable", "Problematic", "Good" + ) - #### Observed results ---- - obs <- dplyr::tibble(classify_overall_quality(df)) - - #### The test ---- - testthat::test_that( - "classify_overall_quality() works", - { - testthat::expect_s3_class(obs, "tbl") - testthat::expect_equal(names(obs[[1]]), names(expected_r[[1]])) - testthat::expect_equal(obs[[1]] == "Problematic", expected_r[[1]] == "Problematic") - } + ### Observed results ---- + rp <- rate_agesex_ratio(pval) + + ### Tests ---- + testthat::expect_vector( + rp, + ptype = character(), + size = 15 + ) + testthat::expect_equal(rp, exp) + testthat::expect_error( + rate_agesex_ratio(as.character(pval)), + regexp = paste0( + "`p` must be of class double; not ", shQuote(class(as.character(pval))), + ". Please try again." + ) + ) + testthat::expect_error( + rate_agesex_ratio(as.integer(pval)), + regexp = paste0( + "`p` must be of class double; not ", shQuote(class(as.integer(pval))), + ". Please try again." + ) + ) + } +) + +# Test check: rate_skewkurt() ---- +testthat::test_that( + "Rate of acceptability of skewness and kurtosis is as expected", + { + ## Sample data ---- + sk <- seq(0.1, 0.9, 0.09) + + ## Expected results ---- + exp <- c( + "Excellent", "Excellent", "Good", "Good", "Acceptable", "Acceptable", + "Problematic", "Problematic", "Problematic" + ) |> + factor(levels = c("Excellent", "Good", "Acceptable", "Problematic")) + + ## Observed results ---- + r <- rate_skewkurt(sk) + + ## Tests ---- + testthat::expect_vector(r, factor( + levels = c("Excellent", "Good", "Acceptable", "Problematic") + ), 9) + testthat::expect_equal(r, exp) + testthat::expect_error( + rate_skewkurt(as.character(sk)), + regexp = paste0( + "`sk` must be of class double; not ", shQuote(class(as.character(sk))), + ". Please try again." + ) + ) + testthat::expect_error( + rate_skewkurt(as.integer(sk)), + regexp = paste0( + "`sk` must be of class double; not ", shQuote(class(as.integer(sk))), + ". Please try again." + ) + ) + } +) + +# Test check: classify_overall_quality() ---- +testthat::test_that( + "rate_overall_quality() works", + { + ## Sample data ---- + q <- c(29, 17, 11, 5, 13, 14, 19, 26) + + ## Expected result ---- + exp <- c( + "Problematic", "Acceptable", "Good", "Excellent", "Good", "Good", + "Acceptable", "Problematic" + ) + + ## Observed results ---- + obs <- rate_overall_quality(q) + + ## Tests ---- + testthat::expect_true(is(obs, "factor")) + testthat::expect_equal(as.character(obs), as.character(exp)) + testthat::expect_error( + rate_overall_quality(as.character(q)), + regexp = paste0( + "`q` must be of class numeric or integer; not ", + shQuote(class(as.character(q))), ". Please try again." + ) ) } ) diff --git a/tests/testthat/test-quality_scorers.R b/tests/testthat/test-quality_scorers.R index bc06430..1f4b854 100644 --- a/tests/testthat/test-quality_scorers.R +++ b/tests/testthat/test-quality_scorers.R @@ -1,72 +1,96 @@ -# Tests check: Quality scorers ------------------------------------------------- - -## Test check: assign_penalty_points_flags_and_sd() ---- +# Tests check: score_std_flags() ---- +testthat::test_that( + "Quality scorer works well", + { + ## Sample data ---- + std <- c("Problematic", "Excellent", "Good", "Good", "Problematic", + "Excellent", "Problematic", "Excellent", "Problematic", "Problematic", + "Good", "Good", "Excellent", "Problematic", "Good") + + ## Expected results ---- + exp <- c(20, 0, 5, 5, 20, 0, 20, 0, 20, 20, 5, 5, 0, 20, 5) + + ## Observed results ---- + cl <- score_std_flags(std) + + ## Tests ---- + testthat::expect_vector(cl, ptype = numeric(), 15) + testthat::expect_equal(cl, exp) + testthat::expect_error( + score_std_flags(as.numeric(exp)), + regexp = paste0( + "`x` must be of class `character` or `factor`; not ", shQuote(class(exp)), ". Please try again." + ) + ) + } +) -local( +# Test check: score_agesex_ratio() ---- +testthat::test_that( + "score_agesexr_dps() works as expected", { - ### Sample data ---- - class_sd_mfaz <- c( - "Problematic", "Excellent", "Good", "Good", "Problematic", - "Excellent", "Problematic", "Excellent", "Problematic", "Problematic", - "Good", "Good", "Excellent", "Problematic", "Good" + ## Sample data ---- + cl <- c( + "Problematic", "Problematic", "Acceptable", "Acceptable", "Excellent", + "Acceptable", "Acceptable", "Acceptable", "Acceptable", "Good", + "Acceptable", "Acceptable", "Acceptable", "Problematic", "Good" ) - ### Expected results ---- - expected_sd_mfaz_scores <- c( - 20, 0, 5, 5, 20, 0, 20, - 0, 20, 20, 5, 5, 0, 20, 5 - ) + ## Expected results ---- + e <- c(10, 10, 4, 4, 0, 4, 4, 4, 4, 2, 4, 4, 4, 10, 2) - ### Observed results ---- - scores_sd_mfaz <- assign_penalty_points_flags_and_sd(class_sd_mfaz) + ## Observed results ---- + o <- score_agesexr_dps(cl) - ### The test ---- - testthat::test_that( - "scores are equal to expected", - { - testthat::expect_equal(scores_sd_mfaz, expected_sd_mfaz_scores) - testthat::expect_type(scores_sd_mfaz, "double") - } + ## Tests ---- + testthat::expect_vector(o, numeric(), 15) + testthat::expect_equal(o, e) + testthat::expect_error( + score_agesexr_dps(e), + regexp = paste0( + "`x` must be of class `character` or `factor`; not ", shQuote(class(e)), ". Please try again." + ) ) } ) - -## Test check: assign_penalty_points_skew_kurt() ---- - -local( +# Test check: score_skewkurt() ---- +testthat::test_that( + "score_skewkurt() works fine", { - class_skew_kurt <- c( + ## Sample data ---- + sk <- c( "Excellent", "Excellent", "Good", "Excellent", "Good", "Problematic", "Acceptable", "Excellent", "Problematic", "Good", "Problematic", "Problematic", "Problematic", "Excellent", "Problematic" ) - ### Expected results ---- - expected_skew_kurt_scores <- c( - 0, 0, 1, 0, 1, 5, 3, - 0, 5, 1, 5, 5, 5, 0, 5 - ) + ## Expected results ---- + exp <- c(0, 0, 1, 0, 1, 5, 3, 0, 5, 1, 5, 5, 5, 0, 5) - ### Observed results ---- - scores_skew_kurt <- assign_penalty_points_skew_kurt(class_skew_kurt) + ## Observed results ---- + o <- score_skewkurt(sk) - ### The test ---- - testthat::test_that( - "scores are equal to expected", - { - testthat::expect_equal(scores_skew_kurt, expected_skew_kurt_scores) - testthat::expect_type(scores_skew_kurt, "double") - } + ## Tests ---- + testthat::expect_vector(o, numeric(), 15) + testthat::expect_equal(o, exp) + testthat::expect_error( + score_skewkurt(exp), + regexp = paste0( + "`x` must be of class `character` or `factor`; not ", shQuote(class(exp)), ". Please try again." + ) ) } ) -## Test check: get_quality_score() ---- -local( + +# Test check: score_overall_quality() ---- +## With `.for` set to "mfaz" ---- +testthat::test_that( + "score_overall_quality() return the correct values for a given classification", { - ### Sample data ---- + ## Sample data ---- df <- data.frame( flagged_class = "Problematic", sd_class = "Good", @@ -77,20 +101,63 @@ local( sex_ratio_class = "Problematic" ) - ### Expected results ---- - expected_score <- 41 + ## Expected results ---- + score <- 41 + + ## Observed results ---- + obs <- score_overall_quality( + cl_flags = df$flagged_class, + cl_sex = df$sex_ratio_class, + cl_age = df$age_ratio_class, + cl_dps_m = df$dps_class, + cl_std = df$sd_class, + cl_skw = df$skew_class, + cl_kurt = df$kurt_class, + .for = "mfaz" + ) - ### Observed results ---- - overall_score <- compute_quality_score(df, type = "mfaz") + ## Tests ---- + testthat::expect_vector(object = obs, ptype = numeric(), size = 1) + testthat::expect_equal(obs, score) + } +) - ### The test ---- - testthat::test_that( - "get_quality_score() return the correct values for a given classification", - { - testthat::expect_vector(overall_score) - testthat::expect_equal(overall_score, expected_score) +## With `.for` set to "mfaz" ---- +testthat::test_that( + "score_overall_quality() return the correct values for a given classification", + { + ## Sample data ---- + df <- data.frame( + flagged_class = "Good", + sd_class = "Good", + skew_class = "Excellent", + kurt_class = "Excellent", + age_ratio_class = "Good", + sex_ratio_class = "Problematic", + dps_h_class = "Excellent", + dps_w_class = "Excellent" + ) - } + ## Expected results ---- + score <- 22 + + ## Observed results ---- + obs <- score_overall_quality( + cl_flags = df$flagged_class, + cl_sex = df$sex_ratio_class, + cl_age = df$age_ratio_class, + cl_dps_m = NULL, + cl_std = df$sd_class, + cl_skw = df$skew_class, + cl_kurt = df$kurt_class, + cl_dps_w = df$dps_h_class, + cl_dps_h = df$dps_h_class, + .for = "wfhz" ) + + ## Tests ---- + testthat::expect_vector(object = obs, ptype = numeric(), size = 1) + testthat::expect_equal(obs, score) } ) + diff --git a/tests/testthat/test-sample_size.R b/tests/testthat/test-sample_size.R deleted file mode 100644 index 584002c..0000000 --- a/tests/testthat/test-sample_size.R +++ /dev/null @@ -1,27 +0,0 @@ -# Test check: Sample Size Requirement Checker ---------------------------------- - -## - -local({ - ## Observed results ---- - x <- check_sample_size( - df = anthro.01, - .group = cluster, - .data_type = "survey" - ) - - ## The test ---- - testthat::test_that( - "check_sample_size() returns a data frame object", - { - testthat::expect_s3_class(object = x, class = "tbl_df", exact = FALSE) - testthat::expect_true(all(c("groups", "n_obs", "meet_ipc") %in% names(x))) - } - ) -}) - -## Sentinel sites ---- - - -## Screening ----- - diff --git a/tests/testthat/test-stattests.R b/tests/testthat/test-stattests.R new file mode 100644 index 0000000..a4e7e44 --- /dev/null +++ b/tests/testthat/test-stattests.R @@ -0,0 +1,22 @@ +# Test check: mw_stattest_ageratio() ---- +testthat::test_that( + "mw_stattest_ageratio() works as expected", + { + ## Sample data ---- + age <- anthro.01[["age"]] + ager <- as.character(age) + ## Observed results ---- + x <- mw_stattest_ageratio(age, .expectedP = 0.66) + + ## Tests ---- + testthat::expect_type(x, "list") + testthat::expect_vector(x) + testthat::expect_named(x, c("p", "observedR", "observedP")) + testthat::expect_error( + mw_stattest_ageratio(ager, .expectedP = 0.66), + regexp = paste0( + "`age` must be of class 'numeric'; not ", shQuote(class(ager)) , ". Please try again." + ) + ) + } +) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R new file mode 100644 index 0000000..1f344cf --- /dev/null +++ b/tests/testthat/test-utils.R @@ -0,0 +1,251 @@ +# Test check: get_age_months() ---- +testthat::test_that( + "calculate_age_in_months() does the job as expected", + { + ## Sample data ---- + + df <- data.frame( + surv_date = as.Date( + c( + "2024-01-05", "2024-01-05", "2024-01-05", "2024-01-08", "2024-01-08", + "2024-01-08", "2024-01-10", "2024-01-10", "2024-01-10", "2024-01-11" + ) + ), + bir_date = as.Date( + c( + "2022-04-04", "2021-05-01", "2023-05-24", "2017-12-12", NA, + "2020-12-12", "2022-04-04", "2021-05-01", "2023-05-24", "2020-12-12" + ) + ), + svdate = c( + "2024-01-05", "2024-01-05", "2024-01-05", "2024-01-08", "2024-01-08", + "2024-01-08", "2024-01-10", "2024-01-10", "2024-01-10", "2024-01-11" + ), + birdate = c( + "2022-04-04", "2021-05-01", "2023-05-24", "2017-12-12", NA, + "2020-12-12", "2022-04-04", "2021-05-01", "2023-05-24", "2020-12-12" + ) + ) + + ## Observed results ---- + x <- df |> + mutate( + age_mo = get_age_months(dos = surv_date, dob = bir_date) + ) + + k <- get_age_months(dos = df[["surv_date"]], dob = df[["bir_date"]]) + + ## Tests ---- + testthat::expect_vector(x[["age_mo"]], size = 10) + testthat::expect_true(is.numeric(k)) + testthat::expect_error( + get_age_months(df[["surv_date"]], df[["birdate"]]), + regexp = paste0( + "`dob` must be a vector of class 'Date'; not ", + shQuote(class(df[["birdate"]])), ". Please try again." + ) + ) + testthat::expect_error( + get_age_months(df[["svdate"]], df[["bir_date"]]), + regexp = paste0( + "`dos` must be a vector of class 'Date'; not ", + shQuote(class(df[["svdate"]])), ". Please try again." + ) + ) + } +) + +# Test check: "flag_outliers()" ---- +## flag_outliers with '.from' set to "zscores" ---- +testthat::test_that( + "flag_outliers() works as expected when .from = 'zscore'", + { + ### Sample data ---- + mfaz <- seq(-0.6, 0.9, by = 0.003) |> + sample(size = 50, replace = TRUE) + + wrong_vector <- as.character(mfaz) + + ### Mean MFAZ ---- + x <- mean(mfaz) + + ### Expected results ---- + y <- ifelse(mfaz < (x - 3) | mfaz > (x + 3), 1, 0) + + ### Observed results ---- + z <- flag_outliers(mfaz, .from = "zscores") + + ### Tests ---- + testthat::expect_vector(z, size = 50) + testthat::expect_equal(z, y) + testthat::expect_true(is.numeric(z)) + testthat::expect_error( + flag_outliers(wrong_vector, .from = "zscores"), + regexp = paste0( + "`x` must be of class numeric; not ", + shQuote(class(wrong_vector)), ". Please try again." + ) + ) + } +) + + +## flag_outliers() with '.from' set to "raw_muac" ---- +testthat::test_that( + "flag_outliers() works as expected when .from = 'raw_muac'", + { + ### Sample data ---- + muac <- seq(80, 270, by = 4) |> + sample(size = 20, replace = TRUE) + + wrong_vector <- as.character(muac) + + ### Expected results ---- + x <- ifelse(muac < 100 | muac > 200, 1, 0) + + ### Observed results ---- + y <- flag_outliers(muac, .from = "raw_muac") + + ### Tests ---- + testthat::expect_vector(y, size = 20) + testthat::expect_equal(y, x) + testthat::expect_true(is.numeric(y)) + testthat::expect_error( + flag_outliers(wrong_vector, .from = "raw_muac"), + regexp = paste0( + "`x` must be of class numeric; not ", + shQuote(class(wrong_vector)), ". Please try again." + ) + ) + } +) + +# Test check: remove_flags() ----- +## With .from set to "raw_muac" ---- +testthat::test_that( + "remove_flags() assign NA's when flaggs are identified", + { + ### Sample data ---- + muac <- c( + 88, 160, 196, 260, 204, 232, 220, 128, 204, + 84, 160, 128, 88, 156, 96, 160, 204, 220, 120, 228 + ) + + wrong_vector <- as.character(muac) + + ### Expected results ---- + x <- c( + NA, 160, 196, NA, NA, NA, NA, 128, NA, NA, + 160, 128, NA, 156, NA, 160, NA, NA, 120, NA + ) + + ### Observed results ---- + w <- remove_flags(muac, "raw_muac") + + ### Tests ---- + testthat::expect_length(w, 20) + testthat::expect_equal(x, w) + testthat::expect_true(is.numeric(w)) + testthat::expect_error( + remove_flags(wrong_vector, "raw_muac"), + regexp = paste0( + "`x` must be of class numeric; not ", + shQuote(class(wrong_vector)), ". Please try again." + ) + ) + } +) + +## With .from set to "zscores" ---- +testthat::test_that( + "remove_flags() assign NA's when flaggs are identified", + { + ### Sample data without NA's ---- + zsc <- wfhz.01$wfhz + + ### A Sample data of a wrong class ---- + wrong_vector <- as.character(zsc) + + ### Observed results ---- + w <- remove_flags(zsc, "zscores") + + ### Tests ---- + testthat::expect_length(w, 303) + testthat::expect_contains(is.na(w), "TRUE") + testthat::expect_true(is.numeric(w)) + testthat::expect_error( + remove_flags(wrong_vector, "zscores"), + regexp = paste0( + "`x` must be of class numeric; not ", + shQuote(class(wrong_vector)), ". Please try again." + ) + ) + } +) + +# Test check: recode_muac() ---- +## With .to set to "cm" ---- +testthat::test_that( + "recode_muac() works well when .to = 'cm'", + { + ### Sample data ---- + x <- anthro.02$muac + + ### A sample data in centimeters ---- + e <- seq(10.3, 20.3, 0.7) + + ### Expected results ---- + p <- x / 10 + + ### Observed results ---- + w <- recode_muac(x, .to = "cm") + + #### Tests ---- + testthat::expect_vector(w, 2267) + testthat::expect_equal(w, p) + testthat::expect_true(is.numeric(w)) + testthat::expect_error( + recode_muac(as.character(x), .to = "cm"), + regexp = paste0( + "`x` must be of class 'numeric' or `integer` or 'double'; not ", + shQuote(class(as.character(x))), ". Please try again." + ) + ) + testthat::expect_error( + recode_muac(e, .to = "cm"), + regexp = paste0("MUAC values are not in millimeters. Please try again.") + ) + } +) + +## With .to set to "mm" ---- +testthat::test_that( + "recode_muac() works well when .to = 'mm'", + { + ### Sample data ---- + x <- anthro.02$muac + + ### Expected results ---- + p <- x / 10 + m <- p * 10 + + ### Observed results ---- + w <- recode_muac(p, .to = "mm") + + ### Tests ---- + testthat::expect_vector(w, 2267) + testthat::expect_equal(w, m) + testthat::expect_true(is.numeric(w)) + testthat::expect_error( + recode_muac(as.character(m), .to = "mm"), + regexp = paste0( + "`x` must be of class 'numeric' or `integer` or 'double'; not ", + shQuote(class(as.character(x))), ". Please try again." + ) + ) + testthat::expect_error( + recode_muac(x, .to = "mm"), + regexp = "MUAC values are not in centimeter. Please try again." + ) + } +) diff --git a/tests/testthat/test-wrangle_age.R b/tests/testthat/test-wrangle_age.R new file mode 100644 index 0000000..84da07e --- /dev/null +++ b/tests/testthat/test-wrangle_age.R @@ -0,0 +1,101 @@ +# Test check: mw_wrangle_age() ---- +## When date of data collection and date of birth are available ---- +testthat::test_that( + "mw_wrangle_age() works as expected", + { + ### Sample data ---- + df <- data.frame( + surdate = as.Date(c( + "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01" + )), + birdate = as.Date(c( + "2019-01-01", NA, "2018-03-20", "2019-11-05", "2021-04-25" + )), + age = c(NA, 36, NA, NA, NA), + bdate = c("2019-01-01", "2019-11-05", "2018-03-20", "2019-11-05", "2021-04-25"), + age_ = as.character(c(10, 20, 13.6, 59.7, 30.1)), + svdate = c("2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01", "2023-01-01") + ) + + ### Expected results ---- + w <- c(1461.00, 1095.75, 1748.00, 1153.00, 616.00) + + ### Observed results ---- + x <- df |> + mw_wrangle_age( + dos = surdate, + dob = birdate, + age = age, + .decimals = 2 + ) + + ### Tests ---- + testthat::expect_type(x, "list") + testthat::expect_vector(x[["age_days"]], size = 5) + testthat::expect_equal(x[["age_days"]], w) + testthat::expect_true(is.double(x[["age_days"]])) + testthat::expect_error( + mw_wrangle_age(df, surdate, birdate, age_, 2), + regexp = paste0( + "age` must be of class 'numeric'; not ", + shQuote(class(df[["age_"]])), ". Please try again." + ) + ) + } +) + +## When date of data collection and date of birth are not available ---- +testthat::test_that( + "mw_wrangle_age() works as expected", + { + ### Sample data ---- + df <- data.frame( + months = c(10, 20, 13.6, 59.7, 30.1), + month = as.character(c(10, 20, 13.6, 59.7, 30.1)) + ) + + ### Expected results ---- + w <- c(304.375, 608.750, 413.950, 1817.119, 916.169) + + ### Observed results ---- + x <- df |> + mw_wrangle_age( + dos = NULL, + dob = NULL, + age = months, + .decimals = 3 + ) + + ### Tests ---- + testthat::expect_type(x, "list") + testthat::expect_vector(x[["age_days"]], size = 5) + testthat::expect_equal(x[["age_days"]], w) + testthat::expect_true(is.double(x[["age_days"]])) + testthat::expect_error( + mw_wrangle_age(df, dos = NULL, dob = NULL, age = month, 2), + regexp = paste0( + "age` must be of class 'numeric'; not ", + shQuote(class(df[["month"]])), ". Please try again." + ) + ) + } +) + + +## Test check: mw_wrangle_age() ---- +testthat::test_that( + "mw_wrangle_age() works as expected when a vector of wrong class if supplied", + { + ### Sample data ---- + df <- data.frame(age = as.character(c(6, 36, 40, 39.6, 10))) + + ### Tests ---- + testthat::expect_error( + mw_wrangle_age(df, dos = NULL, dob = NULL, age = age, 2), + regexp = paste0( + "age` must be of class 'numeric'; not ", + shQuote(class(df[["age"]])), ". Please try again." + ) + ) + } +) diff --git a/tests/testthat/test-wrangle_muac.R b/tests/testthat/test-wrangle_muac.R new file mode 100644 index 0000000..2c111ec --- /dev/null +++ b/tests/testthat/test-wrangle_muac.R @@ -0,0 +1,124 @@ +# Test check: mw_wrangle_muac() ---- +## When age is available ---- +testthat::test_that( + "mw_wrangle_muac() works well when age is supplied", + { + ### Wrangle MUAC data ---- + df <- anthro.03 |> + mw_wrangle_age( + dos = NULL, + dob = NULL, + age = age, + .decimals = 3 + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = age, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm", + .decimals = 3 + ) + + ### Tests ---- + testthat::expect_true(is(df, "tbl")) + testthat::expect_vector(df[["mfaz"]], size = 943) + testthat::expect_true(is.double(df[["mfaz"]])) + testthat::expect_vector(df[["flag_mfaz"]], size = 943) + testthat::expect_true(is.numeric(df[["flag_mfaz"]])) + } +) + +# Test check: mw_wrangle_muac() ---- +## When age = NULL ---- +testthat::test_that( + "mw_wrangle_muac() works well when age is not supplied", + { + ### Wrangle MUAC data ---- + df <- anthro.03 |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" + ) + + ### Tests ---- + testthat::expect_true(is(df, "tbl")) + testthat::expect_vector(df[["flag_muac"]], size = 943) + testthat::expect_true(is.numeric(df[["flag_muac"]])) + } +) + +## Check if function errors when wrong input for sex is supplied ---- +testthat::test_that( + "mw_wrangle_muac() throws error when wrong sex input is supplied", + { + ### Sample data of sex code as "m" and "f" ---- + df <- data.frame( + sex1 = c("m", "m", "m", "f", "f", "f", "f", "fe", "male", "female"), + sex2 = c(1, 1, 1, 2, 2, 2, 2, 2, 3, 4), + muac = seq(100, 300, 21), + age = seq(6, 59, 5.8) + ) + + ### Tests ---- + testthat::expect_error( + df |> + mw_wrangle_age( + dos = NULL, + dob = NULL, + age = age, + .decimals = 3 + ) |> + mw_wrangle_muac( + sex = sex1, + muac = muac, + age = age, + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm", + .decimals = 3 + ), + regexp = "Values for sex should either be 'm', 'f' or 1 and 2 for male and female respectively" + ) + } +) + +## Check if function errors when wrong input for sex is supplied ---- +testthat::test_that( + "mw_wrangle_muac() throws error when wrong sex input is supplied", + { + ### Sample data of sex code as "m" and "f" ---- + df <- data.frame( + sex1 = c("m", "m", "m", "f", "f", "f", "f", "fe", "male", "female"), + sex2 = c(1, 1, 1, 2, 2, 2, 2, 2, 3, 4), + muac = seq(100, 300, 21), + age = seq(6, 59, 5.8) + ) + + ### Tests ---- + testthat::expect_error( + df |> + mw_wrangle_age( + dos = NULL, + dob = NULL, + age = age, + .decimals = 3 + ) |> + mw_wrangle_muac( + sex = sex2, + muac = muac, + age = age, + .recode_sex = FALSE, + .recode_muac = TRUE, + .to = "cm", + .decimals = 3 + ), + regexp = "Values for sex should either be 'm', 'f' or 1 and 2 for male and female respectively" + ) + } +) diff --git a/tests/testthat/test-wrangle_wfhz.R b/tests/testthat/test-wrangle_wfhz.R new file mode 100644 index 0000000..4d9a394 --- /dev/null +++ b/tests/testthat/test-wrangle_wfhz.R @@ -0,0 +1,94 @@ +# Test check: mw_wrangle_wfhz() ---- +testthat::test_that( + "mw_wrangle_wfhz() works as designed", + { + ## Wrangle WFHZ data ---- + df <- anthro.01 |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE, + .decimals = 3 + ) + + ## Weight of a wrong class ---- + df$w <- as.character(anthro.01$weight) + + ## Height of a wrong class ---- + df$h <- as.character(anthro.01$height) + + ## Tests ---- + testthat::expect_true(is(df, "tbl")) + testthat::expect_vector(df[["wfhz"]], size = 1191) + testthat::expect_true(is.double(df[["wfhz"]])) + testthat::expect_vector(df[["flag_wfhz"]], size = 1191) + testthat::expect_true(is.numeric(df[["flag_wfhz"]])) + testthat::expect_error( + mw_wrangle_wfhz(df, sex, w, height, TRUE), + regexp = paste0( + "`weight` must be of class 'double'; not ", + shQuote(class(df[["w"]])), ". Please try again." + ) + ) + testthat::expect_error( + mw_wrangle_wfhz(df, sex, weight, h, TRUE), + regexp = paste0( + "`height` must be of class 'double'; not ", + shQuote(class(df[["h"]])), ". Please try again." + ) + ) + } +) + +## Check if function errors when wrong input for sex is supplied ---- +testthat::test_that( + "mw_wrangle_wfhz() throws error when a wrong input is supplied", + { + ### Sample data of sex code as "m" and "f" ---- + df <- data.frame( + sex1 = c("m", "m", "m", "f", "f", "f", "f", "fe", "male", "female", "f"), + sex2 = c(1, 1, 1, 2, 2, 2, 2, 2, 3, 4, 1), + ht = seq(70, 100, 2.9), + wt = seq(6, 14, 0.8) + ) + + ### Tests ---- + testthat::expect_error( + df |> + mw_wrangle_wfhz( + sex = sex1, + weight = wt, + height = ht, + .recode_sex = TRUE + ), + regexp = "Values for sex should either be 'm', 'f' or 1 and 2 for male and female respectively" + ) + } +) + +## Check if function errors when wrong input for sex is supplied ---- +testthat::test_that( + "mw_wrangle_wfhz() throws error when a wrong input is supplied", + { + ### Sample data of sex code as "m" and "f" ---- + df <- data.frame( + sex1 = c("m", "m", "m", "f", "f", "f", "f", "fe", "male", "female", "f"), + sex2 = c(1, 1, 1, 2, 2, 2, 2, 2, 3, 4, 1), + ht = seq(70, 100, 2.9), + wt = seq(6, 14, 0.8) + ) + + ### Tests ---- + testthat::expect_error( + df |> + mw_wrangle_wfhz( + sex = sex2, + weight = wt, + height = ht, + .recode_sex = TRUE + ), + regexp = "Values for sex should either be 'm', 'f' or 1 and 2 for male and female respectively" + ) + } +) diff --git a/tests/testthat/test-wranglers.R b/tests/testthat/test-wranglers.R deleted file mode 100644 index 425674c..0000000 --- a/tests/testthat/test-wranglers.R +++ /dev/null @@ -1,236 +0,0 @@ -# Test check for input data processors ----------------------------------------- - -## Test check: "flag_outliers()" ---- -### Test check: flag_outliers with 'type' set to "zscore" ---- - -local( - { - #### Sample data ---- - mfaz <- seq(-0.6, 0.9, by = 0.003) |> - sample(size = 50, replace = TRUE) - - #### Mean of mfaz ---- - mean_mfaz <- mean(mfaz) - - #### Expected results ---- - expected_results <- ifelse( - mfaz < (mean_mfaz - 3) | mfaz > (mean_mfaz + 3), 1, 0 - ) - - #### Observed results ---- - flags_mfaz <- flag_outliers(mfaz, type = "zscore") - - #### The test ---- - testthat::test_that( - "flag_outliers() returns expected output", - { - testthat::expect_vector(flags_mfaz, size = 50) - testthat::expect_equal(flags_mfaz, expected_results) - } - ) - } -) - -### Test check: flag_outliers() with 'type' set to "crude" ---- -local( - { - #### Sample data ---- - crude <- seq(80, 270, by = 4) |> - sample(size = 20, replace = TRUE) - - #### Expected results ---- - expected_results <- ifelse(crude < 100 | crude > 200, 1, 0) - - #### Observed results ---- - flags_crude <- flag_outliers(crude, type = "crude") - - #### The test ---- - testthat::test_that( - "flag_outliers with 'type' set for 'crude' return the correct output", - { - testthat::expect_vector(flags_crude, size = 20) - testthat::expect_equal(flags_crude, expected_results) - } - ) - } -) - -## Test check: remove_flags() ----- -### Test check: remove_flags() with method set to "mfaz"---- -local({ - #### Observed results ---- - processed_df <- anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( - sex = sex, - muac = muac, - age = "age", - .recode_sex = TRUE, - .recode_muac = TRUE, - unit = "cm" - ) - - processed_df[["not_flag"]] <- remove_flags(processed_df[["mfaz"]], unit = "zscore") - - #### The test ---- - testthat::test_that( - "remove_flags() assign NA's when flaggs are identified", - { - with( - processed_df, - testthat::expect_length(processed_df[["not_flag"]], 1191) - ) - } - ) -}) - -### Test check: remove_flags() with method set to "crude"---- -local({ - #### Observed results ---- - processed_df <- anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( - sex = sex, - muac = muac, - age = NULL, - .recode_sex = TRUE, - .recode_muac = FALSE, - unit = "none" - ) - processed_df[["not_flag"]] <- remove_flags(processed_df[["muac"]], unit = "crude") - - #### The test ---- - testthat::test_that( - "remove_flags() assign NA's when flaggs are identified", - { - with( - processed_df, - testthat::expect_length(processed_df[["not_flag"]], 1191) - ) - } - ) -}) - -### Test check: recode_muac() ---- -local( - { - #### Sample data ---- - muac <- seq(90, 250, by = 4) - - #### Expected results ---- - expected_results <- muac / 10 - - ### Observed results ---- - muac_cm <- recode_muac(muac, unit = "cm") - - #### The test ---- - testthat::test_that( - "recode_muac() works well", - { - testthat::expect_vector(muac_cm, size = 41) - testthat::expect_equal(muac_cm, expected_results) - } - ) - } -) - -### Test check: recode_muac() ---- -local( - { - #### Sample data ---- - muac <- seq(9.0, 25.0, by = 0.2) - - #### Expected results ---- - expected_results <- muac * 10 - - ### Observed results ---- - muac_mm <- recode_muac(muac, unit = "mm") - - #### The test ---- - testthat::test_that( - "recode_muac() works well", - { - testthat::expect_vector(muac_mm, size = 81) - testthat::expect_equal(muac_mm, expected_results) - } - ) - } -) - -### Test check: process_muac_data() ---- - -local( - { - #### Sample data ---- - df <- data.frame( - sex = c(2, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1), - muac = c(165, 222, 176, 150, 219, 193, 196, 203, 203, - 129, 97, 158, 156, 215, 214), - age <- c(13, 56, 53, 23, 43, 55, 25,16, 44, 19, 45, 36, 11, 31,26) - ) - - #### Expected results ---- - expected_results <- c( - 1.757, 3.057, 0.902, 0.161, 3.786, 1.892, 3.249, 4.217, - 2.651, -1.484, -5.529, 0.117, 1.151, 4.151, 4.415 - ) - - #### Observed results ---- - df <- df |> - process_age( - svdate = NULL, - birdate = NULL, - age = age - ) |> - process_muac_data( - sex = sex, - muac = muac, - age = "age", - .recode_sex = FALSE, - .recode_muac = TRUE, - unit = "cm" - ) - - #### The Test ---- - testthat::test_that( - "process_muac_data() works well", - { - testthat::expect_vector(df[["mfaz"]], size = 15) - testthat::expect_vector(df[["flag_mfaz"]], size = 15) - testthat::expect_equal(df[["mfaz"]], expected_results) - } - ) - } -) - -### Test check: process_wfhz_data() ---- -local( - { - df <- anthro.01 |> - process_wfhz_data( - sex = sex, - weight = weight, - height = height, - .recode_sex = TRUE - ) - - #### The Test ---- - testthat::test_that( - "process_wfhz_data() works as designed", - { - testthat::expect_vector(df[["wfhz"]], size = 1191) - testthat::expect_vector(df[["flag_wfhz"]], size = 1191) - testthat::expect_vector(is.numeric(df[["mfaz"]])) - } - ) - - } -) diff --git a/vignettes/sample_size.qmd b/vignettes/ipc_amn_check.qmd similarity index 51% rename from vignettes/sample_size.qmd rename to vignettes/ipc_amn_check.qmd index 8fcdaec..e5f6f4d 100644 --- a/vignettes/sample_size.qmd +++ b/vignettes/ipc_amn_check.qmd @@ -10,11 +10,11 @@ vignette: > ```{r} #| label: global-setup -library(ipccheckr) +library(mwana) ``` -Evidence on the prevalence of acute malnutrition used in the IPC Acute Malnutrition (IPC AMN) can come from different sources with data collected in different ways: representative surveys, screenings or community-based surveillance system (known as sentinel sites). The IPC set minimum sample size requirements for each of these sources. Details can be read from the [IPC Manual version 3.1 ](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/). +Evidence on the prevalence of acute malnutrition used in the IPC Acute Malnutrition (IPC AMN) can come from different sources, namely: representative surveys, screenings or community-based surveillance system (known as sentinel sites). The IPC set minimum sample size requirements for each of these sources. Details can be read from the [IPC Manual version 3.1 ](https://www.ipcinfo.org/ipcinfo-website/resources/ipc-manual/en/). -In the IPC AMN analysis workflow, the very first step of a data analyst is to check if these requirements were met. This is done for each area meant to be included in the *de facto* IPC AMN analysis. For this, `mwana` provides a handy function: `check_sample_size()`. +In the IPC AMN analysis workflow, the very first step of a data analyst is to check if these requirements were met. This is done for each area meant to be included in the IPC AMN analysis. For this, `mwana` provides a handy function: `mw_check_ipcamn_ssreq()`. To demonstrate its usage, we will use a built-in sample dataset `anthro.01`. @@ -35,17 +35,17 @@ head(anthro.01) `anthro.01` contains anthropometry data from SMART surveys from anonymized locations. We can check further details about the dataset by calling `help(anthro.01)` in `R` console. -Now that we are acquainted with the dataset, we can now proceed to execute the task. To achieve this we simply do: +Now that we are acquainted with the dataset, we can proceed to execute the task. To achieve this, we simply do: ```{r} #| label: check #| echo: true #| eval: false -check_sample_size( +mw_check_ipcamn_ssreq( df = anthro.01, - .group = cluster, - .data_type = "survey" + cluster = cluster, + .source = "survey" ) ``` @@ -55,10 +55,10 @@ Or we can also choose to chain the data object to the function using the pipe op #| echo: true #| eval: false -anthro.01 |> - check_sample_size( - .group = cluster, - .data_type = "survey" +anthro.01 |> + mw_check_ipcamn_ssreq( + cluster = cluster, + .source = "survey" ) ``` @@ -67,20 +67,20 @@ Either way, the returned output will be: #| label: view_check #| echo: false -anthro.01 |> - check_sample_size( - .group = cluster, - .data_type = "survey" +anthro.01 |> + mw_check_ipcamn_ssreq( + cluster = cluster, + .source = "survey" ) ``` A table (of class `tibble`) is returned with three columns: - + Column `groups` counts the number of unique cluster ID's in the dataset. - + Column `n_obs` counts the number of children the dataset. + + Column `n_clusters` counts the number of unique cluster IDs in the dataset. + + Column `n_obs` counts the number of children in the dataset. + Column `meet_ipc` indicates whether the IPC AMN sample size requirements (for surveys in this case) were met or not. -The above output is not quite useful yet, as we often deal with multi-area dataset. We can get a summarised table by area as follows: +The above output is not quite useful yet as we often deal with multiple area dataset. We can get a summarised table by area as follows: ```{r} #| label: group_by #| echo: true @@ -90,11 +90,11 @@ The above output is not quite useful yet, as we often deal with multi-area datas library(dplyr) ## Use the group_by() function ---- -anthro.01 |> - group_by(area) |> - check_sample_size( - .group = cluster, - .data_type = "survey" +anthro.01 |> + group_by(area) |> + mw_check_ipcamn_ssreq( + cluster = cluster, + .source = "survey" ) ``` @@ -107,12 +107,12 @@ This will return: library(dplyr) -anthro.01 |> - group_by(area) |> - check_sample_size( - .group = cluster, - .data_type = "survey" +anthro.01 |> + group_by(area) |> + mw_check_ipcamn_ssreq( + cluster = cluster, + .source = "survey" ) ``` -For screening or sentinel site-based data, we approach the task the same way; we only have to change the `.data_type` parameter to "screening" or to "ssite" as appropriate, as well as to supply `.group` with the right column name of the sub-areas inside the main area (villages, localities, comunas, communities, etc). +For screening or sentinel site-based data, we approach the task the same way; we only have to change the `.source` parameter to "screening" or to "ssite" as appropriate, as well as to supply `cluster` with the right column name of the sub-areas inside the main area (villages, localities, comunas, communities, etc). diff --git a/vignettes/plausibility.qmd b/vignettes/plausibility.qmd index d8e8315..ed4e9e0 100644 --- a/vignettes/plausibility.qmd +++ b/vignettes/plausibility.qmd @@ -11,32 +11,32 @@ vignette: > #| label: setup #| collapse: true -library(ipccheckr) +library(mwana) ``` # Introduction Plausibility check is a tool that evaluates the overall quality and acceptability of anthropometric data to ensure its suitability for informing decision-making process. -`mwana` provides a set of handy functions to facilitate this evaluation. These functions allow users to assess the acceptability of weight-for-height z-score (WFHZ) and mid upper-arm circumference (MUAC) data. The evaluation of the latter can be done on the basis of MUAC-for-age z-score (MFAZ) or the absolute MUAC values. +`mwana` provides a set of handy functions to facilitate this evaluation. These functions allow users to assess the acceptability of weight-for-height z-score (WFHZ) and mid upper-arm circumference (MUAC) data. The evaluation of the latter can be done on the basis of MUAC-for-age z-score (MFAZ) or raw MUAC values. -In this vignette, we will learn how to use these functions and when to consider using MFAZ plausibility check over the one based on the absolute MUAC values. For demonstration, we will use a `mwana` built-in sample dataset named `anthro.01`. This dataset is about district level SMART surveys from anonymized locations. You can read more about it by calling `?anthro.01` in `R`console. +In this vignette, we will learn how to use these functions and when to consider using MFAZ plausibility check over the one based on raw MUAC values. For demonstration, we will use a `mwana` built-in sample dataset named `anthro.01`. This dataset is contains district level SMART surveys from anonymized locations. Do `?anthro.01` in `R` console to read more about the dataset. We will begin the demonstration with the plausibility check that you are most familiar with and then proceed to the ones you are less familiar with. -## Plausibility check on WFHZ data +## Plausibility check of WFHZ data -We check the plausibility of WFHZ data by calling the `check_plausibility_wfhz()` function. Before doing that, we need ensure the data is in the right "shape and format" that is accepted and understood by the function. Don't worry, you will soon learn how to get there. But first, let's take a moment to help you understand some key features about this function. +We check the plausibility of WFHZ data by calling the `mw_plausibility_check_wfhz()` function. Before doing that, we need ensure the data is in the right "shape and format" that is accepted and understood by the function. Don't worry, you will soon learn how to get there. But first, let's take a moment to walk you through some key features about this function. -`check_plausibility_wfhz()` is a replica of the plausibility check in ENA for SMART software of the [SMART Methodology](https://smartmethodology.org/). Under the hood, it runs the same statistical tests you already know from SMART, and it applies the same rating and scoring criteria as well. Beware though that there some small differences to have in mind: +`mw_plausibility_check_wfhz()` is a replica of the plausibility check in ENA for SMART software of the [SMART Methodology](https://smartmethodology.org/). Under the hood, it runs the same statistical tests you already know from SMART, and it applies the same rating and scoring criteria. Beware though that there are some small differences to have in mind: - (i) `check_plausibility_wfhz()` does not include MUAC in its test suit. This is simply due the fact that now you can run a more comprehensive test suit on MUAC; + (i) `mw_plausibility_check_wfhz()` does not include MUAC in its test suite. This is simply due the fact that now you can run a more comprehensive test suite for MUAC. - (ii) `check_plausibility_wfhz()` allows user to run checks on a multiple-area dataset at once, without having to repeat the same workflow over and over again for the number of areas the data holds. + (ii) `mw_plausibility_check_wfhz()` allows user to run checks on a multiple area dataset at once, without having to repeat the same workflow over and over again for the number of areas the data holds. That is it! Now we can begin delving into the "how to". -It is always a good practice to start off by inspecting the dataset. Let us check the first 6 rows of the dataset: +It is always a good practice to start off by inspecting the dataset. Let's check the first 6 rows of our dataset: ```{r} #| label: data @@ -54,58 +54,58 @@ head(anthro.01) head(anthro.01) ``` -As you see, the dataset has eleven variables and way how their respective values are presented. This is useful to inform the data wrangling workflow. - -Now let's wrangle the data. +We can see that the dataset has eleven variables, and the way how their respective values are presented. This is useful to inform the data wrangling workflow. ### Data wrangling -As mentioned somewhere above, before we supply data object to `check_plausibility_wfhz()`, we need to wrangle it first. This task is executed by `process_age()` and `process_wfhz_data()`. Read more about the technical documentation by doing this `help(process_age)` or `help(process_wfhz_data)` in `R` console. +As mentioned somewhere above, before we supply a data object to `mw_plausibility_check_wfhz()`, we need to wrangle it first. This task is executed by `mw_wrangle_age()` and `mw_wrangle_wfhz()`. Read more about the technical documentation by doing `help(mw_wrangle_age)` or `help(mw_wrangle_wfhz)` in `R` console. #### Age {#sec-age} -We use `process_age()` to calculate child's age in months based on the date of data collection and child's date of birth. This is done as follows: +We use `mw_wrangle_age()` to calculate child's age in months based on the date of data collection and child's date of birth. This is done as follows: ```{r} -#| label: process_age +#| label: wrangle_age #| echo: true #| eval: false -age_mo <- process_age( - df = anthro.01, - svdate = "dos", - birdate = "dob", - age = age -) +age_mo <- anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) ``` This will return: ```{r} -#| label: view_process_age +#| label: view_age #| echo: false #| eval: true -age_mo <- anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age +age_mo <- anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 ) head(age_mo) ``` -#### Wrangling all other required variables +#### Wrangling all other remaining variables -For this, we call `process_wfhz_data()` as follows: +For this, we call `mw_wrangle_wfhz()` as follows: ```{r} #| label: wrangle_wfhz_data #| echo: true #| eval: false -wrangled_df <- anthro.01 |> - process_wfhz_data( +wrangled_df <- anthro.01 |> + mw_wrangle_wfhz( sex = sex, weight = weight, height = height, @@ -119,7 +119,7 @@ In this example, the argument `.recode_sex` was set to `TRUE`. That is because u If by any chance your sex variable is coded in any other different way than aforementioned, then you will have to recode it outside `mwana` utilities and then set `.recode_sex` accordingly. ::: -Under the hood, after recoding (or not) the sex variables, `process_wfhz_data()` computes the z-scores, then identifies outliers and flags them and adds them to the dataset through the `wfhz` and `flag_wfhz` variables. We can see this below: +Under the hood, after recoding (or not) the sex variables, `mw_wrangle_wfhz()` computes the z-scores, then identifies outliers and adds them to the dataset. Two new variables (`wfhz` and `flag_wfhz`) are created and added to the dataset. We can see this below: ```{r} #| label: view_df @@ -127,66 +127,65 @@ Under the hood, after recoding (or not) the sex variables, `process_wfhz_data()` #| eval: true #| include: true -wrangled_df <- anthro.01 |> - process_wfhz_data( +wrangled_df <- anthro.01 |> + mw_wrangle_wfhz( sex = sex, weight = weight, height = height, .recode_sex = TRUE ) -x <- wrangled_df |> +x <- wrangled_df |> dplyr::select(area, wfhz, flag_wfhz) head(x) ``` -### On to *de facto* WFHZ plausibility check +### On to *de facto* plausibility check of WFHZ data -We now can check the plausibility of our data. We do that by calling `check_plausibility_wfhz()` function as demonstrated below: +We can check the plausibility of our data by calling `mw_plausibility_check_wfhz()` function as demonstrated below: ```{r} #| label: pl_wfhz #| echo: true #| eval: false -x <- wrangled_df |> - check_plausibility_wfhz( +x <- wrangled_df |> + mw_plausibility_check_wfhz( sex = sex, age = age, weight = weight, height = height, - flags = flag_wfhz, - area = area # This is variable name with the geographical areas to summarise at. + flags = flag_wfhz ) ``` -Or we can chain all previous functions using the pipe operator: +Or we can chain all previous functions in this way: ```{r} #| label: pipe_workflow #| echo: true #| eval: false -x <- anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_wfhz_data( +x <- anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_wfhz( sex = sex, weight = weight, height = height, .recode_sex = TRUE - ) |> - check_plausibility_wfhz( + ) |> + mw_plausibility_check_wfhz( sex = sex, age = age, weight = weight, height = height, - flags = flag_wfhz, - area = area. + flags = flag_wfhz ) ``` @@ -197,105 +196,172 @@ The returned output is: #| echo: false #| eval: true -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_wfhz_data( +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_wfhz( sex = sex, weight = weight, height = height, .recode_sex = TRUE - ) |> - check_plausibility_wfhz( + ) |> + mw_plausibility_check_wfhz( sex = sex, age = age, weight = weight, height = height, - flags = flag_wfhz, - area = area + flags = flag_wfhz ) ``` -As we can see, the returned output is a summary table of statistics and rating grouped by district. I think by now you do realize that this output is kind of what you used to construct through the extraction of details from each plausibility check reports from ENA for SMART software. You would do this district by district, jumping from ENA itself to Microsoft Word then to Excel spreadsheet - where you used to compile a table like the above back-and-forth. - -We can make this output more pretty and ready to share. We can achieve this by chaining `generate_pretty_table_wfhz()` to the previous pipeline: +As we can see, the returned output is a summary table of statistics and ratings. +We can neat it for more clarity and readability. We can achieve this by chaining `mw_neat_output_wfhz()` to the previous pipeline: ```{r} -#| label: pretty_table +#| label: neat_table #| echo: true #| eval: false -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_wfhz_data( +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_wfhz( sex = sex, weight = weight, height = height, .recode_sex = TRUE - ) |> - check_plausibility_wfhz( + ) |> + mw_plausibility_check_wfhz( sex = sex, age = age, weight = weight, height = height, - flags = flag_wfhz, - area = area - ) |> - generate_pretty_table_wfhz() - + flags = flag_wfhz + ) |> + mw_neat_output_wfhz() ``` This will give us: ```{r} -#| label: view_pretty_table +#| label: view_neat_table #| echo: false #| eval: true -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_wfhz_data( +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_wfhz( sex = sex, weight = weight, height = height, .recode_sex = TRUE - ) |> - check_plausibility_wfhz( + ) |> + mw_plausibility_check_wfhz( sex = sex, age = age, weight = weight, height = height, - flags = flag_wfhz, - area = area - ) |> - generate_pretty_table_wfhz() - + flags = flag_wfhz + ) |> + mw_neat_output_wfhz() ``` An already formatted table, with scientific notations converted to standard notations, etc. +When working on a multiple area dataset, for instance districts, we can check the plausibility of all districts in the dataset at once by simply chaining the previous workflow with `group_by()` function from the `dplyr` package: + +```{r} +#| label: pl_group_by +#| echo: true +#| eval: false +#| message: false + +## Load library ---- +library(dplyr) + +## The workflow ---- +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE + ) |> + group_by(area) |> + mw_plausibility_check_wfhz( + sex = sex, + age = age, + weight = weight, + height = height, + flags = flag_wfhz + ) |> + mw_neat_output_wfhz() +``` + +This will return the following: + +```{r} +#| label: pl_group_by_view +#| echo: false +#| eval: true +#| message: false + +library(dplyr) + +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_wfhz( + sex = sex, + weight = weight, + height = height, + .recode_sex = TRUE + ) |> + group_by(area) |> + mw_plausibility_check_wfhz( + sex = sex, + age = age, + weight = weight, + height = height, + flags = flag_wfhz + ) |> + mw_neat_output_wfhz() +``` + At this point, you have reached the end of your workflow 🎉 . -## Plausibility check on MFAZ data +## Plausibility check of MFAZ data -We will assess the plausibility of MUAC data through MFAZ if we have age variable available in our data. +We will assess the plausibility of MUAC data through MFAZ if we have age variable available in our dataset. :::{.callout-note} -The MFAZ plausibility check was built based on the insights gotten from [Bilukha, O., & Kianian, B. (2023)](https://doi.org/10.1111/mcn.13478) research presented at the [2023 High-Level Technical Assessment Workshop](https://smartmethodology.org/wp-content/uploads/2024/03/2023-High-level-Technical-Assessment-Workshop-Report.pdf) held in Nairobi, Kenya. Results from this research suggested a feasibility of applying the similar plausibility check as that in WFHZ for MFAZ, with a difference in the amount of flags to be considered: maximum of 2%. +The plausibility check for MFAZ data was built based on the insights gotten from [Bilukha, O., & Kianian, B. (2023)](https://doi.org/10.1111/mcn.13478) research presented at the [2023 High-Level Technical Assessment Workshop](https://smartmethodology.org/wp-content/uploads/2024/03/2023-High-level-Technical-Assessment-Workshop-Report.pdf) held in Nairobi, Kenya. Results from this research suggested a feasibility of applying the similar plausibility check as that in WFHZ for MFAZ, with a difference in the amount of flags to be considered: maximum of 2%. ::: -We can run MFAZ plausibility check by calling `check_plausibility_mfaz()`. As in WFHZ, we first need to ensure that the data is in the right shape and format that is accepted and understood by the function. The workflow starts with processing age; for this, we approach the same way as in @sec-age. +We can run MFAZ plausibility check by calling `mw_plausibility_check_mfaz()`. As in WFHZ, we first need to ensure that the data is in the right shape and format that is accepted and understood by the function. The workflow starts with processing age; for this, we approach the same way as in @sec-age. :::{.callout-important} ## Age ratio test in MFAZ @@ -306,30 +372,31 @@ This is different in MFAZ. The test is done on children aged 6 to 23 months over ### Wrangling MFAZ data {#sec-wrangle_mfaz} -This is the job of `process_muac_data()` function. We use it as follows: +This is the job of `mw_wrangle_muac()` function. We use it as follows: ```{r} #| label: wrangle_mfaz_data #| echo: true #| eval: false -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( sex = sex, - muac = muac, + muac = muac, age = "age", .recode_sex = TRUE, .recode_muac = TRUE, - unit = "cm" + .to = "cm" ) ``` -Just as in WFHZ wrangler, under the hood, `process_muac_data()` computes the z-scores then identifies outliers and flags them. These are stored in the `mfaz` and `flag_mfaz` variables that are created and added to the dataset. +Just as in WFHZ wrangler, under the hood, `mw_wrangle_muac()` computes the z-scores then identifies outliers and flags them. These are stored in the `mfaz` and `flag_mfaz` variables that are created and added to the dataset. The above code returns: @@ -338,29 +405,30 @@ The above code returns: #| echo: false #| eval: true -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( sex = sex, - muac = muac, + muac = muac, age = "age", .recode_sex = TRUE, .recode_muac = TRUE, - unit = "cm" + .to = "cm" ) ``` :::{.callout-note} -`process_muac_data()` accepts MUAC values in centimeters. This is why it takes the arguments `.recode_muac` and `unit` to control whether there is need to transform the variable `muac`function or not. Read the function documentation to learn how to control these two arguments. +`mw_wrangle_muac()` accepts MUAC values in centimeters. This is why it takes the arguments `.recode_muac` and `.to` to control whether there is need to transform the variable `muac`function or not. Read the function documentation to learn how to control these two arguments. ::: -### On to *de facto* MFAZ plausibility check +### On to *de facto* plausibility check of MFAZ data -We achieve this by calling the `check_plausibility_mfaz()` function: +We achieve this by calling the `mw_plausibility_check_mfaz()` function: ```{r} #| label: pl_mfaz @@ -370,28 +438,28 @@ We achieve this by calling the `check_plausibility_mfaz()` function: ## Load dplyr library ---- library(dplyr) -## Check plausibility ---- -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( +## The workflow ---- +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( sex = sex, - muac = muac, + muac = muac, age = "age", .recode_sex = TRUE, .recode_muac = TRUE, - unit = "cm" - ) |> - mutate(muac = recode_muac(muac, unit = "mm")) |> - check_plausibility_mfaz( + .to = "cm" + ) |> + mutate(muac = recode_muac(muac, .to = "mm")) |> + mw_plausibility_check_mfaz( sex = sex, muac = muac, age = age, - flags = flag_mfaz, - area = area + flags = flag_mfaz ) ``` @@ -401,32 +469,35 @@ And this will return: #| label: view_pl_mfaz #| echo: false #| eval: true +#| message: false -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( +library(dplyr) + +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( sex = sex, - muac = muac, + muac = muac, age = "age", .recode_sex = TRUE, .recode_muac = TRUE, - unit = "cm" - ) |> - dplyr::mutate(muac = recode_muac(muac, unit = "mm")) |> - check_plausibility_mfaz( + .to = "cm" + ) |> + mutate(muac = recode_muac(muac, .to = "mm")) |> + mw_plausibility_check_mfaz( sex = sex, muac = muac, age = age, - flags = flag_mfaz, - area = area + flags = flag_mfaz ) ``` -We can also make this output pretty and ready to share it out. We just need to call `generate_pretty_table_mfaz()` and chain it to the pipeline: +We can also neat this output. We just need to call `mw_neat_output_mfaz()` and chain it to the pipeline: ```{r} #| label: pretty_mfaz @@ -436,30 +507,30 @@ We can also make this output pretty and ready to share it out. We just need to c ## Load dplyr library ---- library(dplyr) -## Check plausibility ---- -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( +## The workflow ---- +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( sex = sex, - muac = muac, + muac = muac, age = "age", .recode_sex = TRUE, .recode_muac = TRUE, - unit = "cm" - ) |> - mutate(muac = recode_muac(muac, unit = "mm")) |> - check_plausibility_mfaz( + .to = "cm" + ) |> + mutate(muac = recode_muac(muac, .to = "mm")) |> + mw_plausibility_check_mfaz( sex = sex, muac = muac, age = age, - flags = flag_mfaz, - area = area - ) |> - generate_pretty_table_mfaz() + flags = flag_mfaz + ) |> + mw_neat_output_mfaz() ``` This will return: @@ -470,57 +541,130 @@ This will return: #| message: false #| eval: true -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( sex = sex, - muac = muac, + muac = muac, age = "age", .recode_sex = TRUE, .recode_muac = TRUE, - unit = "cm" - ) |> - dplyr::mutate(muac = recode_muac(muac, unit = "mm")) |> - check_plausibility_mfaz( + .to = "cm" + ) |> + dplyr::mutate(muac = recode_muac(muac, .to = "mm")) |> + mw_plausibility_check_mfaz( sex = sex, muac = muac, age = age, - flags = flag_mfaz, - area = area - ) |> - generate_pretty_table_mfaz() + flags = flag_mfaz + ) |> + mw_neat_output_mfaz() +``` + +We can also run checks on a multiple area dataset as follows: + +```{r} +#| label: grouped_mfaz +#| echo: true +#| eval: false + +## Load dplyr library ---- +library(dplyr) + +## The workflow ---- +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = "age", + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" + ) |> + mutate(muac = recode_muac(muac, .to = "mm")) |> + group_by(area) |> + mw_plausibility_check_mfaz( + sex = sex, + muac = muac, + age = age, + flags = flag_mfaz + ) |> + mw_neat_output_mfaz() +``` + +This will return: + +```{r} +#| label: grouped_mfaz_view +#| echo: false +#| message: false +#| eval: true + +library(dplyr) + +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, + muac = muac, + age = "age", + .recode_sex = TRUE, + .recode_muac = TRUE, + .to = "cm" + ) |> + mutate(muac = recode_muac(muac, .to = "mm")) |> + group_by(area) |> + mw_plausibility_check_mfaz( + sex = sex, + muac = muac, + age = age, + flags = flag_mfaz + ) |> + mw_neat_output_mfaz() ``` At this point, you have reached the end of your workflow ✨. -## Plausibility check on the absolute MUAC values +## Plausibility check of raw MUAC data -We will assess the plausibility of the MUAC data through it's absolute values when the variable age is not available in the dataset. This is a job assigned to `check_plausibility_muac()`. The workflow for this check is the shortest one. +We will assess the plausibility of raw MUAC data through it's raw values when the variable age is not available in the dataset. This is a job assigned to `mw_plausibility_check_muac()`. The workflow for this check is the shortest one. ### Data wrangling -As you can tell, z-scores cannot be computed in the absence of age. In this way, the data wrangling workflow would be quite minimal. You still set the arguments inside `process_muac_data()` as learned in @sec-wrangle_mfaz. The only difference is that here we will set `age` to `NULL`. Fundamentally, under the hood the function detects MUAC values that are outliers and flags them and stores them in `flag_muac` variable that is added to the dataset. +As you can tell, z-scores cannot be computed in the absence of age. In this way, the data wrangling workflow would be quite minimal. You still set the arguments inside `mw_wrangle_muac()` as learned in @sec-wrangle_mfaz. The only difference is that here we will set `age` to `NULL`. Fundamentally, under the hood the function detects MUAC values that are outliers and flags them and stores them in `flag_muac` variable that is added to the dataset. We will continue using the same dataset: -```{r usage.4, echo=TRUE, eval=FALSE} +```{r} #| label: wrangle_muac #| echo: true #| eval: false anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) + mw_wrangle_muac( + sex = sex, + muac = muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" + ) ``` This returns: @@ -531,19 +675,19 @@ This returns: #| eval: true anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) + mw_wrangle_muac( + sex = sex, + muac = muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" + ) ``` ### On to *de facto* plausibility check -We just have to add `check_plausibility_muac()` to the above pipeline: +We just have to add `mw_plausibility_check_muac()` to the above pipeline: ```{r} #| label: pl_muac @@ -551,15 +695,15 @@ We just have to add `check_plausibility_muac()` to the above pipeline: #| eval: false anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) |> - check_plausibility_muac( + mw_wrangle_muac( + sex = sex, + muac = muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" + ) |> + mw_plausibility_check_muac( sex = sex, flags = flag_muac, muac = muac @@ -574,43 +718,43 @@ And this will return: anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) |> - check_plausibility_muac( + mw_wrangle_muac( + sex = sex, + muac = muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" + ) |> + mw_plausibility_check_muac( sex = sex, flags = flag_muac, muac = muac ) ``` -We can also return a formatted table with `generate_pretty_table_muac()`: +We can also return a formatted table with `mw_neat_output_muac()`: ```{r} -#| label: pretty_table_muac +#| label: neat_table_muac #| echo: true #| eval: false anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) |> - check_plausibility_muac( + mw_wrangle_muac( + sex = sex, + muac = muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" + ) |> + mw_plausibility_check_muac( sex = sex, flags = flag_muac, muac = muac - ) |> - generate_pretty_table_muac() + ) |> + mw_neat_output_muac() ``` And we get @@ -620,26 +764,26 @@ And we get #| echo: false anthro.01 |> -process_muac_data( -sex = sex, -muac = muac, -age = NULL, -.recode_sex = TRUE, -.recode_muac = FALSE, -unit = "none" -) |> - check_plausibility_muac( + mw_wrangle_muac( + sex = sex, + muac = muac, + age = NULL, + .recode_sex = TRUE, + .recode_muac = FALSE, + .to = "none" + ) |> + mw_plausibility_check_muac( sex = sex, flags = flag_muac, muac = muac - ) |> - generate_pretty_table_muac() + ) |> + mw_neat_output_muac() ``` -When working on multiple-area data, we will have to approach it slightly different. We use `dplyr::group_by()`: +When working on multiple area data, we approach the task the same way as demonstrated above: -```{r usage.4g, echo=TRUE, eval=FALSE} +```{r} #| label: by_area #| echo: true #| eval: false @@ -649,26 +793,26 @@ library(dplyr) ## Check plausibility ---- anthro.01 |> - process_muac_data( + mw_wrangle_muac( sex = sex, muac = muac, age = NULL, .recode_sex = TRUE, .recode_muac = FALSE, - unit = "none" - ) |> - group_by(area) |> - check_plausibility_muac( + .to = "none" + ) |> + group_by(area) |> + mw_plausibility_check_muac( sex = sex, flags = flag_muac, muac = muac - ) |> - generate_pretty_table_muac() + ) |> + mw_neat_output_muac() ``` And we get: -```{r usage.4h, echo=FALSE, message=FALSE} +```{r} #| label: view_by_area #| echo: false #| message: false @@ -676,19 +820,19 @@ And we get: library(dplyr) anthro.01 |> - process_muac_data( + mw_wrangle_muac( sex = sex, muac = muac, age = NULL, .recode_sex = TRUE, .recode_muac = FALSE, - unit = "none" - ) |> - group_by(area) |> - check_plausibility_muac( + .to = "none" + ) |> + group_by(area) |> + mw_plausibility_check_muac( sex = sex, flags = flag_muac, muac = muac - ) |> - generate_pretty_table_muac() + ) |> + mw_neat_output_muac() ``` diff --git a/vignettes/prevalence.qmd b/vignettes/prevalence.qmd index 10d9c9f..7c79d5d 100644 --- a/vignettes/prevalence.qmd +++ b/vignettes/prevalence.qmd @@ -10,14 +10,14 @@ vignette: > ```{r} #| label: load_library -library(ipccheckr) +library(mwana) ``` ## Introduction This vignette demonstrates how to use the `mwana` package's functions to estimate the prevalence of wasting. The package allow users to estimate prevalence based on: + The weight-for-height z-score (WFHZ) and/or edema; - + The absolute MUAC values and/or edema; + + The raw MUAC values and/or edema; + The combined prevalence and + The MUAC-for-age z-score (MFAZ) and/or edema. @@ -37,7 +37,7 @@ Now we can begin delving into each function. ### Estimation of the prevalence of wasting based on WFHZ {#sec-prevalence-wfhz} -To estimate the prevalence of wasting based on WFHZ we use the `compute_wfhz_prevalence()` function. The dataset to supply must have been wrangled by `process_wfhz_data()`. +To estimate the prevalence of wasting based on WFHZ we use the `compute_wfhz_prevalence()` function. The dataset to supply must have been wrangled by `mw_wrangle_wfhz()`. As usual, we start off by inspecting our dataset: @@ -66,12 +66,12 @@ To achieve this we do: #| echo: true #| eval: false -anthro.02 |> +anthro.02 |> compute_wfhz_prevalence( .wt = NULL, - .edema = edema, + .edema = edema, .summary_by = NULL - ) + ) ``` This will return: @@ -82,7 +82,7 @@ This will return: compute_wfhz_prevalence( df = anthro.02, .wt = NULL, - .edema = edema, + .edema = edema, .summary_by = NULL ) ``` @@ -94,13 +94,12 @@ If for some reason the variable edema is not available in the dataset, or it's t #| echo: true #| eval: false -anthro.02 |> +anthro.02 |> compute_wfhz_prevalence( .wt = NULL, - .edema = NULL, # Setting .edema to NULL + .edema = NULL, # Setting .edema to NULL .summary_by = NULL - ) - + ) ``` And we get: @@ -112,7 +111,7 @@ And we get: compute_wfhz_prevalence( df = anthro.02, .wt = NULL, - .edema = NULL, + .edema = NULL, .summary_by = NULL ) ``` @@ -126,12 +125,12 @@ The above output summary does not show results by province. We can control that #| echo: true #| eval: false -anthro.02 |> +anthro.02 |> compute_wfhz_prevalence( .wt = NULL, - .edema = edema, - .summary_by = province ## province is the variable's name holding data on where the survey was conducted. -) + .edema = edema, + .summary_by = province # province is the variable's name holding data on where the survey was conducted. + ) ``` And _voila_ : @@ -140,12 +139,12 @@ And _voila_ : #| label: view_unwt_wast_wfhz_province #| echo: false -anthro.02 |> +anthro.02 |> compute_wfhz_prevalence( .wt = NULL, - .edema = edema, + .edema = edema, .summary_by = province -) + ) ``` A table with two rows is returned with each province's statistics. @@ -159,12 +158,12 @@ To get the weighted prevalence, we make the use of the `.wt` argument. We pass t #| echo: true #| eval: false -anthro.02 |> +anthro.02 |> compute_wfhz_prevalence( - .wt = "wtfactor", ## Passing the wtfactor to .wt - .edema = edema, + .wt = "wtfactor", # Passing the wtfactor to .wt + .edema = edema, .summary_by = province -) + ) ``` And you get: @@ -173,12 +172,12 @@ And you get: #| label: view_wt_wasting_wfhz #| echo: true -anthro.02 |> +anthro.02 |> compute_wfhz_prevalence( .wt = "wtfactor", - .edema = edema, + .edema = edema, .summary_by = province -) + ) ``` :::{.callout-note} @@ -205,15 +204,15 @@ Now let's apply the prevalence function: #| echo: true #| eval: false -anthro.03 |> - process_wfhz_data( +anthro.03 |> + mw_wrangle_wfhz( sex = sex, .recode_sex = TRUE, height = height, weight = weight - ) |> + ) |> compute_wfhz_prevalence( - .wt = NULL, + .wt = NULL, .edema = edema, .summary_by = district ) @@ -225,15 +224,15 @@ The returned output will be: #| label: view_anthro.3_prev #| echo: false -anthro.03 |> - process_wfhz_data( +anthro.03 |> + mw_wrangle_wfhz( sex = sex, .recode_sex = TRUE, height = height, weight = weight - ) |> + ) |> compute_wfhz_prevalence( - .wt = NULL, + .wt = NULL, .edema = edema, .summary_by = district ) @@ -243,14 +242,14 @@ Can you spot the differences? 😎 Yes, you're absolutely correct! While in Caho ### Estimation of the prevalence of wasting based on MFAZ -The prevalence of wasting based on MFAZ can be estimated using the `compute_mfaz_prevalence()` function. This function works and is implemented the same way as demonstrated in @sec-prevalence-wfhz, with the exception of the data wrangling that is based on MUAC. This was demonstrated in **ADD LINK TO PLAUSIBILITY MUAC**. In this way, to avoid redundancy, we will not demonstrate the workflow. +The prevalence of wasting based on MFAZ can be estimated using the `compute_mfaz_prevalence()` function. This function works and is implemented the same way as demonstrated in @sec-prevalence-wfhz, with the exception of the data wrangling that is based on MUAC. This was demonstrated in the [plausibility checks](https://nutriverse.io/mwana/articles/plausibility.html). In this way, to avoid redundancy, we will not demonstrate the workflow. -### Estimation of the prevalence of wasting based on the absolute MUAC values {#sec-prevalence-muac} +### Estimation of the prevalence of wasting based on the raw MUAC values {#sec-prevalence-muac} -This job is assigned to `compute_muac_prevalence()`. Once you call the function, before starting the prevalence estimation, it first evaluates the acceptability of the MFAZ standard deviation and the age ratio test. Yes, you read well, MFAZ's standard deviation, not on the absolute values MUAC. +This job is assigned to `compute_muac_prevalence()`. Once you call the function, before starting the prevalence estimation, it first evaluates the acceptability of the MFAZ standard deviation and the age ratio test. Yes, you read well, MFAZ's standard deviation, not on the raw values MUAC. :::{.callout-important} -Although the acceptability is evaluated on the basis of MFAZ, the actual prevalence is estimated on the basis of the absolute MUAC values. MFAZ is also used to detect outliers and flag them to be excluded from the prevalence analysis. +Although the acceptability is evaluated on the basis of MFAZ, the actual prevalence is estimated on the basis of the raw MUAC values. MFAZ is also used to detect outliers and flag them to be excluded from the prevalence analysis. ::: The MFAZ standard deviation and the age ratio test results are used to control the prevalence analysis flow in this way: @@ -288,9 +287,9 @@ As in ENA Software, make sure you run the plausibility check before you call the #| echo: true #| eval: false -anthro.04 |> +anthro.04 |> compute_muac_prevalence( - .wt = NULL, + .wt = NULL, .edema = edema, .summary_by = province ) @@ -302,9 +301,9 @@ This will return: #| label: view_prev_muac #| echo: false -anthro.04 |> +anthro.04 |> compute_muac_prevalence( - .wt = NULL, + .wt = NULL, .edema = edema, .summary_by = province ) @@ -327,21 +326,24 @@ We approach this task as follows: library(dplyr) ## Compute prevalence ---- -anthro.02 |> - process_age(age = age) |> - process_muac_data( - sex = sex, +anthro.02 |> + mw_wrangle_age( + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, .recode_sex = FALSE, muac = muac, - .recode_muac = TRUE, - unit = "cm", + .recode_muac = TRUE, + .to = "cm", age = "age" - ) |> + ) |> mutate( - muac = recode_muac(muac, unit = "mm") - ) |> + muac = recode_muac(muac, .to = "mm") + ) |> compute_muac_prevalence( - .wt = "wtfactor", + .wt = "wtfactor", .edema = edema, .summary_by = province ) @@ -356,33 +358,36 @@ This will return: library(dplyr) -anthro.02 |> - process_age(age = age) |> - process_muac_data( - sex = sex, +anthro.02 |> + mw_wrangle_age( + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, .recode_sex = FALSE, muac = muac, - .recode_muac = TRUE, - unit = "cm", + .recode_muac = TRUE, + .to = "cm", age = "age" - ) |> + ) |> mutate( - muac = recode_muac(muac, unit = "mm") - ) |> + muac = recode_muac(muac, .to = "mm") + ) |> compute_muac_prevalence( - .wt = "wtfactor", + .wt = "wtfactor", .edema = edema, .summary_by = province ) ``` :::{.callout-warning} -You may have noticed that in the above code block, we called the `recode_muac()` function inside `mutate()`. This is because after you use `process_muac_data()`, it puts the MUAC variable in centimeters. The `compute_muac_prevalence()` function was defined to accept MUAC in millimeters. Therefore, it must be converted to millimeters. +You may have noticed that in the above code block, we called the `recode_muac()` function inside `mutate()`. This is because after you use `mw_wrangle_muac()`, it puts the MUAC variable in centimeters. The `compute_muac_prevalence()` function was defined to accept MUAC in millimeters. Therefore, it must be converted to millimeters. ::: ### Estimation of the combined prevalence of wasting -The estimation of the combined prevalence of wasting is a task attributed to the `compute_combined_prevalence()` function. The case-definition is based on the WFHZ, the absolute MUAC values and edema. From the workflow standpoint, it combines the workflow demonstrated in @sec-prevalence-wfhz and in @sec-prevalence-muac. +The estimation of the combined prevalence of wasting is a task attributed to the `compute_combined_prevalence()` function. The case-definition is based on the WFHZ, the raw MUAC values and edema. From the workflow standpoint, it combines the workflow demonstrated in @sec-prevalence-wfhz and in @sec-prevalence-muac. To demonstrate it's implementation we will use the `anthro.01` dataset. @@ -406,25 +411,26 @@ Fundamentally, it combines the wrangling workflow of WFHZ and MUAC data: library(dplyr) ## Apply the wrangling workflow ---- -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( - sex = sex, +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, .recode_sex = TRUE, muac = muac, - .recode_muac = TRUE, - unit = "cm", + .recode_muac = TRUE, + .to = "cm", age = "age" - ) |> + ) |> mutate( - muac = recode_muac(muac, unit = "mm") - ) |> - process_wfhz_data( - sex = sex, + muac = recode_muac(muac, .to = "mm") + ) |> + mw_wrangle_wfhz( + sex = sex, weight = weight, height = height, .recode_sex = FALSE @@ -440,29 +446,30 @@ This is to get the `wfhz` and `flag_wfhz` the `mfaz` and `flag_mfaz` added to th library(dplyr) -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( - sex = sex, +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, .recode_sex = TRUE, muac = muac, - .recode_muac = TRUE, - unit = "cm", + .recode_muac = TRUE, + .to = "cm", age = "age" - ) |> + ) |> mutate( - muac = recode_muac(muac, unit = "mm") - ) |> - process_wfhz_data( - sex = sex, + muac = recode_muac(muac, .to = "mm") + ) |> + mw_wrangle_wfhz( + sex = sex, weight = weight, height = height, .recode_sex = FALSE - ) |> + ) |> select(area, wfhz, flag_wfhz, mfaz, flag_mfaz) ``` @@ -498,70 +505,72 @@ Now that we understand what happens under the hood, we can now proceed to implem library(dplyr) ## Apply the workflow ---- -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( - sex = sex, +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, .recode_sex = TRUE, muac = muac, - .recode_muac = TRUE, - unit = "cm", - age = "age" - ) |> + .recode_muac = TRUE, + unit = "cm", + .to = "age" + ) |> mutate( - muac = recode_muac(muac, unit = "mm") - ) |> - process_wfhz_data( - sex = sex, + muac = recode_muac(muac, .to = "mm") + ) |> + mw_wrangle_wfhz( + sex = sex, weight = weight, height = height, .recode_sex = FALSE - ) |> + ) |> compute_combined_prevalence( - .wt = NULL, - .edema = edema, + .wt = NULL, + .edema = edema, .summary_by = area ) ``` We get this: -```{r wfhz.4c, echo = FALSE, message=FALSE} +```{r} #| label: view_cwasting #| echo: false library(dplyr) -anthro.01 |> - process_age( - svdate = "dos", - birdate = "dob", - age = age - ) |> - process_muac_data( - sex = sex, +anthro.01 |> + mw_wrangle_age( + dos = dos, + dob = dob, + age = age, + .decimals = 2 + ) |> + mw_wrangle_muac( + sex = sex, .recode_sex = TRUE, muac = muac, - .recode_muac = TRUE, - unit = "cm", + .recode_muac = TRUE, + .to = "cm", age = "age" - ) |> + ) |> mutate( - muac = recode_muac(muac, unit = "mm") - ) |> - process_wfhz_data( - sex = sex, + muac = recode_muac(muac, .to = "mm") + ) |> + mw_wrangle_wfhz( + sex = sex, weight = weight, height = height, .recode_sex = FALSE - ) |> + ) |> compute_combined_prevalence( - .wt = NULL, - .edema = edema, + .wt = NULL, + .edema = edema, .summary_by = area ) ```