New summarize_noise() (#8)

* New summarize_noise() * Bump * Style
2DegreesInvesting · Jun 17, 2024 · a48de56 · a48de56
1 parent 3a0d743
commit a48de56
Show file tree

Hide file tree

Showing 11 changed files with 272 additions and 13 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: tiltAddCO2
 Title: Add CO2
-Version: 0.0.0.9001
+Version: 0.0.0.9002
 Authors@R: c(
     person("Mauro", "Lepore", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "https://orcid.org/0000-0002-1986-7988")),

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export(add_co2)
+export(summarize_noise)
 import(tiltIndicator)
 import(tiltToyData)
 importFrom(dplyr,bind_rows)
@@ -22,6 +23,7 @@ importFrom(rlang,.data)
 importFrom(rlang,.env)
 importFrom(rlang,abort)
 importFrom(rlang,ensym)
+importFrom(rlang,is_named)
 importFrom(tibble,tibble)
 importFrom(tibble,tribble)
 importFrom(tidyselect,all_of)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,9 @@
 <!-- NEWS.md is maintained by https://cynkra.github.io/fledge, do not edit -->
 
+# tiltAddCO2 0.0.0.9002 (2024-06-17)
+
+* New `summarize_noise()` (#8).
+
 # tiltAddCO2 0.0.0.9001 (2024-05-10)
 
 * Add NEWS.md, following approval of

diff --git a/R/add_co2.R b/R/add_co2.R
@@ -19,16 +19,17 @@
 #' co2 <- read_csv(file, show_col_types = FALSE)
 #' profile <- toy_profile_emissions_impl_output()
 #'
-#' with_co2 <- profile |>
-#'   add_co2(co2)
+#' with_co2 <- profile |> add_co2(co2)
 #'
-#' with_co2 |>
-#'   unnest_product() |>
-#'   relocate(matches("co2"))
+#' product <- with_co2 |> unnest_product()
+#'
+#' product |> relocate(matches(c("co2", "min", "max")))
+#'
+#' product |> summarize_noise(.by = "benchmark")
 #'
 #' with_co2 |>
 #'   unnest_company() |>
-#'   relocate(matches("co2"))
+#'   relocate(matches(c("co2")))
 add_co2 <- function(data, co2, jitter_amount = NULL) {
   data_co2 <- data |>
     add_co2_footprint(co2) |>

diff --git a/R/import-standalone-check_crucial_names.R b/R/import-standalone-check_crucial_names.R
@@ -0,0 +1,54 @@
+# Standalone file: do not edit by hand
+# Source: <https://github.com/2DegreesInvesting/tiltIndicator/blob/main/R/standalone-check_crucial_names.R>
+# ----------------------------------------------------------------------
+#
+# ---
+# repo: 2DegreesInvesting/tiltIndicator
+# file: standalone-check_crucial_names.R
+# last-updated: 2024-06-15
+# license: https://unlicense.org
+# imports: [rlang, glue]
+# ---
+
+#' @importFrom rlang is_named
+#' @importFrom rlang abort
+#' @importFrom glue glue
+NULL
+
+#' Check if a named object contains expected names
+#'
+#' Based on fgeo.tool::check_crucial_names()
+#'
+#' @param x A named object.
+#' @param expected_names String; expected names of `x`.
+#'
+#' @return Invisible `x`, or an error with informative message.
+#'
+#' Adapted from: https://github.com/RMI-PACTA/r2dii.match/blob/main/R/check_crucial_names.R
+#'
+#' @examples
+#' x <- c(a = 1)
+#' check_crucial_names(x, "a")
+#' try(check_crucial_names(x, "bad"))
+#' @noRd
+check_crucial_names <- function(x, expected_names) {
+  stopifnot(is_named(x))
+  stopifnot(is.character(expected_names))
+
+  ok <- all(unique(expected_names) %in% names(x))
+  if (!ok) {
+    abort_missing_names(sort(setdiff(expected_names, names(x))))
+  }
+
+  invisible(x)
+}
+
+abort_missing_names <- function(missing_names) {
+  abort(
+    "missing_names",
+    message = glue(
+      "Must have missing names:
+      {paste0('`', missing_names, '`', collapse = ', ')}"
+    )
+  )
+}
diff --git a/R/summarize_noise.R b/R/summarize_noise.R
@@ -0,0 +1,55 @@
+#' Summarize the mean percent noise in each gruop of jittered range of data
+#'
+#' @param data A dataset with the columns `r toString(col_summarize_noise())`.
+#' @inheritParams dplyr::summarize
+#'
+#' @return See [dplyr::summarize()].
+#' @export
+#'
+#' @examples
+#' # styler: off
+#' data <- tibble::tribble(
+#'   ~min_jitter,  ~min,   ~max, ~max_jitter, ~group,
+#'           0.8,   1.0,    2.1,         2.2,      1,
+#'           0.9,   1.1,    2.2,         3.0,      1,
+#'
+#'           0.1,   2.1,    3.1,         5.8,      2,
+#'           0.2,   2.2,    3.2,         5.9,      2,
+#' )
+#' # styler: on
+#'
+#' summarize_noise(data)
+#'
+#' summarize_noise(data, .by = "group")
+summarize_noise <- function(data, .by = NULL) {
+  crucial <- col_summarize_noise()
+  check_crucial_names(data, crucial)
+
+  data |>
+    dplyr::summarize(
+      min_noise = mean_noise(.data[[col_min()]], .data[[col_min_jitter()]]),
+      max_noise = mean_noise(.data[[col_max()]], .data[[col_max_jitter()]]),
+      .by = all_of(.by)
+    )
+}
+
+col_summarize_noise <- function() {
+  c(
+    col_min(),
+    col_min_jitter(),
+    col_max(),
+    col_max_jitter()
+  )
+}
+
+mean_noise <- function(x, noisy) {
+  mean(percent_noise(x, noisy), na.rm = TRUE)
+}
+
+toy_summarize_noise <- example_data_factory(tibble(
+  # Designed to have 20% and 10% noise for min and max, respectively
+  !!col_min_jitter() := 0.8,
+  !!col_min() := 1,
+  !!col_max() := 2,
+  !!col_max_jitter() := 2.2
+))
diff --git a/man/add_co2.Rd b/man/add_co2.Rd
diff --git a/man/summarize_noise.Rd b/man/summarize_noise.Rd
diff --git a/...stthat/_snaps/add_co2-group_benchmarks.md → tests/testthat/_snaps/group_benchmarks.md b/...stthat/_snaps/add_co2-group_benchmarks.md → tests/testthat/_snaps/group_benchmarks.md
diff --git a/tests/testthat/_snaps/summarize_noise.md b/tests/testthat/_snaps/summarize_noise.md
@@ -0,0 +1,36 @@
+# without crucial columns errors gracefully
+
+    Code
+      summarize_noise(bad)
+    Condition
+      Error in `abort_missing_names()`:
+      ! Must have missing names:
+      `min`
+
+---
+
+    Code
+      summarize_noise(bad)
+    Condition
+      Error in `abort_missing_names()`:
+      ! Must have missing names:
+      `max`
+
+---
+
+    Code
+      summarize_noise(bad)
+    Condition
+      Error in `abort_missing_names()`:
+      ! Must have missing names:
+      `min_jitter`
+
+---
+
+    Code
+      summarize_noise(bad)
+    Condition
+      Error in `abort_missing_names()`:
+      ! Must have missing names:
+      `max_jitter`
+
diff --git a/tests/testthat/test-summarize_noise.R b/tests/testthat/test-summarize_noise.R
@@ -0,0 +1,67 @@
+test_that("without crucial columns errors gracefully", {
+  data <- toy_summarize_noise()
+  crucial <- "min"
+  bad <- select(data, -all_of(crucial))
+  expect_snapshot(error = TRUE, summarize_noise(bad))
+
+  crucial <- "max"
+  bad <- select(data, -all_of(crucial))
+  expect_snapshot(error = TRUE, summarize_noise(bad))
+
+  crucial <- "min_jitter"
+  bad <- select(data, -all_of(crucial))
+  expect_snapshot(error = TRUE, summarize_noise(bad))
+
+  crucial <- "max_jitter"
+  bad <- select(data, -all_of(crucial))
+  expect_snapshot(error = TRUE, summarize_noise(bad))
+})
+
+test_that("yields columns `min_noise` and `max_noise`", {
+  data <- toy_summarize_noise()
+  out <- summarize_noise(data)
+  expect_true(hasName(out, "min_noise"))
+  expect_true(hasName(out, "max_noise"))
+})
+
+test_that("yields a summary, so fewer rows than the input data", {
+  data <- toy_summarize_noise(!!col_min() := 1:2)
+  out <- summarize_noise(data)
+  expect_true(nrow(out) < nrow(data))
+})
+
+test_that("yields the expected percent noise", {
+  data <- toy_summarize_noise()
+  out <- summarize_noise(data)
+  # Toy `data` designed with these expectations
+  expect_equal(out$min_noise, 20)
+  expect_equal(out$max_noise, 10)
+})
+
+test_that("drops missing values", {
+  data <- toy_summarize_noise(!!col_min() := c(1, NA))
+  out <- summarize_noise(data)
+  expect_false(anyNA(out))
+})
+
+test_that("is sensitive to .by", {
+  data <- toy_summarize_noise(group = 1)
+  expect_no_error(summarize_noise(data, .by = "group"))
+})
+
+test_that("yields the expected percent noise by group", {
+  data <- bind_rows(
+    # This dataset has 20% and 10% noise in min and max respectively
+    toy_summarize_noise(g = 1),
+    # This dataset has 10% and 10% noise in min and max respectively
+    toy_summarize_noise(g = 2, !!col_min_jitter() := 0.9)
+  )
+
+  out <- summarize_noise(data, .by = "g")
+
+  expect_equal(out[1, ][["min_noise"]], 20)
+  expect_equal(out[1, ][["max_noise"]], 10)
+
+  expect_equal(out[2, ][["min_noise"]], 10)
+  expect_equal(out[2, ][["max_noise"]], 10)
+})