Adapt the code to the new data set, without financial (#138)

* added new toy data set * added documentation * updated bar plot emission profile * fixed space typo * modified bar plot emission profile + added scenario & year * modified xlim() * new data set to handle NAs * modified map and tests passed * added defining global variables * added documentation * corrected map * changed tests * commented vignette for now * styled * uncommented vignette * updated changelog * removed article * added arguments description * styled * added util function * fixed potential bug * styled * aligned * use tidy style
2DegreesInvesting · Jul 15, 2024 · fb7a172 · fb7a172
1 parent a931511
commit fb7a172
Show file tree

Hide file tree

Showing 30 changed files with 637 additions and 368 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -9,12 +9,16 @@ export(map_region_risk)
 export(modes)
 export(plot_sankey)
 export(scatter_plot_financial)
+export(scenarios)
+export(scenarios_financial)
 export(theme_tiltplot)
+export(years)
 importFrom(dplyr,arrange)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
 importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
+importFrom(dplyr,first)
 importFrom(dplyr,group_by)
 importFrom(dplyr,inner_join)
 importFrom(dplyr,left_join)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,11 @@
 <!-- NEWS.md is maintained by https://cynkra.github.io/fledge, do not edit -->
 
+# tiltPlot 0.0.0.9003 (2024-07-09)
+
+* tiltPlot data fosters a new toy data set without financials (`without_financial`) (#137).
+* `bar_plot_emission_profile()` has two new arguments: `scenario` and `year`.
+* `map_region_risk()` has two new arguments: `scenario` and `year`.
+
 # tiltPlot 0.0.0.9002 (2024-06-04)
 
 * `bar_plot_emission_profile()` has now modes that the user can choose from (#134).

diff --git a/R/bar_plot_emission_profile.R b/R/bar_plot_emission_profile.R
@@ -8,6 +8,8 @@
 #' emission profiles will be plotted. The user can choose from one to several
 #' benchmark(s) to be plotted.
 #' @param mode A character vector: `r toString(modes())`.
+#' @param scenario A character vector: `r toString(scenarios())`.
+#' @param year A character vector: `r toString(years())`.
 #'
 #' @return A [ggplot] object.
 #'
@@ -18,14 +20,19 @@
 #' bar_plot_emission_profile(without_financial, benchmarks)
 bar_plot_emission_profile <- function(data,
                                       benchmarks = benchmarks(),
-                                      mode = modes()) {
+                                      mode = modes(),
+                                      scenario = scenarios(),
+                                      year = years()) {
   benchmarks <- arg_match(benchmarks, multiple = TRUE)
   mode <- mode |>
-    arg_match()
+    arg_match() |>
+    switch_mode_emission_profile()
+  scenario <- arg_match(scenario)
+  year <- year
 
   data |>
-    check_bar_plot_emission_profile() |>
-    prepare_bar_plot_emission_profile(benchmarks, mode) |>
+    check_bar_plot_emission_profile(mode) |>
+    prepare_bar_plot_emission_profile(benchmarks = benchmarks, mode = mode, scenario = scenario, year = year) |>
     plot_bar_plot_emission_profile_impl()
 }
 
@@ -35,10 +42,10 @@ bar_plot_emission_profile <- function(data,
 #'
 #' @return A data frame
 #' @noRd
-check_bar_plot_emission_profile <- function(data) {
+check_bar_plot_emission_profile <- function(data, mode) {
   crucial <- c(
     "benchmark",
-    modes(),
+    mode,
     aka("risk_category")
   )
   data |> check_crucial_names(names_matching(data, crucial))
@@ -53,14 +60,16 @@ check_bar_plot_emission_profile <- function(data) {
 #' @return A data frame.
 #'
 #' @noRd
-prepare_bar_plot_emission_profile <- function(data, benchmarks, mode) {
-  risk_var <- names_matching(data, aka("risk_category"))
+prepare_bar_plot_emission_profile <- function(data, benchmarks, mode, scenario, year) {
+  risk_var <- get_colname(data, aka("risk_category"))
 
   data <- data |>
     mutate(risk_category_var = as_risk_category(.data[[risk_var]]))
 
   data <- data |>
-    filter(.data$benchmark %in% benchmarks) |>
+    filter((.data$benchmark %in% .env$benchmarks &
+      .data$scenario == .env$scenario &
+      .data$year == .env$year)) |>
     group_by(.data$risk_category_var, .data$benchmark) |>
     summarise(total_mode = sum(.data[[mode]])) |>
     group_by(.data$benchmark) |>
@@ -80,5 +89,5 @@ plot_bar_plot_emission_profile_impl <- function(data) {
     geom_col(position = position_stack(reverse = TRUE), width = width_bar()) +
     fill_score_colors() +
     theme_tiltplot() +
-    xlim(0, 1)
+    xlim(0, NA)
 }
diff --git a/R/custom_gradient_color.R b/R/custom_gradient_color.R
@@ -24,7 +24,6 @@ custom_gradient_color <- function(risk_high = 1, risk_medium = 1, risk_low = 1)
   # interpolate the colors based on proportions : 1 is highest intensity
   final_color <- high_color * risk_high + medium_color * risk_medium + low_color * risk_low
 
-  final_color <- do.call(rgb, as.list(final_color))
-
-  return(final_color)
+  final_color <- do.call(rgb, c(as.list(final_color)))
+  final_color
 }
diff --git a/R/example_data.R b/R/example_data.R
@@ -25,16 +25,20 @@ default_financial <- function(bank_id = "a",
 default_without_financial <- function(company_name = "a",
                                       emission_profile = "medium",
                                       benchmark = "all",
-                                      equal_weight = 0.1,
-                                      worst_case = 0.1,
-                                      best_case = 0.1) {
+                                      scenario = "1.5C RPS",
+                                      year = 2030,
+                                      equal_weight_emission_profile = 0.1,
+                                      worst_case_emission_profile = 0.1,
+                                      best_case_emission_profile = 0.1) {
   tibble(
     company_name = company_name,
     emission_profile = emission_profile,
     benchmark = benchmark,
-    equal_weight = equal_weight,
-    worst_case = worst_case,
-    best_case = best_case
+    scenario = scenario,
+    year = year,
+    equal_weight_emission_profile = equal_weight_emission_profile,
+    worst_case_emission_profile = worst_case_emission_profile,
+    best_case_emission_profile = best_case_emission_profile
   )
 }
 

diff --git a/R/map_region_risk.R b/R/map_region_risk.R
@@ -10,6 +10,8 @@
 #' default mode.
 #' @param mode The mode to plot. It can be one of "equal_weight", "worst_case"
 #' or "best_case". If nothing is chosen, "equal_weight" is the default mode.
+#' @param scenario A character vector: `r toString(scenarios())`.
+#' @param year A character vector: `r toString(years())`.
 #'
 #' @return A ggplot2 object representing the country data plot.
 #' @export
@@ -22,20 +24,17 @@
 map_region_risk <- function(data,
                             # TODO: plot for other countries
                             country_code = c("DE"),
-                            benchmark = c(
-                              "all",
-                              "isic_4digit",
-                              "tilt_sector",
-                              "unit",
-                              "unit_isic_4digit",
-                              "unit_tilt_sector"
-                            ),
-                            mode = c("equal_weight", "worst_case", "best_case")) {
+                            benchmark = benchmarks(),
+                            mode = modes(),
+                            scenario = scenarios(),
+                            year = years()) {
   prepared_data <- prepare_geo_data(
     data,
     country_code,
     benchmark,
-    mode
+    mode,
+    scenario,
+    year
   )
   shp_1 <- prepared_data[[1]]
   aggregated_data <- prepared_data[[2]]

diff --git a/R/prepare_geo_data.R b/R/prepare_geo_data.R
@@ -10,23 +10,27 @@
 prepare_geo_data <- function(data,
                              country_code = c("DE"),
                              benchmark = benchmarks(),
-                             mode = c(
-                               "equal_weight",
-                               "worst_case",
-                               "best_case"
-                             )) {
+                             mode = modes(),
+                             scenario = scenarios(),
+                             year = years()) {
   benchmark <- arg_match(benchmark)
-  mode <- arg_match(mode)
+  mode <- mode |>
+    arg_match() |>
+    switch_mode_emission_profile()
   country_code <- arg_match(country_code)
+  scenario <- arg_match(scenario)
+  year <- year
 
   crucial <- c(
     aka("risk_category"),
-    "company_name",
+    aka("companies_id"),
     "postcode",
-    "benchmark"
+    "benchmark",
+    "scenario",
+    aka("year")
   )
   data |> check_crucial_names(names_matching(data, crucial))
-  risk_var <- names_matching(data, aka("risk_category"))
+  risk_var <- get_colname(data, aka("risk_category"))
   data <- data |>
     mutate(risk_category_var = as_risk_category(data[[risk_var]]))
 
@@ -50,7 +54,11 @@ prepare_geo_data <- function(data,
 
   # merge shapefile with financial data
   geo <- data |>
-    filter(benchmark == .env$benchmark) |>
+    filter(
+      .data$benchmark == .env$benchmark,
+      .data$scenario == .env$scenario,
+      .data$year == .env$year
+    ) |>
     left_join(shp_1, by = "postcode") |>
     st_as_sf()
 
@@ -59,7 +67,6 @@ prepare_geo_data <- function(data,
   list(shp_1, aggregated_data)
 }
 
-
 #' Aggregate Geo Data
 #'
 #' @param geo A data frame containing geographical data.
@@ -84,52 +91,28 @@ prepare_geo_data <- function(data,
 #'
 #' aggregate_geo(geo, mode = "worst_case")
 aggregate_geo <- function(geo, mode) {
-  if (mode %in% c("worst_case", "best_case")) {
-    aggregated_data <- geo |>
-      group_by(.data$postcode, .data$company_name) |>
-      mutate(
-        # Choose the worst or best risk category and set the others to 0.
-        proportion = calculate_case_proportions(.data$risk_category_var, mode)
-      ) |>
-      group_by(.data$postcode, .data$risk_category_var) |>
-      summarize(proportion = sum(.data$proportion)) |>
-      ungroup()
-  } else if (mode == "equal_weight") {
-    aggregated_data <- geo |>
-      group_by(.data$postcode, .data$risk_category_var) |>
-      summarize(count = n()) |>
-      # Do not group by company here since all of them have equal weights.
-      group_by(.data$postcode) |>
-      mutate(proportion = .data$count / sum(.data$count)) |>
-      ungroup()
-  }
+  aggregated_data <- geo |>
+    group_by(.data$postcode, .data$risk_category_var) |>
+    summarise(total_mode = sum(.data[[mode]])) |>
+    group_by(.data$postcode) |>
+    mutate(proportion = total_mode / sum(total_mode)) |>
+    ungroup()
 
-  # apply custom_gradient_color to each row
+  # Pivot
   aggregated_data <- aggregated_data |>
     pivot_wider(names_from = "risk_category_var", values_from = "proportion", values_fill = 0) |>
-    mutate(color = pmap(list(.data$high, .data$medium, .data$low), custom_gradient_color))
-}
+    filter(.data$total_mode != 0)
 
-#' Calculate Proportions for Worst or Best Case Scenarios
-#'
-#' @param categories A factor vector of risk categories.
-#' @param mode A character string specifying the mode.
-#'
-#' @return A numeric vector representing the calculated proportions for each
-#' category.
-#'
-#' @examples
-#' categories <- as_risk_category(c("low", "medium", "medium", "high"))
-#' calculate_case_proportions(categories, mode = "worst_case")
-#' @noRd
-calculate_case_proportions <- function(categories, mode) {
-  if (mode == "worst_case") {
-    extreme_risk <- levels(categories)[max(as.integer(categories))]
-  } else if (mode == "best_case") {
-    extreme_risk <- levels(categories)[min(as.integer(categories))]
-  }
-
-  is_extreme <- categories == extreme_risk
-  proportions <- ifelse(is_extreme, 1 / sum(is_extreme), 0)
-  proportions
+  # Calculate color row by row
+  aggregated_data <- aggregated_data |>
+    group_by(.data$postcode) |>
+    summarise(
+      total_mode = add(.data$total_mode),
+      geometry = first(.data$geometry),
+      low = add(.data$low),
+      medium = add(.data$medium),
+      high = add(.data$high)
+    ) |>
+    mutate(color = pmap(list(.data$high, .data$medium, .data$low), custom_gradient_color))
+  aggregated_data
 }
diff --git a/R/scatter_plot_financial.R b/R/scatter_plot_financial.R
@@ -21,13 +21,9 @@
 #' )
 scatter_plot_financial <- function(data,
                                    benchmarks = benchmarks(),
-                                   mode = c(
-                                     "equal_weight",
-                                     "worst_case",
-                                     "best_case"
-                                   ),
-                                   scenario = c("IPR", "WEO"),
-                                   year = c(2030, 2050)) {
+                                   mode = modes(),
+                                   scenario = scenarios_financial(),
+                                   year = years()) {
   # FIXME: .env$ instead of _arg seems to cause a bug only for benchmarks.
   benchmarks_arg <- arg_match(benchmarks, multiple = TRUE)
   scenario <- arg_match(scenario)

diff --git a/R/tiltPlot-package.R b/R/tiltPlot-package.R
@@ -7,6 +7,7 @@
 #' @importFrom dplyr case_when
 #' @importFrom dplyr distinct
 #' @importFrom dplyr filter
+#' @importFrom dplyr first
 #' @importFrom dplyr group_by
 #' @importFrom dplyr inner_join
 #' @importFrom dplyr left_join