Create a German Map showing regional risks (#73)

* added postcodes into the data * first trial of creating simple german map without any polygons * tried europstat * NUTS2 Level germany * merged financial data with geometry of map file * succeded map but have to modify the financial data in order to see something * modified financial data to see all the risks categories * added data set and documented + added CC-BY-SA license * updated code with new financial data set * updated sankey plot with new financial data * use snake-case for shapefiles * modified license and imported package functions * created gradient color for each combination of low, medium and high risk categories * fixed function argument * documented functions * documented unknown functions * added import of the new function of the germap_map function * corrected nuts_de documentation and added german_map function doc * added a little test * added test color * passed all the checks * updated README file * added copyrighted to the map * styled * styled arguments * create custom gradient color separatly * added test to the test custom gradient func * styled plot sankey and xctr arguments * replaced germap_map by map_country * added prepare_geo_data.R function * added test to prepare geo data and rename map_country * removed csv and wrote tsv file for nuts_de * rescaled the labs of the title * modified test to add risks names to the color lists * used tidy verse style
2DegreesInvesting · Oct 25, 2023 · e92b65a · e92b65a
1 parent 173964b
commit e92b65a
Show file tree

Hide file tree

Showing 36 changed files with 9,594 additions and 516 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -13,14 +13,21 @@ Depends:
     R (>= 2.10)
 Imports: 
     dplyr,
+    eurostat,
     ggalluvial,
     ggplot2,
     glue,
+    grDevices,
+    here,
+    purrr,
+    readr,
     rlang,
+    sf,
     stats,
     tibble,
     tidyr,
-    tidyselect
+    tidyselect,
+    vroom
 Suggests: 
     testthat (>= 3.0.0)
 Config/testthat/edition: 3

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,32 +1,41 @@
 # Generated by roxygen2: do not edit by hand
 
+export(map_region_risk)
 export(plot_sankey)
 export(plot_xctr)
 export(plot_xctr_financial)
 export(theme_tiltplot)
+importFrom(dplyr,arrange)
 importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
 importFrom(dplyr,filter)
 importFrom(dplyr,group_by)
+importFrom(dplyr,inner_join)
+importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,n)
 importFrom(dplyr,pull)
 importFrom(dplyr,select)
 importFrom(dplyr,summarise)
 importFrom(dplyr,summarize)
+importFrom(dplyr,ungroup)
+importFrom(eurostat,get_eurostat_geospatial)
 importFrom(ggalluvial,StatStratum)
 importFrom(ggalluvial,geom_alluvium)
 importFrom(ggalluvial,geom_stratum)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,after_stat)
+importFrom(ggplot2,coord_sf)
 importFrom(ggplot2,element_blank)
 importFrom(ggplot2,element_text)
 importFrom(ggplot2,facet_wrap)
 importFrom(ggplot2,geom_bar)
 importFrom(ggplot2,geom_col)
 importFrom(ggplot2,geom_label)
+importFrom(ggplot2,geom_sf)
 importFrom(ggplot2,geom_text)
 importFrom(ggplot2,ggplot)
+importFrom(ggplot2,ggplot_build)
 importFrom(ggplot2,ggtitle)
 importFrom(ggplot2,labs)
 importFrom(ggplot2,scale_fill_manual)
@@ -38,11 +47,20 @@ importFrom(ggplot2,theme_minimal)
 importFrom(ggplot2,unit)
 importFrom(ggplot2,ylim)
 importFrom(glue,glue)
+importFrom(grDevices,rgb)
+importFrom(here,here)
+importFrom(purrr,pmap)
+importFrom(readr,read_csv2)
 importFrom(rlang,.data)
 importFrom(rlang,.env)
 importFrom(rlang,arg_match)
+importFrom(sf,st_as_sf)
 importFrom(stats,na.omit)
 importFrom(tibble,tibble)
 importFrom(tibble,tribble)
 importFrom(tidyr,drop_na)
+importFrom(tidyr,pivot_wider)
 importFrom(tidyselect,matches)
+importFrom(vroom,col_character)
+importFrom(vroom,col_integer)
+importFrom(vroom,cols)
diff --git a/R/custom_gradient_color.R b/R/custom_gradient_color.R
@@ -0,0 +1,30 @@
+#' Custom Gradient Color
+#'
+#' This function generates a custom gradient color based on the input values for
+#' "high," "medium," and "low.
+#'
+#' @param high A numeric value representing the intensity of the "high" color
+#' (0 to 1).
+#' @param medium A numeric value representing the intensity of the "medium"
+#' color (0 to 1).
+#' @param low A numeric value representing the intensity of the "low" color
+#' (0 to 1).
+#'
+#' @return A character string representing the combined RGB color code.
+#' @noRd
+#'
+#' @examples
+#' custom_gradient_color(1, 0.5, 0.2)
+custom_gradient_color <- function(high, medium, low) {
+  # define RGB values for "high," "medium," and "low"
+  high_color <- red <- c(1, 0, 0)
+  medium_color <- orange <- c(1, 0.5, 0)
+  low_color <- green <- c(0, 1, 0)
+
+  # interpolate the colors based on proportions : 1 is highest intensity
+  final_color <- high_color * high + medium_color * medium + low_color * low
+
+  final_color <- do.call(rgb, as.list(final_color))
+
+  return(final_color)
+}
diff --git a/R/map_region_risk.R b/R/map_region_risk.R
@@ -0,0 +1,46 @@
+#' Create a map with the risk color of each region (NUTS3 granularity)
+#'
+#' @param data A data frame like [financial]
+#' @param country_code Country code (ISO 3166 alpha-2) for which the map will be
+#' plotted.
+#' @param benchmark The mode of benchmark to plot.
+#' It can be one of "all", "unit" or "tilt_sec", "unit_tilt_sec", "isic_sec"
+#' or "unit_isic_sec". If nothing is chosen, "all" is the default mode.
+#'
+#' @param finance_weight The mode of financial data to plot (#TODO : fix financial columns).
+#' It can be one of "equal_weight", "worst_case" or "best_case". If nothing is
+#' chosen, "equal_weight" is the default mode.
+#'
+#' @return A ggplot2 object representing the country data plot.
+#' @export
+#'
+#' @examples
+#' # Plot a German with a "unit" benchmark and equal_weight finance
+#' map_region_risk(financial, country_code = "DE", benchmark = "unit")
+map_region_risk <- function(data,
+                            # TODO : Plot for other countries
+                            country_code = c("DE"),
+                            benchmark = c(
+                              "all",
+                              "unit",
+                              "tilt_sec",
+                              "unit_tilt_sec",
+                              "isic_sec",
+                              "unit_isic_sec"
+                            ),
+                            finance_weight = c("equal_weight", "worst_case", "best_case")) {
+  prepared_data <- prepare_geo_data(
+    data,
+    country_code,
+    benchmark,
+    finance_weight
+  )
+  shp_1 <- prepared_data[[1]]
+  aggregated_data <- prepared_data[[2]]
+
+  # create map based on financial geo with two layers; financial data and map
+  ggplot() +
+    geom_sf(data = aggregated_data, mapping = aes(fill = .data$color)) +
+    geom_sf(data = shp_1, fill = NA) +
+    coord_sf()
+}
diff --git a/R/nuts_de.R b/R/nuts_de.R
@@ -0,0 +1,8 @@
+#' Correspondence table between German postcodes and NUTS level 3 codes
+#'
+#' @source "© European Union - GISCO, 2021, postal code point dataset,
+#' Licence CC-BY-SA 4.0 available at https://ec.europa.eu/eurostat/web/gisco/geodata/reference-data."
+#' @examples
+#' nuts_de
+#' @keywords internal
+"nuts_de"
diff --git a/R/plot_sankey.R b/R/plot_sankey.R
@@ -11,11 +11,6 @@
 #' with the same highest risk, we assume equal weights again.
 #' * "best_case" - similar to the worst-case scenario but just with the
 #' lowest-risk category.
-#' * "main_activity" - sometimes banks give one sector classification to one
-#' company. However, with our data we know that sometimes the products stem from
-#' different sectors. Knowing that the bank categorizes the product in one
-#' specific sector, we could assume that the bank only finance the product in
-#' the sector that it categories the company in.
 #'
 #' @return A sankey plot of class [ggalluvial].
 #' @export
@@ -26,11 +21,12 @@
 #'
 #' # Plot with best_case weight
 #' plot_sankey(financial, mode = "best_case")
-plot_sankey <- function(data, with_company = TRUE, mode = c("equal_weight", "worst_case", "best_case", "main_activity")) {
+plot_sankey <- function(data,
+                        with_company = TRUE,
+                        mode = c("equal_weight", "worst_case", "best_case")) {
   mode <- arg_match(mode)
 
   crucial <- c(
-    "main_activity",
     "_risk_category",
     "equal_weight_finance",
     "worst_case_finance",
@@ -45,7 +41,7 @@ plot_sankey <- function(data, with_company = TRUE, mode = c("equal_weight", "wor
     data = data,
     aes(
       y = .data[[switch_mode(mode)]],
-      axis1 = .data$kg_id,
+      axis1 = .data$bank_id,
       axis3 = .data$tilt_sector,
       axis4 = as_risk_category(.data[[risk_var]]),
       fill = as_risk_category(.data[[risk_var]])

diff --git a/R/plot_xctr_financial.R b/R/plot_xctr_financial.R
@@ -6,7 +6,7 @@
 #' @param company_name (optional) The name of the specific company to plot the
 #' financial data for. If NULL, the function will plot the financial data for the portfolio.
 #' @param mode The mode of financial data to plot.
-#' It can be one of "equal_weight", "worst_case", "best_case", or "main_activity".
+#' It can be one of "equal_weight", "worst_case" or "best_case".
 #'
 #' @return A ggplot2 object representing the financial data plot.
 #'
@@ -19,15 +19,16 @@
 #' # Example 2: Plot portfolio-level financial data
 #' plot_xctr_financial(data = financial, mode = "worst_case")
 #'
-plot_xctr_financial <- function(data, company_name = NULL, mode = c("equal_weight", "worst_case", "best_case", "main_activity")) {
+plot_xctr_financial <- function(data,
+                                company_name = NULL,
+                                mode = c("equal_weight", "worst_case", "best_case")) {
   mode <- arg_match(mode)
 
   # TODO: do we want to drop NA's everywhere silently?
   data <- data |>
-    drop_na(-c(.data$equal_weight_finance, .data$worst_case_finance, .data$best_case_finance, .data$main_activity))
+    drop_na(-c(.data$equal_weight_finance, .data$worst_case_finance, .data$best_case_finance))
 
   crucial <- c(
-    "main_activity",
     "_risk_category",
     "equal_weight_finance",
     "worst_case_finance",

diff --git a/R/prepare_geo_data.R b/R/prepare_geo_data.R
@@ -0,0 +1,75 @@
+#' Prepare Geo Data for a Specific Country
+#'
+#' @inheritParams map_region_risk
+#'
+#' @return A list containing the following components:
+#'   - \code{shp_1}: Spatial data for the specified country.
+#'   - \code{aggregated_data}: Aggregated financial data.
+#' @noRd
+#'
+#' @examples
+#' prepare_geo_data(financial_data)
+prepare_geo_data <- function(data,
+                             country_code = c("DE"),
+                             benchmark = c(
+                               "all",
+                               "unit",
+                               "tilt_sec",
+                               "unit_tilt_sec",
+                               "isic_sec",
+                               "unit_isic_sec"
+                             ),
+                             finance_weight = c("equal_weight", "worst_case", "best_case")) {
+  benchmark_arg <- arg_match(benchmark)
+  finance_weight <- arg_match(finance_weight)
+
+  crucial <- c(
+    "_risk_category",
+    "equal_weight_finance",
+    "worst_case_finance",
+    "best_case_finance"
+  )
+  data |> check_crucial_names(names_matching(data, crucial))
+  risk_var <- names_matching(data, "_risk_category")
+
+  # get shapefile of European countries
+  shp_0 <- get_eurostat_geospatial(
+    resolution = 10,
+    nuts_level = 3,
+    year = 2016,
+    crs = 3035,
+    make_valid = TRUE
+  )
+
+  # filter for the specified country
+  shp_1 <- shp_0 |>
+    filter(.data$CNTR_CODE == country_code) |>
+    select(geo = "NUTS_ID", "geometry") |>
+    arrange(.data$geo) |>
+    st_as_sf()
+
+  # merge to have zip codes with NUTS file
+  shp_1 <- shp_1 |>
+    inner_join(nuts_de, by = "geo")
+
+  # merge shapefile with financial data
+  financial_geo <- data |>
+    filter(benchmark == benchmark_arg) |>
+    left_join(shp_1, by = "postcode") |>
+    st_as_sf()
+
+  # Add the code for data aggregation and color mapping
+  aggregated_data <- financial_geo |>
+    group_by(.data$postcode, .data$xctr_risk_category) |>
+    summarize(count = n()) |>
+    group_by(.data$postcode) |>
+    mutate(proportion = .data$count / sum(.data$count)) |>
+    ungroup()
+
+  # apply custom_gradient_color to each row
+  aggregated_data <- aggregated_data |>
+    pivot_wider(names_from = "xctr_risk_category", values_from = "proportion", values_fill = 0) |>
+    mutate(color = pmap(list(.data$high, .data$medium, .data$low), custom_gradient_color))
+
+  return(list(shp_1, aggregated_data))
+}
diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/R/tiltPlot-package.R b/R/tiltPlot-package.R
@@ -2,29 +2,37 @@
 "_PACKAGE"
 
 ## usethis namespace: start
+#' @importFrom dplyr arrange
 #' @importFrom dplyr bind_rows
 #' @importFrom dplyr case_when
 #' @importFrom dplyr filter
 #' @importFrom dplyr group_by
+#' @importFrom dplyr inner_join
+#' @importFrom dplyr left_join
 #' @importFrom dplyr mutate
 #' @importFrom dplyr n
 #' @importFrom dplyr pull
 #' @importFrom dplyr select
 #' @importFrom dplyr summarise
 #' @importFrom dplyr summarize
+#' @importFrom dplyr ungroup
+#' @importFrom eurostat get_eurostat_geospatial
 #' @importFrom ggalluvial geom_alluvium
 #' @importFrom ggalluvial geom_stratum
 #' @importFrom ggalluvial StatStratum
 #' @importFrom ggplot2 aes
 #' @importFrom ggplot2 after_stat
+#' @importFrom ggplot2 coord_sf
 #' @importFrom ggplot2 element_blank
 #' @importFrom ggplot2 element_text
 #' @importFrom ggplot2 facet_wrap
 #' @importFrom ggplot2 geom_bar
 #' @importFrom ggplot2 geom_col
 #' @importFrom ggplot2 geom_label
+#' @importFrom ggplot2 geom_sf
 #' @importFrom ggplot2 geom_text
 #' @importFrom ggplot2 ggplot
+#' @importFrom ggplot2 ggplot_build
 #' @importFrom ggplot2 ggtitle
 #' @importFrom ggplot2 labs
 #' @importFrom ggplot2 scale_fill_manual
@@ -36,13 +44,22 @@
 #' @importFrom ggplot2 unit
 #' @importFrom ggplot2 ylim
 #' @importFrom glue glue
+#' @importFrom grDevices rgb
+#' @importFrom here here
+#' @importFrom purrr pmap
+#' @importFrom readr read_csv2
 #' @importFrom rlang .data
 #' @importFrom rlang .env
 #' @importFrom rlang arg_match
+#' @importFrom sf st_as_sf
 #' @importFrom stats na.omit
 #' @importFrom tibble tibble
 #' @importFrom tibble tribble
 #' @importFrom tidyr drop_na
+#' @importFrom tidyr pivot_wider
 #' @importFrom tidyselect matches
+#' @importFrom vroom col_character
+#' @importFrom vroom col_integer
+#' @importFrom vroom cols
 ## usethis namespace: end
 NULL
diff --git a/R/utils.R b/R/utils.R
@@ -57,8 +57,7 @@ switch_mode <- function(mode) {
   switch(mode,
     "equal_weight" = "equal_weight_finance",
     "worst_case" = "worst_case_finance",
-    "best_case" = "best_case_finance",
-    "main_activity" = "main_activity"
+    "best_case" = "best_case_finance"
   )
 }
 

diff --git a/README.Rmd b/README.Rmd
@@ -102,4 +102,14 @@ plot_xctr(no_fin) +
   labs(title = "Risk distribution of all products on a portfolio level")
 ```
 
+### 4. Create a German map with risk categories color gradient
+
+```{r}
+map_region_risk(financial, "DE", benchmark = "unit_isic_sec") +
+  labs(title = "German map of high, medium and low propotion of the companies
+  that are found in one region.
+  © EuroGeographics for the administrative boundaries ")
+```
+
+