Skip to content

Commit

Permalink
Create a German Map showing regional risks (#73)
Browse files Browse the repository at this point in the history
* added postcodes into the data

* first trial of creating simple german map without any polygons

* tried europstat

* NUTS2 Level germany

* merged financial data with geometry of map file

* succeded map but have to modify the financial data in order to see something

* modified financial data to see all the risks categories

* added data set and documented + added CC-BY-SA license

* updated code with new financial data set

* updated sankey plot with new financial data

* use snake-case for shapefiles

* modified license and imported package functions

* created gradient color for each combination of low, medium and high risk categories

* fixed function argument

* documented functions

* documented unknown functions

* added import of the new function of the germap_map function

* corrected nuts_de documentation and added german_map function doc

* added a little test

* added test color

* passed all the checks

* updated README file

* added copyrighted to the map

* styled

* styled arguments

* create custom gradient color separatly

* added test to the test custom gradient func

* styled plot sankey and xctr arguments

* replaced germap_map by map_country

* added prepare_geo_data.R function

* added test to prepare geo data and rename map_country

* removed csv and wrote tsv file for nuts_de

* rescaled the labs of the title

* modified test to add risks names to the color lists

* used tidy verse style
  • Loading branch information
lindadelacombaz authored Oct 25, 2023
1 parent 173964b commit e92b65a
Show file tree
Hide file tree
Showing 36 changed files with 9,594 additions and 516 deletions.
9 changes: 8 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,21 @@ Depends:
R (>= 2.10)
Imports:
dplyr,
eurostat,
ggalluvial,
ggplot2,
glue,
grDevices,
here,
purrr,
readr,
rlang,
sf,
stats,
tibble,
tidyr,
tidyselect
tidyselect,
vroom
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
Expand Down
18 changes: 18 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,32 +1,41 @@
# Generated by roxygen2: do not edit by hand

export(map_region_risk)
export(plot_sankey)
export(plot_xctr)
export(plot_xctr_financial)
export(theme_tiltplot)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,inner_join)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,pull)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(dplyr,summarize)
importFrom(dplyr,ungroup)
importFrom(eurostat,get_eurostat_geospatial)
importFrom(ggalluvial,StatStratum)
importFrom(ggalluvial,geom_alluvium)
importFrom(ggalluvial,geom_stratum)
importFrom(ggplot2,aes)
importFrom(ggplot2,after_stat)
importFrom(ggplot2,coord_sf)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,element_text)
importFrom(ggplot2,facet_wrap)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_col)
importFrom(ggplot2,geom_label)
importFrom(ggplot2,geom_sf)
importFrom(ggplot2,geom_text)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggplot_build)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,labs)
importFrom(ggplot2,scale_fill_manual)
Expand All @@ -38,11 +47,20 @@ importFrom(ggplot2,theme_minimal)
importFrom(ggplot2,unit)
importFrom(ggplot2,ylim)
importFrom(glue,glue)
importFrom(grDevices,rgb)
importFrom(here,here)
importFrom(purrr,pmap)
importFrom(readr,read_csv2)
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(rlang,arg_match)
importFrom(sf,st_as_sf)
importFrom(stats,na.omit)
importFrom(tibble,tibble)
importFrom(tibble,tribble)
importFrom(tidyr,drop_na)
importFrom(tidyr,pivot_wider)
importFrom(tidyselect,matches)
importFrom(vroom,col_character)
importFrom(vroom,col_integer)
importFrom(vroom,cols)
30 changes: 30 additions & 0 deletions R/custom_gradient_color.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#' Custom Gradient Color
#'
#' This function generates a custom gradient color based on the input values for
#' "high," "medium," and "low.
#'
#' @param high A numeric value representing the intensity of the "high" color
#' (0 to 1).
#' @param medium A numeric value representing the intensity of the "medium"
#' color (0 to 1).
#' @param low A numeric value representing the intensity of the "low" color
#' (0 to 1).
#'
#' @return A character string representing the combined RGB color code.
#' @noRd
#'
#' @examples
#' custom_gradient_color(1, 0.5, 0.2)
custom_gradient_color <- function(high, medium, low) {
# define RGB values for "high," "medium," and "low"
high_color <- red <- c(1, 0, 0)
medium_color <- orange <- c(1, 0.5, 0)
low_color <- green <- c(0, 1, 0)

# interpolate the colors based on proportions : 1 is highest intensity
final_color <- high_color * high + medium_color * medium + low_color * low

final_color <- do.call(rgb, as.list(final_color))

return(final_color)
}
46 changes: 46 additions & 0 deletions R/map_region_risk.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Create a map with the risk color of each region (NUTS3 granularity)
#'
#' @param data A data frame like [financial]
#' @param country_code Country code (ISO 3166 alpha-2) for which the map will be
#' plotted.
#' @param benchmark The mode of benchmark to plot.
#' It can be one of "all", "unit" or "tilt_sec", "unit_tilt_sec", "isic_sec"
#' or "unit_isic_sec". If nothing is chosen, "all" is the default mode.
#'
#' @param finance_weight The mode of financial data to plot (#TODO : fix financial columns).
#' It can be one of "equal_weight", "worst_case" or "best_case". If nothing is
#' chosen, "equal_weight" is the default mode.
#'
#' @return A ggplot2 object representing the country data plot.
#' @export
#'
#' @examples
#' # Plot a German with a "unit" benchmark and equal_weight finance
#' map_region_risk(financial, country_code = "DE", benchmark = "unit")
map_region_risk <- function(data,
# TODO : Plot for other countries
country_code = c("DE"),
benchmark = c(
"all",
"unit",
"tilt_sec",
"unit_tilt_sec",
"isic_sec",
"unit_isic_sec"
),
finance_weight = c("equal_weight", "worst_case", "best_case")) {
prepared_data <- prepare_geo_data(
data,
country_code,
benchmark,
finance_weight
)
shp_1 <- prepared_data[[1]]
aggregated_data <- prepared_data[[2]]

# create map based on financial geo with two layers; financial data and map
ggplot() +
geom_sf(data = aggregated_data, mapping = aes(fill = .data$color)) +
geom_sf(data = shp_1, fill = NA) +
coord_sf()
}
8 changes: 8 additions & 0 deletions R/nuts_de.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#' Correspondence table between German postcodes and NUTS level 3 codes
#'
#' @source "© European Union - GISCO, 2021, postal code point dataset,
#' Licence CC-BY-SA 4.0 available at https://ec.europa.eu/eurostat/web/gisco/geodata/reference-data."
#' @examples
#' nuts_de
#' @keywords internal
"nuts_de"
12 changes: 4 additions & 8 deletions R/plot_sankey.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,6 @@
#' with the same highest risk, we assume equal weights again.
#' * "best_case" - similar to the worst-case scenario but just with the
#' lowest-risk category.
#' * "main_activity" - sometimes banks give one sector classification to one
#' company. However, with our data we know that sometimes the products stem from
#' different sectors. Knowing that the bank categorizes the product in one
#' specific sector, we could assume that the bank only finance the product in
#' the sector that it categories the company in.
#'
#' @return A sankey plot of class [ggalluvial].
#' @export
Expand All @@ -26,11 +21,12 @@
#'
#' # Plot with best_case weight
#' plot_sankey(financial, mode = "best_case")
plot_sankey <- function(data, with_company = TRUE, mode = c("equal_weight", "worst_case", "best_case", "main_activity")) {
plot_sankey <- function(data,
with_company = TRUE,
mode = c("equal_weight", "worst_case", "best_case")) {
mode <- arg_match(mode)

crucial <- c(
"main_activity",
"_risk_category",
"equal_weight_finance",
"worst_case_finance",
Expand All @@ -45,7 +41,7 @@ plot_sankey <- function(data, with_company = TRUE, mode = c("equal_weight", "wor
data = data,
aes(
y = .data[[switch_mode(mode)]],
axis1 = .data$kg_id,
axis1 = .data$bank_id,
axis3 = .data$tilt_sector,
axis4 = as_risk_category(.data[[risk_var]]),
fill = as_risk_category(.data[[risk_var]])
Expand Down
9 changes: 5 additions & 4 deletions R/plot_xctr_financial.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @param company_name (optional) The name of the specific company to plot the
#' financial data for. If NULL, the function will plot the financial data for the portfolio.
#' @param mode The mode of financial data to plot.
#' It can be one of "equal_weight", "worst_case", "best_case", or "main_activity".
#' It can be one of "equal_weight", "worst_case" or "best_case".
#'
#' @return A ggplot2 object representing the financial data plot.
#'
Expand All @@ -19,15 +19,16 @@
#' # Example 2: Plot portfolio-level financial data
#' plot_xctr_financial(data = financial, mode = "worst_case")
#'
plot_xctr_financial <- function(data, company_name = NULL, mode = c("equal_weight", "worst_case", "best_case", "main_activity")) {
plot_xctr_financial <- function(data,
company_name = NULL,
mode = c("equal_weight", "worst_case", "best_case")) {
mode <- arg_match(mode)

# TODO: do we want to drop NA's everywhere silently?
data <- data |>
drop_na(-c(.data$equal_weight_finance, .data$worst_case_finance, .data$best_case_finance, .data$main_activity))
drop_na(-c(.data$equal_weight_finance, .data$worst_case_finance, .data$best_case_finance))

crucial <- c(
"main_activity",
"_risk_category",
"equal_weight_finance",
"worst_case_finance",
Expand Down
75 changes: 75 additions & 0 deletions R/prepare_geo_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#' Prepare Geo Data for a Specific Country
#'
#' @inheritParams map_region_risk
#'
#' @return A list containing the following components:
#' - \code{shp_1}: Spatial data for the specified country.
#' - \code{aggregated_data}: Aggregated financial data.
#' @noRd
#'
#' @examples
#' prepare_geo_data(financial_data)
prepare_geo_data <- function(data,
country_code = c("DE"),
benchmark = c(
"all",
"unit",
"tilt_sec",
"unit_tilt_sec",
"isic_sec",
"unit_isic_sec"
),
finance_weight = c("equal_weight", "worst_case", "best_case")) {
benchmark_arg <- arg_match(benchmark)
finance_weight <- arg_match(finance_weight)

crucial <- c(
"_risk_category",
"equal_weight_finance",
"worst_case_finance",
"best_case_finance"
)
data |> check_crucial_names(names_matching(data, crucial))
risk_var <- names_matching(data, "_risk_category")

# get shapefile of European countries
shp_0 <- get_eurostat_geospatial(
resolution = 10,
nuts_level = 3,
year = 2016,
crs = 3035,
make_valid = TRUE
)

# filter for the specified country
shp_1 <- shp_0 |>
filter(.data$CNTR_CODE == country_code) |>
select(geo = "NUTS_ID", "geometry") |>
arrange(.data$geo) |>
st_as_sf()

# merge to have zip codes with NUTS file
shp_1 <- shp_1 |>
inner_join(nuts_de, by = "geo")

# merge shapefile with financial data
financial_geo <- data |>
filter(benchmark == benchmark_arg) |>
left_join(shp_1, by = "postcode") |>
st_as_sf()

# Add the code for data aggregation and color mapping
aggregated_data <- financial_geo |>
group_by(.data$postcode, .data$xctr_risk_category) |>
summarize(count = n()) |>
group_by(.data$postcode) |>
mutate(proportion = .data$count / sum(.data$count)) |>
ungroup()

# apply custom_gradient_color to each row
aggregated_data <- aggregated_data |>
pivot_wider(names_from = "xctr_risk_category", values_from = "proportion", values_fill = 0) |>
mutate(color = pmap(list(.data$high, .data$medium, .data$low), custom_gradient_color))

return(list(shp_1, aggregated_data))
}
Binary file added R/sysdata.rda
Binary file not shown.
17 changes: 17 additions & 0 deletions R/tiltPlot-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,37 @@
"_PACKAGE"

## usethis namespace: start
#' @importFrom dplyr arrange
#' @importFrom dplyr bind_rows
#' @importFrom dplyr case_when
#' @importFrom dplyr filter
#' @importFrom dplyr group_by
#' @importFrom dplyr inner_join
#' @importFrom dplyr left_join
#' @importFrom dplyr mutate
#' @importFrom dplyr n
#' @importFrom dplyr pull
#' @importFrom dplyr select
#' @importFrom dplyr summarise
#' @importFrom dplyr summarize
#' @importFrom dplyr ungroup
#' @importFrom eurostat get_eurostat_geospatial
#' @importFrom ggalluvial geom_alluvium
#' @importFrom ggalluvial geom_stratum
#' @importFrom ggalluvial StatStratum
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 after_stat
#' @importFrom ggplot2 coord_sf
#' @importFrom ggplot2 element_blank
#' @importFrom ggplot2 element_text
#' @importFrom ggplot2 facet_wrap
#' @importFrom ggplot2 geom_bar
#' @importFrom ggplot2 geom_col
#' @importFrom ggplot2 geom_label
#' @importFrom ggplot2 geom_sf
#' @importFrom ggplot2 geom_text
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 ggplot_build
#' @importFrom ggplot2 ggtitle
#' @importFrom ggplot2 labs
#' @importFrom ggplot2 scale_fill_manual
Expand All @@ -36,13 +44,22 @@
#' @importFrom ggplot2 unit
#' @importFrom ggplot2 ylim
#' @importFrom glue glue
#' @importFrom grDevices rgb
#' @importFrom here here
#' @importFrom purrr pmap
#' @importFrom readr read_csv2
#' @importFrom rlang .data
#' @importFrom rlang .env
#' @importFrom rlang arg_match
#' @importFrom sf st_as_sf
#' @importFrom stats na.omit
#' @importFrom tibble tibble
#' @importFrom tibble tribble
#' @importFrom tidyr drop_na
#' @importFrom tidyr pivot_wider
#' @importFrom tidyselect matches
#' @importFrom vroom col_character
#' @importFrom vroom col_integer
#' @importFrom vroom cols
## usethis namespace: end
NULL
3 changes: 1 addition & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ switch_mode <- function(mode) {
switch(mode,
"equal_weight" = "equal_weight_finance",
"worst_case" = "worst_case_finance",
"best_case" = "best_case_finance",
"main_activity" = "main_activity"
"best_case" = "best_case_finance"
)
}

Expand Down
10 changes: 10 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,14 @@ plot_xctr(no_fin) +
labs(title = "Risk distribution of all products on a portfolio level")
```

### 4. Create a German map with risk categories color gradient

```{r}
map_region_risk(financial, "DE", benchmark = "unit_isic_sec") +
labs(title = "German map of high, medium and low propotion of the companies
that are found in one region.
© EuroGeographics for the administrative boundaries ")
```



Loading

0 comments on commit e92b65a

Please sign in to comment.