Skip to content

Commit

Permalink
updated synonyms list, added comments and Roxygen skeleton to update_…
Browse files Browse the repository at this point in the history
…name_2_rgn.Rmd
  • Loading branch information
annaramji committed Jul 18, 2024
1 parent 62066a0 commit dd95581
Show file tree
Hide file tree
Showing 10 changed files with 70 additions and 13 deletions.
Binary file modified data/georegion_labels.rda
Binary file not shown.
Binary file modified data/georegions.rda
Binary file not shown.
Binary file modified data/rgn_master.rda
Binary file not shown.
Binary file modified data/rgn_synonyms.rda
Binary file not shown.
Binary file modified data/sovregion_labels.rda
Binary file not shown.
Binary file modified data/sovregions.rda
Binary file not shown.
Binary file modified data/split_pops.rda
Binary file not shown.
2 changes: 2 additions & 0 deletions data_raw/rgn_eez_v2013a_synonyms.csv
Original file line number Diff line number Diff line change
Expand Up @@ -301,3 +301,5 @@ NA,Zimbabwe,NA,NA,landlocked
205,myanmar (burma),MM,MMR,ohi_region
127,st. vincent and grenadines,VC,VCT,ohi_region
116,u.s. virgin islands,PR,PRI,ohi_region
209,"China, Macao Special Administrative Region",CN,CHN,ohi_region
209,"China, Hong Kong Special Administrative Region",CN,CHN,ohi_region
44 changes: 41 additions & 3 deletions updating_functions/name_2_rgn/update_name_2_rgn.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ This script can be used to update the synonyms in name_2_rgn as well as confirm

**1.)** Run the code below to update the synonyms list in name_2 region. If you need to delete or edit existing synonyms open \~/data_raw/rgn_eez_v2013a_synonyms.csv and manually edit the file.

```{r}
```{r v2023-updates, eval=FALSE}
library(tidyverse)
load(here::here("data/rgn_master.rda"))
Expand All @@ -27,17 +27,55 @@ new_synonyms <- c("congo - brazzaville",
#region ids, update with the matching region id for the synonym
#put NA for landlocked or disputed
region_ids <- c(100,199, 205, 127, 116)
region_ids <- c(100, 199, 205, 127, 116)
#rgn type (must be be ohi_region, landlocked, or disputed)
region_types <- c("ohi_region", "ohi_region", "ohi_region", "ohi_region", "ohi_region")
#run the function to add synonyms
source(here::here("updating_functions/name_2_rgn/update_synonyms.R"))
update_synonyms(new_synonyms, region_ids, region_types)
update_synonyms(synonyms_list = new_synonyms,
region_id_list = region_ids,
rgn_type_list = region_types)
```


v2024 updates:

- "China, Macao Special Administrative Region"
- "China, Hong Kong Special Administrative Region"

- also updated `update_synonyms.R` --> added Roxygen skeleton, comments

```{r v2024-updates, eval=FALSE}
library(tidyverse)
load(here::here("data/rgn_master.rda"))
#new synonyms, update with new synonyms
new_synonyms <- c("China, Macao Special Administrative Region",
"China, Hong Kong Special Administrative Region")
#region ids, update with the matching region id for the synonym
#put NA for landlocked or disputed
region_ids <- c(209, 209)
#rgn type (must be be ohi_region, landlocked, or disputed)
region_types <- c("ohi_region", "ohi_region")
#run the function to add synonyms
source(here::here("updating_functions/name_2_rgn/update_synonyms.R"))
update_synonyms(synonyms_list = new_synonyms,
region_id_list = region_ids,
rgn_type_list = region_types)
# synonyms successfully added!
```




**2)** Rerun data_prepare.R to update the rda files stored in the data folder

```{r}
Expand Down
37 changes: 27 additions & 10 deletions updating_functions/name_2_rgn/update_synonyms.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,43 @@
old_synonyms <- read_csv(here::here("data_raw/rgn_eez_v2013a_synonyms.csv"))
#' Update Synonyms function for OHI region names
#'
#' @param synonyms_list list of synonyms
#' @param region_id_list list of OHI region IDs you want to associate these synonyms with
#' @param rgn_type_list type of region: 'ohi_region', 'landlocked', or 'disputed'
#'
#' @return
#' @export # append new synonyms to rgn_eez_v2013a_synonyms.csv
#'
#' @examples # see update_name_2_rgn.Rmd

valid_values <- c('ohi_region', 'landlocked', 'disputed')
update_synonyms <- function(synonyms_list, region_id_list, rgn_type_list) {


#make them into a data frame
update_synonyms <- function(synonyms_list, region_id_list, rgn_type_list) {

# source old synonyms
old_synonyms <- read_csv(here::here("data_raw/rgn_eez_v2013a_synonyms.csv"))

# define valid values
valid_values <- c('ohi_region', 'landlocked', 'disputed')

# make them into a data frame
rgn_data <- tibble(rgn_id_2013 = region_id_list, rgn_nam_2013 = synonyms_list,
rgn_typ = rgn_type_list)
rgn_typ = rgn_type_list)


rgn_main <- rgn_master %>% select(-c(rgn_nam_2013, rgn_typ))

#lookup the correct information for each
# lookup the correct information for each
rgn_syn_new <- rgn_data %>%
left_join(rgn_main, by = "rgn_id_2013") %>%
select(rgn_id_2013, rgn_nam_2013, rgn_key_2013,
eez_iso3, rgn_typ) # select the rows we want
eez_iso3, rgn_typ) # select the rows we want
cat("\nConfirm new line(s) are correct:\n")
print(rgn_syn_new)


name_check <- rgn_syn_new %>%
left_join(rgn_master, by ="rgn_id_2013")

# check if region names are correct
cat("\nConfirm these are the correct region names for your synonyms:\n")
print(name_check$rgn_nam_2013.y)

Expand All @@ -31,15 +48,15 @@ update_synonyms <- function(synonyms_list, region_id_list, rgn_type_list) {
if (tolower(confirmation) == "yes") {

final_synonyms <- rbind(old_synonyms, rgn_syn_new)
#check that there are no duplicates
# check that there are no duplicates
final_synonyms_test <- final_synonyms %>%
mutate(rgn_nam_2013 = tolower(rgn_nam_2013)) %>%
mutate(rgn_nam_2013 = stringr::str_remove(rgn_nam_2013, ",")) %>%
mutate(rgn_nam_2013 = stringr::str_remove(rgn_nam_2013, "'")) %>%
mutate(rgn_nam_2013 = stringr::str_remove(rgn_nam_2013, "´")) %>%
mutate(rgn_nam_2013 = stringr::str_remove(rgn_nam_2013, "")) %>%
group_by(rgn_nam_2013) %>%
summarize(n=n()) %>% filter(n >1)
summarize(n = n()) %>% filter(n >1)

invalid_rgn_types <- rgn_data$rgn_typ[rgn_data$rgn_typ %in% c("landlocked", "disputed")]

Expand Down

0 comments on commit dd95581

Please sign in to comment.