-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #202 from OHI-Science/dev
merging into master
- Loading branch information
Showing
37 changed files
with
1,199 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
#' Update layers_eez_base.csv | ||
#' By Peter Menzies | ||
#' | ||
#' Convenient interface for changing directory paths of recently updated layers in layers_eez_base.csv | ||
#' No arguments required - user will be prompted for needed information | ||
#' | ||
#' | ||
#' @keywords ohi | ||
#' @export | ||
|
||
|
||
layers_eez_base_updater <- function() { | ||
|
||
require(here) | ||
require(tidyverse) | ||
|
||
# read in the csv | ||
layers_eez_base <- read_csv(here("metadata_documentation/layers_eez_base.csv"), col_types = cols()) | ||
|
||
# prompt user for current version year (gsub() to remove 'v' if user adds it as well) | ||
message("") | ||
version_year <- paste0("v", gsub("\\D", "", readline(prompt = "enter version year: "))) | ||
|
||
# empty vector that will later contain layer names - used in while loop | ||
possible_layers <- c() | ||
|
||
# loop that ends when a viable goal/subgoal abbr is supplied | ||
while (length(possible_layers) == 0) { | ||
|
||
message("") | ||
goal <- readline(prompt = "enter the goal/subgoal/prs/res abbreviation for the layers you're updating (e.g. 'np', 'hab', or 'cc'): ") %>% | ||
tolower() | ||
|
||
possible_layers <- layers_eez_base$layer[startsWith(layers_eez_base$layer, goal)] | ||
|
||
if (length(possible_layers) == 0) { | ||
message("\nthere are no layers starting with that abbreviation\n") | ||
} | ||
} | ||
|
||
# the component layers of that goal/subgoal are printed below for user's convenience | ||
message("\nthese are the layers associated with that abbreviation:\n") | ||
print(possible_layers) | ||
message("\nif you want to update all of these layers, enter 'all' at the next prompt —") | ||
message("if you are only updating certain ones you can copypaste the layer names above separated by commas\n") | ||
|
||
|
||
# prompt user for layers which have been updated | ||
updated_layers <- str_split(readline(prompt = "enter 'all' or layers separated only by commas: "), ",")[[1]] %>% | ||
str_remove_all(" ") | ||
|
||
if (tolower(updated_layers[1]) == "all") { | ||
updated_layers <- possible_layers | ||
} | ||
|
||
|
||
# loop that executes or repeats if entries don't match any of the layer names in the goal / subgoal | ||
while (length(intersect(updated_layers, possible_layers)) < length(updated_layers)) { | ||
|
||
unknown_layers <- setdiff(updated_layers, possible_layers) | ||
|
||
if (length(unknown_layers) == length(updated_layers)) { | ||
message("\nnone of the layers entered coincide with the chosen abbreviation\n") | ||
} else if (length(unknown_layers) == 1) { | ||
message(paste0("\nthe following entry does not coincide with the chosen abbreviation: \n")) | ||
print(unknown_layers) | ||
message("") | ||
} else { | ||
message("\nthe following entries do not coincide with the chosen abbreviation: \n") | ||
print(unknown_layers) | ||
message("") | ||
} | ||
|
||
updated_layers <- str_split(readline(prompt = "enter 'all' or layers separated only by commas: "), ",")[[1]] %>% | ||
str_remove_all(" ") | ||
|
||
if (tolower(updated_layers[1]) == "all") { | ||
updated_layers <- possible_layers | ||
} | ||
} | ||
|
||
# create df with updated file paths | ||
layers_eez_base_updated <- layers_eez_base %>% | ||
mutate(dir = case_when(layer %in% updated_layers ~ | ||
gsub("v20\\d\\d", version_year, dir), | ||
TRUE ~ dir)) | ||
|
||
# vector of dir names which have changed from the original csv | ||
updated_dirs <- anti_join(layers_eez_base_updated, layers_eez_base, by = c("layer", "dir")) %>% | ||
rename("dir (UPDATED)" = dir) | ||
|
||
# make sure the chosen year and layers elicited changes | ||
if (nrow(updated_dirs) != 0) { | ||
message("\nthe selected 'dir' values will be updated as shown in the data viewer ↑ ") | ||
message("do you want to update layers_eez_base.csv with these changes?\n") | ||
View(updated_dirs) | ||
|
||
# request for permission to overwrite current csv with version containing updated dirs | ||
overwrite <- readline(prompt = "update? ('y' or 'n'): ") | ||
|
||
if (overwrite == "y") { | ||
write.csv(layers_eez_base_updated, here("metadata_documentation/layers_eez_base.csv"), | ||
row.names = FALSE) | ||
message("\nfile has been updated\n") | ||
|
||
} else { | ||
message("\nfile was *NOT* updated\n") | ||
} | ||
|
||
# if no changes were elicited to any file paths, end function with message | ||
} else { | ||
message("\nthe chosen file paths already contain that version year - no updates were made\n") | ||
} | ||
|
||
} | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
#' Split Macro-Regions to OHI Regions | ||
#' | ||
#' This function takes datasets containing country records that include multiple OHI regions | ||
#' (macro-regions) and breaks them down into individual OHI regions. It uses population-weighted | ||
#' values to distribute data across the new regions. When a dataset contains both a macro-region | ||
#' and a sub-region, the sub-region's data is calculated by summing the population weight of the | ||
#' macro region and the record for the region. | ||
#' | ||
#' @param m The input dataset containing countries and associated values. | ||
#' @param country_column The column name in the dataset `m` representing the countries. Defaults to "country". | ||
#' @param value_column The column name in the dataset `m` representing the values associated with each country. Defaults to "value". | ||
#' @param duplicate A logical value. If TRUE, the values will not be split between new regions, e.g., when calculating sustainability scores. Defaults to FALSE. | ||
#' | ||
#' @details The function is built to recognize common macro-region names and their corresponding OHI regions. | ||
#' It's imperative for users to be aware that this function might require updates if new macro-regions | ||
#' or changes to OHI regions occur in the future. | ||
#' | ||
#' Population data is used to weight the values for each newly split region. This data must be provided | ||
#' in the `split_pops` data frame (external to this function). If `duplicate` is set to TRUE, values are | ||
#' not divided among regions but duplicated instead. | ||
#' | ||
#' @return A dataset with macro-regions split into individual OHI regions. | ||
#' | ||
#' @examples | ||
#' # This assumes existence of a dataset similar in structure to expected input and `split_pops` | ||
#' # updated_data <- region_split(original_data) | ||
#' | ||
#' @keywords ohi, macro-region, split | ||
#' @export | ||
|
||
split_regions <- function(m, country_column = "country", value_column = "value", duplicate = FALSE) { | ||
|
||
# List of macro-regions to break down | ||
split_details <- list( | ||
`Netherlands Antilles` = c("Bonaire", "Sint Eustatius", "Saba", "Curaçao", "Sint Maarten", "Aruba"), | ||
`Bonaire/S.Eustatius/Saba` = c("Bonaire", "Sint Eustatius", "Saba"), | ||
`Saint Helena/Asc./Trist.` = c("Tristan da Cunha", "Saint Helena", "Ascension"), | ||
`Channel Islands` = c("Guernsey", "Jersey"), | ||
`United States Minor Outlying Island` = c("Wake Island", "Jarvis Island", "Palmyra Atoll", "Howland Island and Baker Island", "Johnston Atoll"), | ||
`French Southern Territories` = c("Glorioso Islands", "Juan de Nova Island", "Bassas da India", "Ile Europa", "Ile Tromelin", "Crozet Islands", "Amsterdam Island and Saint Paul Island", "Kerguelen Islands"), | ||
`Bonaire, Sint Eustatius and Saba` = c("Bonaire", "Sint Eustatius", "Saba"), | ||
`French Southern Terr` = c("Glorioso Islands", "Juan de Nova Island", "Bassas da India", "Ile Europa", "Ile Tromelin", "Crozet Islands", "Amsterdam Island and Saint Paul Island", "Kerguelen Islands"), | ||
`United States Minor Outlying Islands` = c("Wake Island", "Jarvis Island", "Palmyra Atoll", "Howland Island and Baker Island", "Johnston Atoll"), | ||
`Saint Helena, Ascension and Tristan da Cunha` = c("Saint Helena", "Ascension", "Tristan da Cunha"), | ||
`Caribbean Netherlands` = c("Bonaire", "Sint Eustatius", "Saba"), | ||
`Channel Isl. (UK)` = c("Jersey", "Guernsey"), | ||
`Saba and Sint Eustatius (Netherlands)` = c("Saba", "Sint Eustatius"), | ||
`Mozambique Channel Isl. (France)` = c("Juan de Nova Island", "Bassas da India", "Ile Europa") | ||
) | ||
|
||
# Rename data frame columns | ||
m <- m %>% | ||
rename(!!paste0(country_column) := country, !!paste0(value_column) := value) | ||
|
||
# Load population weighting data | ||
population <- split_pops | ||
|
||
# Loop over all macro region names within the country column | ||
for (country_name in names(split_details)) { | ||
|
||
# Check to see if macro country name is present | ||
if (country_name %in% m$country) { | ||
|
||
# Pull the regions associated with the macro country name | ||
regions <- split_details[[country_name]] | ||
|
||
# Calculate total population for the regions | ||
pop_sum <- sum(population$population[population$country %in% regions]) | ||
|
||
# Calculate area weights for the regions | ||
area_weights <- population %>% | ||
filter(country %in% regions) %>% | ||
mutate(weight = ifelse(duplicate, 1, population / pop_sum)) %>% # Conditionally set weight | ||
select(country, weight) %>% | ||
mutate(id = row_number()) | ||
|
||
# Split the data into different regions | ||
m_new <- m %>% | ||
filter(country == country_name) %>% | ||
uncount(length(regions), .id = "id") %>% | ||
left_join(area_weights, by = c("id" = "id")) %>% | ||
mutate(value = value * weight) %>% | ||
mutate(country = country.y) %>% | ||
select(-country.x, -country.y, -id, -weight) | ||
|
||
# Update m to remove the original country and add broken down rows | ||
m <- m %>% | ||
filter(!(country %in% country_name)) %>% | ||
rbind(m_new) | ||
|
||
# Sum duplicate rows if duplicate is False | ||
if (!duplicate) { | ||
grouping_vars <- setdiff(names(m), "value") | ||
|
||
m <- m %>% | ||
group_by(across(all_of(grouping_vars))) %>% | ||
summarize(value = case_when(all(is.na(value)) ~ NA, | ||
TRUE ~ sum(value, na.rm = TRUE))) %>% | ||
ungroup() | ||
|
||
# Remove duplicates if duplicate is True | ||
} else { | ||
|
||
# Remove duplicates | ||
m <- m[!duplicated(m), ] | ||
|
||
} # End duplicate if statement | ||
|
||
} # End country if statement | ||
|
||
} # End for loop | ||
|
||
# Rename columns to match data frame input | ||
m <- m %>% | ||
rename(!!country_column := country, !!value_column := value) | ||
|
||
# Return the new data frame | ||
return(m) | ||
|
||
} # End function |
This file was deleted.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
### Prepare data files for R | ||
### if data needs to be updated, do this in the source files and then resave in ohicore | ||
### eventually this will have its own package ('rohiprep') | ||
|
||
library(devtools) | ||
library(tidyverse) | ||
|
||
rgn_synonyms <- read_csv('data_raw/rgn_eez_v2013a_synonyms.csv') | ||
usethis::use_data(rgn_synonyms, overwrite = TRUE) | ||
|
||
rgn_master <- read_csv('data_raw/eez_rgn_2013main.csv') | ||
usethis::use_data(rgn_master, overwrite = TRUE) | ||
|
||
## The following have not been updated, will do this when I figure out how they are used. | ||
|
||
georegion_labels <- read.csv('data_raw/georegion_labels.csv') | ||
usethis::use_data(georegion_labels, overwrite = TRUE) | ||
|
||
|
||
georegions <- read.csv('data_raw/georegions.csv') | ||
usethis::use_data(georegions, overwrite = TRUE) | ||
|
||
sovregion_labels <- read.csv('data_raw/sovregion_labels.csv') | ||
usethis::use_data(sovregion_labels, overwrite = TRUE) | ||
|
||
sovregions <- read.csv('data_raw/sovregions.csv') | ||
usethis::use_data(sovregions, overwrite = TRUE) | ||
|
||
split_pops <- read_csv('data_raw/split_pops.csv') | ||
usethis::use_data(split_pops, overwrite = TRUE) | ||
|
Oops, something went wrong.