Skip to content

Commit

Permalink
Merge pull request #184 from OHI-Science/dev
Browse files Browse the repository at this point in the history
Merging dev changes into master
  • Loading branch information
Melsteroni authored Dec 8, 2016
2 parents 410cebb + 73a6c0a commit c0bf7e1
Show file tree
Hide file tree
Showing 36 changed files with 756 additions and 442 deletions.
11 changes: 7 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Package: ohicore
Description: A collection of functions for generically calculating the Ocean
Health Index scores as well as individual goals and sub-goals.
Version: 0.1
Date: 2013-09-25
Version: 0.2
Date: 2016-07-28
Title: Ocean Health Index calculation package
Author: Ben Best, Steve Hastings, Darren Hardy
Maintainer: Ben Best <bbest@nceas.ucsb.edu>
Author: Ben Best, Melanie Frazier, Julia Stewart Lowndes, Casey O'Hara, Ning Jiang Mendes, Jamie Afflerbach, Steve Hastings, Darren Hardy
Maintainer: Melanie Frazier <frazier@nceas.ucsb.edu>
License: MIT
LazyData: TRUE
Depends:
Expand Down Expand Up @@ -40,6 +40,9 @@ Collate:
'compare_scores_df.R'
'gapfill_georegions.R'
'name_to_rgn.R'
'name_2_rgn.R'
'collapse_2_rgn.R'
'data.R'
'read_git_csv.R'
'shp_to_geojson.R'
'trace_git_csv_value.R'
Expand Down
20 changes: 2 additions & 18 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ export(Layers)
export(PlotFlower)
export(SelectLayersData)
export(change_plot)
export(collapse_2_rgn)
export(compare_scores_df)
export(gapfill_georegions)
export(mapvalues)
export(name_2_rgn)
export(name_to_rgn)
export(read_git_csv)
export(score.clamp)
Expand All @@ -24,22 +26,4 @@ exportClasses(Conf)
exportClasses(Layers)
import(dplyr)
import(ggplot2)
import(git2r)
import(htmlwidgets)
import(plotly)
import(plyr)
import(reshape2)
import(tidyr)
importFrom(dplyr,arrange)
importFrom(dplyr,desc)
importFrom(dplyr,failwith)
importFrom(dplyr,filter)
importFrom(dplyr,id)
importFrom(dplyr,intersect)
importFrom(dplyr,mutate)
importFrom(dplyr,setdiff)
importFrom(dplyr,setequal)
importFrom(dplyr,summarise)
importFrom(dplyr,summarize)
importFrom(dplyr,union)
importFrom(plyr,rename)
48 changes: 21 additions & 27 deletions R/CalculateAll.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,24 +51,6 @@
#' ## write
#' write.csv(scores, 'scores.csv', na='', row.names=F)
#' }
#' @import reshape2
#' @import plyr
#' @import dplyr
#' @import tidyr
#' @import git2r
#' @importFrom dplyr arrange
#' @importFrom dplyr desc
#' @importFrom dplyr failwith
#' @importFrom dplyr id
#' @importFrom dplyr mutate
#' @importFrom dplyr summarize
#' @importFrom dplyr summarise
#' @importFrom dplyr filter
#' @importFrom dplyr intersect
#' @importFrom dplyr setdiff
#' @importFrom dplyr setequal
#' @importFrom dplyr union
#' @importFrom plyr rename
#'
#' @export
CalculateAll = function(conf, layers){
Expand Down Expand Up @@ -102,6 +84,16 @@ CalculateAll = function(conf, layers){
assign('scores', scores, envir=conf$functions)
if (nrow(subset(scores, goal==g & dimension %in% c('status','trend')))!=0) stop(sprintf('Scores were assigned to goal %s by previous goal function.', g))
scores_g = eval(parse(text=goals_X$preindex_function[i]), envir=conf$functions)

# error if 'status' or 'trend' are missing
if ( !all( c('status', 'trend') %in% unique(scores_g$dimension)) ){
stop(sprintf('Missing "status" or "trend" dimension in %s goal model\n', g))
}
# error if something other than 'status' or 'trend' as dimension
if ( !all(unique(scores_g$dimension) %in% c('status', 'trend')) ){
stop(sprintf('"status" and "trend" should be the only dimensions in %s goal model\n', g))
}

if (nrow(scores_g) > 0){
scores = rbind(scores, scores_g[,c('goal','dimension','region_id','score')])
}
Expand All @@ -112,7 +104,8 @@ CalculateAll = function(conf, layers){
scores = rbind(scores, scores_P)

## Calculate Resilience, all goals
scores = rbind(scores, CalculateResilienceAll(layers, conf))
scores_R = CalculateResilienceAll(layers, conf)
scores = rbind(scores, scores_R)
scores = data.frame(scores)

## Calculate Goal Score and Likely Future, all goals
Expand Down Expand Up @@ -159,7 +152,7 @@ CalculateAll = function(conf, layers){
scores = rbind(scores, scores_G)
}

## Post-Index functions: supragoals
## Post-Index functions: Calculate Status, Trend, Likely Future State and Scores for 'Supragoals'
goals_Y = subset(conf$goals, !is.na(postindex_function))
supragoals = subset(conf$goals, is.na(parent), goal, drop=T); supragoals

Expand All @@ -172,18 +165,19 @@ CalculateAll = function(conf, layers){
scores = eval(parse(text=goals_Y$postindex_function[i]), envir=conf$functions)
}

## Calculate Region Index Scores using goal weights
cat(sprintf('Calculating Index score for each region for supragoals using goal weights...\n'))
## Calculate Overall Index Scores for each region using goal weights
cat(sprintf('Calculating Index Score for each region using goal weights to combine goal scores...\n'))

# calculate weighted-mean Index scores from goal scores and rbind to 'scores' variable
scores =
rbind(scores,
scores %>%
scores %>%

# filter only supragoal scores, merge with supragoal weightings
dplyr::filter(dimension=='score', goal %in% supragoals) %>%
merge(conf$goals %>%
select(goal, weight)) %>%
dplyr::select(goal, weight)) %>%
dplyr::mutate(weight = as.numeric(weight)) %>%

# calculate the weighted mean of supragoals, add goal and dimension column
dplyr::group_by(region_id) %>%
Expand All @@ -193,8 +187,8 @@ CalculateAll = function(conf, layers){
data.frame())


## Calculate Region Likely Future State Scores using goal weights
cat(sprintf('Calculating Likely Future State for each region for supragoals using goal weights...\n'))
## Calculate Overall Index Likely Future State for each region
cat(sprintf('Calculating Index Likely Future State for each region...\n'))

# calculate weighted-mean Likely Future State scores and rbind to 'scores' variable
scores =
Expand All @@ -204,7 +198,7 @@ CalculateAll = function(conf, layers){
# filter only supragoal scores, merge with supragoal weightings
dplyr::filter(dimension=='future', goal %in% supragoals) %>%
merge(conf$goals %>%
select(goal, weight)) %>%
dplyr::select(goal, weight)) %>%

# calculate the weighted mean of supragoals, add goal and dimension column
dplyr::group_by(region_id) %>%
Expand Down
6 changes: 3 additions & 3 deletions R/CalculatePressuresAll.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ CalculatePressuresAll = function(layers, conf){

## further preparation of matrix data for analysis
p_matrix <- p_matrix %>%
left_join(p_categories, by="layer") %>%
dplyr::left_join(p_categories, by="layer") %>%
dplyr::group_by(goal, element, category, subcategory) %>%
dplyr::mutate(max_subcategory = max(m_intensity)) %>%
data.frame()
Expand Down Expand Up @@ -221,7 +221,7 @@ CalculatePressuresAll = function(layers, conf){
scores <- regions_dataframe %>%
dplyr::left_join(calc_pressure, by="region_id") %>%
dplyr::mutate(dimension="pressures") %>%
select(goal, dimension, region_id, score=pressure) %>%
mutate(score = round(score*100, 2))
dplyr::select(goal, dimension, region_id, score=pressure) %>%
dplyr::mutate(score = round(score*100, 2))
return(scores)
}
4 changes: 2 additions & 2 deletions R/CalculateResilienceAll.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ CalculateResilienceAll = function(layers, conf){
scores <- regions_dataframe %>%
dplyr::left_join(calc_resil, by="region_id") %>%
dplyr::mutate(dimension="resilience") %>%
select(goal, dimension, region_id, score=val_num) %>%
mutate(score = round(score*100, 2))
dplyr::select(goal, dimension, region_id, score=val_num) %>%
dplyr::mutate(score = round(score*100, 2))
return(scores)

}
2 changes: 0 additions & 2 deletions R/change_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
#' @param save_csv If TRUE, the difference csv file will be saved.
#' @param save_png If TRUE, a static png of the image will be saved.
#'
#' @import plotly
#' @import htmlwidgets
#'
#' @export
change_plot = function(repo = "ohi-global", scenario="eez2014", commit="previous",
Expand Down
113 changes: 113 additions & 0 deletions R/collapse_2_rgn.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
#' Collapse region
#'
#' Collapse data for duplicate regions.
#'
#' @param df_in dataset
#' @param fld_id
#' @param fld_value field with value, defaults to 'value'
#' @param collapse_fxn function to collapse duplicate regions into one (example: China, Macau, Hong Kong)
#' @param collapse_wts table with weights if weighted mean function is chosen
#'
#' @details This function collapses duplicate regions (example: China, Macau, Hong Kong)
#'
#' @keywords ohi
#' @export
collapse_2_rgn <- function(df_in,
fld_value,
fld_id = 'rgn_id',
collapse_fxn = c('sum','mean','weighted_mean')[1],
collapse_wts = NULL) {
### Expectation: a data frame, with columns for rgn_id, maybe rgn_name, and some value.
### Group by rgn_id and unique fields; summarize value column(s) according to collapse_fxn.
### collapse_csv? collapse_flds_join (not used...?)?
### collapse_wts will be a .csv or a df with rgn_id and another column to be used for weighting.
### What if other variables not captured by "fld_unique"?
### DETAIL. Return a data.frame (vs add_rgn_id which writes to a csv)
### and perform extra checks, including collapsing on duplicates.
### Note: The original fld_name lost because possible to collapse multiple
### countries into a single region.

### check for valid arguments stopifnot(fld_name %in% names(d))
### ? should be warning for these?
stopifnot(fld_id %in% names(df_in))
stopifnot(fld_value %in% names(df_in))
stopifnot(all(fld_keep %in% names(df_in)))
# stopifnot(sum(duplicated(df_in[ , fld_keep])) == 0)

fld_keep_rgn_id <- c(fld_id, setdiff(c(fld_keep), fld_id))

### are there duplicates? create index of duplicated records
i_dupes <- duplicated(df_in[, fld_keep_rgn_id], fromLast = FALSE) |
duplicated(df_in[, fld_keep_rgn_id], fromLast = TRUE)

if (sum(i_dupes) == 0) {
### No duplicates - return dataframe as is.
cat(sprintf('No duplicates found in %s. \n', paste(fld_keep_rgn_id, collapse = ', ')))
df_out <- df_in
} else {
### Duplicates found; collapse using function
cat(sprintf('\nDuplicate values found for %s. \n', paste(fld_keep_rgn_id, collapse = ', ')))
cat(sprintf('Resolving by collapsing %s with collapse_fxn: %s after first removing all NAs from duplicates...\n',
fld_id, collapse_fxn))

fld_dropped <- names(df_in)[!names(df_in) %in% c(fld_value, fld_keep_rgn_id)]
message(sprintf('Dropping variables: %s\n', paste(fld_dropped, collapse = ', ')))
message(' Use argument fld_keep to prevent variables from being dropped.\n')

### create a data.frame of just the duplicated records, for collapsing
df_in_dup <- df_in[i_dupes, ] %>%
dplyr::arrange(rgn_id, rgn_name)
print(df_in_dup)

### set tmp_value to be the value, to protect original value
df_in_dup$tmp_value <- df_in_dup[[fld_value]]

if (collapse_fxn == "sum") {
df_in_collapsed <- df_in_dup %>%
dplyr::filter(!is.na(tmp_value)) %>%
dplyr::group_by_(.dots = as.list(fld_keep_rgn_id)) %>%
dplyr::summarize(tmp_value = sum(tmp_value))
### NOTE: since NAs removed above, no need here... similar below
} else if(collapse_fxn == "mean") {
df_in_collapsed <- df_in_dup %>%
dplyr::filter(!is.na(tmp_value)) %>%
dplyr::group_by_(.dots=as.list(fld_keep_rgn_id)) %>%
dplyr::summarize(tmp_value = mean(tmp_value))
} else if(collapse_fxn == "weighted_mean") {
wts_df <- switch(class(collapse_wts),
'character' = read.csv(collapse_wts, stringsAsFactors = FALSE),
'data.frame' = collapse_wts,
'NULL' = stop('Must set weighting values for weighted mean'),
as.data.frame(collapse_wts))

flds_matched <- intersect(names(wts_df), names(df_in_dup))
fld_collapse <- setdiff(names(wts_df), names(df_in_dup))
stopifnot(length(fld_collapse) == 1)
wts_df['tmp_weight'] <- wts_df[fld_collapse]
wts_df <- wts_df[, c(flds_matched, "tmp_weight")]
df_in_collapsed <- df_in_dup %>%
dplyr::left_join(wts_df, by = flds_matched) %>%
dplyr::filter(!is.na(tmp_value) & !is.na(tmp_weight)) %>%
dplyr::group_by_(.dots = as.list(fld_keep_rgn_id)) %>%
dplyr::summarize(tmp_value = sum(tmp_value * tmp_weight)/sum(tmp_weight))
} else {
stop("collapse_fxn needs to be a string of one of the following: sum, mean, weighted_mean.")
}

### clean up tmp_value field, quick check
df_in_collapsed <- df_in_collapsed %>%
dplyr::rename_(.dots = setNames('tmp_value', fld_value))
head(df_in_collapsed)

df_out <- rbind(df_in[!i_dupes, c(fld_keep_rgn_id, fld_value)],
df_in_collapsed)
}

### limit to same subset of fields for consistent behavior regardless of duplicates presents
df_out <- df_out[, c(fld_keep_rgn_id, fld_value)]

### check to ensure no duplicates remaining in kept fields
stopifnot(duplicated(df_out[, c(fld_keep_rgn_id)]) == 0)

return(as.data.frame(df_out))
}
16 changes: 8 additions & 8 deletions R/compare_scores_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,24 @@ compare_scores_df = function(a_csv, b_csv, r_csv, g_csv){

# merge
d = a %>%
base::merge(
merge(
b,
by=c('goal','dimension','region_id'),
suffixes=c('.a','.b')) %>%
dplyr::rename(rgn_id=region_id) %>%
mutate(
dplyr::mutate(
score.dif = score.a - score.b,
score.na = is.na(score.a)!=is.na(score.b)) %>%
left_join(
rbind_list(
dplyr::left_join(
dplyr::bind_rows(
r %>%
select(rgn_id, rgn_name=label),
dplyr::select(rgn_id, rgn_name=label),
data.frame(rgn_id=0, rgn_name='GLOBAL')),
by='rgn_id') %>%
# filter(abs(score_dif) > 0.01 | score_na == T) %>%
arrange(rgn_id!=0, goal!='Index', dimension!='score', goal, desc(dimension), desc(abs(score.dif)), is.na(score.a), is.na(score.b)) %>%
select(goal, dimension, rgn_id, rgn_name, score.a, score.b, score.dif) %>%
mutate(
dplyr::arrange(rgn_id!=0, goal!='Index', dimension!='score', goal, desc(dimension), desc(abs(score.dif)), is.na(score.a), is.na(score.b)) %>%
dplyr::select(goal, dimension, rgn_id, rgn_name, score.a, score.b, score.dif) %>%
dplyr::mutate(
goal = factor(goal, c('Index', g$goal)),
dimension = factor(dimension, ohi_dimensions),
id = row_number())
Expand Down
Loading

0 comments on commit c0bf7e1

Please sign in to comment.