Skip to content

Commit

Permalink
remove pixel counting method entirely
Browse files Browse the repository at this point in the history
  • Loading branch information
mitchest committed Feb 1, 2018
1 parent 793f6a4 commit b8ce282
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 20 deletions.
63 changes: 55 additions & 8 deletions accuracy_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,21 @@ user_accuracy <- function(conf_mat) {
data.frame(as.list(ret))
}

# mapped area as per Olofsson et al. 2014;
mapped_areas <- function(idx, conf_mat_list, area_tables) {
# pixel counts
mapped_areas <- as.numeric(area_tables[[idx]])
if (length(mapped_areas) != 4) {return(rep(NA,4))}
mapped_areas_p <- mapped_areas / sum(mapped_areas)
# p_ij matrix
if (dim_check(conf_mat_list[[idx]])) {return(rep(NA,4))}
conf_mat_nij <- matrix(as.vector(prop.table(conf_mat_list[[idx]], 1)), nrow = 4, ncol = 4)
conf_mat_pij <- matrix(mapped_areas_p, nrow = 4, ncol = 4) * conf_mat_nij # DANGER - hard coded to 4 classes
# area estimates
areas_pk <- colSums(conf_mat_pij)
area_estimates <- areas_pk * sum(mapped_areas)
area_estimates / sum(area_estimates)
}


# collect metrics ---------------------------------------------------------
Expand Down Expand Up @@ -170,32 +185,64 @@ collect_one_iteration <- function(iter_n, get_this, big_list, data) {
)) %>% mutate(iter_n = iter_n)
}

collect_image_results <- function(this_row, get_this, iter_n){
collect_image_results <- function(this_row, get_this, iter_n, data){
if (get_this$type[this_row] == "boot") {
area_tables <- iter_n[[get_this$scenario[this_row]]][[get_this$method[this_row]]]
conf_mat_list <- lapply(
X = 1:length(iter_n[["boot"]][[1]]),
FUN = get_conf_mat,
iter_n[["boot"]][["test"]],
iter_n[["boot"]][[get_this$tt[this_row]]],
data)
area_estimates <- lapply(
X = 1:length(iter_n[["boot"]][[1]]),
FUN = mapped_areas,
conf_mat_list, area_tables)
} else {
area_tables <- iter_n[[get_this$scenario[this_row]]][[get_this$type[this_row]]][[get_this$method[this_row]]]
conf_mat_list <- lapply(
X = 1:length(iter_n[[get_this$scenario[this_row]]][[get_this$type[this_row]]][[1]]),
FUN = get_conf_mat,
iter_n[[get_this$scenario[this_row]]][[get_this$type[this_row]]][["test"]],
iter_n[[get_this$scenario[this_row]]][[get_this$type[this_row]]][[get_this$tt[this_row]]],
data)
area_estimates <- lapply(
X = 1:length(iter_n[[get_this$scenario[this_row]]][[get_this$type[this_row]]][[1]]),
FUN = mapped_areas,
conf_mat_list, area_tables)
}
prop_tables <- lapply(area_tables, function(x) {x/sum(x)})
data.frame(
Banksia = unlist(lapply(prop_tables, `[[`, 1)),
Eucalypt = unlist(lapply(prop_tables, `[[`, 2)),
Teatree = unlist(lapply(prop_tables, `[[`, 3)),
Wetheath = unlist(lapply(prop_tables, `[[`, 4)),
Banksia = unlist(lapply(area_estimates, `[[`, 1)),
Eucalypt = unlist(lapply(area_estimates, `[[`, 2)),
Teatree = unlist(lapply(area_estimates, `[[`, 3)),
Wetheath = unlist(lapply(area_estimates, `[[`, 4)),
# method info
type = get_this$type[this_row],
method = get_this$method[this_row],
scenario = get_this$scenario[this_row])

# pixel counting method
# prop_tables <- lapply(area_tables, function(x) {x/sum(x)})
# data.frame(
# Banksia = unlist(lapply(prop_tables, `[[`, 1)),
# Eucalypt = unlist(lapply(prop_tables, `[[`, 2)),
# Teatree = unlist(lapply(prop_tables, `[[`, 3)),
# Wetheath = unlist(lapply(prop_tables, `[[`, 4)),
# # method info
# type = get_this$type[this_row],
# method = get_this$method[this_row],
# scenario = get_this$scenario[this_row])
}

collect_image_iteration <- function(iter_n, get_this, big_list) {
collect_image_iteration <- function(iter_n, get_this, big_list, data) {
print(paste0("Collecting iteration ", iter_n))
print(Sys.time())
rbindlist(lapply(
X = 1:nrow(get_this),
FUN = collect_image_results,
get_this,
big_list[[iter_n]]
big_list[[iter_n]],
data
)) %>% mutate(iter_n = iter_n)
}

Expand Down
7 changes: 4 additions & 3 deletions calculate_accuracy.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,16 @@ get_this_image <- data.frame(
rep(names(big_list_image[[1]][["rrcv"]]), each = 2),
rep(names(big_list_image[[1]][["kfold"]]), each = 2)),
method = c(rep(c("image_lda", "image_rf"), 13)),
tt = c(rep(c("image", "image"), 13)),
tt = c(rep(c("test_lda", "test_rf"), 13)),
stringsAsFactors = F)



# calculate stats ---------------------------------------------------------

# for image_preds
image_results <- collect_image_iteration(1, get_this_image, big_list_image)
image_results <- collect_image_iteration(1, get_this_image, big_list_image, survey_points) %>%
na.omit()
saveRDS(image_results, file="image_results.rds")

metric_results <- rbindlist(lapply(
Expand Down Expand Up @@ -145,7 +146,7 @@ fig3dat <- image_results_long %>%
"Teatree" = "Tea Tree Thicket", "Wetheath" = "Wet Heath"),
sample_structure = recode(sample_structure, "class-space" = "class & space"),
value = value * 100) # % for prettier numbers on plot
fig3 <- ggplot(figdat, aes(y = value)) +
fig3 <- ggplot(fig3dat, aes(y = value)) +
geom_violin(aes(x = sample_structure, fill = sample_fraction), scale = "area", draw_quantiles = c(0.05,0.5,0.9), lwd=0.25) +
scale_fill_manual("Resampling design", values = c("#969696", "#d55e00", "#f0e442", "#56b4e9")) +
scale_y_continuous(breaks = pretty_breaks) +
Expand Down
42 changes: 33 additions & 9 deletions mapping_example.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ fit_and_map <- function(niter, data, image_data) {
}

# fit n iterations of the resmapling routine
mapping_runs <- lapply(1:10, fit_and_map, survey_points, image_data)
mapping_runs <- lapply(1:800, fit_and_map, survey_points, image_data)
saveRDS(mapping_runs, file = "A:/1_UNSW/0_data/Dharawal_project/mapping_runs.rds")

mapping_runs <- readRDS("A:/1_UNSW/0_data/Dharawal_project/mapping_runs.rds")
Expand All @@ -97,6 +97,26 @@ med_ci <- function(x) {
round(c(med,ci)*100, 0)
}

# mapped area as per Olofsson et al. 2014;
mapped_areas <- function(idx, conf_mat_list, area_tables) {
# util function
dim_check <- function(x, len = 4) { # DANGER - hard coded to 4 classes, use len = if error matrix is different size
dim(x)[1] != len | dim(x)[2] != len
}
# pixel counts
mapped_areas <- as.numeric(area_tables[[idx]])
if (length(mapped_areas) != 4) {return(rep(NA,4))}
mapped_areas_p <- mapped_areas / sum(mapped_areas)
# p_ij matrix
if (dim_check(conf_mat_list[[idx]])) {return(rep(NA,4))}
conf_mat_nij <- matrix(as.vector(prop.table(conf_mat_list[[idx]], 1)), nrow = 4, ncol = 4)
conf_mat_pij <- matrix(mapped_areas_p, nrow = 4, ncol = 4) * conf_mat_nij # DANGER - hard coded to 4 classes
# area estimates
areas_pk <- colSums(conf_mat_pij)
area_estimates <- areas_pk * sum(mapped_areas)
area_estimates / sum(area_estimates)
}

# get vector of accuracy stats and get mean/intervals
accuracy_distribution <- unlist(lapply(mapping_runs, `[[`, 1))
med_ci(accuracy_distribution)
Expand All @@ -105,12 +125,16 @@ med_ci(accuracy_distribution)
class_areas <- lapply(mapping_runs, `[[`, 2)
class_areas <- lapply(class_areas, table)

class_proportions <- lapply(class_areas, function(x){(x/sum(x))})
conf_mat_list <- lapply(mapping_runs, `[[`, 3)
conf_mat_list <- lapply(conf_mat_list, table)

mapped_areas_list <- lapply(1:length(class_areas), mapped_areas, conf_mat_list, class_areas)

class_proportions <- list(
Banksia = unlist(lapply(class_proportions, `[[`, 1)),
Eucalypt = unlist(lapply(class_proportions, `[[`, 2)),
Teatree = unlist(lapply(class_proportions, `[[`, 3)),
Wetheath = unlist(lapply(class_proportions, `[[`, 4)))
Banksia = unlist(lapply(mapped_areas_list, `[[`, 1)),
Eucalypt = unlist(lapply(mapped_areas_list, `[[`, 2)),
Teatree = unlist(lapply(mapped_areas_list, `[[`, 3)),
Wetheath = unlist(lapply(mapped_areas_list, `[[`, 4)))

lapply(class_proportions, med_ci)

Expand All @@ -122,7 +146,7 @@ lapply(class_proportions, med_ci)
# Olofsson, P., Foody, G. M., Herold, M., Stehman, S. V., Woodcock, C. E., & Wulder, M. A. (2014). Good practices for estimating area and assessing accuracy of land change. Remote Sensing of Environment, 148, 42-57.

### choose an iteration (close to median value?) to do simultaneous intervals on
eg_run <- 5 # run chosen for the simultaneous example
eg_run <- 17 # run chosen for the simultaneous example
test_true <- mapping_runs[[eg_run]][[3]]
conf_mat <- table(test_true$test, test_true$true)
mapped_areas <- table(mapping_runs[[eg_run]][[2]])
Expand Down Expand Up @@ -165,7 +189,7 @@ sampled_accuracies <- function(test_true) {
sample_frac(1, replace = T) # this is the bootstrap
conf_mat <- table(data$test, data$true)
list(perc_agr = sum(diag(conf_mat)) / sum(conf_mat),
user = diag(conf_mat) / rowSums(conf_mat))
user = diag(conf_mat) / rowSums(conf_mat))
}

singlerun_resample <- replicate(n = 1000,
Expand Down Expand Up @@ -200,7 +224,7 @@ singlerun_area_95 <- (unlist(lapply(singlerun_error_df, quantile, 0.975)) -
# full resample
med_ci(accuracy_distribution)
# bootstrap single run
med_ci(singlerun_oa)
med_ci(singlerun_oa); sd(singlerun_oa) * 1.96
# simultaneous single run
round(c(estimate=oa, `2.5%`=oa-oa_95, `97.5%`=oa+oa_95)*100)
round(c(oa, oa_95)*100)
Expand Down

0 comments on commit b8ce282

Please sign in to comment.