remove pixel counting method entirely

mitchest · Feb 1, 2018 · b8ce282 · b8ce282
1 parent 793f6a4
commit b8ce282
Show file tree

Hide file tree

Showing 3 changed files with 92 additions and 20 deletions.
diff --git a/accuracy_functions.R b/accuracy_functions.R
@@ -78,6 +78,21 @@ user_accuracy <- function(conf_mat) {
   data.frame(as.list(ret))
 }
 
+# mapped area as per Olofsson et al. 2014;
+mapped_areas <- function(idx, conf_mat_list, area_tables) {
+  # pixel counts
+  mapped_areas <- as.numeric(area_tables[[idx]])
+  if (length(mapped_areas) != 4) {return(rep(NA,4))}
+  mapped_areas_p <- mapped_areas / sum(mapped_areas)
+  # p_ij matrix
+  if (dim_check(conf_mat_list[[idx]])) {return(rep(NA,4))}
+  conf_mat_nij <- matrix(as.vector(prop.table(conf_mat_list[[idx]], 1)), nrow = 4, ncol = 4)
+  conf_mat_pij <- matrix(mapped_areas_p, nrow = 4, ncol = 4) * conf_mat_nij # DANGER - hard coded to 4 classes
+  # area estimates
+  areas_pk <- colSums(conf_mat_pij)
+  area_estimates <- areas_pk * sum(mapped_areas)
+  area_estimates / sum(area_estimates)
+}
 
 
 # collect metrics ---------------------------------------------------------
@@ -170,32 +185,64 @@ collect_one_iteration <- function(iter_n, get_this, big_list, data) {
   )) %>% mutate(iter_n = iter_n)
 }
 
-collect_image_results <- function(this_row, get_this, iter_n){
+collect_image_results <- function(this_row, get_this, iter_n, data){
   if (get_this$type[this_row] == "boot") {
     area_tables <- iter_n[[get_this$scenario[this_row]]][[get_this$method[this_row]]]
+    conf_mat_list <- lapply(
+      X = 1:length(iter_n[["boot"]][[1]]),
+      FUN = get_conf_mat,
+      iter_n[["boot"]][["test"]],
+      iter_n[["boot"]][[get_this$tt[this_row]]],
+      data)
+    area_estimates <- lapply(
+      X = 1:length(iter_n[["boot"]][[1]]),
+      FUN = mapped_areas,
+      conf_mat_list, area_tables)
   } else {
     area_tables <- iter_n[[get_this$scenario[this_row]]][[get_this$type[this_row]]][[get_this$method[this_row]]]
+    conf_mat_list <- lapply(
+      X = 1:length(iter_n[[get_this$scenario[this_row]]][[get_this$type[this_row]]][[1]]),
+      FUN = get_conf_mat,
+      iter_n[[get_this$scenario[this_row]]][[get_this$type[this_row]]][["test"]],
+      iter_n[[get_this$scenario[this_row]]][[get_this$type[this_row]]][[get_this$tt[this_row]]],
+      data)
+    area_estimates <- lapply(
+      X = 1:length(iter_n[[get_this$scenario[this_row]]][[get_this$type[this_row]]][[1]]),
+      FUN = mapped_areas,
+      conf_mat_list, area_tables)
   }
-  prop_tables <- lapply(area_tables, function(x) {x/sum(x)})
   data.frame(
-    Banksia = unlist(lapply(prop_tables, `[[`, 1)),
-    Eucalypt = unlist(lapply(prop_tables, `[[`, 2)),
-    Teatree = unlist(lapply(prop_tables, `[[`, 3)),
-    Wetheath = unlist(lapply(prop_tables, `[[`, 4)),
+    Banksia = unlist(lapply(area_estimates, `[[`, 1)),
+    Eucalypt = unlist(lapply(area_estimates, `[[`, 2)),
+    Teatree = unlist(lapply(area_estimates, `[[`, 3)),
+    Wetheath = unlist(lapply(area_estimates, `[[`, 4)),
     # method info
     type = get_this$type[this_row],
     method = get_this$method[this_row],
     scenario = get_this$scenario[this_row])
+
+  # pixel counting method
+  # prop_tables <- lapply(area_tables, function(x) {x/sum(x)})
+  # data.frame(
+  #   Banksia = unlist(lapply(prop_tables, `[[`, 1)),
+  #   Eucalypt = unlist(lapply(prop_tables, `[[`, 2)),
+  #   Teatree = unlist(lapply(prop_tables, `[[`, 3)),
+  #   Wetheath = unlist(lapply(prop_tables, `[[`, 4)),
+  #   # method info
+  #   type = get_this$type[this_row],
+  #   method = get_this$method[this_row],
+  #   scenario = get_this$scenario[this_row])
 }
 
-collect_image_iteration <- function(iter_n, get_this, big_list) {
+collect_image_iteration <- function(iter_n, get_this, big_list, data) {
   print(paste0("Collecting iteration ", iter_n))
   print(Sys.time())
   rbindlist(lapply(
     X = 1:nrow(get_this),
     FUN = collect_image_results,
     get_this,
-    big_list[[iter_n]]
+    big_list[[iter_n]],
+    data
   )) %>% mutate(iter_n = iter_n)
 }
 

diff --git a/calculate_accuracy.R b/calculate_accuracy.R
@@ -60,15 +60,16 @@ get_this_image <- data.frame(
            rep(names(big_list_image[[1]][["rrcv"]]), each = 2),
            rep(names(big_list_image[[1]][["kfold"]]), each = 2)),
   method = c(rep(c("image_lda", "image_rf"), 13)),
-  tt = c(rep(c("image", "image"), 13)),
+  tt = c(rep(c("test_lda", "test_rf"), 13)),
   stringsAsFactors = F)
 
 
 
 # calculate stats ---------------------------------------------------------
 
 # for image_preds
-image_results <- collect_image_iteration(1, get_this_image, big_list_image)
+image_results <- collect_image_iteration(1, get_this_image, big_list_image, survey_points) %>%
+  na.omit()
 saveRDS(image_results, file="image_results.rds")
 
 metric_results <- rbindlist(lapply(
@@ -145,7 +146,7 @@ fig3dat <- image_results_long %>%
                                 "Teatree" = "Tea Tree Thicket", "Wetheath" = "Wet Heath"),
          sample_structure = recode(sample_structure, "class-space" = "class & space"),
          value = value * 100) # % for prettier numbers on plot
-fig3 <- ggplot(figdat, aes(y = value)) +
+fig3 <- ggplot(fig3dat, aes(y = value)) +
   geom_violin(aes(x = sample_structure, fill = sample_fraction), scale = "area", draw_quantiles = c(0.05,0.5,0.9), lwd=0.25) +
   scale_fill_manual("Resampling design", values = c("#969696", "#d55e00", "#f0e442", "#56b4e9")) +
   scale_y_continuous(breaks = pretty_breaks) +

diff --git a/mapping_example.R b/mapping_example.R
@@ -83,7 +83,7 @@ fit_and_map <- function(niter, data, image_data) {
 }
 
 # fit n iterations of the resmapling routine
-mapping_runs <- lapply(1:10, fit_and_map, survey_points, image_data)
+mapping_runs <- lapply(1:800, fit_and_map, survey_points, image_data)
 saveRDS(mapping_runs, file = "A:/1_UNSW/0_data/Dharawal_project/mapping_runs.rds")
 
 mapping_runs <- readRDS("A:/1_UNSW/0_data/Dharawal_project/mapping_runs.rds")
@@ -97,6 +97,26 @@ med_ci <- function(x) {
   round(c(med,ci)*100, 0)
 }
 
+# mapped area as per Olofsson et al. 2014;
+mapped_areas <- function(idx, conf_mat_list, area_tables) {
+  # util function
+  dim_check <- function(x, len = 4) { # DANGER - hard coded to 4 classes, use len = if error matrix is different size
+    dim(x)[1] != len | dim(x)[2] != len
+  }
+  # pixel counts
+  mapped_areas <- as.numeric(area_tables[[idx]])
+  if (length(mapped_areas) != 4) {return(rep(NA,4))}
+  mapped_areas_p <- mapped_areas / sum(mapped_areas)
+  # p_ij matrix
+  if (dim_check(conf_mat_list[[idx]])) {return(rep(NA,4))}
+  conf_mat_nij <- matrix(as.vector(prop.table(conf_mat_list[[idx]], 1)), nrow = 4, ncol = 4)
+  conf_mat_pij <- matrix(mapped_areas_p, nrow = 4, ncol = 4) * conf_mat_nij # DANGER - hard coded to 4 classes
+  # area estimates
+  areas_pk <- colSums(conf_mat_pij)
+  area_estimates <- areas_pk * sum(mapped_areas)
+  area_estimates / sum(area_estimates)
+}
+
 # get vector of accuracy stats and get mean/intervals
 accuracy_distribution <- unlist(lapply(mapping_runs, `[[`, 1))
 med_ci(accuracy_distribution)
@@ -105,12 +125,16 @@ med_ci(accuracy_distribution)
 class_areas <- lapply(mapping_runs, `[[`, 2)
 class_areas <- lapply(class_areas, table)
 
-class_proportions <- lapply(class_areas, function(x){(x/sum(x))})
+conf_mat_list <- lapply(mapping_runs, `[[`, 3)
+conf_mat_list <- lapply(conf_mat_list, table)
+
+mapped_areas_list <- lapply(1:length(class_areas), mapped_areas, conf_mat_list, class_areas)
+
 class_proportions <- list(
-  Banksia = unlist(lapply(class_proportions, `[[`, 1)),
-  Eucalypt = unlist(lapply(class_proportions, `[[`, 2)),
-  Teatree = unlist(lapply(class_proportions, `[[`, 3)),
-  Wetheath = unlist(lapply(class_proportions, `[[`, 4)))
+  Banksia = unlist(lapply(mapped_areas_list, `[[`, 1)),
+  Eucalypt = unlist(lapply(mapped_areas_list, `[[`, 2)),
+  Teatree = unlist(lapply(mapped_areas_list, `[[`, 3)),
+  Wetheath = unlist(lapply(mapped_areas_list, `[[`, 4)))
 
 lapply(class_proportions, med_ci)
 
@@ -122,7 +146,7 @@ lapply(class_proportions, med_ci)
 # Olofsson, P., Foody, G. M., Herold, M., Stehman, S. V., Woodcock, C. E., & Wulder, M. A. (2014). Good practices for estimating area and assessing accuracy of land change. Remote Sensing of Environment, 148, 42-57.
 
 ### choose an iteration (close to median value?) to do simultaneous intervals on
-eg_run <- 5 # run chosen for the simultaneous example
+eg_run <- 17 # run chosen for the simultaneous example
 test_true <- mapping_runs[[eg_run]][[3]]
 conf_mat <- table(test_true$test, test_true$true)
 mapped_areas <- table(mapping_runs[[eg_run]][[2]])
@@ -165,7 +189,7 @@ sampled_accuracies <- function(test_true) {
     sample_frac(1, replace = T) # this is the bootstrap
   conf_mat <- table(data$test, data$true)
   list(perc_agr = sum(diag(conf_mat)) / sum(conf_mat),
-             user = diag(conf_mat) / rowSums(conf_mat))
+       user = diag(conf_mat) / rowSums(conf_mat))
 }
 
 singlerun_resample <- replicate(n = 1000,
@@ -200,7 +224,7 @@ singlerun_area_95 <- (unlist(lapply(singlerun_error_df, quantile, 0.975)) -
 # full resample
 med_ci(accuracy_distribution)
 # bootstrap single run
-med_ci(singlerun_oa)
+med_ci(singlerun_oa); sd(singlerun_oa) * 1.96
 # simultaneous single run
 round(c(estimate=oa, `2.5%`=oa-oa_95, `97.5%`=oa+oa_95)*100)
 round(c(oa, oa_95)*100)