From f7e23498ae915eada7d7e8334d09023e978c0bd3 Mon Sep 17 00:00:00 2001
From: andrewgryan <andrew.ryan@metoffice.gov.uk>
Date: Tue, 24 Jan 2023 16:20:21 +0000
Subject: [PATCH 1/7] decouple output directory from call to save

---
 Code/CaseStudy2/1d_DataPrep_PM25.R | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R
index 4a76640..bd5cb7c 100644
--- a/Code/CaseStudy2/1d_DataPrep_PM25.R
+++ b/Code/CaseStudy2/1d_DataPrep_PM25.R
@@ -5,6 +5,9 @@
 library(here)
 source(here("Code", "CaseStudy2", "0_Source.R"))
 
+# Directories
+output_dir <- "Data/CaseStudy2/Processed/PM25"
+
 # Loading shapefiles 
 load("Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData")
 
@@ -175,7 +178,7 @@ for (i in as.character(seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1)
 }
 
 # Save cams
-save(pm25_cams, file = "Data/CaseStudy2/Processed/PM25/pm25_cams.RData")
+save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData"))
 
 ##############################################
 ### Preparing PM data from ground monitors ###
@@ -432,7 +435,7 @@ rm(tmp1, tmp2, tmp3, aurn_dat, gm_dat, ltn_dat, mcr_msoa,
    stations_aurn_dat, stations_gm_dat, stations_ltn_dat)
 
 # Save aurn data
-save(pm25_gm, file = "Data/CaseStudy2/Processed/PM25/pm25_gm.RData")
+save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData"))
 
 ###################################
 ### Preparing PM data from EMEP ###
@@ -574,11 +577,4 @@ for (i in as.character(seq(as.Date('2021-01-01'), as.Date('2021-04-30'), by = 1)
 }
 
 # Save aurn data
-save(pm25_emep, file = "Data/CaseStudy2/Processed/PM25/pm25_emep.RData")
-
-
-
-
-
-
-
+save(pm25_emep, file = file.path(output_dir, "pm25_emep.RData"))

From 0a57a6c1b8234ca2524ebf56ffcbdefe5ccedf5f Mon Sep 17 00:00:00 2001
From: andrewgryan <andrew.ryan@metoffice.gov.uk>
Date: Tue, 24 Jan 2023 16:47:17 +0000
Subject: [PATCH 2/7] extract main program and add --output-dir flag

---
 Code/CaseStudy2/1d_DataPrep_PM25.R | 1131 ++++++++++++++--------------
 Code/CaseStudy2/cli.R              |   13 +-
 2 files changed, 577 insertions(+), 567 deletions(-)

diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R
index bd5cb7c..374f7a1 100644
--- a/Code/CaseStudy2/1d_DataPrep_PM25.R
+++ b/Code/CaseStudy2/1d_DataPrep_PM25.R
@@ -5,576 +5,579 @@
 library(here)
 source(here("Code", "CaseStudy2", "0_Source.R"))
 
-# Directories
-output_dir <- "Data/CaseStudy2/Processed/PM25"
-
-# Loading shapefiles 
-load("Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData")
-
-##########################################
-### Preparing PM data from CAMS-Europe ###
-##########################################
-################### The following code takes a while to run, so population estimates  ###################
-################### were run separately and saved to a raster (found underneath)      ###################
-# # Empty raster
-# r <- raster(xmn = -10,
-#             xmx = 3,
-#             ymn = 49,
-#             ymx = 62,
-#             res = 0.1)
-# 
-# # Adding unique  ID
-# r[] <- 1:(dim(r)[1]*dim(r)[2])
-# 
-# # Empty dataset to append to
-# weights <- NULL
-# 
-# # Extracting values
-# a1 <- raster::extract(r, # Grid unique IDs
-#                       uk_full, # Shapefiles
-#                       weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap
-#                       small = TRUE) # Small areas in comparison to the raster
-# 
-# # Setting to missing if not in UK
-# r[!(r[] %in% a1[[1]])] <- NA
-# 
-# # file names
-# files <- c('Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20201201-20210531.nc',
-#            'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20210601-20211130.nc',
-#            'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20211201-20220430.nc')
-# 
-# # files start dates
-# start_date <- c(as.Date('2020-12-01'),
-#                 as.Date('2021-06-01'),
-#                 as.Date('2021-12-01'))
-# 
-# # Loop for each file
-# for (i in 1:length(files)){
-#   # Opening raster
-#   ncin <- raster(files[i],
-#                  band = 1,
-#                  verbose = FALSE,
-#                  stopIfNotEqualSpaced = FALSE)
-#   # Getting the number of  days
-#   N_days <- floor(nbands(ncin)/24)
-#   # Dates
-#   Dates <- start_date[i] + (1:N_days) - 1
-#   # Loop for each day in the year
-#   for (j in 1:N_days){
-#     # Getting date
-#     date <- Dates[j]
-#     # Looping for each hour in the day
-#     for (k in (24*(j-1)+1):(24*j)){
-#       # Opening raster
-#       ncin <- raster(files[i],
-#                      band = k,
-#                      verbose = FALSE,
-#                      stopIfNotEqualSpaced = FALSE)
-#       # # Cropping for the UK
-#       # ncin <- crop(ncin, extent(-10, 3, 49, 62))
-#       extent(ncin) <- c(-10, 3, 49, 62)
-#       # Setting to missing if not in  UK
-#       ncin[is.na(r[])] <- NA
-#       # Saving raster
-#       writeRaster(ncin,
-#                   filename = paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''),
-#                   overwrite = TRUE)
-#       # else {keep <- keep + ncin}
-#       print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = ''))
-#     }
-#   }
-# }
-#########################################################################################################
-#########################################################################################################
-
-# Empty raster
-r0 <- raster(xmn = -2.8,
-             xmx = -1.8,
-             ymn = 53.2,
-             ymx = 53.7,
-             res = 0.1)
-
-# Adding unique  ID
-r0[] <- 1:(dim(r0)[1]*dim(r0)[2])
-
-# Subsetting Greater Manchester shapefiles
-mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale',
-                                                    'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan'))
-
-# Converting to long lat
-mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs"))
-
-# Extracting values
-a1 <- raster::extract(r0, # Grid unique IDs
-                      mcr_msoa, # Shapefiles
-                      weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap
-                      small = TRUE) # Small areas in comparison to the raster
-
-# Empty dataset to append to
-Weights_msoa <- NULL
-
-# Loop for each area
-for (i in 1:length(a1)){
-  # Converting Weights_msoa to dataframes
-  tmp <- as.data.frame(a1[[i]])
-  # Removing NAs
-  tmp <- subset(tmp, !is.na(value))
-  # Reweighting the Weights_msoa after zeroes removed
-  tmp$weight <- tmp$weight/sum(tmp$weight)
-  # New Region Name
-  tmp$area_id <- mcr_msoa@data$area_id[i]
-  # Creating new dataset
-  if (i == 1) {Weights_msoa <- tmp}
-  else {Weights_msoa <- rbind(Weights_msoa, tmp)}
-  # Removing unecessary data
-  rm(tmp)
-}
-
-# Altering column names
-names(Weights_msoa)[1] <- c('IDGRID')
-
-# Merging to skeleton dataset
-pm25_cams <- NULL
-
-# Loop for each date
-for (i in as.character(seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1))){
-  # Loop for each time
-  for (j in 0:23){
-    # Reading in PM25 from CAMS
-    r <- raster(paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
-    # Renaming raster
-    names(r) <- 'pm25'
-    # Creating aggregated estimates of PM25 by MSOA
-    tmp1 <- r %>%
-      # Converting raster to dataframe
-      crop(r0) %>%
-      stack(r0) %>%
-      rasterToPoints() %>%
-      as.data.frame()%>%
-      # Renaming columns
-      dplyr::select(IDGRID = layer, pm25) %>%
-      # Merging on weights to aggregate
-      right_join(Weights_msoa,
-                 by = 'IDGRID') %>%
-      # Aggregating grid to
-      ddply(.(area_id),
-            summarize,
-            pm25_cams_agg = weighted.mean(pm25, weight))
-    # Extracting PM2.5 values at centroids
-    tmp1$pm25_cams_cent<- raster::extract(r, mcr_msoa@data[,c('cent_long', 'cent_lat')])
-    # Adding on date and hour
-    tmp1 <- tmp1 %>%
-      mutate(hour = j,
-             date = i) %>%
-      # Outputting datasets
-      dplyr::select(area_id, date, hour, pm25_cams_cent, pm25_cams_agg)
-    # Appending together
-    pm25_cams <- rbind(pm25_cams, tmp1)
-    # Removing uncessary datasets
-    rm(tmp1, tmp2)    
-    # Printing index
-    print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
+# TODO: Remove dependence of calling directory location
+main <- function(output_dir) {
+  print(output_dir)
+  stop("DEBUG")
+
+  # Loading shapefiles 
+  load("Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData")
+
+  ##########################################
+  ### Preparing PM data from CAMS-Europe ###
+  ##########################################
+  ################### The following code takes a while to run, so population estimates  ###################
+  ################### were run separately and saved to a raster (found underneath)      ###################
+  # # Empty raster
+  # r <- raster(xmn = -10,
+  #             xmx = 3,
+  #             ymn = 49,
+  #             ymx = 62,
+  #             res = 0.1)
+  # 
+  # # Adding unique  ID
+  # r[] <- 1:(dim(r)[1]*dim(r)[2])
+  # 
+  # # Empty dataset to append to
+  # weights <- NULL
+  # 
+  # # Extracting values
+  # a1 <- raster::extract(r, # Grid unique IDs
+  #                       uk_full, # Shapefiles
+  #                       weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap
+  #                       small = TRUE) # Small areas in comparison to the raster
+  # 
+  # # Setting to missing if not in UK
+  # r[!(r[] %in% a1[[1]])] <- NA
+  # 
+  # # file names
+  # files <- c('Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20201201-20210531.nc',
+  #            'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20210601-20211130.nc',
+  #            'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20211201-20220430.nc')
+  # 
+  # # files start dates
+  # start_date <- c(as.Date('2020-12-01'),
+  #                 as.Date('2021-06-01'),
+  #                 as.Date('2021-12-01'))
+  # 
+  # # Loop for each file
+  # for (i in 1:length(files)){
+  #   # Opening raster
+  #   ncin <- raster(files[i],
+  #                  band = 1,
+  #                  verbose = FALSE,
+  #                  stopIfNotEqualSpaced = FALSE)
+  #   # Getting the number of  days
+  #   N_days <- floor(nbands(ncin)/24)
+  #   # Dates
+  #   Dates <- start_date[i] + (1:N_days) - 1
+  #   # Loop for each day in the year
+  #   for (j in 1:N_days){
+  #     # Getting date
+  #     date <- Dates[j]
+  #     # Looping for each hour in the day
+  #     for (k in (24*(j-1)+1):(24*j)){
+  #       # Opening raster
+  #       ncin <- raster(files[i],
+  #                      band = k,
+  #                      verbose = FALSE,
+  #                      stopIfNotEqualSpaced = FALSE)
+  #       # # Cropping for the UK
+  #       # ncin <- crop(ncin, extent(-10, 3, 49, 62))
+  #       extent(ncin) <- c(-10, 3, 49, 62)
+  #       # Setting to missing if not in  UK
+  #       ncin[is.na(r[])] <- NA
+  #       # Saving raster
+  #       writeRaster(ncin,
+  #                   filename = paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''),
+  #                   overwrite = TRUE)
+  #       # else {keep <- keep + ncin}
+  #       print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = ''))
+  #     }
+  #   }
+  # }
+  #########################################################################################################
+  #########################################################################################################
+
+  # Empty raster
+  r0 <- raster(xmn = -2.8,
+               xmx = -1.8,
+               ymn = 53.2,
+               ymx = 53.7,
+               res = 0.1)
+
+  # Adding unique  ID
+  r0[] <- 1:(dim(r0)[1]*dim(r0)[2])
+
+  # Subsetting Greater Manchester shapefiles
+  mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale',
+                                                      'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan'))
+
+  # Converting to long lat
+  mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs"))
+
+  # Extracting values
+  a1 <- raster::extract(r0, # Grid unique IDs
+                        mcr_msoa, # Shapefiles
+                        weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap
+                        small = TRUE) # Small areas in comparison to the raster
+
+  # Empty dataset to append to
+  Weights_msoa <- NULL
+
+  # Loop for each area
+  for (i in 1:length(a1)){
+    # Converting Weights_msoa to dataframes
+    tmp <- as.data.frame(a1[[i]])
+    # Removing NAs
+    tmp <- subset(tmp, !is.na(value))
+    # Reweighting the Weights_msoa after zeroes removed
+    tmp$weight <- tmp$weight/sum(tmp$weight)
+    # New Region Name
+    tmp$area_id <- mcr_msoa@data$area_id[i]
+    # Creating new dataset
+    if (i == 1) {Weights_msoa <- tmp}
+    else {Weights_msoa <- rbind(Weights_msoa, tmp)}
+    # Removing unecessary data
+    rm(tmp)
   }
-}
 
-# Save cams
-save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData"))
-
-##############################################
-### Preparing PM data from ground monitors ###
-##############################################
-# Subsetting Greater Manchester shapefiles
-mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale',
-                                                    'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan'))
-
-# Converting to long lat
-mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs"))
-
-# Reading in LTN network data
-tmp1 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/BroomLane_AQ_hourly.csv')
-tmp2 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/CromwellAQ_hourly.csv')
-tmp3 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/DelamereRoad_AQ_hourly.csv')
-tmp4 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/GrangethorpeAQ_2b_hourly.csv')
-tmp5 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/ManorRad_AQ_hourly.csv')
-tmp6 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/SladeLane_AQ_hourly.csv')
-
-# Adding Longitude
-tmp1$latitude <- 53.441161
-tmp2$latitude <- 53.445053
-tmp3$latitude <- 53.442702
-tmp4$latitude <- 53.436213
-tmp5$latitude <- 53.447703
-tmp6$latitude <- 53.448551
-
-# Adding Latitude
-tmp1$longitude <- -2.185467
-tmp2$longitude <- -2.188442
-tmp3$longitude <- -2.188183
-tmp4$longitude <- -2.203991
-tmp5$longitude <- -2.183646
-tmp6$longitude <- -2.197556
-
-# Getting site label
-tmp1$site <- 'BroomLane'
-tmp2$site <- 'Cromwell'
-tmp3$site <- 'DelamereRoad'
-tmp4$site <- 'Grangethorpe'
-tmp5$site <- 'ManorRoad'
-tmp6$site <- 'SladeLane'
-
-# Adding site label
-tmp1$code <- 'LTN-BLN'
-tmp2$code <- 'LTN-CRM'
-tmp3$code <- 'LTN-DRD'
-tmp4$code <- 'LTN-GRA'
-tmp5$code <- 'LTN-MRD'
-tmp6$code <- 'LTN-SLN'
-
-# Adding site label
-tmp1$date <- as.Date(substr(tmp1$ds, 1, 10))
-tmp2$date <- as.Date(substr(tmp2$ds, 1, 10))
-tmp3$date <- as.Date(substr(tmp3$ds, 1, 10))
-tmp4$date <- as.Date(substr(tmp4$ds, 1, 10))
-tmp5$date <- as.Date(substr(tmp5$ds, 1, 10))
-tmp6$date <- as.Date(substr(tmp6$ds, 1, 10))
-
-# Adding site label
-tmp1$hour <- as.numeric(substr(tmp1$ds, 12, 13))
-tmp2$hour <- as.numeric(substr(tmp2$ds, 12, 13))
-tmp3$hour <- as.numeric(substr(tmp3$ds, 12, 13))
-tmp4$hour <- as.numeric(substr(tmp4$ds, 12, 13))
-tmp5$hour <- as.numeric(substr(tmp5$ds, 12, 13))
-tmp6$hour <- as.numeric(substr(tmp6$ds, 12, 13))
-
-# Loading in stations data
-stations_ltn_dat <- rbind(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) %>%
-  dplyr::select(site, code, longitude, latitude) %>%
-  unique()
-
-# Pulling all data together
-ltn_dat <- rbind(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) %>%
-  dplyr::select(date, pm2.5 = PM2.5 , site, code, hour)%>%
-  dplyr::mutate(pm2.5 = ifelse(pm2.5 <= 0, NA, pm2.5))
-
-# Removing unecessary datasets
-rm(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6)
-
-# List of Stations
-stations_aurn_dat <- importMeta(source = "aurn", all = TRUE) %>%
-  filter(variable == 'PM2.5' &
-           (as.Date(end_date, format = '%Y-%m-%d') >= as.Date('2020-12-01') |
-              end_date == 'ongoing') &
-           longitude > (mcr_msoa@bbox[1,1] - 0.5) &
-           longitude < (mcr_msoa@bbox[1,2] + 0.5) &
-           latitude > (mcr_msoa@bbox[2,1] - 0.5) &
-           latitude < (mcr_msoa@bbox[2,2] + 0.5)) %>%
-  dplyr::select(code, site, latitude, longitude) %>%
-  unique() %>%
-  filter(!(site %in% c('Glazebury', 'Liverpool Speke')))
-
-# Downloading pollutant/weather data
-aurn_dat <- importAURN(site = stations_aurn_dat$code,
-                       year = 2020:2021,
-                       pollutant = c('pm2.5'),
-                       meta = FALSE,
-                       data_type = 'hourly',
-                       verbose = FALSE) %>%
-  dplyr::mutate(pm2.5 = ifelse(pm2.5 <= 0, NA, pm2.5))
-
-# Getting date and hour
-aurn_dat$hour <- as.numeric(substr(aurn_dat$date, 12, 13))
-aurn_dat$date <- as.Date(substr(aurn_dat$date, 1, 10))
-
-# Appenidng both together
-gm_dat <- rbind(aurn_dat, ltn_dat)
-stations_gm_dat <- rbind(stations_aurn_dat, stations_ltn_dat)
-
-# Getting closest station for each MSOA
-A1 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
-                       stations_aurn_dat[,c('longitude', 'latitude')]), 1, which.min)
-A2 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
-                        stations_ltn_dat[,c('longitude', 'latitude')]), 1, which.min)
-A3 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
-                        stations_gm_dat[,c('longitude', 'latitude')]), 1, which.min)
-
-# Getting data from nearest station (AURN)
-tmp1 <- data.frame(area_id = mcr_msoa$area_id,
-                   code_aurn = stations_aurn_dat$code[A1]) %>%
-  left_join(aurn_dat %>%
-              dplyr::select(code_aurn = code, date, hour, pm25_aurn = pm2.5),
-            by = 'code_aurn') %>%
-  dplyr::select(area_id, date, hour, pm25_aurn, code_aurn)
-
-# Getting data from nearest station (LTN)
-tmp2 <- data.frame(area_id = mcr_msoa$area_id,
-                   code_ltn = stations_ltn_dat$code[A2]) %>%
-  left_join(ltn_dat %>%
-              dplyr::select(code_ltn = code, date, hour, pm25_ltn = pm2.5),
-            by = 'code_ltn') %>%
-  dplyr::select(area_id, date, hour, pm25_ltn, code_ltn)
-
-# Getting data from nearest station (Both)
-tmp3 <- data.frame(area_id = mcr_msoa$area_id,
-                   code_gm = stations_gm_dat$code[A3]) %>%
-  left_join(gm_dat %>%
-              dplyr::select(code_gm = code, date, hour, pm25_gm = pm2.5),
-            by = 'code_gm') %>%
-  dplyr::select(area_id, date, hour, pm25_gm, code_gm)
-
-# Merging together
-pm25_gm <- expand.grid(area_id = mcr_msoa$area_id,
-                       date =seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1),
-                       hour = 0:23)%>%
-  left_join(tmp1,
-            by = c('area_id', 'date', 'hour')) %>%
-  left_join(tmp2,
-            by = c('area_id', 'date', 'hour')) %>%
-  left_join(tmp3,
-            by = c('area_id', 'date', 'hour'))
-
-# Removing unecessary datasets
-rm(tmp1, tmp2, tmp3)
-
-# Getting stations with non-missing data
-aurn_dat <- aurn_dat %>%
-  left_join(stations_aurn_dat %>%
-              dplyr::select(code, longitude, latitude),
-            by = "code")
-# Getting stations with non-missing data
-ltn_dat <- ltn_dat %>%
-  left_join(stations_ltn_dat %>%
-              dplyr::select(code, longitude, latitude),
-            by = "code")
-# Getting stations with non-missing data
-gm_dat <- gm_dat %>%
-  left_join(stations_gm_dat %>%
-              dplyr::select(code, longitude, latitude),
-            by = "code")
-
-# Empty dataset for data from nearest *non-missing* station
-tmp1 <- NULL
-tmp2 <- NULL
-tmp3 <- NULL
-
-# Loop for each date
-for (i in as.character(seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1))){
-  # Loop for each time
-  for (j in 0:23){
-    # Getting stations with non-missing data
-    test1 <- aurn_dat %>%
-      dplyr::filter(date == i & hour == j & !is.na(pm2.5))
-    # Getting stations with non-missing data
-    test2 <- ltn_dat %>%
-      dplyr::filter(date == i & hour == j & !is.na(pm2.5))
-    # Getting stations with non-missing data
-    test3 <- gm_dat %>%
-      dplyr::filter(date == i & hour == j & !is.na(pm2.5))
-    # Getting closest non-missing station for each MSOA
-    A1 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
-                            test1[,c('longitude', 'latitude')]),
-                1, which.min)
-    # Getting closest non-missing station for each MSOA
-    A2 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
-                            test2[,c('longitude', 'latitude')]),
-                1, which.min)
-    # Getting closest non-missing station for each MSOA
-    A3 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
-                            test3[,c('longitude', 'latitude')]),
-                1, which.min)
-    # Getting data from nearest station (AURN)
-    if (length(A1) > 0){
-      test1 <- data.frame(area_id = mcr_msoa$area_id,
-                          code_aurn_near = test1$code[A1]) %>%
-        left_join(test1 %>%
-                    dplyr::select(code_aurn_near = code, date, hour, pm25_aurn_near = pm2.5),
-                  by = 'code_aurn_near') %>%
-        dplyr::select(area_id, date, hour, pm25_aurn_near, code_aurn_near)
-      # Appending
-      tmp1 <- rbind(tmp1, test1)
+  # Altering column names
+  names(Weights_msoa)[1] <- c('IDGRID')
+
+  # Merging to skeleton dataset
+  pm25_cams <- NULL
+
+  # Loop for each date
+  for (i in as.character(seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1))){
+    # Loop for each time
+    for (j in 0:23){
+      # Reading in PM25 from CAMS
+      r <- raster(paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
+      # Renaming raster
+      names(r) <- 'pm25'
+      # Creating aggregated estimates of PM25 by MSOA
+      tmp1 <- r %>%
+        # Converting raster to dataframe
+        crop(r0) %>%
+        stack(r0) %>%
+        rasterToPoints() %>%
+        as.data.frame()%>%
+        # Renaming columns
+        dplyr::select(IDGRID = layer, pm25) %>%
+        # Merging on weights to aggregate
+        right_join(Weights_msoa,
+                   by = 'IDGRID') %>%
+        # Aggregating grid to
+        ddply(.(area_id),
+              summarize,
+              pm25_cams_agg = weighted.mean(pm25, weight))
+      # Extracting PM2.5 values at centroids
+      tmp1$pm25_cams_cent<- raster::extract(r, mcr_msoa@data[,c('cent_long', 'cent_lat')])
+      # Adding on date and hour
+      tmp1 <- tmp1 %>%
+        mutate(hour = j,
+               date = i) %>%
+        # Outputting datasets
+        dplyr::select(area_id, date, hour, pm25_cams_cent, pm25_cams_agg)
+      # Appending together
+      pm25_cams <- rbind(pm25_cams, tmp1)
+      # Removing uncessary datasets
+      rm(tmp1, tmp2)    
+      # Printing index
+      print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
     }
-    # Getting data from nearest station (LTN)
-    if (length(A2) > 0){
-      test2 <- data.frame(area_id = mcr_msoa$area_id,
-                          code_ltn_near = test2$code[A2]) %>%
-        left_join(test2 %>%
-                    dplyr::select(code_ltn_near = code, date, hour, pm25_ltn_near = pm2.5),
-                  by = 'code_ltn_near') %>%
-        dplyr::select(area_id, date, hour, pm25_ltn_near, code_ltn_near)
-      # Appending
-      tmp2 <- rbind(tmp2, test2)
-    }
-    if (length(A3) > 0){
-      # Getting data from nearest station (Both)
-      test3 <- data.frame(area_id = mcr_msoa$area_id,
-                          code_gm_near = test3$code[A3]) %>%
-        left_join(test3 %>%
-                    dplyr::select(code_gm_near = code, date, hour, pm25_gm_near = pm2.5),
-                  by = 'code_gm_near') %>%
-        dplyr::select(area_id, date, hour, pm25_gm_near, code_gm_near)
-      # Appending
-      tmp3 <- rbind(tmp3, test3)
+  }
+
+  # Save cams
+  save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData"))
+
+  ##############################################
+  ### Preparing PM data from ground monitors ###
+  ##############################################
+  # Subsetting Greater Manchester shapefiles
+  mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale',
+                                                      'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan'))
+
+  # Converting to long lat
+  mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs"))
+
+  # Reading in LTN network data
+  tmp1 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/BroomLane_AQ_hourly.csv')
+  tmp2 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/CromwellAQ_hourly.csv')
+  tmp3 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/DelamereRoad_AQ_hourly.csv')
+  tmp4 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/GrangethorpeAQ_2b_hourly.csv')
+  tmp5 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/ManorRad_AQ_hourly.csv')
+  tmp6 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/SladeLane_AQ_hourly.csv')
+
+  # Adding Longitude
+  tmp1$latitude <- 53.441161
+  tmp2$latitude <- 53.445053
+  tmp3$latitude <- 53.442702
+  tmp4$latitude <- 53.436213
+  tmp5$latitude <- 53.447703
+  tmp6$latitude <- 53.448551
+
+  # Adding Latitude
+  tmp1$longitude <- -2.185467
+  tmp2$longitude <- -2.188442
+  tmp3$longitude <- -2.188183
+  tmp4$longitude <- -2.203991
+  tmp5$longitude <- -2.183646
+  tmp6$longitude <- -2.197556
+
+  # Getting site label
+  tmp1$site <- 'BroomLane'
+  tmp2$site <- 'Cromwell'
+  tmp3$site <- 'DelamereRoad'
+  tmp4$site <- 'Grangethorpe'
+  tmp5$site <- 'ManorRoad'
+  tmp6$site <- 'SladeLane'
+
+  # Adding site label
+  tmp1$code <- 'LTN-BLN'
+  tmp2$code <- 'LTN-CRM'
+  tmp3$code <- 'LTN-DRD'
+  tmp4$code <- 'LTN-GRA'
+  tmp5$code <- 'LTN-MRD'
+  tmp6$code <- 'LTN-SLN'
+
+  # Adding site label
+  tmp1$date <- as.Date(substr(tmp1$ds, 1, 10))
+  tmp2$date <- as.Date(substr(tmp2$ds, 1, 10))
+  tmp3$date <- as.Date(substr(tmp3$ds, 1, 10))
+  tmp4$date <- as.Date(substr(tmp4$ds, 1, 10))
+  tmp5$date <- as.Date(substr(tmp5$ds, 1, 10))
+  tmp6$date <- as.Date(substr(tmp6$ds, 1, 10))
+
+  # Adding site label
+  tmp1$hour <- as.numeric(substr(tmp1$ds, 12, 13))
+  tmp2$hour <- as.numeric(substr(tmp2$ds, 12, 13))
+  tmp3$hour <- as.numeric(substr(tmp3$ds, 12, 13))
+  tmp4$hour <- as.numeric(substr(tmp4$ds, 12, 13))
+  tmp5$hour <- as.numeric(substr(tmp5$ds, 12, 13))
+  tmp6$hour <- as.numeric(substr(tmp6$ds, 12, 13))
+
+  # Loading in stations data
+  stations_ltn_dat <- rbind(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) %>%
+    dplyr::select(site, code, longitude, latitude) %>%
+    unique()
+
+  # Pulling all data together
+  ltn_dat <- rbind(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) %>%
+    dplyr::select(date, pm2.5 = PM2.5 , site, code, hour)%>%
+    dplyr::mutate(pm2.5 = ifelse(pm2.5 <= 0, NA, pm2.5))
+
+  # Removing unecessary datasets
+  rm(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6)
+
+  # List of Stations
+  stations_aurn_dat <- importMeta(source = "aurn", all = TRUE) %>%
+    filter(variable == 'PM2.5' &
+             (as.Date(end_date, format = '%Y-%m-%d') >= as.Date('2020-12-01') |
+                end_date == 'ongoing') &
+             longitude > (mcr_msoa@bbox[1,1] - 0.5) &
+             longitude < (mcr_msoa@bbox[1,2] + 0.5) &
+             latitude > (mcr_msoa@bbox[2,1] - 0.5) &
+             latitude < (mcr_msoa@bbox[2,2] + 0.5)) %>%
+    dplyr::select(code, site, latitude, longitude) %>%
+    unique() %>%
+    filter(!(site %in% c('Glazebury', 'Liverpool Speke')))
+
+  # Downloading pollutant/weather data
+  aurn_dat <- importAURN(site = stations_aurn_dat$code,
+                         year = 2020:2021,
+                         pollutant = c('pm2.5'),
+                         meta = FALSE,
+                         data_type = 'hourly',
+                         verbose = FALSE) %>%
+    dplyr::mutate(pm2.5 = ifelse(pm2.5 <= 0, NA, pm2.5))
+
+  # Getting date and hour
+  aurn_dat$hour <- as.numeric(substr(aurn_dat$date, 12, 13))
+  aurn_dat$date <- as.Date(substr(aurn_dat$date, 1, 10))
+
+  # Appenidng both together
+  gm_dat <- rbind(aurn_dat, ltn_dat)
+  stations_gm_dat <- rbind(stations_aurn_dat, stations_ltn_dat)
+
+  # Getting closest station for each MSOA
+  A1 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
+                         stations_aurn_dat[,c('longitude', 'latitude')]), 1, which.min)
+  A2 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
+                          stations_ltn_dat[,c('longitude', 'latitude')]), 1, which.min)
+  A3 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
+                          stations_gm_dat[,c('longitude', 'latitude')]), 1, which.min)
+
+  # Getting data from nearest station (AURN)
+  tmp1 <- data.frame(area_id = mcr_msoa$area_id,
+                     code_aurn = stations_aurn_dat$code[A1]) %>%
+    left_join(aurn_dat %>%
+                dplyr::select(code_aurn = code, date, hour, pm25_aurn = pm2.5),
+              by = 'code_aurn') %>%
+    dplyr::select(area_id, date, hour, pm25_aurn, code_aurn)
+
+  # Getting data from nearest station (LTN)
+  tmp2 <- data.frame(area_id = mcr_msoa$area_id,
+                     code_ltn = stations_ltn_dat$code[A2]) %>%
+    left_join(ltn_dat %>%
+                dplyr::select(code_ltn = code, date, hour, pm25_ltn = pm2.5),
+              by = 'code_ltn') %>%
+    dplyr::select(area_id, date, hour, pm25_ltn, code_ltn)
+
+  # Getting data from nearest station (Both)
+  tmp3 <- data.frame(area_id = mcr_msoa$area_id,
+                     code_gm = stations_gm_dat$code[A3]) %>%
+    left_join(gm_dat %>%
+                dplyr::select(code_gm = code, date, hour, pm25_gm = pm2.5),
+              by = 'code_gm') %>%
+    dplyr::select(area_id, date, hour, pm25_gm, code_gm)
+
+  # Merging together
+  pm25_gm <- expand.grid(area_id = mcr_msoa$area_id,
+                         date =seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1),
+                         hour = 0:23)%>%
+    left_join(tmp1,
+              by = c('area_id', 'date', 'hour')) %>%
+    left_join(tmp2,
+              by = c('area_id', 'date', 'hour')) %>%
+    left_join(tmp3,
+              by = c('area_id', 'date', 'hour'))
+
+  # Removing unecessary datasets
+  rm(tmp1, tmp2, tmp3)
+
+  # Getting stations with non-missing data
+  aurn_dat <- aurn_dat %>%
+    left_join(stations_aurn_dat %>%
+                dplyr::select(code, longitude, latitude),
+              by = "code")
+  # Getting stations with non-missing data
+  ltn_dat <- ltn_dat %>%
+    left_join(stations_ltn_dat %>%
+                dplyr::select(code, longitude, latitude),
+              by = "code")
+  # Getting stations with non-missing data
+  gm_dat <- gm_dat %>%
+    left_join(stations_gm_dat %>%
+                dplyr::select(code, longitude, latitude),
+              by = "code")
+
+  # Empty dataset for data from nearest *non-missing* station
+  tmp1 <- NULL
+  tmp2 <- NULL
+  tmp3 <- NULL
+
+  # Loop for each date
+  for (i in as.character(seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1))){
+    # Loop for each time
+    for (j in 0:23){
+      # Getting stations with non-missing data
+      test1 <- aurn_dat %>%
+        dplyr::filter(date == i & hour == j & !is.na(pm2.5))
+      # Getting stations with non-missing data
+      test2 <- ltn_dat %>%
+        dplyr::filter(date == i & hour == j & !is.na(pm2.5))
+      # Getting stations with non-missing data
+      test3 <- gm_dat %>%
+        dplyr::filter(date == i & hour == j & !is.na(pm2.5))
+      # Getting closest non-missing station for each MSOA
+      A1 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
+                              test1[,c('longitude', 'latitude')]),
+                  1, which.min)
+      # Getting closest non-missing station for each MSOA
+      A2 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
+                              test2[,c('longitude', 'latitude')]),
+                  1, which.min)
+      # Getting closest non-missing station for each MSOA
+      A3 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')],
+                              test3[,c('longitude', 'latitude')]),
+                  1, which.min)
+      # Getting data from nearest station (AURN)
+      if (length(A1) > 0){
+        test1 <- data.frame(area_id = mcr_msoa$area_id,
+                            code_aurn_near = test1$code[A1]) %>%
+          left_join(test1 %>%
+                      dplyr::select(code_aurn_near = code, date, hour, pm25_aurn_near = pm2.5),
+                    by = 'code_aurn_near') %>%
+          dplyr::select(area_id, date, hour, pm25_aurn_near, code_aurn_near)
+        # Appending
+        tmp1 <- rbind(tmp1, test1)
+      }
+      # Getting data from nearest station (LTN)
+      if (length(A2) > 0){
+        test2 <- data.frame(area_id = mcr_msoa$area_id,
+                            code_ltn_near = test2$code[A2]) %>%
+          left_join(test2 %>%
+                      dplyr::select(code_ltn_near = code, date, hour, pm25_ltn_near = pm2.5),
+                    by = 'code_ltn_near') %>%
+          dplyr::select(area_id, date, hour, pm25_ltn_near, code_ltn_near)
+        # Appending
+        tmp2 <- rbind(tmp2, test2)
+      }
+      if (length(A3) > 0){
+        # Getting data from nearest station (Both)
+        test3 <- data.frame(area_id = mcr_msoa$area_id,
+                            code_gm_near = test3$code[A3]) %>%
+          left_join(test3 %>%
+                      dplyr::select(code_gm_near = code, date, hour, pm25_gm_near = pm2.5),
+                    by = 'code_gm_near') %>%
+          dplyr::select(area_id, date, hour, pm25_gm_near, code_gm_near)
+        # Appending
+        tmp3 <- rbind(tmp3, test3)
+      }
+      # Removing uncessary datasets
+      rm(test1, test2, test3)
+      # Printing index
+      print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
     }
-    # Removing uncessary datasets
-    rm(test1, test2, test3)
-    # Printing index
-    print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
   }
-}
 
-# Merging together
-pm25_gm <- pm25_gm %>%
-  left_join(tmp1,
-            by = c('area_id', 'date', 'hour')) %>%
-  left_join(tmp2,
-            by = c('area_id', 'date', 'hour')) %>%
-  left_join(tmp3,
-            by = c('area_id', 'date', 'hour'))
-
-# Removing unecessary files
-rm(tmp1, tmp2, tmp3, aurn_dat, gm_dat, ltn_dat, mcr_msoa,
-   stations_aurn_dat, stations_gm_dat, stations_ltn_dat)
-
-# Save aurn data
-save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData"))
-
-###################################
-### Preparing PM data from EMEP ###
-###################################
-################### The following code takes a while to run, so population estimates  ###################
-################### were run separately and saved to a raster (found underneath)      ###################
-# # file names
-# files <- c("Data/CaseStudy2/Raw/PM25/EMEP/pm25_janfeb2021_regrid.nc",
-#            "Data/CaseStudy2/Raw/PM25/EMEP/pm25_marapr2021_regrid.nc")
-# 
-# # files start dates
-# start_date <- c(as.Date('2021-01-01'),
-#                 as.Date('2021-03-01'))
-# 
-# # Loop for each file
-# for (i in 1:length(files)){
-#   # Opening raster to get number of days
-#   ncin <- raster(files[i],
-#                  band = 1,
-#                  verbose = FALSE,
-#                  stopIfNotEqualSpaced = FALSE)
-#   # Getting the number of  days
-#   N_days <- floor(nbands(ncin)/24)
-#   # Dates
-#   Dates <- start_date[i] + (1:N_days) - 1
-#   # Loop for each day in the year
-#   for (j in 1:N_days){
-#     # Getting date
-#     date <- Dates[j]
-#     # Looping for each hour in the day
-#     for (k in (24*(j-1)+1):(24*j)){
-#       # Opening raster
-#       ncin <- raster(files[i],
-#                      band = k,
-#                      verbose = FALSE,
-#                      stopIfNotEqualSpaced = FALSE)
-#       # Cropping for the UK
-#       ncin <- crop(ncin, extent(-2.755, -1.895, 53.325, 53.695))
-#       # Saving raster
-#       writeRaster(ncin,
-#                   filename = paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''),
-#                   overwrite = TRUE)
-#       # else {keep <- keep + ncin}
-#       print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = ''))
-#     }
-#   }
-# }
-#########################################################################################################
-#########################################################################################################
-
-# Empty raster
-r0 <- raster(xmn = -2.755,
-             xmx = -1.895,
-             ymn = 53.325,
-             ymx = 53.695,
-             res = 0.01)
-
-# Adding unique  ID
-r0[] <- 1:(dim(r0)[1]*dim(r0)[2])
-
-# Subsetting Greater Manchester shapefiles
-mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale',
-                                                    'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan'))
-
-# Converting to long lat
-mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs"))
-
-# Extracting values
-a1 <- raster::extract(r0, # Grid unique IDs
-                      mcr_msoa, # Shapefiles
-                      weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap
-                      small = TRUE) # Small areas in comparison to the raster
-
-# Empty dataset to append to
-Weights_msoa <- NULL
-
-# Loop for each area
-for (i in 1:length(a1)){
-  # Converting Weights_msoa to dataframes
-  tmp <- as.data.frame(a1[[i]])
-  # Removing NAs
-  tmp <- subset(tmp, !is.na(value))
-  # Reweighting the Weights_msoa after zeroes removed
-  tmp$weight <- tmp$weight/sum(tmp$weight)
-  # New Region Name
-  tmp$area_id <- mcr_msoa@data$area_id[i]
-  # Creating new dataset
-  if (i == 1) {Weights_msoa <- tmp}
-  else {Weights_msoa <- rbind(Weights_msoa, tmp)}
-  # Removing unecessary data
-  rm(tmp)
-}
+  # Merging together
+  pm25_gm <- pm25_gm %>%
+    left_join(tmp1,
+              by = c('area_id', 'date', 'hour')) %>%
+    left_join(tmp2,
+              by = c('area_id', 'date', 'hour')) %>%
+    left_join(tmp3,
+              by = c('area_id', 'date', 'hour'))
+
+  # Removing unecessary files
+  rm(tmp1, tmp2, tmp3, aurn_dat, gm_dat, ltn_dat, mcr_msoa,
+     stations_aurn_dat, stations_gm_dat, stations_ltn_dat)
+
+  # Save aurn data
+  save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData"))
+
+  ###################################
+  ### Preparing PM data from EMEP ###
+  ###################################
+  ################### The following code takes a while to run, so population estimates  ###################
+  ################### were run separately and saved to a raster (found underneath)      ###################
+  # # file names
+  # files <- c("Data/CaseStudy2/Raw/PM25/EMEP/pm25_janfeb2021_regrid.nc",
+  #            "Data/CaseStudy2/Raw/PM25/EMEP/pm25_marapr2021_regrid.nc")
+  # 
+  # # files start dates
+  # start_date <- c(as.Date('2021-01-01'),
+  #                 as.Date('2021-03-01'))
+  # 
+  # # Loop for each file
+  # for (i in 1:length(files)){
+  #   # Opening raster to get number of days
+  #   ncin <- raster(files[i],
+  #                  band = 1,
+  #                  verbose = FALSE,
+  #                  stopIfNotEqualSpaced = FALSE)
+  #   # Getting the number of  days
+  #   N_days <- floor(nbands(ncin)/24)
+  #   # Dates
+  #   Dates <- start_date[i] + (1:N_days) - 1
+  #   # Loop for each day in the year
+  #   for (j in 1:N_days){
+  #     # Getting date
+  #     date <- Dates[j]
+  #     # Looping for each hour in the day
+  #     for (k in (24*(j-1)+1):(24*j)){
+  #       # Opening raster
+  #       ncin <- raster(files[i],
+  #                      band = k,
+  #                      verbose = FALSE,
+  #                      stopIfNotEqualSpaced = FALSE)
+  #       # Cropping for the UK
+  #       ncin <- crop(ncin, extent(-2.755, -1.895, 53.325, 53.695))
+  #       # Saving raster
+  #       writeRaster(ncin,
+  #                   filename = paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''),
+  #                   overwrite = TRUE)
+  #       # else {keep <- keep + ncin}
+  #       print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = ''))
+  #     }
+  #   }
+  # }
+  #########################################################################################################
+  #########################################################################################################
+
+  # Empty raster
+  r0 <- raster(xmn = -2.755,
+               xmx = -1.895,
+               ymn = 53.325,
+               ymx = 53.695,
+               res = 0.01)
+
+  # Adding unique  ID
+  r0[] <- 1:(dim(r0)[1]*dim(r0)[2])
+
+  # Subsetting Greater Manchester shapefiles
+  mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale',
+                                                      'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan'))
+
+  # Converting to long lat
+  mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs"))
+
+  # Extracting values
+  a1 <- raster::extract(r0, # Grid unique IDs
+                        mcr_msoa, # Shapefiles
+                        weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap
+                        small = TRUE) # Small areas in comparison to the raster
+
+  # Empty dataset to append to
+  Weights_msoa <- NULL
+
+  # Loop for each area
+  for (i in 1:length(a1)){
+    # Converting Weights_msoa to dataframes
+    tmp <- as.data.frame(a1[[i]])
+    # Removing NAs
+    tmp <- subset(tmp, !is.na(value))
+    # Reweighting the Weights_msoa after zeroes removed
+    tmp$weight <- tmp$weight/sum(tmp$weight)
+    # New Region Name
+    tmp$area_id <- mcr_msoa@data$area_id[i]
+    # Creating new dataset
+    if (i == 1) {Weights_msoa <- tmp}
+    else {Weights_msoa <- rbind(Weights_msoa, tmp)}
+    # Removing unecessary data
+    rm(tmp)
+  }
 
-# Altering column names
-names(Weights_msoa)[1] <- c('IDGRID')
-
-# Merging to skeleton dataset
-pm25_emep <- NULL
-
-# Loop for each date
-for (i in as.character(seq(as.Date('2021-01-01'), as.Date('2021-04-30'), by = 1))){
-  # Loop for each time
-  for (j in 0:23){
-    # Reading in PM25 from CAMS
-    r <- raster(paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
-    # Renaming raster
-    names(r) <- 'pm25'
-    # Creating aggregated estimates of PM25 by MSOA
-    tmp1 <- r %>%
-      # Converting raster to dataframe
-      crop(r0) %>%
-      stack(r0) %>%
-      rasterToPoints() %>%
-      as.data.frame()%>%
-      # Renaming columns
-      dplyr::select(IDGRID = layer, pm25) %>%
-      # Merging on weights to aggregate
-      right_join(Weights_msoa,
-                 by = 'IDGRID') %>%
-      # Aggregating grid to
-      ddply(.(area_id),
-            summarize,
-            pm25_cams_agg = weighted.mean(pm25, weight))
-    # Extracting PM2.5 values at centroids
-    tmp1$pm25_cams_cent<- raster::extract(r, mcr_msoa@data[,c('cent_long', 'cent_lat')])
-    # Adding on date and hour
-    tmp1 <- tmp1 %>%
-      mutate(hour = j,
-             date = i) %>%
-      # Outputting datasets
-      dplyr::select(area_id, date, hour, pm25_cams_cent, pm25_cams_agg)
-    # Appending together
-    pm25_emep <- rbind(pm25_emep, tmp1)
-    # Removing uncessary datasets
-    rm(tmp1)
-    # Printing index
-    print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
+  # Altering column names
+  names(Weights_msoa)[1] <- c('IDGRID')
+
+  # Merging to skeleton dataset
+  pm25_emep <- NULL
+
+  # Loop for each date
+  for (i in as.character(seq(as.Date('2021-01-01'), as.Date('2021-04-30'), by = 1))){
+    # Loop for each time
+    for (j in 0:23){
+      # Reading in PM25 from CAMS
+      r <- raster(paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
+      # Renaming raster
+      names(r) <- 'pm25'
+      # Creating aggregated estimates of PM25 by MSOA
+      tmp1 <- r %>%
+        # Converting raster to dataframe
+        crop(r0) %>%
+        stack(r0) %>%
+        rasterToPoints() %>%
+        as.data.frame()%>%
+        # Renaming columns
+        dplyr::select(IDGRID = layer, pm25) %>%
+        # Merging on weights to aggregate
+        right_join(Weights_msoa,
+                   by = 'IDGRID') %>%
+        # Aggregating grid to
+        ddply(.(area_id),
+              summarize,
+              pm25_cams_agg = weighted.mean(pm25, weight))
+      # Extracting PM2.5 values at centroids
+      tmp1$pm25_cams_cent<- raster::extract(r, mcr_msoa@data[,c('cent_long', 'cent_lat')])
+      # Adding on date and hour
+      tmp1 <- tmp1 %>%
+        mutate(hour = j,
+               date = i) %>%
+        # Outputting datasets
+        dplyr::select(area_id, date, hour, pm25_cams_cent, pm25_cams_agg)
+      # Appending together
+      pm25_emep <- rbind(pm25_emep, tmp1)
+      # Removing uncessary datasets
+      rm(tmp1)
+      # Printing index
+      print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
+    }
   }
-}
 
-# Save aurn data
-save(pm25_emep, file = file.path(output_dir, "pm25_emep.RData"))
+  # Save aurn data
+  save(pm25_emep, file = file.path(output_dir, "pm25_emep.RData"))
+}
diff --git a/Code/CaseStudy2/cli.R b/Code/CaseStudy2/cli.R
index ecd041a..3810464 100644
--- a/Code/CaseStudy2/cli.R
+++ b/Code/CaseStudy2/cli.R
@@ -8,7 +8,11 @@ cli <- function() {
                 help = "DIMEX processing step"),
     make_option(c("--prefix"),
                 default = "~/Dropbox/Github/SPFFinalReport",
-                help = "top-level directory containing Code/ and Data/")
+                help = "top-level directory containing Code/ and Data/"),
+    make_option(c("--output-dir"),
+                dest="output_dir",
+                default = "Data/Processed/PM25",
+                help = "Output directory")
   )) %>%
   parse_args %>%
   validate_args
@@ -51,7 +55,7 @@ script_name <- function(step) {
 }
 
 # Main program
-main <- function() {
+main_wrapper <- function() {
   opts <- cli()
 
   # Working directory
@@ -59,7 +63,10 @@ main <- function() {
 
   # Run particular step
   source(here("Code", "CaseStudy2", script_name(opts$step)))
+
+  # Run the loaded main program
+  main(opts$output_dir)
   
 }
 
-main()
\ No newline at end of file
+main_wrapper()
\ No newline at end of file

From ec414581f52f61b5e6e2aa8ff520655affd76726 Mon Sep 17 00:00:00 2001
From: andrewgryan <andrew.ryan@metoffice.gov.uk>
Date: Tue, 24 Jan 2023 16:51:42 +0000
Subject: [PATCH 3/7] pass prefix directory to main program

---
 Code/CaseStudy2/1d_DataPrep_PM25.R | 8 ++++----
 Code/CaseStudy2/cli.R              | 6 ++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R
index 374f7a1..af993b1 100644
--- a/Code/CaseStudy2/1d_DataPrep_PM25.R
+++ b/Code/CaseStudy2/1d_DataPrep_PM25.R
@@ -6,12 +6,12 @@ library(here)
 source(here("Code", "CaseStudy2", "0_Source.R"))
 
 # TODO: Remove dependence of calling directory location
-main <- function(output_dir) {
-  print(output_dir)
-  stop("DEBUG")
+main <- function(output_dir, prefix_dir) {
 
   # Loading shapefiles 
-  load("Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData")
+  load(file.path(prefix_dir, "Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData"))
+
+  stop("DEBUG")
 
   ##########################################
   ### Preparing PM data from CAMS-Europe ###
diff --git a/Code/CaseStudy2/cli.R b/Code/CaseStudy2/cli.R
index 3810464..841e686 100644
--- a/Code/CaseStudy2/cli.R
+++ b/Code/CaseStudy2/cli.R
@@ -58,14 +58,12 @@ script_name <- function(step) {
 main_wrapper <- function() {
   opts <- cli()
 
-  # Working directory
-  setwd(opts$prefix)
-
   # Run particular step
   source(here("Code", "CaseStudy2", script_name(opts$step)))
 
   # Run the loaded main program
-  main(opts$output_dir)
+  main(output_dir = opts$output_dir,
+       prefix_dir = opts$prefix)
   
 }
 

From eaa5f23f5188563f5d31d5f667f60ffa104ad51a Mon Sep 17 00:00:00 2001
From: andrewgryan <andrew.ryan@metoffice.gov.uk>
Date: Tue, 24 Jan 2023 17:05:27 +0000
Subject: [PATCH 4/7] make directory paths absolute and dry

---
 Code/CaseStudy2/1d_DataPrep_PM25.R | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R
index af993b1..7e1fe2e 100644
--- a/Code/CaseStudy2/1d_DataPrep_PM25.R
+++ b/Code/CaseStudy2/1d_DataPrep_PM25.R
@@ -8,8 +8,11 @@ source(here("Code", "CaseStudy2", "0_Source.R"))
 # TODO: Remove dependence of calling directory location
 main <- function(output_dir, prefix_dir) {
 
+  processed_dir = file.path(prefix_dir, "Data/CaseStudy2/Processed")
+  ground_monitors_dir = file.path(prefix_dir, "Data/CaseStudy2/Raw/PM25/GroundMonitors")
+
   # Loading shapefiles 
-  load(file.path(prefix_dir, "Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData"))
+  load(file.path(processed_dir, "Shapefiles/shapefiles.RData"))
 
   stop("DEBUG")
 
@@ -143,7 +146,8 @@ main <- function(output_dir, prefix_dir) {
     # Loop for each time
     for (j in 0:23){
       # Reading in PM25 from CAMS
-      r <- raster(paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
+      file_name <- paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')
+      r <- raster(file.path(processed_dir, 'PM25/CAMS-Europe', file_name))
       # Renaming raster
       names(r) <- 'pm25'
       # Creating aggregated estimates of PM25 by MSOA
@@ -193,12 +197,12 @@ main <- function(output_dir, prefix_dir) {
   mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs"))
 
   # Reading in LTN network data
-  tmp1 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/BroomLane_AQ_hourly.csv')
-  tmp2 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/CromwellAQ_hourly.csv')
-  tmp3 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/DelamereRoad_AQ_hourly.csv')
-  tmp4 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/GrangethorpeAQ_2b_hourly.csv')
-  tmp5 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/ManorRad_AQ_hourly.csv')
-  tmp6 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/SladeLane_AQ_hourly.csv')
+  tmp1 <- read.csv(file.path(ground_monitors_dir, 'BroomLane_AQ_hourly.csv'))
+  tmp2 <- read.csv(file.path(ground_monitors_dir, 'CromwellAQ_hourly.csv'))
+  tmp3 <- read.csv(file.path(ground_monitors_dir, 'DelamereRoad_AQ_hourly.csv'))
+  tmp4 <- read.csv(file.path(ground_monitors_dir, 'GrangethorpeAQ_2b_hourly.csv'))
+  tmp5 <- read.csv(file.path(ground_monitors_dir, 'ManorRad_AQ_hourly.csv'))
+  tmp6 <- read.csv(file.path(ground_monitors_dir, 'SladeLane_AQ_hourly.csv'))
 
   # Adding Longitude
   tmp1$latitude <- 53.441161
@@ -542,7 +546,8 @@ main <- function(output_dir, prefix_dir) {
     # Loop for each time
     for (j in 0:23){
       # Reading in PM25 from CAMS
-      r <- raster(paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = ''))
+      file_name <- paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')
+      r <- raster(file.path(processed_dir, 'PM25/EMEP', file_name))
       # Renaming raster
       names(r) <- 'pm25'
       # Creating aggregated estimates of PM25 by MSOA

From eb050cb064ce123374a51ab42f24bb03b86affe2 Mon Sep 17 00:00:00 2001
From: andrewgryan <andrew.ryan@metoffice.gov.uk>
Date: Tue, 24 Jan 2023 17:10:18 +0000
Subject: [PATCH 5/7] undo changes to commented legacy code

---
 Code/CaseStudy2/1d_DataPrep_PM25.R | 153 +++++++++++++++--------------
 1 file changed, 77 insertions(+), 76 deletions(-)

diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R
index 7e1fe2e..6e099dc 100644
--- a/Code/CaseStudy2/1d_DataPrep_PM25.R
+++ b/Code/CaseStudy2/1d_DataPrep_PM25.R
@@ -5,6 +5,83 @@
 library(here)
 source(here("Code", "CaseStudy2", "0_Source.R"))
 
+##########################################
+### Preparing PM data from CAMS-Europe ###
+##########################################
+################### The following code takes a while to run, so population estimates  ###################
+################### were run separately and saved to a raster (found underneath)      ###################
+# # Empty raster
+# r <- raster(xmn = -10,
+#             xmx = 3,
+#             ymn = 49,
+#             ymx = 62,
+#             res = 0.1)
+# 
+# # Adding unique  ID
+# r[] <- 1:(dim(r)[1]*dim(r)[2])
+# 
+# # Empty dataset to append to
+# weights <- NULL
+# 
+# # Extracting values
+# a1 <- raster::extract(r, # Grid unique IDs
+#                       uk_full, # Shapefiles
+#                       weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap
+#                       small = TRUE) # Small areas in comparison to the raster
+# 
+# # Setting to missing if not in UK
+# r[!(r[] %in% a1[[1]])] <- NA
+# 
+# # file names
+# files <- c('Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20201201-20210531.nc',
+#            'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20210601-20211130.nc',
+#            'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20211201-20220430.nc')
+# 
+# # files start dates
+# start_date <- c(as.Date('2020-12-01'),
+#                 as.Date('2021-06-01'),
+#                 as.Date('2021-12-01'))
+# 
+# # Loop for each file
+# for (i in 1:length(files)){
+#   # Opening raster
+#   ncin <- raster(files[i],
+#                  band = 1,
+#                  verbose = FALSE,
+#                  stopIfNotEqualSpaced = FALSE)
+#   # Getting the number of  days
+#   N_days <- floor(nbands(ncin)/24)
+#   # Dates
+#   Dates <- start_date[i] + (1:N_days) - 1
+#   # Loop for each day in the year
+#   for (j in 1:N_days){
+#     # Getting date
+#     date <- Dates[j]
+#     # Looping for each hour in the day
+#     for (k in (24*(j-1)+1):(24*j)){
+#       # Opening raster
+#       ncin <- raster(files[i],
+#                      band = k,
+#                      verbose = FALSE,
+#                      stopIfNotEqualSpaced = FALSE)
+#       # # Cropping for the UK
+#       # ncin <- crop(ncin, extent(-10, 3, 49, 62))
+#       extent(ncin) <- c(-10, 3, 49, 62)
+#       # Setting to missing if not in  UK
+#       ncin[is.na(r[])] <- NA
+#       # Saving raster
+#       writeRaster(ncin,
+#                   filename = paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''),
+#                   overwrite = TRUE)
+#       # else {keep <- keep + ncin}
+#       print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = ''))
+#     }
+#   }
+# }
+#########################################################################################################
+#########################################################################################################
+
+  
 # TODO: Remove dependence of calling directory location
 main <- function(output_dir, prefix_dir) {
 
@@ -16,82 +93,6 @@ main <- function(output_dir, prefix_dir) {
 
   stop("DEBUG")
 
-  ##########################################
-  ### Preparing PM data from CAMS-Europe ###
-  ##########################################
-  ################### The following code takes a while to run, so population estimates  ###################
-  ################### were run separately and saved to a raster (found underneath)      ###################
-  # # Empty raster
-  # r <- raster(xmn = -10,
-  #             xmx = 3,
-  #             ymn = 49,
-  #             ymx = 62,
-  #             res = 0.1)
-  # 
-  # # Adding unique  ID
-  # r[] <- 1:(dim(r)[1]*dim(r)[2])
-  # 
-  # # Empty dataset to append to
-  # weights <- NULL
-  # 
-  # # Extracting values
-  # a1 <- raster::extract(r, # Grid unique IDs
-  #                       uk_full, # Shapefiles
-  #                       weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap
-  #                       small = TRUE) # Small areas in comparison to the raster
-  # 
-  # # Setting to missing if not in UK
-  # r[!(r[] %in% a1[[1]])] <- NA
-  # 
-  # # file names
-  # files <- c('Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20201201-20210531.nc',
-  #            'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20210601-20211130.nc',
-  #            'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20211201-20220430.nc')
-  # 
-  # # files start dates
-  # start_date <- c(as.Date('2020-12-01'),
-  #                 as.Date('2021-06-01'),
-  #                 as.Date('2021-12-01'))
-  # 
-  # # Loop for each file
-  # for (i in 1:length(files)){
-  #   # Opening raster
-  #   ncin <- raster(files[i],
-  #                  band = 1,
-  #                  verbose = FALSE,
-  #                  stopIfNotEqualSpaced = FALSE)
-  #   # Getting the number of  days
-  #   N_days <- floor(nbands(ncin)/24)
-  #   # Dates
-  #   Dates <- start_date[i] + (1:N_days) - 1
-  #   # Loop for each day in the year
-  #   for (j in 1:N_days){
-  #     # Getting date
-  #     date <- Dates[j]
-  #     # Looping for each hour in the day
-  #     for (k in (24*(j-1)+1):(24*j)){
-  #       # Opening raster
-  #       ncin <- raster(files[i],
-  #                      band = k,
-  #                      verbose = FALSE,
-  #                      stopIfNotEqualSpaced = FALSE)
-  #       # # Cropping for the UK
-  #       # ncin <- crop(ncin, extent(-10, 3, 49, 62))
-  #       extent(ncin) <- c(-10, 3, 49, 62)
-  #       # Setting to missing if not in  UK
-  #       ncin[is.na(r[])] <- NA
-  #       # Saving raster
-  #       writeRaster(ncin,
-  #                   filename = paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''),
-  #                   overwrite = TRUE)
-  #       # else {keep <- keep + ncin}
-  #       print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = ''))
-  #     }
-  #   }
-  # }
-  #########################################################################################################
-  #########################################################################################################
-
   # Empty raster
   r0 <- raster(xmn = -2.8,
                xmx = -1.8,

From dd83e9950ac9be333b7f5264ad6ab5ac453faf14 Mon Sep 17 00:00:00 2001
From: andrewgryan <andrew.ryan@metoffice.gov.uk>
Date: Tue, 24 Jan 2023 17:23:19 +0000
Subject: [PATCH 6/7] begin to partition preparation into separate functions

---
 Code/CaseStudy2/1d_DataPrep_PM25.R | 109 ++++++++++++++++-------------
 1 file changed, 61 insertions(+), 48 deletions(-)

diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R
index 6e099dc..b74f721 100644
--- a/Code/CaseStudy2/1d_DataPrep_PM25.R
+++ b/Code/CaseStudy2/1d_DataPrep_PM25.R
@@ -93,6 +93,13 @@ main <- function(output_dir, prefix_dir) {
 
   stop("DEBUG")
 
+  # TODO: allow user to select particular data source
+  prepare_cams()
+  prepare_aurn()
+  prepare_emep()
+}
+
+prepare_cams <- function() {
   # Empty raster
   r0 <- raster(xmn = -2.8,
                xmx = -1.8,
@@ -186,6 +193,10 @@ main <- function(output_dir, prefix_dir) {
 
   # Save cams
   save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData"))
+  
+}
+
+prepare_aurn <- function() {
 
   ##############################################
   ### Preparing PM data from ground monitors ###
@@ -443,56 +454,58 @@ main <- function(output_dir, prefix_dir) {
 
   # Save aurn data
   save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData"))
+}
 
-  ###################################
-  ### Preparing PM data from EMEP ###
-  ###################################
-  ################### The following code takes a while to run, so population estimates  ###################
-  ################### were run separately and saved to a raster (found underneath)      ###################
-  # # file names
-  # files <- c("Data/CaseStudy2/Raw/PM25/EMEP/pm25_janfeb2021_regrid.nc",
-  #            "Data/CaseStudy2/Raw/PM25/EMEP/pm25_marapr2021_regrid.nc")
-  # 
-  # # files start dates
-  # start_date <- c(as.Date('2021-01-01'),
-  #                 as.Date('2021-03-01'))
-  # 
-  # # Loop for each file
-  # for (i in 1:length(files)){
-  #   # Opening raster to get number of days
-  #   ncin <- raster(files[i],
-  #                  band = 1,
-  #                  verbose = FALSE,
-  #                  stopIfNotEqualSpaced = FALSE)
-  #   # Getting the number of  days
-  #   N_days <- floor(nbands(ncin)/24)
-  #   # Dates
-  #   Dates <- start_date[i] + (1:N_days) - 1
-  #   # Loop for each day in the year
-  #   for (j in 1:N_days){
-  #     # Getting date
-  #     date <- Dates[j]
-  #     # Looping for each hour in the day
-  #     for (k in (24*(j-1)+1):(24*j)){
-  #       # Opening raster
-  #       ncin <- raster(files[i],
-  #                      band = k,
-  #                      verbose = FALSE,
-  #                      stopIfNotEqualSpaced = FALSE)
-  #       # Cropping for the UK
-  #       ncin <- crop(ncin, extent(-2.755, -1.895, 53.325, 53.695))
-  #       # Saving raster
-  #       writeRaster(ncin,
-  #                   filename = paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''),
-  #                   overwrite = TRUE)
-  #       # else {keep <- keep + ncin}
-  #       print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = ''))
-  #     }
-  #   }
-  # }
-  #########################################################################################################
-  #########################################################################################################
+###################################
+### Preparing PM data from EMEP ###
+###################################
+################### The following code takes a while to run, so population estimates  ###################
+################### were run separately and saved to a raster (found underneath)      ###################
+# # file names
+# files <- c("Data/CaseStudy2/Raw/PM25/EMEP/pm25_janfeb2021_regrid.nc",
+#            "Data/CaseStudy2/Raw/PM25/EMEP/pm25_marapr2021_regrid.nc")
+# 
+# # files start dates
+# start_date <- c(as.Date('2021-01-01'),
+#                 as.Date('2021-03-01'))
+# 
+# # Loop for each file
+# for (i in 1:length(files)){
+#   # Opening raster to get number of days
+#   ncin <- raster(files[i],
+#                  band = 1,
+#                  verbose = FALSE,
+#                  stopIfNotEqualSpaced = FALSE)
+#   # Getting the number of  days
+#   N_days <- floor(nbands(ncin)/24)
+#   # Dates
+#   Dates <- start_date[i] + (1:N_days) - 1
+#   # Loop for each day in the year
+#   for (j in 1:N_days){
+#     # Getting date
+#     date <- Dates[j]
+#     # Looping for each hour in the day
+#     for (k in (24*(j-1)+1):(24*j)){
+#       # Opening raster
+#       ncin <- raster(files[i],
+#                      band = k,
+#                      verbose = FALSE,
+#                      stopIfNotEqualSpaced = FALSE)
+#       # Cropping for the UK
+#       ncin <- crop(ncin, extent(-2.755, -1.895, 53.325, 53.695))
+#       # Saving raster
+#       writeRaster(ncin,
+#                   filename = paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''),
+#                   overwrite = TRUE)
+#       # else {keep <- keep + ncin}
+#       print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = ''))
+#     }
+#   }
+# }
+#########################################################################################################
+#########################################################################################################
 
+prepare_emep <- function(ew_msoa, mcr_msoa, output_dir, processed_dir) {
   # Empty raster
   r0 <- raster(xmn = -2.755,
                xmx = -1.895,

From 8f2da805f9540636b43a7000dbb18c73ab7c4c2b Mon Sep 17 00:00:00 2001
From: andrewgryan <andrew.ryan@metoffice.gov.uk>
Date: Thu, 26 Jan 2023 09:28:36 +0000
Subject: [PATCH 7/7] wip

---
 Code/CaseStudy2/1d_DataPrep_PM25.R | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R
index b74f721..f39aeee 100644
--- a/Code/CaseStudy2/1d_DataPrep_PM25.R
+++ b/Code/CaseStudy2/1d_DataPrep_PM25.R
@@ -94,9 +94,18 @@ main <- function(output_dir, prefix_dir) {
   stop("DEBUG")
 
   # TODO: allow user to select particular data source
-  prepare_cams()
-  prepare_aurn()
-  prepare_emep()
+
+  # Save cams
+  pm25_cams <- prepare_cams()
+  save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData"))
+
+  # Save aurn data
+  pm25_gm <- prepare_aurn()
+  save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData"))
+
+  # Save aurn data
+  pm25_emep <- prepare_emep()
+  save(pm25_emep, file = file.path(output_dir, "pm25_emep.RData"))
 }
 
 prepare_cams <- function() {
@@ -191,9 +200,7 @@ prepare_cams <- function() {
     }
   }
 
-  # Save cams
-  save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData"))
-  
+  pm25_cams  
 }
 
 prepare_aurn <- function() {
@@ -452,8 +459,7 @@ prepare_aurn <- function() {
   rm(tmp1, tmp2, tmp3, aurn_dat, gm_dat, ltn_dat, mcr_msoa,
      stations_aurn_dat, stations_gm_dat, stations_ltn_dat)
 
-  # Save aurn data
-  save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData"))
+  pm25_gm
 }
 
 ###################################
@@ -505,7 +511,7 @@ prepare_aurn <- function() {
 #########################################################################################################
 #########################################################################################################
 
-prepare_emep <- function(ew_msoa, mcr_msoa, output_dir, processed_dir) {
+prepare_emep <- function() {
   # Empty raster
   r0 <- raster(xmn = -2.755,
                xmx = -1.895,
@@ -597,6 +603,5 @@ prepare_emep <- function(ew_msoa, mcr_msoa, output_dir, processed_dir) {
     }
   }
 
-  # Save aurn data
-  save(pm25_emep, file = file.path(output_dir, "pm25_emep.RData"))
+  pm25_emep
 }