From f7e23498ae915eada7d7e8334d09023e978c0bd3 Mon Sep 17 00:00:00 2001 From: andrewgryan Date: Tue, 24 Jan 2023 16:20:21 +0000 Subject: [PATCH 1/7] decouple output directory from call to save --- Code/CaseStudy2/1d_DataPrep_PM25.R | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R index 4a76640..bd5cb7c 100644 --- a/Code/CaseStudy2/1d_DataPrep_PM25.R +++ b/Code/CaseStudy2/1d_DataPrep_PM25.R @@ -5,6 +5,9 @@ library(here) source(here("Code", "CaseStudy2", "0_Source.R")) +# Directories +output_dir <- "Data/CaseStudy2/Processed/PM25" + # Loading shapefiles load("Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData") @@ -175,7 +178,7 @@ for (i in as.character(seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1) } # Save cams -save(pm25_cams, file = "Data/CaseStudy2/Processed/PM25/pm25_cams.RData") +save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData")) ############################################## ### Preparing PM data from ground monitors ### @@ -432,7 +435,7 @@ rm(tmp1, tmp2, tmp3, aurn_dat, gm_dat, ltn_dat, mcr_msoa, stations_aurn_dat, stations_gm_dat, stations_ltn_dat) # Save aurn data -save(pm25_gm, file = "Data/CaseStudy2/Processed/PM25/pm25_gm.RData") +save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData")) ################################### ### Preparing PM data from EMEP ### @@ -574,11 +577,4 @@ for (i in as.character(seq(as.Date('2021-01-01'), as.Date('2021-04-30'), by = 1) } # Save aurn data -save(pm25_emep, file = "Data/CaseStudy2/Processed/PM25/pm25_emep.RData") - - - - - - - +save(pm25_emep, file = file.path(output_dir, "pm25_emep.RData")) From 0a57a6c1b8234ca2524ebf56ffcbdefe5ccedf5f Mon Sep 17 00:00:00 2001 From: andrewgryan Date: Tue, 24 Jan 2023 16:47:17 +0000 Subject: [PATCH 2/7] extract main program and add --output-dir flag --- Code/CaseStudy2/1d_DataPrep_PM25.R | 1131 ++++++++++++++-------------- Code/CaseStudy2/cli.R | 13 +- 2 files changed, 577 insertions(+), 567 deletions(-) diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R index bd5cb7c..374f7a1 100644 --- a/Code/CaseStudy2/1d_DataPrep_PM25.R +++ b/Code/CaseStudy2/1d_DataPrep_PM25.R @@ -5,576 +5,579 @@ library(here) source(here("Code", "CaseStudy2", "0_Source.R")) -# Directories -output_dir <- "Data/CaseStudy2/Processed/PM25" - -# Loading shapefiles -load("Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData") - -########################################## -### Preparing PM data from CAMS-Europe ### -########################################## -################### The following code takes a while to run, so population estimates ################### -################### were run separately and saved to a raster (found underneath) ################### -# # Empty raster -# r <- raster(xmn = -10, -# xmx = 3, -# ymn = 49, -# ymx = 62, -# res = 0.1) -# -# # Adding unique ID -# r[] <- 1:(dim(r)[1]*dim(r)[2]) -# -# # Empty dataset to append to -# weights <- NULL -# -# # Extracting values -# a1 <- raster::extract(r, # Grid unique IDs -# uk_full, # Shapefiles -# weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap -# small = TRUE) # Small areas in comparison to the raster -# -# # Setting to missing if not in UK -# r[!(r[] %in% a1[[1]])] <- NA -# -# # file names -# files <- c('Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20201201-20210531.nc', -# 'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20210601-20211130.nc', -# 'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20211201-20220430.nc') -# -# # files start dates -# start_date <- c(as.Date('2020-12-01'), -# as.Date('2021-06-01'), -# as.Date('2021-12-01')) -# -# # Loop for each file -# for (i in 1:length(files)){ -# # Opening raster -# ncin <- raster(files[i], -# band = 1, -# verbose = FALSE, -# stopIfNotEqualSpaced = FALSE) -# # Getting the number of days -# N_days <- floor(nbands(ncin)/24) -# # Dates -# Dates <- start_date[i] + (1:N_days) - 1 -# # Loop for each day in the year -# for (j in 1:N_days){ -# # Getting date -# date <- Dates[j] -# # Looping for each hour in the day -# for (k in (24*(j-1)+1):(24*j)){ -# # Opening raster -# ncin <- raster(files[i], -# band = k, -# verbose = FALSE, -# stopIfNotEqualSpaced = FALSE) -# # # Cropping for the UK -# # ncin <- crop(ncin, extent(-10, 3, 49, 62)) -# extent(ncin) <- c(-10, 3, 49, 62) -# # Setting to missing if not in UK -# ncin[is.na(r[])] <- NA -# # Saving raster -# writeRaster(ncin, -# filename = paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''), -# overwrite = TRUE) -# # else {keep <- keep + ncin} -# print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = '')) -# } -# } -# } -######################################################################################################### -######################################################################################################### - -# Empty raster -r0 <- raster(xmn = -2.8, - xmx = -1.8, - ymn = 53.2, - ymx = 53.7, - res = 0.1) - -# Adding unique ID -r0[] <- 1:(dim(r0)[1]*dim(r0)[2]) - -# Subsetting Greater Manchester shapefiles -mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale', - 'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan')) - -# Converting to long lat -mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs")) - -# Extracting values -a1 <- raster::extract(r0, # Grid unique IDs - mcr_msoa, # Shapefiles - weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap - small = TRUE) # Small areas in comparison to the raster - -# Empty dataset to append to -Weights_msoa <- NULL - -# Loop for each area -for (i in 1:length(a1)){ - # Converting Weights_msoa to dataframes - tmp <- as.data.frame(a1[[i]]) - # Removing NAs - tmp <- subset(tmp, !is.na(value)) - # Reweighting the Weights_msoa after zeroes removed - tmp$weight <- tmp$weight/sum(tmp$weight) - # New Region Name - tmp$area_id <- mcr_msoa@data$area_id[i] - # Creating new dataset - if (i == 1) {Weights_msoa <- tmp} - else {Weights_msoa <- rbind(Weights_msoa, tmp)} - # Removing unecessary data - rm(tmp) -} - -# Altering column names -names(Weights_msoa)[1] <- c('IDGRID') - -# Merging to skeleton dataset -pm25_cams <- NULL - -# Loop for each date -for (i in as.character(seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1))){ - # Loop for each time - for (j in 0:23){ - # Reading in PM25 from CAMS - r <- raster(paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) - # Renaming raster - names(r) <- 'pm25' - # Creating aggregated estimates of PM25 by MSOA - tmp1 <- r %>% - # Converting raster to dataframe - crop(r0) %>% - stack(r0) %>% - rasterToPoints() %>% - as.data.frame()%>% - # Renaming columns - dplyr::select(IDGRID = layer, pm25) %>% - # Merging on weights to aggregate - right_join(Weights_msoa, - by = 'IDGRID') %>% - # Aggregating grid to - ddply(.(area_id), - summarize, - pm25_cams_agg = weighted.mean(pm25, weight)) - # Extracting PM2.5 values at centroids - tmp1$pm25_cams_cent<- raster::extract(r, mcr_msoa@data[,c('cent_long', 'cent_lat')]) - # Adding on date and hour - tmp1 <- tmp1 %>% - mutate(hour = j, - date = i) %>% - # Outputting datasets - dplyr::select(area_id, date, hour, pm25_cams_cent, pm25_cams_agg) - # Appending together - pm25_cams <- rbind(pm25_cams, tmp1) - # Removing uncessary datasets - rm(tmp1, tmp2) - # Printing index - print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) +# TODO: Remove dependence of calling directory location +main <- function(output_dir) { + print(output_dir) + stop("DEBUG") + + # Loading shapefiles + load("Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData") + + ########################################## + ### Preparing PM data from CAMS-Europe ### + ########################################## + ################### The following code takes a while to run, so population estimates ################### + ################### were run separately and saved to a raster (found underneath) ################### + # # Empty raster + # r <- raster(xmn = -10, + # xmx = 3, + # ymn = 49, + # ymx = 62, + # res = 0.1) + # + # # Adding unique ID + # r[] <- 1:(dim(r)[1]*dim(r)[2]) + # + # # Empty dataset to append to + # weights <- NULL + # + # # Extracting values + # a1 <- raster::extract(r, # Grid unique IDs + # uk_full, # Shapefiles + # weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap + # small = TRUE) # Small areas in comparison to the raster + # + # # Setting to missing if not in UK + # r[!(r[] %in% a1[[1]])] <- NA + # + # # file names + # files <- c('Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20201201-20210531.nc', + # 'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20210601-20211130.nc', + # 'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20211201-20220430.nc') + # + # # files start dates + # start_date <- c(as.Date('2020-12-01'), + # as.Date('2021-06-01'), + # as.Date('2021-12-01')) + # + # # Loop for each file + # for (i in 1:length(files)){ + # # Opening raster + # ncin <- raster(files[i], + # band = 1, + # verbose = FALSE, + # stopIfNotEqualSpaced = FALSE) + # # Getting the number of days + # N_days <- floor(nbands(ncin)/24) + # # Dates + # Dates <- start_date[i] + (1:N_days) - 1 + # # Loop for each day in the year + # for (j in 1:N_days){ + # # Getting date + # date <- Dates[j] + # # Looping for each hour in the day + # for (k in (24*(j-1)+1):(24*j)){ + # # Opening raster + # ncin <- raster(files[i], + # band = k, + # verbose = FALSE, + # stopIfNotEqualSpaced = FALSE) + # # # Cropping for the UK + # # ncin <- crop(ncin, extent(-10, 3, 49, 62)) + # extent(ncin) <- c(-10, 3, 49, 62) + # # Setting to missing if not in UK + # ncin[is.na(r[])] <- NA + # # Saving raster + # writeRaster(ncin, + # filename = paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''), + # overwrite = TRUE) + # # else {keep <- keep + ncin} + # print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = '')) + # } + # } + # } + ######################################################################################################### + ######################################################################################################### + + # Empty raster + r0 <- raster(xmn = -2.8, + xmx = -1.8, + ymn = 53.2, + ymx = 53.7, + res = 0.1) + + # Adding unique ID + r0[] <- 1:(dim(r0)[1]*dim(r0)[2]) + + # Subsetting Greater Manchester shapefiles + mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale', + 'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan')) + + # Converting to long lat + mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs")) + + # Extracting values + a1 <- raster::extract(r0, # Grid unique IDs + mcr_msoa, # Shapefiles + weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap + small = TRUE) # Small areas in comparison to the raster + + # Empty dataset to append to + Weights_msoa <- NULL + + # Loop for each area + for (i in 1:length(a1)){ + # Converting Weights_msoa to dataframes + tmp <- as.data.frame(a1[[i]]) + # Removing NAs + tmp <- subset(tmp, !is.na(value)) + # Reweighting the Weights_msoa after zeroes removed + tmp$weight <- tmp$weight/sum(tmp$weight) + # New Region Name + tmp$area_id <- mcr_msoa@data$area_id[i] + # Creating new dataset + if (i == 1) {Weights_msoa <- tmp} + else {Weights_msoa <- rbind(Weights_msoa, tmp)} + # Removing unecessary data + rm(tmp) } -} -# Save cams -save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData")) - -############################################## -### Preparing PM data from ground monitors ### -############################################## -# Subsetting Greater Manchester shapefiles -mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale', - 'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan')) - -# Converting to long lat -mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs")) - -# Reading in LTN network data -tmp1 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/BroomLane_AQ_hourly.csv') -tmp2 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/CromwellAQ_hourly.csv') -tmp3 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/DelamereRoad_AQ_hourly.csv') -tmp4 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/GrangethorpeAQ_2b_hourly.csv') -tmp5 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/ManorRad_AQ_hourly.csv') -tmp6 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/SladeLane_AQ_hourly.csv') - -# Adding Longitude -tmp1$latitude <- 53.441161 -tmp2$latitude <- 53.445053 -tmp3$latitude <- 53.442702 -tmp4$latitude <- 53.436213 -tmp5$latitude <- 53.447703 -tmp6$latitude <- 53.448551 - -# Adding Latitude -tmp1$longitude <- -2.185467 -tmp2$longitude <- -2.188442 -tmp3$longitude <- -2.188183 -tmp4$longitude <- -2.203991 -tmp5$longitude <- -2.183646 -tmp6$longitude <- -2.197556 - -# Getting site label -tmp1$site <- 'BroomLane' -tmp2$site <- 'Cromwell' -tmp3$site <- 'DelamereRoad' -tmp4$site <- 'Grangethorpe' -tmp5$site <- 'ManorRoad' -tmp6$site <- 'SladeLane' - -# Adding site label -tmp1$code <- 'LTN-BLN' -tmp2$code <- 'LTN-CRM' -tmp3$code <- 'LTN-DRD' -tmp4$code <- 'LTN-GRA' -tmp5$code <- 'LTN-MRD' -tmp6$code <- 'LTN-SLN' - -# Adding site label -tmp1$date <- as.Date(substr(tmp1$ds, 1, 10)) -tmp2$date <- as.Date(substr(tmp2$ds, 1, 10)) -tmp3$date <- as.Date(substr(tmp3$ds, 1, 10)) -tmp4$date <- as.Date(substr(tmp4$ds, 1, 10)) -tmp5$date <- as.Date(substr(tmp5$ds, 1, 10)) -tmp6$date <- as.Date(substr(tmp6$ds, 1, 10)) - -# Adding site label -tmp1$hour <- as.numeric(substr(tmp1$ds, 12, 13)) -tmp2$hour <- as.numeric(substr(tmp2$ds, 12, 13)) -tmp3$hour <- as.numeric(substr(tmp3$ds, 12, 13)) -tmp4$hour <- as.numeric(substr(tmp4$ds, 12, 13)) -tmp5$hour <- as.numeric(substr(tmp5$ds, 12, 13)) -tmp6$hour <- as.numeric(substr(tmp6$ds, 12, 13)) - -# Loading in stations data -stations_ltn_dat <- rbind(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) %>% - dplyr::select(site, code, longitude, latitude) %>% - unique() - -# Pulling all data together -ltn_dat <- rbind(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) %>% - dplyr::select(date, pm2.5 = PM2.5 , site, code, hour)%>% - dplyr::mutate(pm2.5 = ifelse(pm2.5 <= 0, NA, pm2.5)) - -# Removing unecessary datasets -rm(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) - -# List of Stations -stations_aurn_dat <- importMeta(source = "aurn", all = TRUE) %>% - filter(variable == 'PM2.5' & - (as.Date(end_date, format = '%Y-%m-%d') >= as.Date('2020-12-01') | - end_date == 'ongoing') & - longitude > (mcr_msoa@bbox[1,1] - 0.5) & - longitude < (mcr_msoa@bbox[1,2] + 0.5) & - latitude > (mcr_msoa@bbox[2,1] - 0.5) & - latitude < (mcr_msoa@bbox[2,2] + 0.5)) %>% - dplyr::select(code, site, latitude, longitude) %>% - unique() %>% - filter(!(site %in% c('Glazebury', 'Liverpool Speke'))) - -# Downloading pollutant/weather data -aurn_dat <- importAURN(site = stations_aurn_dat$code, - year = 2020:2021, - pollutant = c('pm2.5'), - meta = FALSE, - data_type = 'hourly', - verbose = FALSE) %>% - dplyr::mutate(pm2.5 = ifelse(pm2.5 <= 0, NA, pm2.5)) - -# Getting date and hour -aurn_dat$hour <- as.numeric(substr(aurn_dat$date, 12, 13)) -aurn_dat$date <- as.Date(substr(aurn_dat$date, 1, 10)) - -# Appenidng both together -gm_dat <- rbind(aurn_dat, ltn_dat) -stations_gm_dat <- rbind(stations_aurn_dat, stations_ltn_dat) - -# Getting closest station for each MSOA -A1 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], - stations_aurn_dat[,c('longitude', 'latitude')]), 1, which.min) -A2 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], - stations_ltn_dat[,c('longitude', 'latitude')]), 1, which.min) -A3 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], - stations_gm_dat[,c('longitude', 'latitude')]), 1, which.min) - -# Getting data from nearest station (AURN) -tmp1 <- data.frame(area_id = mcr_msoa$area_id, - code_aurn = stations_aurn_dat$code[A1]) %>% - left_join(aurn_dat %>% - dplyr::select(code_aurn = code, date, hour, pm25_aurn = pm2.5), - by = 'code_aurn') %>% - dplyr::select(area_id, date, hour, pm25_aurn, code_aurn) - -# Getting data from nearest station (LTN) -tmp2 <- data.frame(area_id = mcr_msoa$area_id, - code_ltn = stations_ltn_dat$code[A2]) %>% - left_join(ltn_dat %>% - dplyr::select(code_ltn = code, date, hour, pm25_ltn = pm2.5), - by = 'code_ltn') %>% - dplyr::select(area_id, date, hour, pm25_ltn, code_ltn) - -# Getting data from nearest station (Both) -tmp3 <- data.frame(area_id = mcr_msoa$area_id, - code_gm = stations_gm_dat$code[A3]) %>% - left_join(gm_dat %>% - dplyr::select(code_gm = code, date, hour, pm25_gm = pm2.5), - by = 'code_gm') %>% - dplyr::select(area_id, date, hour, pm25_gm, code_gm) - -# Merging together -pm25_gm <- expand.grid(area_id = mcr_msoa$area_id, - date =seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1), - hour = 0:23)%>% - left_join(tmp1, - by = c('area_id', 'date', 'hour')) %>% - left_join(tmp2, - by = c('area_id', 'date', 'hour')) %>% - left_join(tmp3, - by = c('area_id', 'date', 'hour')) - -# Removing unecessary datasets -rm(tmp1, tmp2, tmp3) - -# Getting stations with non-missing data -aurn_dat <- aurn_dat %>% - left_join(stations_aurn_dat %>% - dplyr::select(code, longitude, latitude), - by = "code") -# Getting stations with non-missing data -ltn_dat <- ltn_dat %>% - left_join(stations_ltn_dat %>% - dplyr::select(code, longitude, latitude), - by = "code") -# Getting stations with non-missing data -gm_dat <- gm_dat %>% - left_join(stations_gm_dat %>% - dplyr::select(code, longitude, latitude), - by = "code") - -# Empty dataset for data from nearest *non-missing* station -tmp1 <- NULL -tmp2 <- NULL -tmp3 <- NULL - -# Loop for each date -for (i in as.character(seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1))){ - # Loop for each time - for (j in 0:23){ - # Getting stations with non-missing data - test1 <- aurn_dat %>% - dplyr::filter(date == i & hour == j & !is.na(pm2.5)) - # Getting stations with non-missing data - test2 <- ltn_dat %>% - dplyr::filter(date == i & hour == j & !is.na(pm2.5)) - # Getting stations with non-missing data - test3 <- gm_dat %>% - dplyr::filter(date == i & hour == j & !is.na(pm2.5)) - # Getting closest non-missing station for each MSOA - A1 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], - test1[,c('longitude', 'latitude')]), - 1, which.min) - # Getting closest non-missing station for each MSOA - A2 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], - test2[,c('longitude', 'latitude')]), - 1, which.min) - # Getting closest non-missing station for each MSOA - A3 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], - test3[,c('longitude', 'latitude')]), - 1, which.min) - # Getting data from nearest station (AURN) - if (length(A1) > 0){ - test1 <- data.frame(area_id = mcr_msoa$area_id, - code_aurn_near = test1$code[A1]) %>% - left_join(test1 %>% - dplyr::select(code_aurn_near = code, date, hour, pm25_aurn_near = pm2.5), - by = 'code_aurn_near') %>% - dplyr::select(area_id, date, hour, pm25_aurn_near, code_aurn_near) - # Appending - tmp1 <- rbind(tmp1, test1) + # Altering column names + names(Weights_msoa)[1] <- c('IDGRID') + + # Merging to skeleton dataset + pm25_cams <- NULL + + # Loop for each date + for (i in as.character(seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1))){ + # Loop for each time + for (j in 0:23){ + # Reading in PM25 from CAMS + r <- raster(paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) + # Renaming raster + names(r) <- 'pm25' + # Creating aggregated estimates of PM25 by MSOA + tmp1 <- r %>% + # Converting raster to dataframe + crop(r0) %>% + stack(r0) %>% + rasterToPoints() %>% + as.data.frame()%>% + # Renaming columns + dplyr::select(IDGRID = layer, pm25) %>% + # Merging on weights to aggregate + right_join(Weights_msoa, + by = 'IDGRID') %>% + # Aggregating grid to + ddply(.(area_id), + summarize, + pm25_cams_agg = weighted.mean(pm25, weight)) + # Extracting PM2.5 values at centroids + tmp1$pm25_cams_cent<- raster::extract(r, mcr_msoa@data[,c('cent_long', 'cent_lat')]) + # Adding on date and hour + tmp1 <- tmp1 %>% + mutate(hour = j, + date = i) %>% + # Outputting datasets + dplyr::select(area_id, date, hour, pm25_cams_cent, pm25_cams_agg) + # Appending together + pm25_cams <- rbind(pm25_cams, tmp1) + # Removing uncessary datasets + rm(tmp1, tmp2) + # Printing index + print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) } - # Getting data from nearest station (LTN) - if (length(A2) > 0){ - test2 <- data.frame(area_id = mcr_msoa$area_id, - code_ltn_near = test2$code[A2]) %>% - left_join(test2 %>% - dplyr::select(code_ltn_near = code, date, hour, pm25_ltn_near = pm2.5), - by = 'code_ltn_near') %>% - dplyr::select(area_id, date, hour, pm25_ltn_near, code_ltn_near) - # Appending - tmp2 <- rbind(tmp2, test2) - } - if (length(A3) > 0){ - # Getting data from nearest station (Both) - test3 <- data.frame(area_id = mcr_msoa$area_id, - code_gm_near = test3$code[A3]) %>% - left_join(test3 %>% - dplyr::select(code_gm_near = code, date, hour, pm25_gm_near = pm2.5), - by = 'code_gm_near') %>% - dplyr::select(area_id, date, hour, pm25_gm_near, code_gm_near) - # Appending - tmp3 <- rbind(tmp3, test3) + } + + # Save cams + save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData")) + + ############################################## + ### Preparing PM data from ground monitors ### + ############################################## + # Subsetting Greater Manchester shapefiles + mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale', + 'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan')) + + # Converting to long lat + mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs")) + + # Reading in LTN network data + tmp1 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/BroomLane_AQ_hourly.csv') + tmp2 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/CromwellAQ_hourly.csv') + tmp3 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/DelamereRoad_AQ_hourly.csv') + tmp4 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/GrangethorpeAQ_2b_hourly.csv') + tmp5 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/ManorRad_AQ_hourly.csv') + tmp6 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/SladeLane_AQ_hourly.csv') + + # Adding Longitude + tmp1$latitude <- 53.441161 + tmp2$latitude <- 53.445053 + tmp3$latitude <- 53.442702 + tmp4$latitude <- 53.436213 + tmp5$latitude <- 53.447703 + tmp6$latitude <- 53.448551 + + # Adding Latitude + tmp1$longitude <- -2.185467 + tmp2$longitude <- -2.188442 + tmp3$longitude <- -2.188183 + tmp4$longitude <- -2.203991 + tmp5$longitude <- -2.183646 + tmp6$longitude <- -2.197556 + + # Getting site label + tmp1$site <- 'BroomLane' + tmp2$site <- 'Cromwell' + tmp3$site <- 'DelamereRoad' + tmp4$site <- 'Grangethorpe' + tmp5$site <- 'ManorRoad' + tmp6$site <- 'SladeLane' + + # Adding site label + tmp1$code <- 'LTN-BLN' + tmp2$code <- 'LTN-CRM' + tmp3$code <- 'LTN-DRD' + tmp4$code <- 'LTN-GRA' + tmp5$code <- 'LTN-MRD' + tmp6$code <- 'LTN-SLN' + + # Adding site label + tmp1$date <- as.Date(substr(tmp1$ds, 1, 10)) + tmp2$date <- as.Date(substr(tmp2$ds, 1, 10)) + tmp3$date <- as.Date(substr(tmp3$ds, 1, 10)) + tmp4$date <- as.Date(substr(tmp4$ds, 1, 10)) + tmp5$date <- as.Date(substr(tmp5$ds, 1, 10)) + tmp6$date <- as.Date(substr(tmp6$ds, 1, 10)) + + # Adding site label + tmp1$hour <- as.numeric(substr(tmp1$ds, 12, 13)) + tmp2$hour <- as.numeric(substr(tmp2$ds, 12, 13)) + tmp3$hour <- as.numeric(substr(tmp3$ds, 12, 13)) + tmp4$hour <- as.numeric(substr(tmp4$ds, 12, 13)) + tmp5$hour <- as.numeric(substr(tmp5$ds, 12, 13)) + tmp6$hour <- as.numeric(substr(tmp6$ds, 12, 13)) + + # Loading in stations data + stations_ltn_dat <- rbind(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) %>% + dplyr::select(site, code, longitude, latitude) %>% + unique() + + # Pulling all data together + ltn_dat <- rbind(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) %>% + dplyr::select(date, pm2.5 = PM2.5 , site, code, hour)%>% + dplyr::mutate(pm2.5 = ifelse(pm2.5 <= 0, NA, pm2.5)) + + # Removing unecessary datasets + rm(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6) + + # List of Stations + stations_aurn_dat <- importMeta(source = "aurn", all = TRUE) %>% + filter(variable == 'PM2.5' & + (as.Date(end_date, format = '%Y-%m-%d') >= as.Date('2020-12-01') | + end_date == 'ongoing') & + longitude > (mcr_msoa@bbox[1,1] - 0.5) & + longitude < (mcr_msoa@bbox[1,2] + 0.5) & + latitude > (mcr_msoa@bbox[2,1] - 0.5) & + latitude < (mcr_msoa@bbox[2,2] + 0.5)) %>% + dplyr::select(code, site, latitude, longitude) %>% + unique() %>% + filter(!(site %in% c('Glazebury', 'Liverpool Speke'))) + + # Downloading pollutant/weather data + aurn_dat <- importAURN(site = stations_aurn_dat$code, + year = 2020:2021, + pollutant = c('pm2.5'), + meta = FALSE, + data_type = 'hourly', + verbose = FALSE) %>% + dplyr::mutate(pm2.5 = ifelse(pm2.5 <= 0, NA, pm2.5)) + + # Getting date and hour + aurn_dat$hour <- as.numeric(substr(aurn_dat$date, 12, 13)) + aurn_dat$date <- as.Date(substr(aurn_dat$date, 1, 10)) + + # Appenidng both together + gm_dat <- rbind(aurn_dat, ltn_dat) + stations_gm_dat <- rbind(stations_aurn_dat, stations_ltn_dat) + + # Getting closest station for each MSOA + A1 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], + stations_aurn_dat[,c('longitude', 'latitude')]), 1, which.min) + A2 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], + stations_ltn_dat[,c('longitude', 'latitude')]), 1, which.min) + A3 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], + stations_gm_dat[,c('longitude', 'latitude')]), 1, which.min) + + # Getting data from nearest station (AURN) + tmp1 <- data.frame(area_id = mcr_msoa$area_id, + code_aurn = stations_aurn_dat$code[A1]) %>% + left_join(aurn_dat %>% + dplyr::select(code_aurn = code, date, hour, pm25_aurn = pm2.5), + by = 'code_aurn') %>% + dplyr::select(area_id, date, hour, pm25_aurn, code_aurn) + + # Getting data from nearest station (LTN) + tmp2 <- data.frame(area_id = mcr_msoa$area_id, + code_ltn = stations_ltn_dat$code[A2]) %>% + left_join(ltn_dat %>% + dplyr::select(code_ltn = code, date, hour, pm25_ltn = pm2.5), + by = 'code_ltn') %>% + dplyr::select(area_id, date, hour, pm25_ltn, code_ltn) + + # Getting data from nearest station (Both) + tmp3 <- data.frame(area_id = mcr_msoa$area_id, + code_gm = stations_gm_dat$code[A3]) %>% + left_join(gm_dat %>% + dplyr::select(code_gm = code, date, hour, pm25_gm = pm2.5), + by = 'code_gm') %>% + dplyr::select(area_id, date, hour, pm25_gm, code_gm) + + # Merging together + pm25_gm <- expand.grid(area_id = mcr_msoa$area_id, + date =seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1), + hour = 0:23)%>% + left_join(tmp1, + by = c('area_id', 'date', 'hour')) %>% + left_join(tmp2, + by = c('area_id', 'date', 'hour')) %>% + left_join(tmp3, + by = c('area_id', 'date', 'hour')) + + # Removing unecessary datasets + rm(tmp1, tmp2, tmp3) + + # Getting stations with non-missing data + aurn_dat <- aurn_dat %>% + left_join(stations_aurn_dat %>% + dplyr::select(code, longitude, latitude), + by = "code") + # Getting stations with non-missing data + ltn_dat <- ltn_dat %>% + left_join(stations_ltn_dat %>% + dplyr::select(code, longitude, latitude), + by = "code") + # Getting stations with non-missing data + gm_dat <- gm_dat %>% + left_join(stations_gm_dat %>% + dplyr::select(code, longitude, latitude), + by = "code") + + # Empty dataset for data from nearest *non-missing* station + tmp1 <- NULL + tmp2 <- NULL + tmp3 <- NULL + + # Loop for each date + for (i in as.character(seq(as.Date('2020-12-01'), as.Date('2021-12-31'), by = 1))){ + # Loop for each time + for (j in 0:23){ + # Getting stations with non-missing data + test1 <- aurn_dat %>% + dplyr::filter(date == i & hour == j & !is.na(pm2.5)) + # Getting stations with non-missing data + test2 <- ltn_dat %>% + dplyr::filter(date == i & hour == j & !is.na(pm2.5)) + # Getting stations with non-missing data + test3 <- gm_dat %>% + dplyr::filter(date == i & hour == j & !is.na(pm2.5)) + # Getting closest non-missing station for each MSOA + A1 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], + test1[,c('longitude', 'latitude')]), + 1, which.min) + # Getting closest non-missing station for each MSOA + A2 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], + test2[,c('longitude', 'latitude')]), + 1, which.min) + # Getting closest non-missing station for each MSOA + A3 <- apply(proxy::dist(mcr_msoa@data[,c('cent_long', 'cent_lat')], + test3[,c('longitude', 'latitude')]), + 1, which.min) + # Getting data from nearest station (AURN) + if (length(A1) > 0){ + test1 <- data.frame(area_id = mcr_msoa$area_id, + code_aurn_near = test1$code[A1]) %>% + left_join(test1 %>% + dplyr::select(code_aurn_near = code, date, hour, pm25_aurn_near = pm2.5), + by = 'code_aurn_near') %>% + dplyr::select(area_id, date, hour, pm25_aurn_near, code_aurn_near) + # Appending + tmp1 <- rbind(tmp1, test1) + } + # Getting data from nearest station (LTN) + if (length(A2) > 0){ + test2 <- data.frame(area_id = mcr_msoa$area_id, + code_ltn_near = test2$code[A2]) %>% + left_join(test2 %>% + dplyr::select(code_ltn_near = code, date, hour, pm25_ltn_near = pm2.5), + by = 'code_ltn_near') %>% + dplyr::select(area_id, date, hour, pm25_ltn_near, code_ltn_near) + # Appending + tmp2 <- rbind(tmp2, test2) + } + if (length(A3) > 0){ + # Getting data from nearest station (Both) + test3 <- data.frame(area_id = mcr_msoa$area_id, + code_gm_near = test3$code[A3]) %>% + left_join(test3 %>% + dplyr::select(code_gm_near = code, date, hour, pm25_gm_near = pm2.5), + by = 'code_gm_near') %>% + dplyr::select(area_id, date, hour, pm25_gm_near, code_gm_near) + # Appending + tmp3 <- rbind(tmp3, test3) + } + # Removing uncessary datasets + rm(test1, test2, test3) + # Printing index + print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) } - # Removing uncessary datasets - rm(test1, test2, test3) - # Printing index - print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) } -} -# Merging together -pm25_gm <- pm25_gm %>% - left_join(tmp1, - by = c('area_id', 'date', 'hour')) %>% - left_join(tmp2, - by = c('area_id', 'date', 'hour')) %>% - left_join(tmp3, - by = c('area_id', 'date', 'hour')) - -# Removing unecessary files -rm(tmp1, tmp2, tmp3, aurn_dat, gm_dat, ltn_dat, mcr_msoa, - stations_aurn_dat, stations_gm_dat, stations_ltn_dat) - -# Save aurn data -save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData")) - -################################### -### Preparing PM data from EMEP ### -################################### -################### The following code takes a while to run, so population estimates ################### -################### were run separately and saved to a raster (found underneath) ################### -# # file names -# files <- c("Data/CaseStudy2/Raw/PM25/EMEP/pm25_janfeb2021_regrid.nc", -# "Data/CaseStudy2/Raw/PM25/EMEP/pm25_marapr2021_regrid.nc") -# -# # files start dates -# start_date <- c(as.Date('2021-01-01'), -# as.Date('2021-03-01')) -# -# # Loop for each file -# for (i in 1:length(files)){ -# # Opening raster to get number of days -# ncin <- raster(files[i], -# band = 1, -# verbose = FALSE, -# stopIfNotEqualSpaced = FALSE) -# # Getting the number of days -# N_days <- floor(nbands(ncin)/24) -# # Dates -# Dates <- start_date[i] + (1:N_days) - 1 -# # Loop for each day in the year -# for (j in 1:N_days){ -# # Getting date -# date <- Dates[j] -# # Looping for each hour in the day -# for (k in (24*(j-1)+1):(24*j)){ -# # Opening raster -# ncin <- raster(files[i], -# band = k, -# verbose = FALSE, -# stopIfNotEqualSpaced = FALSE) -# # Cropping for the UK -# ncin <- crop(ncin, extent(-2.755, -1.895, 53.325, 53.695)) -# # Saving raster -# writeRaster(ncin, -# filename = paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''), -# overwrite = TRUE) -# # else {keep <- keep + ncin} -# print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = '')) -# } -# } -# } -######################################################################################################### -######################################################################################################### - -# Empty raster -r0 <- raster(xmn = -2.755, - xmx = -1.895, - ymn = 53.325, - ymx = 53.695, - res = 0.01) - -# Adding unique ID -r0[] <- 1:(dim(r0)[1]*dim(r0)[2]) - -# Subsetting Greater Manchester shapefiles -mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale', - 'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan')) - -# Converting to long lat -mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs")) - -# Extracting values -a1 <- raster::extract(r0, # Grid unique IDs - mcr_msoa, # Shapefiles - weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap - small = TRUE) # Small areas in comparison to the raster - -# Empty dataset to append to -Weights_msoa <- NULL - -# Loop for each area -for (i in 1:length(a1)){ - # Converting Weights_msoa to dataframes - tmp <- as.data.frame(a1[[i]]) - # Removing NAs - tmp <- subset(tmp, !is.na(value)) - # Reweighting the Weights_msoa after zeroes removed - tmp$weight <- tmp$weight/sum(tmp$weight) - # New Region Name - tmp$area_id <- mcr_msoa@data$area_id[i] - # Creating new dataset - if (i == 1) {Weights_msoa <- tmp} - else {Weights_msoa <- rbind(Weights_msoa, tmp)} - # Removing unecessary data - rm(tmp) -} + # Merging together + pm25_gm <- pm25_gm %>% + left_join(tmp1, + by = c('area_id', 'date', 'hour')) %>% + left_join(tmp2, + by = c('area_id', 'date', 'hour')) %>% + left_join(tmp3, + by = c('area_id', 'date', 'hour')) + + # Removing unecessary files + rm(tmp1, tmp2, tmp3, aurn_dat, gm_dat, ltn_dat, mcr_msoa, + stations_aurn_dat, stations_gm_dat, stations_ltn_dat) + + # Save aurn data + save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData")) + + ################################### + ### Preparing PM data from EMEP ### + ################################### + ################### The following code takes a while to run, so population estimates ################### + ################### were run separately and saved to a raster (found underneath) ################### + # # file names + # files <- c("Data/CaseStudy2/Raw/PM25/EMEP/pm25_janfeb2021_regrid.nc", + # "Data/CaseStudy2/Raw/PM25/EMEP/pm25_marapr2021_regrid.nc") + # + # # files start dates + # start_date <- c(as.Date('2021-01-01'), + # as.Date('2021-03-01')) + # + # # Loop for each file + # for (i in 1:length(files)){ + # # Opening raster to get number of days + # ncin <- raster(files[i], + # band = 1, + # verbose = FALSE, + # stopIfNotEqualSpaced = FALSE) + # # Getting the number of days + # N_days <- floor(nbands(ncin)/24) + # # Dates + # Dates <- start_date[i] + (1:N_days) - 1 + # # Loop for each day in the year + # for (j in 1:N_days){ + # # Getting date + # date <- Dates[j] + # # Looping for each hour in the day + # for (k in (24*(j-1)+1):(24*j)){ + # # Opening raster + # ncin <- raster(files[i], + # band = k, + # verbose = FALSE, + # stopIfNotEqualSpaced = FALSE) + # # Cropping for the UK + # ncin <- crop(ncin, extent(-2.755, -1.895, 53.325, 53.695)) + # # Saving raster + # writeRaster(ncin, + # filename = paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''), + # overwrite = TRUE) + # # else {keep <- keep + ncin} + # print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = '')) + # } + # } + # } + ######################################################################################################### + ######################################################################################################### + + # Empty raster + r0 <- raster(xmn = -2.755, + xmx = -1.895, + ymn = 53.325, + ymx = 53.695, + res = 0.01) + + # Adding unique ID + r0[] <- 1:(dim(r0)[1]*dim(r0)[2]) + + # Subsetting Greater Manchester shapefiles + mcr_msoa <- subset(ew_msoa, parent_area_name %in% c('Bolton', 'Bury', 'Manchester', 'Oldham', 'Rochdale', + 'Salford', 'Stockport', 'Tameside', 'Trafford', 'Wigan')) + + # Converting to long lat + mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs")) + + # Extracting values + a1 <- raster::extract(r0, # Grid unique IDs + mcr_msoa, # Shapefiles + weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap + small = TRUE) # Small areas in comparison to the raster + + # Empty dataset to append to + Weights_msoa <- NULL + + # Loop for each area + for (i in 1:length(a1)){ + # Converting Weights_msoa to dataframes + tmp <- as.data.frame(a1[[i]]) + # Removing NAs + tmp <- subset(tmp, !is.na(value)) + # Reweighting the Weights_msoa after zeroes removed + tmp$weight <- tmp$weight/sum(tmp$weight) + # New Region Name + tmp$area_id <- mcr_msoa@data$area_id[i] + # Creating new dataset + if (i == 1) {Weights_msoa <- tmp} + else {Weights_msoa <- rbind(Weights_msoa, tmp)} + # Removing unecessary data + rm(tmp) + } -# Altering column names -names(Weights_msoa)[1] <- c('IDGRID') - -# Merging to skeleton dataset -pm25_emep <- NULL - -# Loop for each date -for (i in as.character(seq(as.Date('2021-01-01'), as.Date('2021-04-30'), by = 1))){ - # Loop for each time - for (j in 0:23){ - # Reading in PM25 from CAMS - r <- raster(paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) - # Renaming raster - names(r) <- 'pm25' - # Creating aggregated estimates of PM25 by MSOA - tmp1 <- r %>% - # Converting raster to dataframe - crop(r0) %>% - stack(r0) %>% - rasterToPoints() %>% - as.data.frame()%>% - # Renaming columns - dplyr::select(IDGRID = layer, pm25) %>% - # Merging on weights to aggregate - right_join(Weights_msoa, - by = 'IDGRID') %>% - # Aggregating grid to - ddply(.(area_id), - summarize, - pm25_cams_agg = weighted.mean(pm25, weight)) - # Extracting PM2.5 values at centroids - tmp1$pm25_cams_cent<- raster::extract(r, mcr_msoa@data[,c('cent_long', 'cent_lat')]) - # Adding on date and hour - tmp1 <- tmp1 %>% - mutate(hour = j, - date = i) %>% - # Outputting datasets - dplyr::select(area_id, date, hour, pm25_cams_cent, pm25_cams_agg) - # Appending together - pm25_emep <- rbind(pm25_emep, tmp1) - # Removing uncessary datasets - rm(tmp1) - # Printing index - print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) + # Altering column names + names(Weights_msoa)[1] <- c('IDGRID') + + # Merging to skeleton dataset + pm25_emep <- NULL + + # Loop for each date + for (i in as.character(seq(as.Date('2021-01-01'), as.Date('2021-04-30'), by = 1))){ + # Loop for each time + for (j in 0:23){ + # Reading in PM25 from CAMS + r <- raster(paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) + # Renaming raster + names(r) <- 'pm25' + # Creating aggregated estimates of PM25 by MSOA + tmp1 <- r %>% + # Converting raster to dataframe + crop(r0) %>% + stack(r0) %>% + rasterToPoints() %>% + as.data.frame()%>% + # Renaming columns + dplyr::select(IDGRID = layer, pm25) %>% + # Merging on weights to aggregate + right_join(Weights_msoa, + by = 'IDGRID') %>% + # Aggregating grid to + ddply(.(area_id), + summarize, + pm25_cams_agg = weighted.mean(pm25, weight)) + # Extracting PM2.5 values at centroids + tmp1$pm25_cams_cent<- raster::extract(r, mcr_msoa@data[,c('cent_long', 'cent_lat')]) + # Adding on date and hour + tmp1 <- tmp1 %>% + mutate(hour = j, + date = i) %>% + # Outputting datasets + dplyr::select(area_id, date, hour, pm25_cams_cent, pm25_cams_agg) + # Appending together + pm25_emep <- rbind(pm25_emep, tmp1) + # Removing uncessary datasets + rm(tmp1) + # Printing index + print(paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) + } } -} -# Save aurn data -save(pm25_emep, file = file.path(output_dir, "pm25_emep.RData")) + # Save aurn data + save(pm25_emep, file = file.path(output_dir, "pm25_emep.RData")) +} diff --git a/Code/CaseStudy2/cli.R b/Code/CaseStudy2/cli.R index ecd041a..3810464 100644 --- a/Code/CaseStudy2/cli.R +++ b/Code/CaseStudy2/cli.R @@ -8,7 +8,11 @@ cli <- function() { help = "DIMEX processing step"), make_option(c("--prefix"), default = "~/Dropbox/Github/SPFFinalReport", - help = "top-level directory containing Code/ and Data/") + help = "top-level directory containing Code/ and Data/"), + make_option(c("--output-dir"), + dest="output_dir", + default = "Data/Processed/PM25", + help = "Output directory") )) %>% parse_args %>% validate_args @@ -51,7 +55,7 @@ script_name <- function(step) { } # Main program -main <- function() { +main_wrapper <- function() { opts <- cli() # Working directory @@ -59,7 +63,10 @@ main <- function() { # Run particular step source(here("Code", "CaseStudy2", script_name(opts$step))) + + # Run the loaded main program + main(opts$output_dir) } -main() \ No newline at end of file +main_wrapper() \ No newline at end of file From ec414581f52f61b5e6e2aa8ff520655affd76726 Mon Sep 17 00:00:00 2001 From: andrewgryan Date: Tue, 24 Jan 2023 16:51:42 +0000 Subject: [PATCH 3/7] pass prefix directory to main program --- Code/CaseStudy2/1d_DataPrep_PM25.R | 8 ++++---- Code/CaseStudy2/cli.R | 6 ++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R index 374f7a1..af993b1 100644 --- a/Code/CaseStudy2/1d_DataPrep_PM25.R +++ b/Code/CaseStudy2/1d_DataPrep_PM25.R @@ -6,12 +6,12 @@ library(here) source(here("Code", "CaseStudy2", "0_Source.R")) # TODO: Remove dependence of calling directory location -main <- function(output_dir) { - print(output_dir) - stop("DEBUG") +main <- function(output_dir, prefix_dir) { # Loading shapefiles - load("Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData") + load(file.path(prefix_dir, "Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData")) + + stop("DEBUG") ########################################## ### Preparing PM data from CAMS-Europe ### diff --git a/Code/CaseStudy2/cli.R b/Code/CaseStudy2/cli.R index 3810464..841e686 100644 --- a/Code/CaseStudy2/cli.R +++ b/Code/CaseStudy2/cli.R @@ -58,14 +58,12 @@ script_name <- function(step) { main_wrapper <- function() { opts <- cli() - # Working directory - setwd(opts$prefix) - # Run particular step source(here("Code", "CaseStudy2", script_name(opts$step))) # Run the loaded main program - main(opts$output_dir) + main(output_dir = opts$output_dir, + prefix_dir = opts$prefix) } From eaa5f23f5188563f5d31d5f667f60ffa104ad51a Mon Sep 17 00:00:00 2001 From: andrewgryan Date: Tue, 24 Jan 2023 17:05:27 +0000 Subject: [PATCH 4/7] make directory paths absolute and dry --- Code/CaseStudy2/1d_DataPrep_PM25.R | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R index af993b1..7e1fe2e 100644 --- a/Code/CaseStudy2/1d_DataPrep_PM25.R +++ b/Code/CaseStudy2/1d_DataPrep_PM25.R @@ -8,8 +8,11 @@ source(here("Code", "CaseStudy2", "0_Source.R")) # TODO: Remove dependence of calling directory location main <- function(output_dir, prefix_dir) { + processed_dir = file.path(prefix_dir, "Data/CaseStudy2/Processed") + ground_monitors_dir = file.path(prefix_dir, "Data/CaseStudy2/Raw/PM25/GroundMonitors") + # Loading shapefiles - load(file.path(prefix_dir, "Data/CaseStudy2/Processed/Shapefiles/shapefiles.RData")) + load(file.path(processed_dir, "Shapefiles/shapefiles.RData")) stop("DEBUG") @@ -143,7 +146,8 @@ main <- function(output_dir, prefix_dir) { # Loop for each time for (j in 0:23){ # Reading in PM25 from CAMS - r <- raster(paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) + file_name <- paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '') + r <- raster(file.path(processed_dir, 'PM25/CAMS-Europe', file_name)) # Renaming raster names(r) <- 'pm25' # Creating aggregated estimates of PM25 by MSOA @@ -193,12 +197,12 @@ main <- function(output_dir, prefix_dir) { mcr_msoa <- spTransform(mcr_msoa, CRS("+proj=longlat +datum=WGS84 +no_defs")) # Reading in LTN network data - tmp1 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/BroomLane_AQ_hourly.csv') - tmp2 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/CromwellAQ_hourly.csv') - tmp3 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/DelamereRoad_AQ_hourly.csv') - tmp4 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/GrangethorpeAQ_2b_hourly.csv') - tmp5 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/ManorRad_AQ_hourly.csv') - tmp6 <- read.csv('Data/CaseStudy2/Raw/PM25/GroundMonitors/SladeLane_AQ_hourly.csv') + tmp1 <- read.csv(file.path(ground_monitors_dir, 'BroomLane_AQ_hourly.csv')) + tmp2 <- read.csv(file.path(ground_monitors_dir, 'CromwellAQ_hourly.csv')) + tmp3 <- read.csv(file.path(ground_monitors_dir, 'DelamereRoad_AQ_hourly.csv')) + tmp4 <- read.csv(file.path(ground_monitors_dir, 'GrangethorpeAQ_2b_hourly.csv')) + tmp5 <- read.csv(file.path(ground_monitors_dir, 'ManorRad_AQ_hourly.csv')) + tmp6 <- read.csv(file.path(ground_monitors_dir, 'SladeLane_AQ_hourly.csv')) # Adding Longitude tmp1$latitude <- 53.441161 @@ -542,7 +546,8 @@ main <- function(output_dir, prefix_dir) { # Loop for each time for (j in 0:23){ # Reading in PM25 from CAMS - r <- raster(paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '')) + file_name <- paste('PM25_', i, '-', sprintf("%02d", j), "00.tif", sep = '') + r <- raster(file.path(processed_dir, 'PM25/EMEP', file_name)) # Renaming raster names(r) <- 'pm25' # Creating aggregated estimates of PM25 by MSOA From eb050cb064ce123374a51ab42f24bb03b86affe2 Mon Sep 17 00:00:00 2001 From: andrewgryan Date: Tue, 24 Jan 2023 17:10:18 +0000 Subject: [PATCH 5/7] undo changes to commented legacy code --- Code/CaseStudy2/1d_DataPrep_PM25.R | 153 +++++++++++++++-------------- 1 file changed, 77 insertions(+), 76 deletions(-) diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R index 7e1fe2e..6e099dc 100644 --- a/Code/CaseStudy2/1d_DataPrep_PM25.R +++ b/Code/CaseStudy2/1d_DataPrep_PM25.R @@ -5,6 +5,83 @@ library(here) source(here("Code", "CaseStudy2", "0_Source.R")) +########################################## +### Preparing PM data from CAMS-Europe ### +########################################## +################### The following code takes a while to run, so population estimates ################### +################### were run separately and saved to a raster (found underneath) ################### +# # Empty raster +# r <- raster(xmn = -10, +# xmx = 3, +# ymn = 49, +# ymx = 62, +# res = 0.1) +# +# # Adding unique ID +# r[] <- 1:(dim(r)[1]*dim(r)[2]) +# +# # Empty dataset to append to +# weights <- NULL +# +# # Extracting values +# a1 <- raster::extract(r, # Grid unique IDs +# uk_full, # Shapefiles +# weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap +# small = TRUE) # Small areas in comparison to the raster +# +# # Setting to missing if not in UK +# r[!(r[] %in% a1[[1]])] <- NA +# +# # file names +# files <- c('Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20201201-20210531.nc', +# 'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20210601-20211130.nc', +# 'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20211201-20220430.nc') +# +# # files start dates +# start_date <- c(as.Date('2020-12-01'), +# as.Date('2021-06-01'), +# as.Date('2021-12-01')) +# +# # Loop for each file +# for (i in 1:length(files)){ +# # Opening raster +# ncin <- raster(files[i], +# band = 1, +# verbose = FALSE, +# stopIfNotEqualSpaced = FALSE) +# # Getting the number of days +# N_days <- floor(nbands(ncin)/24) +# # Dates +# Dates <- start_date[i] + (1:N_days) - 1 +# # Loop for each day in the year +# for (j in 1:N_days){ +# # Getting date +# date <- Dates[j] +# # Looping for each hour in the day +# for (k in (24*(j-1)+1):(24*j)){ +# # Opening raster +# ncin <- raster(files[i], +# band = k, +# verbose = FALSE, +# stopIfNotEqualSpaced = FALSE) +# # # Cropping for the UK +# # ncin <- crop(ncin, extent(-10, 3, 49, 62)) +# extent(ncin) <- c(-10, 3, 49, 62) +# # Setting to missing if not in UK +# ncin[is.na(r[])] <- NA +# # Saving raster +# writeRaster(ncin, +# filename = paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''), +# overwrite = TRUE) +# # else {keep <- keep + ncin} +# print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = '')) +# } +# } +# } +######################################################################################################### +######################################################################################################### + + # TODO: Remove dependence of calling directory location main <- function(output_dir, prefix_dir) { @@ -16,82 +93,6 @@ main <- function(output_dir, prefix_dir) { stop("DEBUG") - ########################################## - ### Preparing PM data from CAMS-Europe ### - ########################################## - ################### The following code takes a while to run, so population estimates ################### - ################### were run separately and saved to a raster (found underneath) ################### - # # Empty raster - # r <- raster(xmn = -10, - # xmx = 3, - # ymn = 49, - # ymx = 62, - # res = 0.1) - # - # # Adding unique ID - # r[] <- 1:(dim(r)[1]*dim(r)[2]) - # - # # Empty dataset to append to - # weights <- NULL - # - # # Extracting values - # a1 <- raster::extract(r, # Grid unique IDs - # uk_full, # Shapefiles - # weight = TRUE, # Give us Weights of the cells so we can do a weighted average of the cells we overlap - # small = TRUE) # Small areas in comparison to the raster - # - # # Setting to missing if not in UK - # r[!(r[] %in% a1[[1]])] <- NA - # - # # file names - # files <- c('Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20201201-20210531.nc', - # 'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20210601-20211130.nc', - # 'Data/CaseStudy2/Raw/PM25/CAMS-Europe/CAMSEurope_20211201-20220430.nc') - # - # # files start dates - # start_date <- c(as.Date('2020-12-01'), - # as.Date('2021-06-01'), - # as.Date('2021-12-01')) - # - # # Loop for each file - # for (i in 1:length(files)){ - # # Opening raster - # ncin <- raster(files[i], - # band = 1, - # verbose = FALSE, - # stopIfNotEqualSpaced = FALSE) - # # Getting the number of days - # N_days <- floor(nbands(ncin)/24) - # # Dates - # Dates <- start_date[i] + (1:N_days) - 1 - # # Loop for each day in the year - # for (j in 1:N_days){ - # # Getting date - # date <- Dates[j] - # # Looping for each hour in the day - # for (k in (24*(j-1)+1):(24*j)){ - # # Opening raster - # ncin <- raster(files[i], - # band = k, - # verbose = FALSE, - # stopIfNotEqualSpaced = FALSE) - # # # Cropping for the UK - # # ncin <- crop(ncin, extent(-10, 3, 49, 62)) - # extent(ncin) <- c(-10, 3, 49, 62) - # # Setting to missing if not in UK - # ncin[is.na(r[])] <- NA - # # Saving raster - # writeRaster(ncin, - # filename = paste('Data/CaseStudy2/Processed/PM25/CAMS-Europe/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''), - # overwrite = TRUE) - # # else {keep <- keep + ncin} - # print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = '')) - # } - # } - # } - ######################################################################################################### - ######################################################################################################### - # Empty raster r0 <- raster(xmn = -2.8, xmx = -1.8, From dd83e9950ac9be333b7f5264ad6ab5ac453faf14 Mon Sep 17 00:00:00 2001 From: andrewgryan Date: Tue, 24 Jan 2023 17:23:19 +0000 Subject: [PATCH 6/7] begin to partition preparation into separate functions --- Code/CaseStudy2/1d_DataPrep_PM25.R | 109 ++++++++++++++++------------- 1 file changed, 61 insertions(+), 48 deletions(-) diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R index 6e099dc..b74f721 100644 --- a/Code/CaseStudy2/1d_DataPrep_PM25.R +++ b/Code/CaseStudy2/1d_DataPrep_PM25.R @@ -93,6 +93,13 @@ main <- function(output_dir, prefix_dir) { stop("DEBUG") + # TODO: allow user to select particular data source + prepare_cams() + prepare_aurn() + prepare_emep() +} + +prepare_cams <- function() { # Empty raster r0 <- raster(xmn = -2.8, xmx = -1.8, @@ -186,6 +193,10 @@ main <- function(output_dir, prefix_dir) { # Save cams save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData")) + +} + +prepare_aurn <- function() { ############################################## ### Preparing PM data from ground monitors ### @@ -443,56 +454,58 @@ main <- function(output_dir, prefix_dir) { # Save aurn data save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData")) +} - ################################### - ### Preparing PM data from EMEP ### - ################################### - ################### The following code takes a while to run, so population estimates ################### - ################### were run separately and saved to a raster (found underneath) ################### - # # file names - # files <- c("Data/CaseStudy2/Raw/PM25/EMEP/pm25_janfeb2021_regrid.nc", - # "Data/CaseStudy2/Raw/PM25/EMEP/pm25_marapr2021_regrid.nc") - # - # # files start dates - # start_date <- c(as.Date('2021-01-01'), - # as.Date('2021-03-01')) - # - # # Loop for each file - # for (i in 1:length(files)){ - # # Opening raster to get number of days - # ncin <- raster(files[i], - # band = 1, - # verbose = FALSE, - # stopIfNotEqualSpaced = FALSE) - # # Getting the number of days - # N_days <- floor(nbands(ncin)/24) - # # Dates - # Dates <- start_date[i] + (1:N_days) - 1 - # # Loop for each day in the year - # for (j in 1:N_days){ - # # Getting date - # date <- Dates[j] - # # Looping for each hour in the day - # for (k in (24*(j-1)+1):(24*j)){ - # # Opening raster - # ncin <- raster(files[i], - # band = k, - # verbose = FALSE, - # stopIfNotEqualSpaced = FALSE) - # # Cropping for the UK - # ncin <- crop(ncin, extent(-2.755, -1.895, 53.325, 53.695)) - # # Saving raster - # writeRaster(ncin, - # filename = paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''), - # overwrite = TRUE) - # # else {keep <- keep + ncin} - # print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = '')) - # } - # } - # } - ######################################################################################################### - ######################################################################################################### +################################### +### Preparing PM data from EMEP ### +################################### +################### The following code takes a while to run, so population estimates ################### +################### were run separately and saved to a raster (found underneath) ################### +# # file names +# files <- c("Data/CaseStudy2/Raw/PM25/EMEP/pm25_janfeb2021_regrid.nc", +# "Data/CaseStudy2/Raw/PM25/EMEP/pm25_marapr2021_regrid.nc") +# +# # files start dates +# start_date <- c(as.Date('2021-01-01'), +# as.Date('2021-03-01')) +# +# # Loop for each file +# for (i in 1:length(files)){ +# # Opening raster to get number of days +# ncin <- raster(files[i], +# band = 1, +# verbose = FALSE, +# stopIfNotEqualSpaced = FALSE) +# # Getting the number of days +# N_days <- floor(nbands(ncin)/24) +# # Dates +# Dates <- start_date[i] + (1:N_days) - 1 +# # Loop for each day in the year +# for (j in 1:N_days){ +# # Getting date +# date <- Dates[j] +# # Looping for each hour in the day +# for (k in (24*(j-1)+1):(24*j)){ +# # Opening raster +# ncin <- raster(files[i], +# band = k, +# verbose = FALSE, +# stopIfNotEqualSpaced = FALSE) +# # Cropping for the UK +# ncin <- crop(ncin, extent(-2.755, -1.895, 53.325, 53.695)) +# # Saving raster +# writeRaster(ncin, +# filename = paste('Data/CaseStudy2/Processed/PM25/EMEP/PM25_', date, '-', sprintf("%02d", k %% 24), "00.tif", sep = ''), +# overwrite = TRUE) +# # else {keep <- keep + ncin} +# print(paste(date, '-', sprintf("%02d", (k - 1) %% 24), "00", sep = '')) +# } +# } +# } +######################################################################################################### +######################################################################################################### +prepare_emep <- function(ew_msoa, mcr_msoa, output_dir, processed_dir) { # Empty raster r0 <- raster(xmn = -2.755, xmx = -1.895, From 8f2da805f9540636b43a7000dbb18c73ab7c4c2b Mon Sep 17 00:00:00 2001 From: andrewgryan Date: Thu, 26 Jan 2023 09:28:36 +0000 Subject: [PATCH 7/7] wip --- Code/CaseStudy2/1d_DataPrep_PM25.R | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/Code/CaseStudy2/1d_DataPrep_PM25.R b/Code/CaseStudy2/1d_DataPrep_PM25.R index b74f721..f39aeee 100644 --- a/Code/CaseStudy2/1d_DataPrep_PM25.R +++ b/Code/CaseStudy2/1d_DataPrep_PM25.R @@ -94,9 +94,18 @@ main <- function(output_dir, prefix_dir) { stop("DEBUG") # TODO: allow user to select particular data source - prepare_cams() - prepare_aurn() - prepare_emep() + + # Save cams + pm25_cams <- prepare_cams() + save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData")) + + # Save aurn data + pm25_gm <- prepare_aurn() + save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData")) + + # Save aurn data + pm25_emep <- prepare_emep() + save(pm25_emep, file = file.path(output_dir, "pm25_emep.RData")) } prepare_cams <- function() { @@ -191,9 +200,7 @@ prepare_cams <- function() { } } - # Save cams - save(pm25_cams, file = file.path(output_dir, "pm25_cams.RData")) - + pm25_cams } prepare_aurn <- function() { @@ -452,8 +459,7 @@ prepare_aurn <- function() { rm(tmp1, tmp2, tmp3, aurn_dat, gm_dat, ltn_dat, mcr_msoa, stations_aurn_dat, stations_gm_dat, stations_ltn_dat) - # Save aurn data - save(pm25_gm, file = file.path(output_dir, "pm25_gm.RData")) + pm25_gm } ################################### @@ -505,7 +511,7 @@ prepare_aurn <- function() { ######################################################################################################### ######################################################################################################### -prepare_emep <- function(ew_msoa, mcr_msoa, output_dir, processed_dir) { +prepare_emep <- function() { # Empty raster r0 <- raster(xmn = -2.755, xmx = -1.895, @@ -597,6 +603,5 @@ prepare_emep <- function(ew_msoa, mcr_msoa, output_dir, processed_dir) { } } - # Save aurn data - save(pm25_emep, file = file.path(output_dir, "pm25_emep.RData")) + pm25_emep }