diff --git a/NAMESPACE b/NAMESPACE index 6e1274b..fe48ca3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,8 @@ # Generated by roxygen2: do not edit by hand export(eq_build_url) -export(eq_data) +export(eq_data_bulletin) +export(eq_data_summary) export(eq_get_bulletin) export(eq_get_bulletin_urls) export(eq_get_bulletin_urls_) diff --git a/R/eq_build.R b/R/eq_build.R index 531e556..fef192d 100644 --- a/R/eq_build.R +++ b/R/eq_build.R @@ -1,7 +1,7 @@ #' -#' Build URLs for specific earthquake bulletins +#' Build URLs for specific PHIVOLCS earthquake bulletin summaries #' -#' @param .url Base URL for PHIVOLCS earthquake bulletins. +#' @param .url Base URL for PHIVOLCS earthquake bulletin summaries. #' @param .year A vector for year (in YYYY format) for which earthquake #' bulletins are to be retrieved. The earliest year that can be specified is #' 2018. If set to NULL (default), all years starting from 2018 to present diff --git a/R/eq_data.R b/R/eq_data.R index c219b3c..e5f54dc 100644 --- a/R/eq_data.R +++ b/R/eq_data.R @@ -1,6 +1,7 @@ #' #' Retrieve earthquake information data from PHIVOLCS bulletins #' +#' @param .url Base URL for PHIVOLCS earthquake bulletins. #' @param .year A vector for year (in YYYY format) for which earthquake #' bulletins are to be retrieved. The earliest year that can be specified is #' 2018. If set to NULL (default), all years starting from 2018 to present @@ -14,21 +15,33 @@ #' (default), table of earthquake information for current year and current #' month is retrieved. Otherwise, all months for all possible years are #' retrieved. -#' @param simplify Logical. Should output be simplified into a data.frame or -#' tibble? Default is TRUE. Otherwise, a list of processed tibbles of -#' earthquake monitoring data. #' #' @returns A tibble of processed earthquake data. #' #' @examples -#' eq_data() +#' eq_data_summary() #' +#' @rdname eq_data #' @export #' -eq_data <- function(.year = NULL, .month = NULL, - latest = TRUE, simplify = TRUE) { +eq_data_summary <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/", + .year = NULL, .month = NULL, latest = TRUE) { ## Retrieve data tables and process ---- - eq_get_table(.year = .year, .month = .month, latest = latest) |> - eq_process_table(simplify = simplify) + eq_get_table(.url = .url, .year = .year, .month = .month, latest = latest) |> + eq_process_table(simplify = TRUE) +} + +#' +#' @rdname eq_data +#' @export +#' + +eq_data_bulletin <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/", + .year = NULL, .month = NULL, latest = TRUE) { + eq_get_bulletin_urls( + .url = .url, .year = .year, .month = .month, latest = latest + ) |> + eq_get_bulletins() |> + eq_process_bulletins() } diff --git a/R/eq_get_bulletin_urls.R b/R/eq_get_bulletin_urls.R index 52cd09b..b311efc 100644 --- a/R/eq_get_bulletin_urls.R +++ b/R/eq_get_bulletin_urls.R @@ -68,22 +68,28 @@ eq_get_bulletin_urls_ <- function(.url) { httr::config(ssl_verifypeer = 0L) |> httr::set_config() + ## Initiate HTML session ---- + .session <- rvest::session(.url) + ## Retrieve links ---- if (.year == 2018 & .month %in% month.name[seq_len(5)]) { - rvest::session(.url) |> - rvest::html_elements(css = "tr td .auto-style49 a") |> + .session |> + rvest::html_elements(css = "tr td a") |> rvest::html_attr(name = "href") |> (\(x) - { - file.path( - "https:/", - stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3], - stringr::str_remove_all(string = x, pattern = "^../../") - ) - } + { + x <- x[stringr::str_detect(string = x, pattern = "Earthquake_Information")] + file.path( + "https:/", + stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3], + stringr::str_remove_all( + string = x, pattern = "^../../|^../../../../" + ) + ) + } )() } else { - rvest::session(.url) |> + .session |> rvest::html_elements(css = ".auto-style91 a") |> rvest::html_attr(name = "href") |> (\(x) @@ -92,7 +98,7 @@ eq_get_bulletin_urls_ <- function(.url) { "https:/", stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3], stringr::str_remove_all( - string = x, pattern = "^../../|\\\\..\\\\..\\\\" + string = x, pattern = "\\.\\./|\\\\..\\\\..\\\\" ) |> stringr::str_replace_all(pattern = "\\\\", replacement = "/") ) diff --git a/R/eq_get_bulletins.R b/R/eq_get_bulletins.R index 022fa0a..ec64778 100644 --- a/R/eq_get_bulletins.R +++ b/R/eq_get_bulletins.R @@ -15,34 +15,41 @@ #' eq_get_bulletin <- function(.url) { - rvest::session(url = .url) |> - rvest::html_table() |> - (\(x) x[[1]])() |> - (\(x) - { - tibble::tibble( - date_time = x[2, 4], - bulletin_number = stringr::str_extract( - string = x[1, 1], pattern = "[0-9]{1,}" - ), - depth = x[2, 8], - magnitude = x[2, 12], - location = x[2, 6], - origin = x[2, 10], - reported_intensity = x[9, 3], - expect_damage = x[11, 4], - expect_aftershocks = x[11, 6], - date_time_issued = x[11, 8], - prepared_by = x[11, 10] - ) |> - dplyr::mutate( - dplyr::across( - .cols = dplyr::everything(), - .fns = ~simplify_vectors(.x) + ## Check URL ---- + url_error <- httr::http_error(.url) + + if (url_error) { + NULL + } else { + rvest::session(url = .url) |> + rvest::html_table() |> + (\(x) x[[1]])() |> + (\(x) + { + tibble::tibble( + date_time = x[2, 4], + bulletin_number = stringr::str_extract( + string = x[1, 1], pattern = "[0-9]{1,}" + ), + depth = x[2, 8], + magnitude = x[2, 12], + location = x[2, 6], + origin = x[2, 10], + reported_intensity = x[9, 3], + expect_damage = x[11, 4], + expect_aftershocks = x[11, 6], + date_time_issued = x[11, 8], + prepared_by = x[11, 10] + ) |> + dplyr::mutate( + dplyr::across( + .cols = dplyr::everything(), + .fns = ~simplify_vectors(.x) + ) ) - ) - } - )() + } + )() + } } #' diff --git a/R/eq_process_bulletins.R b/R/eq_process_bulletins.R index c5ea6a7..852ac07 100644 --- a/R/eq_process_bulletins.R +++ b/R/eq_process_bulletins.R @@ -26,8 +26,12 @@ eq_process_bulletins <- function(eq_df) { longitude = get_longitude(.data$location), latitude = get_latitude(.data$location), depth = as.integer(.data$depth), + magnitude_type = stringr::str_extract_all( + string = .data$magnitude, pattern = "[A-Za-z]{1,}", simplify = TRUE + ) |> + simplify_vectors(), magnitude = stringr::str_remove_all( - string = .data$magnitude, pattern = "Ms " + string = .data$magnitude, pattern = "[^0-9.-]" ) |> as.numeric(), location = get_location(.data$location), @@ -43,7 +47,8 @@ eq_process_bulletins <- function(eq_df) { ) |> dplyr::select( .data$date_time, .data$bulletin_number, .data$longitude, .data$latitude, - .data$depth, .data$magnitude, .data$reported_intensity, .data$location, + .data$depth, .data$magnitude, .data$magnitude_type, + .data$reported_intensity, .data$location, .data$origin, .data$expect_damage, .data$expect_aftershocks, .data$date_time_issued, .data$prepared_by ) diff --git a/R/utils.R b/R/utils.R index 5a0814a..e600817 100644 --- a/R/utils.R +++ b/R/utils.R @@ -33,3 +33,34 @@ get_location <- function(.location) { simplify_vectors <- function(x) { c(x) |> unname() |> unlist() } + + +## Retrieve HTML information from different CSS tags ---- + +# get_links <- function(.session) { +# c( +# .session |> +# rvest::html_elements(css = "tr td .auto-style37 a") |> +# rvest::html_attr(name = "href"), +# .session |> +# rvest::html_elements(css = "tr td .auto-style62 a") |> +# rvest::html_attr(name = "href"), +# .session |> +# rvest::html_elements(css = "tr td .auto-style57 a") |> +# rvest::html_attr(name = "href") +# .session |> +# rvest::html_elements(css = "tr td .auto-style37 a") |> +# rvest::html_attr(name = "href") +# .session |> +# rvest::html_elements(css = "tr td .auto-style12 a") |> +# rvest::html_attr(name = "href") +# .session |> +# rvest::html_elements(css = "tr td a") |> +# rvest::html_attr(name = "href") |> +# (\(x) x[stringr::str_detect(x, pattern = "Earthquake_Information")])() +# +# .session |> +# rvest::html_elements(css = "tr td .auto-style49 a") |> +# rvest::html_attr(name = "href") +# ) +# } diff --git a/man/eq_build_url.Rd b/man/eq_build_url.Rd index d49ddee..ae81ce1 100644 --- a/man/eq_build_url.Rd +++ b/man/eq_build_url.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/eq_build.R \name{eq_build_url} \alias{eq_build_url} -\title{Build URLs for specific earthquake bulletins} +\title{Build URLs for specific PHIVOLCS earthquake bulletin summaries} \usage{ eq_build_url( .url = "https://earthquake.phivolcs.dost.gov.ph/", @@ -11,7 +11,7 @@ eq_build_url( ) } \arguments{ -\item{.url}{Base URL for PHIVOLCS earthquake bulletins.} +\item{.url}{Base URL for PHIVOLCS earthquake bulletin summaries.} \item{.year}{A vector for year (in YYYY format) for which earthquake bulletins are to be retrieved. The earliest year that can be specified is @@ -27,7 +27,7 @@ all months are used.} A vector of URLs based on given \code{.year} and \code{.month} } \description{ -Build URLs for specific earthquake bulletins +Build URLs for specific PHIVOLCS earthquake bulletin summaries } \examples{ eq_build_url() diff --git a/man/eq_data.Rd b/man/eq_data.Rd index 3042638..cdd2898 100644 --- a/man/eq_data.Rd +++ b/man/eq_data.Rd @@ -1,12 +1,27 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/eq_data.R -\name{eq_data} -\alias{eq_data} +\name{eq_data_summary} +\alias{eq_data_summary} +\alias{eq_data_bulletin} \title{Retrieve earthquake information data from PHIVOLCS bulletins} \usage{ -eq_data(.year = NULL, .month = NULL, latest = TRUE, simplify = TRUE) +eq_data_summary( + .url = "https://earthquake.phivolcs.dost.gov.ph/", + .year = NULL, + .month = NULL, + latest = TRUE +) + +eq_data_bulletin( + .url = "https://earthquake.phivolcs.dost.gov.ph/", + .year = NULL, + .month = NULL, + latest = TRUE +) } \arguments{ +\item{.url}{Base URL for PHIVOLCS earthquake bulletins.} + \item{.year}{A vector for year (in YYYY format) for which earthquake bulletins are to be retrieved. The earliest year that can be specified is 2018. If set to NULL (default), all years starting from 2018 to present @@ -22,10 +37,6 @@ retrieved? Only evaluated if \code{.year = NULL} and \code{.month = NULL}. If TR (default), table of earthquake information for current year and current month is retrieved. Otherwise, all months for all possible years are retrieved.} - -\item{simplify}{Logical. Should output be simplified into a data.frame or -tibble? Default is TRUE. Otherwise, a list of processed tibbles of -earthquake monitoring data.} } \value{ A tibble of processed earthquake data. @@ -34,6 +45,6 @@ A tibble of processed earthquake data. Retrieve earthquake information data from PHIVOLCS bulletins } \examples{ -eq_data() +eq_data_summary() } diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 4fc0c31..6d57ed9 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -62,5 +62,6 @@ reference: - title: Data contents: - - eq_data + - eq_data_summary + - eq_data_bulletin diff --git a/tests/testthat/test-eq_data.R b/tests/testthat/test-eq_data.R index 43a833e..89da2eb 100644 --- a/tests/testthat/test-eq_data.R +++ b/tests/testthat/test-eq_data.R @@ -1,5 +1,12 @@ # Tests for eq_data function --------------------------------------------------- test_that("eq_data function output is appropriate", { - expect_s3_class(eq_data(), "tbl") + expect_s3_class(eq_data_summary(), "tbl") }) + +#eq_df <- eq_data_bulletin(.year = 2018, .month = "January") +#eq_df <- eq_data_bulletin(.year = 2019, .month = "January") + +# test_that("eq_data function output is appropriate", { +# expect_s3_class(eq_df, "tbl") +# }) diff --git a/tests/testthat/test-eq_get_bulletin_urls.R b/tests/testthat/test-eq_get_bulletin_urls.R index 385f00b..2a1ec32 100644 --- a/tests/testthat/test-eq_get_bulletin_urls.R +++ b/tests/testthat/test-eq_get_bulletin_urls.R @@ -3,7 +3,7 @@ url_list <- eq_get_bulletin_urls() test_that("get_links function outputs are as expected", { - expect_vector(url_list, ptype = character(), size = nrow(eq_data())) + expect_vector(url_list, ptype = character(), size = nrow(eq_data_summary())) }) url_list <- eq_get_bulletin_urls(latest = FALSE) diff --git a/tests/testthat/test-eq_process_bulletins.R b/tests/testthat/test-eq_process_bulletins.R index 3113c66..5c31d7f 100644 --- a/tests/testthat/test-eq_process_bulletins.R +++ b/tests/testthat/test-eq_process_bulletins.R @@ -7,5 +7,5 @@ bulletin_df <- eq_get_bulletins(urls[1:10]) |> test_that("eq_process_bulletins output is as expected", { expect_s3_class(bulletin_df, "tbl") expect_equal(nrow(bulletin_df), 10) - expect_equal(ncol(bulletin_df), 13) + expect_equal(ncol(bulletin_df), 14) })