Skip to content

Commit

Permalink
Merge pull request #45 from panukatan/dev
Browse files Browse the repository at this point in the history
create functions for getting bulletins; fix #42
  • Loading branch information
ernestguevarra authored Aug 19, 2024
2 parents 361da9f + 4322bce commit 180061b
Show file tree
Hide file tree
Showing 17 changed files with 406 additions and 138 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ Imports:
httr,
rlang,
rvest,
stringr
stringr,
tibble
Suggests:
spelling,
testthat (>= 3.0.0)
Expand Down
11 changes: 9 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

export(eq_build_url)
export(eq_data)
export(eq_get_links)
export(eq_get_links_)
export(eq_get_bulletin)
export(eq_get_bulletin_urls)
export(eq_get_bulletin_urls_)
export(eq_get_bulletins)
export(eq_get_table)
export(eq_process_bulletins)
export(eq_process_table)
importFrom(dplyr,across)
importFrom(dplyr,bind_rows)
importFrom(dplyr,everything)
importFrom(dplyr,mutate)
Expand All @@ -17,6 +21,9 @@ importFrom(rlang,.data)
importFrom(rvest,html_table)
importFrom(rvest,session)
importFrom(stringr,str_detect)
importFrom(stringr,str_remove)
importFrom(stringr,str_remove_all)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_to_title)
importFrom(tibble,tibble)
105 changes: 0 additions & 105 deletions R/eq_get.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#'
#' @examples
#' eq_get_table()
#' eq_get_links()
#'
#' @rdname eq_get
#' @export
Expand Down Expand Up @@ -71,107 +70,3 @@ eq_get_table <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
}
)
}

#'
#' @rdname eq_get
#' @export
#'

eq_get_links_ <- function(.url) {
## Detect year and month from URL ----
.year <- stringr::str_extract(string = .url, pattern = "[0-9]{4}") |>
as.integer()
.month <- stringr::str_extract(
string = .url, pattern = paste(month.name, collapse = "|")
)

## Quiet down error on SSL ----
httr::config(ssl_verifypeer = 0L) |>
httr::set_config()

## Retrieve links ----
if (.year == 2018 & .month %in% month.name[seq_len(5)]) {
rvest::session(.url) |>
rvest::html_elements(css = "tr td .auto-style49 a") |>
rvest::html_attr(name = "href") |>
(\(x)
{
file.path(
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(string = x, pattern = "^../../")
)
}
)()
} else {
rvest::session(.url) |>
rvest::html_elements(css = ".auto-style91 a") |>
rvest::html_attr(name = "href") |>
(\(x)
{
file.path(
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(
string = x, pattern = "^../../|\\\\..\\\\..\\\\"
) |>
stringr::str_replace_all(pattern = "\\\\", replacement = "/")
)
}
)()
}
}

#'
#' @rdname eq_get
#' @export
#'

eq_get_links <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
.year = NULL, .month = NULL, latest = TRUE) {
## Build URLs ----
if (is.null(.year) & is.null(.month)) {
if (latest) {
urls <- .url
} else {
urls <- eq_build_url(.url = .url, .year = .year, .month = .month)
}
} else {
urls <- eq_build_url(.url = .url, .year = .year, .month = .month)
}

## Quiet down error on SSL ----
httr::config(ssl_verifypeer = 0L) |>
httr::set_config()

## Retrieve and structure data ----
lapply(
X = urls,
FUN = eq_get_links_
) |>
unlist()

# url_list_names <- urls |>
# (\(x)
# {
# paste(
# stringr::str_extract(
# string = x, pattern = paste(month.name, collapse = "|")
# ),
# stringr::str_extract(
# string = x, pattern = "[0-9]{4}"
# )
# )
# }
# )() |>
# (\(x)ifelse(x == "NA NA", format(Sys.Date(), format = "%B %Y"), x))()
#
# names(url_list) <- url_list_names
#
# url_list
}

#'
#' @rdname eq_get
#' @export
#'
102 changes: 102 additions & 0 deletions R/eq_get_bulletin_urls.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#'
#' Get PHIVOLCS earthquake information bulletins
#'
#' @param .url Base URL for PHIVOLCS earthquake bulletins.
#' @param .year A vector for year (in YYYY format) for which earthquake
#' bulletins are to be retrieved. The earliest year that can be specified is
#' 2018. If set to NULL (default), all years starting from 2018 to present
#' year are used.
#' @param .month A vector for month for which earthquake bulletins are
#' to be retrieved. This can be set as either an integer index (1 for January)
#' or abbreviation (Jan for January) for full name. If set to NULL (default),
#' all months are used.
#' @param latest Logical. Should the latest table of earthquake information be
#' retrieved? Only evaluated if `.year = NULL` and `.month = NULL`. If TRUE
#' (default), table of earthquake information for current year and current
#' month is retrieved. Otherwise, all months for all possible years are
#' retrieved.
#'
#' @returns A character vector of URLs for PHIVOLCS earthquake information
#' bulletins.
#'
#' @examples
#' eq_get_bulletin_urls()
#'
#' @rdname eq_get_bulletin_urls
#' @export
#'

eq_get_bulletin_urls <- function(.url = "https://earthquake.phivolcs.dost.gov.ph/",
.year = NULL, .month = NULL, latest = TRUE) {
## Build URLs ----
if (is.null(.year) & is.null(.month)) {
if (latest) {
urls <- .url
} else {
urls <- eq_build_url(.url = .url, .year = .year, .month = .month)
}
} else {
urls <- eq_build_url(.url = .url, .year = .year, .month = .month)
}

## Quiet down error on SSL ----
httr::config(ssl_verifypeer = 0L) |>
httr::set_config()

## Retrieve URLs ----
lapply(
X = urls,
FUN = eq_get_bulletin_urls_
) |>
unlist()
}

#'
#' @rdname eq_get_bulletin_urls
#' @export
#'

eq_get_bulletin_urls_ <- function(.url) {
## Detect year and month from URL ----
.year <- stringr::str_extract(string = .url, pattern = "[0-9]{4}") |>
as.integer()
.month <- stringr::str_extract(
string = .url, pattern = paste(month.name, collapse = "|")
)

## Quiet down error on SSL ----
httr::config(ssl_verifypeer = 0L) |>
httr::set_config()

## Retrieve links ----
if (.year == 2018 & .month %in% month.name[seq_len(5)]) {
rvest::session(.url) |>
rvest::html_elements(css = "tr td .auto-style49 a") |>
rvest::html_attr(name = "href") |>
(\(x)
{
file.path(
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(string = x, pattern = "^../../")
)
}
)()
} else {
rvest::session(.url) |>
rvest::html_elements(css = ".auto-style91 a") |>
rvest::html_attr(name = "href") |>
(\(x)
{
file.path(
"https:/",
stringr::str_split_fixed(.url, pattern = "/", n = 4)[ , 3],
stringr::str_remove_all(
string = x, pattern = "^../../|\\\\..\\\\..\\\\"
) |>
stringr::str_replace_all(pattern = "\\\\", replacement = "/")
)
}
)()
}
}
59 changes: 59 additions & 0 deletions R/eq_get_bulletins.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#'
#' Retrieve raw information from PHIVOLCS earthquake information bulletins
#'
#' @param .url A character value or vector of values for PHIVOLCS earthquake
#' information bulletins
#'
#' @returns A tibble of earthquake information from PHIVOLCS bulletins
#'
#' @examples
#' urls <- eq_get_bulletin_urls()
#' eq_get_bulletins(urls[1:3])
#'
#' @rdname eq_get_bulletin
#' @export
#'

eq_get_bulletin <- function(.url) {
rvest::session(url = .url) |>
rvest::html_table() |>
(\(x) x[[1]])() |>
(\(x)
{
tibble::tibble(
date_time = x[2, 4],
bulletin_number = stringr::str_extract(
string = x[1, 1], pattern = "[0-9]{1,}"
),
depth = x[2, 8],
magnitude = x[2, 12],
location = x[2, 6],
origin = x[2, 10],
reported_intensity = x[9, 3],
expect_damage = x[11, 4],
expect_aftershocks = x[11, 6],
date_time_issued = x[11, 8],
prepared_by = x[11, 10]
) |>
dplyr::mutate(
dplyr::across(
.cols = dplyr::everything(),
.fns = ~simplify_vectors(.x)
)
)
}
)()
}

#'
#' @rdname eq_get_bulletin
#' @export
#'

eq_get_bulletins <- function(.url) {
lapply(
X = .url,
FUN = eq_get_bulletin
) |>
dplyr::bind_rows()
}
50 changes: 50 additions & 0 deletions R/eq_process_bulletins.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#'
#' Process earthquake information from PHIVOLCS bulletins
#'
#' @param eq_df A tibble of earthquake information from PHIVOLCS bulletins
#' retrieved using `eq_get_bulletins()`.
#'
#' @returns A tibble of processed earthquake information from PHIVOLCS
#' bulletins.
#'
#' @examples
#' urls <- eq_get_bulletin_urls()
#' eq_get_bulletins(urls[1:3]) |>
#' eq_process_bulletins()
#'
#' @rdname eq_process_bulletin
#' @export
#'

eq_process_bulletins <- function(eq_df) {
eq_df |>
dplyr::mutate(
date_time = strptime(
.data$date_time, format = "%d %B %Y - %I:%M:%S %p", tz = "PST"
),
bulletin_number = as.integer(.data$bulletin_number),
longitude = get_longitude(.data$location),
latitude = get_latitude(.data$location),
depth = as.integer(.data$depth),
magnitude = stringr::str_remove_all(
string = .data$magnitude, pattern = "Ms "
) |>
as.numeric(),
location = get_location(.data$location),
origin = tolower(.data$origin),
reported_intensity = ifelse(
.data$reported_intensity == "", NA_character_, .data$reported_intensity
),
expect_damage = tolower(.data$expect_damage),
expect_aftershocks = tolower(.data$expect_aftershocks),
date_time_issued = strptime(
.data$date_time_issued, format = "%d %B %Y - %I:%M %p", tz = "PST"
)
) |>
dplyr::select(
.data$date_time, .data$bulletin_number, .data$longitude, .data$latitude,
.data$depth, .data$magnitude, .data$reported_intensity, .data$location,
.data$origin, .data$expect_damage, .data$expect_aftershocks,
.data$date_time_issued, .data$prepared_by
)
}
3 changes: 3 additions & 0 deletions R/lindol-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@
#' @importFrom httr config
#' @importFrom rlang .data
#' @importFrom dplyr mutate relocate rename_with bind_rows select everything
#' across
#' @importFrom stringr str_to_title str_remove_all str_replace_all str_detect
#' str_remove str_replace
#' @importFrom rvest session html_table
#' @importFrom tibble tibble
#'
"_PACKAGE"
Loading

0 comments on commit 180061b

Please sign in to comment.