diff --git a/.gitignore b/.gitignore index d04995b..0d16011 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ vignettes/*.R vignettes/*.html concept-abbre.csv dev/ +.lintr diff --git a/R/oa_fetch.R b/R/oa_fetch.R index 40ceaec..f71a78b 100644 --- a/R/oa_fetch.R +++ b/R/oa_fetch.R @@ -98,9 +98,9 @@ oa_fetch <- function(entity = if (is.null(identifier)) NULL else id_type(shorten if (!is.null(options$sample) && (options$sample > per_page)) { paging <- "page" - } else if (!is.null(options$page)){ + } else if (!is.null(options$page)) { paging <- "page" - } else if (is.null(paging)){ + } else if (is.null(paging)) { paging <- "cursor" } @@ -171,6 +171,24 @@ oa_fetch <- function(entity = if (is.null(identifier)) NULL else id_type(shorten #' See https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/paging. #' @param pages Integer vector. #' The range of pages to return. If NULL, return all pages. +#' @param output_pages_to Character. +#' If NULL, the individual pages will be downloaded and processed in memory. +#' If not NULL, the individual pages +#' downloaded will be saved in the directory specified by `output_pages_to`. +#' The directory will be created if it does not exist. +#' **The function will overwrite existing files in the directory without +#' warning!** +#' Defaults to NULL. +#' @param pages_save_function Function. +#' The function which will be used to save the individual pages if +#' `output_pages_to` is set. This function has to take at least two arguments: +#' - the object to save (which will be the page returned in the same formnat +#' as returned by the function `oa_request()`) +#' - the file name where to save it to (which is +#' `file.path(output_pages_to, paste0("page_", i, ".rds"))`). +#' This function can be used for example to save the results in a database or +#' a different format than `.rds`. +#' Defaults to `saveRDS`. #' @param count_only Logical. #' If TRUE, the function returns only the number of item matching the query. #' Defaults to FALSE. @@ -181,7 +199,9 @@ oa_fetch <- function(entity = if (is.null(identifier)) NULL else id_type(shorten #' @param verbose Logical. #' If TRUE, print information about the querying process. Defaults to TRUE. #' -#' @return a data.frame or a list of bibliographic records. +#' @return a data.frame or a list of bibliographic records. If `output_pages_to` is +#' not NULL a character vector containing the names of the saved pages +#' is returned. #' #' For more extensive information about OpenAlex API, please visit: #' @@ -312,6 +332,8 @@ oa_request <- function(query_url, per_page = 200, paging = "cursor", pages = NULL, + output_pages_to = NULL, + pages_save_function = saveRDS, count_only = FALSE, mailto = oa_email(), api_key = oa_apikey(), @@ -379,6 +401,15 @@ oa_request <- function(query_url, # Setting items per page query_ls[["per-page"]] <- per_page + # Setup output_pages_to if not NULL + if (!is.null(output_pages_to)) { + output_pages_to <- normalizePath(output_pages_to, mustWork = FALSE) + if (!dir.exists(output_pages_to)) { + dir.create(output_pages_to) + } + result <- character(n_pages) + } + # Activation of cursor pagination data <- vector("list", length = n_pages) res <- NULL @@ -388,10 +419,27 @@ oa_request <- function(query_url, next_page <- get_next_page(paging, i, res) query_ls[[paging]] <- next_page res <- api_request(query_url, ua, query = query_ls) - if (!is.null(res$results)) data[[i]] <- res$results + next_page <- get_next_page(paging, i + 1, res) + if (!is.null(output_pages_to)) { + fn <- file.path(output_pages_to, paste0("page_", i, ".rds")) + pages_save_function( + unlist( + res$results, + recursive = FALSE + ), + file.path(output_pages_to, paste0("page_", i, ".rds")) + ) + result[[i]] <- fn + } else { + if (!is.null(res$results)) data[[i]] <- res$results + } } - unlist(data, recursive = FALSE) + if (is.null(output_pages_to)) { + return(unlist(data, recursive = FALSE)) + } else { + return(result) + } } get_next_page <- function(paging, i, res = NULL) { diff --git a/man/oa_request.Rd b/man/oa_request.Rd index 3a7c5fc..0ad1b06 100644 --- a/man/oa_request.Rd +++ b/man/oa_request.Rd @@ -9,6 +9,8 @@ oa_request( per_page = 200, paging = "cursor", pages = NULL, + output_pages_to = NULL, + pages_save_function = saveRDS, count_only = FALSE, mailto = oa_email(), api_key = oa_apikey(), @@ -34,6 +36,26 @@ See https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/paging.} \item{pages}{Integer vector. The range of pages to return. If NULL, return all pages.} +\item{output_pages_to}{Character. +If NULL, the individual pages will be downloaded and processed in memory. +If not NULL, the individual pages +downloaded will be saved in the directory specified by `output_pages_to`. +The directory will be created if it does not exist. +**The function will overwrite existing files in the directory without +warning!** +Defaults to NULL.} + +\item{pages_save_function}{Function. +The function which will be used to save the individual pages if +`output_pages_to` is set. This function has to take at least two arguments: + - the object to save (which will be the page returned in the same formnat + as returned by the function `oa_request()`) + - the file name where to save it to (which is + `file.path(output_pages_to, paste0("page_", i, ".rds"))`). +This function can be used for example to save the results in a database or +a different format than `.rds`. +Defaults to `saveRDS`.} + \item{count_only}{Logical. If TRUE, the function returns only the number of item matching the query. Defaults to FALSE.} @@ -48,7 +70,9 @@ Your OpenAlex Premium API key, if available.} If TRUE, print information about the querying process. Defaults to TRUE.} } \value{ -a data.frame or a list of bibliographic records. +a data.frame or a list of bibliographic records. If `output_pages_to` is +not NULL a character vector containing the names of the saved pages +is returned. For more extensive information about OpenAlex API, please visit: