Skip to content

Commit

Permalink
Add api_paginate()
Browse files Browse the repository at this point in the history
  • Loading branch information
mgirlich committed Aug 15, 2023
1 parent 75de903 commit edaacf6
Showing 1 changed file with 297 additions and 0 deletions.
297 changes: 297 additions & 0 deletions R/paginate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,297 @@
api_paginate <- function(req,
limit,
results_field,
f_n_pages,
f_next_page,
max_pages = 100L,
progress = TRUE) {
check_request(req)
check_param(limit)
check_string(results_field, allow_null = TRUE)
check_function(f_n_pages)
check_function(f_next_page)
check_number_whole(max_pages)
check_bool(progress)

f_data <- if (is.null(results_field)) {
function(resp, body_parsed) {
body_parsed
}
} else {
function(resp, body_parsed) {
purrr::pluck(body_parsed, results_field)
}
}

if (is_query_param(limit)) {
req <- req_url_query(req, "{limit$name}" := limit$value)
} else if (is_body_param(limit)) {
data <- req$body$data %||% set_names(list())
data <- purrr::assign_in(data, limit$path, limit$value)
req <- req_body_json(req, data)
}

resp1 <- req_perform(req)
body_parsed <- resp_body_json(resp1)

n_pages <- min(f_n_pages(resp1, body_parsed, limit$value), max_pages)

out <- vector("list", length = n_pages)
out[[1]] <- f_data(resp1, body_parsed)

cli::cli_progress_bar(
"Paginate",
total = n_pages,
format = "{cli::pb_spin} Page {cli::pb_current}/{cli::pb_total} | ETA: {cli::pb_eta}",
current = 1L
)

for (page in seq2(2, n_pages)) {
req <- f_next_page(req, resp, body_parsed)
if (is.null(req)) {
page <- page - 1L
break
}

resp_page <- req_perform(req)

body_parsed <- resp_body_json(resp_page)
out[[page]] <- f_data(resp, body_parsed)

cli::cli_progress_update()
}
cli::cli_progress_done()

# remove unused end of `out` in case the pagination loop exits before all
# `max_pages` is reached
if (page < n_pages) {
out <- out[seq2(1, page)]
}

out
}

api_paginate_next_url <- function(req,
next_field,
results_field = NULL,
limit = NULL,
total_field = NULL,
max_pages = 100L,
progress = TRUE) {
check_string(next_field)
check_param(limit, allow_null = TRUE)
check_string(total_field, allow_null = TRUE)
check_number_whole(max_pages)
check_bool(progress)

f_n_pages <- if (is.null(total_field)) {
function(resp, body_parsed, limit_value) {
NULL
}
} else {
function(resp, body_parsed, limit_value) {
total <- body_parsed[[total_field]]
if (is.null(total)) {
return(NULL)
}

ceiling(total / limit_value)
}
}

f_next_page <- function(req, resp, body_parsed) {
next_url <- body_parsed[[next_field]]

if (is.null(next_url)) {
return(NULL)
}

req_url(req, next_url)
}

api_paginate(
req,
limit,
results_field = results_field,
f_n_pages = f_n_pages,
f_next_page = f_next_page,
max_pages = max_pages,
progress = progress
)
}

api_paginate_offset <- function(req,
offset,
results_field = NULL,
limit = NULL,
total_field = NULL,
max_pages = 100L,
progress = TRUE) {
check_param(offset, allow_null_value = TRUE)
check_param(limit, allow_null = TRUE)
check_string(total_field, allow_null = TRUE)
check_number_whole(max_pages)
check_bool(progress)

f_n_pages <- if (is.null(total_field)) {
function(resp, body_parsed, limit_value) {
NULL
}
} else {
function(resp, body_parsed, limit_value) {
total <- body_parsed[[total_field]]
if (is.null(total)) {
return(NULL)
}

ceiling(total / limit_value)
}
}

cur_offset <- 0L
env <- current_env()
if (is_query_param(offset)) {
f_next_page <- function(req, resp, body_parsed) {
cur_offset <- get("cur_offset", envir = env)
cur_offset <- cur_offset + offset$value
assign("cur_offset", cur_offset, envir = env)
req_url_query(req, "{offset$name}" := cur_offset)
}
} else if (is_body_param(offset)) {
# TODO?
}

api_paginate(
req,
limit,
results_field = results_field,
f_n_pages = f_n_pages,
f_next_page = f_next_page,
max_pages = max_pages,
progress = progress
)
}

api_paginate_next_token <- function(req,
limit = NULL,
token_field,
next_token_field = "nextToken",
total_field = NULL,
results_field = "results",
max_pages = 100L,
progress = TRUE) {
check_string(next_token_field)
check_string(total_field, allow_null = TRUE)
check_number_whole(max_pages)
check_bool(progress)

f_n_pages <- if (is.null(total_field)) {
function(resp, body_parsed, limit_value) {
NULL
}
} else {
function(resp, body_parsed, limit_value) {
total <- body_parsed[[total_field]]
if (is.null(total)) {
return(NULL)
}

ceiling(total / limit_value)
}
}

f_next_page <- function(req, resp, body_parsed) {
next_token <- body_parsed[[next_token_field]]

if (is.null(next_token)) {
return(NULL)
}

if (is_body_param(token_field)) {
req$body$data[[token_field]] <- jsonlite::unbox(next_token)
} else if (is_query_param(token_field)) {
req_url_query(req, "{token_field$name}" := next_token)
}
req
}

api_paginate(
req,
limit,
results_field = results_field,
f_n_pages = f_n_pages,
f_next_page = f_next_page,
max_pages = max_pages,
progress = progress
)
}

in_query <- function(name,
value = NULL,
error_call = caller_env()) {
check_string(name, call = error_call)
out <- list(value = value, name = name)
class(out) <- c("httr2_query_param", "httr2_param")
out
}

in_header <- function(name,
value = NULL,
error_call = caller_env()) {
check_string(name, call = error_call)
out <- list(value = value, name = name)
class(out) <- c("httr2_header_param", "httr2_param")
out
}

in_body <- function(path,
value = NULL,
error_call = caller_env()) {
# TODO check path
out <- list(value = value, path = path)
class(out) <- c("httr2_body_param", "httr2_param")
out
}

check_param <- function(x,
...,
allow_null = FALSE,
allow_null_value = FALSE,
arg = caller_arg(x),
call = caller_env()) {
if (!missing(x)) {
if (is_param(x)) {
if (is.null(x$value) && !allow_null_value) {
abort("{.arg value} must not be `NULL`.", call = call)
}
return(invisible(NULL))
}
if (allow_null && is_null(x)) {
return(invisible(NULL))
}
}

stop_input_type(
x,
"an httr2 parameter object",
allow_null = FALSE,
arg = arg,
call = call
)
}

is_param <- function(x) {
inherits(x, "httr2_param")
}

is_query_param <- function(x) {
inherits(x, "httr2_query_param")
}

is_body_param <- function(x) {
inherits(x, "httr2_body_param")
}

is_header_param <- function(x) {
inherits(x, "httr2_header_param")
}

0 comments on commit edaacf6

Please sign in to comment.