From a8bcf9cc7c2fd61a22e2a45b937333ad53923ca3 Mon Sep 17 00:00:00 2001 From: olivroy Date: Wed, 2 Oct 2024 17:59:11 -0400 Subject: [PATCH 1/2] Use less dplyr in the codebase in favor of vctrs for speed + add a workaround to disable column resolve when it is not necessary to speed up `cols_nanoplot()` --- R/cols_add.R | 16 +++++----- R/dt_summary.R | 24 ++++++-------- R/extract.R | 3 +- R/format_data.R | 4 +-- R/helpers.R | 6 +++- R/render_as_i_html.R | 11 ++++--- R/resolver.R | 12 +++++-- R/tab_info.R | 4 +-- R/tab_options.R | 2 +- R/utils.R | 3 +- R/utils_plots.R | 8 ++++- R/utils_render_grid.R | 8 ++--- R/utils_render_html.R | 4 +-- R/utils_render_latex.R | 8 ++--- R/utils_render_xml.R | 36 ++++++++++----------- R/z_utils_render_footnotes.R | 2 +- tests/testthat/test-tab_spanner.R | 52 +++++++++++++++++++------------ 17 files changed, 115 insertions(+), 88 deletions(-) diff --git a/R/cols_add.R b/R/cols_add.R index 93fafe5eb..950958889 100644 --- a/R/cols_add.R +++ b/R/cols_add.R @@ -427,7 +427,7 @@ cols_add <- function( if (is.null(resolved_column_before) && is.null(resolved_column_after)) { updated_data_tbl <- - dplyr::bind_cols( + vctrs::vec_cbind( data_tbl, data_tbl_new_cols ) @@ -445,16 +445,16 @@ cols_add <- function( if (before_colnum <= 1) { # put new column first updated_data_tbl <- - dplyr::bind_cols( + vctrs::vec_cbind( data_tbl_new_cols, data_tbl ) } else { updated_data_tbl <- - dplyr::bind_cols( - dplyr::select(data_tbl, 1:(dplyr::all_of(before_colnum) - 1)), + vctrs::vec_cbind( + data_tbl[1:(before_colnum - 1)], data_tbl_new_cols, - dplyr::select(data_tbl, dplyr::all_of(before_colnum):ncol(data_tbl)) + data_tbl[before_colnum:ncol(data_tbl)] ) } @@ -474,15 +474,15 @@ cols_add <- function( updated_data_tbl <- dplyr::bind_cols( - dplyr::select(data_tbl, 1:dplyr::all_of(after_colnum)), + data_tbl[1:(after_colnum)], data_tbl_new_cols, - dplyr::select(data_tbl, (after_colnum + 1):ncol(data_tbl)) + data_tbl[(after_colnum + 1):ncol(data_tbl)] ) after_colnum <- which(boxh_df[["var"]] == resolved_column_after) updated_boxh_df <- - dplyr::bind_rows( + vctrs::vec_rbind( boxh_df[1:after_colnum, ], boxh_df_new_cols, boxh_df[(after_colnum + 1):nrow(boxh_df), ] diff --git a/R/dt_summary.R b/R/dt_summary.R index ac284e536..a9434dae9 100644 --- a/R/dt_summary.R +++ b/R/dt_summary.R @@ -272,15 +272,12 @@ dt_summary_build <- function(data, context) { )] <- NA_real_ summary_dfs_data <- - dplyr::select( - summary_dfs_data, - dplyr::all_of(c( - group_id_col_private, - row_id_col_private, - rowname_col_private, - colnames(body) - )) - ) + summary_dfs_data[c( + group_id_col_private, + row_id_col_private, + rowname_col_private, + colnames(body) + )] # # Format with formatting formulae @@ -297,11 +294,8 @@ dt_summary_build <- function(data, context) { summary_dfs_display_gt[["_data"]][is.na(summary_dfs_display_gt[["_data"]])] <- NA - summary_dfs_display_gt[["_stub_df"]] <- - dplyr::mutate( - summary_dfs_display_gt[["_stub_df"]], - row_id = gsub("__[0-9]*", "", row_id) - ) + summary_dfs_display_gt[["_stub_df"]]$row_id <- + gsub("__[0-9]*", "", summary_dfs_display_gt[["_stub_df"]]$row_id) for (k in seq_along(fmt_exprs)) { @@ -414,7 +408,7 @@ dt_summary_build <- function(data, context) { labels_processed <- unlist(lapply(labels, FUN = process_text, context = context)) for (i in seq_len(nrow(summary_dfs_display))) { - summary_dfs_display[i, ][["::rowname::"]] <- + summary_dfs_display[i, ][[rowname_col_private]] <- unname(labels_processed[names(labels_processed) == summary_dfs_display[i, ][["::row_id::"]]]) } diff --git a/R/extract.R b/R/extract.R index 926beac97..77d7a6a93 100644 --- a/R/extract.R +++ b/R/extract.R @@ -396,7 +396,8 @@ assemble_body_extract <- function( } if (!is.null(rowname_col)) { - names(out_df)[names(out_df) == rowname_col] <- "::rowname::" + # ::rowname:: + names(out_df)[names(out_df) == rowname_col] <- rowname_col_private } } diff --git a/R/format_data.R b/R/format_data.R index 273daec6d..b507abb2c 100644 --- a/R/format_data.R +++ b/R/format_data.R @@ -4953,12 +4953,12 @@ format_bins_by_context <- function(x, sep, fmt, context) { # Format the LHS and RHS values val_tbl <- - dplyr::tibble( + vctrs::data_frame( left = as.numeric(x_str_lhs), right = as.numeric(x_str_rhs) ) - val_tbl_gt <- gt(val_tbl) + val_tbl_gt <- gt(val_tbl, groupname_col = NULL) # Ensure that the expression (a RHS formula) is made a closure format_fn <- rlang::as_closure(fmt) diff --git a/R/helpers.R b/R/helpers.R index 43d6c9574..036d5a293 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -705,7 +705,11 @@ unit_conversion <- function(from, to) { } row_conversion <- - dplyr::filter(conversion_factors, from == {{ from }}, to == {{ to }}) + vctrs::vec_slice( + conversion_factors, + conversion_factors$from == from & + conversion_factors$to == to + ) # In the case where units are valid and available in the internal dataset, # they may be across categories; such pairings do not allow for a conversion diff --git a/R/render_as_i_html.R b/R/render_as_i_html.R index b7ac1b362..56c7b8de7 100644 --- a/R/render_as_i_html.R +++ b/R/render_as_i_html.R @@ -412,9 +412,9 @@ render_as_ihtml <- function(data, id) { col_defs <- c(col_defs, group_col_defs, row_name_col_def) styles_tbl <- dt_styles_get(data = data) - body_styles_tbl <- dplyr::filter(styles_tbl, locname %in% c("data", "stub")) + body_styles_tbl <- vctrs::vec_slice(styles_tbl, styles_tbl$locname %in% c("data", "stub")) body_styles_tbl <- dplyr::arrange(body_styles_tbl, colnum, rownum) - body_styles_tbl <- dplyr::select(body_styles_tbl, colname, rownum, html_style) + body_styles_tbl <- dplyr::select(body_styles_tbl, "colname", "rownum", "html_style") # Generate styling rule per combination of `colname` and # `rownum` in `body_styles_tbl` @@ -426,7 +426,7 @@ render_as_ihtml <- function(data, id) { colname <- body_styles_tbl[x, ][["colname"]] rownum <- body_styles_tbl[x, ][["rownum"]] html_style <- body_styles_tbl[x, ][["html_style"]] - html_style <- unlist(strsplit(html_style, "; ")) + html_style <- unlist(strsplit(html_style, "; ", fixed = TRUE)) html_style <- gsub("(-)\\s*(.)", "\\U\\2", html_style, perl = TRUE) html_style <- gsub("(:)\\s*(.*)", ": '\\2'", html_style, perl = TRUE) html_style <- paste(html_style, collapse = ", ") @@ -537,7 +537,10 @@ render_as_ihtml <- function(data, id) { if (has_tab_spanners) { hidden_columns <- dt_boxhead_get_var_by_type(data = data, type = "hidden") - col_groups <- dplyr::filter(dt_spanners_get(data = data), spanner_level == 1) + spanners_df <- dt_spanners_get(data = data) + col_groups <- vctrs::vec_slice( + spanners_df, spanners_df$spanner_level == 1 + ) for (i in seq_len(nrow(col_groups))) { diff --git a/R/resolver.R b/R/resolver.R index ab7e1c314..18f6ff842 100644 --- a/R/resolver.R +++ b/R/resolver.R @@ -181,8 +181,10 @@ resolve_cells_column_spanners <- function( } # filter for levels - spanners <- spanners %>% dplyr::filter(spanner_level %in% levels) - + spanners <- vctrs::vec_slice( + spanners, + spanners$spanner_level %in% levels + ) } # # Resolution of spanners as column spanner names # @@ -256,7 +258,11 @@ resolve_cols_c <- function( ) { null_means <- rlang::arg_match(null_means) - + + if (identical(Sys.getenv("gt_avoid_resolve"), "true")) { + ret <- names(dt_data_get(data)) + return(ret) + } names( resolve_cols_i( expr = {{ expr }}, diff --git a/R/tab_info.R b/R/tab_info.R index bd988670c..450d8dc6c 100644 --- a/R/tab_info.R +++ b/R/tab_info.R @@ -197,13 +197,13 @@ tab_info <- function(data) { groups_rows <- dt_row_groups_get(data = data) - row_groups <- dplyr::select(stub_df, id = group_id, label = group_label) + row_groups <- dplyr::select(stub_df, id = "group_id", label = "group_label") row_groups <- dplyr::filter(row_groups, dplyr::row_number() == 1, .by = "id") row_groups <- dplyr::mutate(row_groups, i = which(groups_rows %in% id)) row_groups$type <- NA_character_ row_groups$label <- unlist(row_groups$label) row_groups$location <- "Row Groups" - row_groups <- dplyr::select(row_groups, id, i, label, type, location) + row_groups <- row_groups[c("id", "i", "label", "type", "location")] } else { row_groups <- empty_tbl diff --git a/R/tab_options.R b/R/tab_options.R index 95055a58e..d1306e3c5 100644 --- a/R/tab_options.R +++ b/R/tab_options.R @@ -880,7 +880,7 @@ tab_options <- function( new_df <- dplyr::left_join( new_df, - dplyr::select(opts_df, parameter, type), + dplyr::select(opts_df, "parameter", "type"), by = "parameter" ) new_df$value <- mapply( diff --git a/R/utils.R b/R/utils.R index 21a26b1e3..d960f624d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -2384,7 +2384,8 @@ column_classes_are_valid <- function(data, columns, valid_classes, call = rlang: ) table_data <- dt_data_get(data = data) - table_data <- dplyr::select(table_data, dplyr::all_of(resolved)) + # select all resolved columns + table_data <- table_data[resolved] all( vapply( diff --git a/R/utils_plots.R b/R/utils_plots.R index be5a5e3b3..702cfafad 100644 --- a/R/utils_plots.R +++ b/R/utils_plots.R @@ -1020,6 +1020,12 @@ generate_nanoplot <- function( bar_tags <- paste(bar_strings, collapse = "\n") } + # Speed up nanoplots number formatting rendering by avoid + # calling resolve_cols_i() too much. + # To be used with caution, but setting this envvar + # for + withr::local_envvar(c("gt_avoid_resolve" = "true")) + if (plot_type == "bar" && single_horizontal_bar) { # This type of display assumes there is only a single `y` value and there @@ -2204,7 +2210,7 @@ format_number_compactly <- function( } # Format value accordingly - + if (!is.null(currency)) { if (abs(val) >= 1e15) { diff --git a/R/utils_render_grid.R b/R/utils_render_grid.R index af811484c..b03326866 100644 --- a/R/utils_render_grid.R +++ b/R/utils_render_grid.R @@ -104,7 +104,7 @@ create_heading_component_g <- function(data) { title_styles <- NA_character_ if ("title" %in% styles_tbl$locname) { - title_style_rows <- dplyr::filter(styles_tbl, locname == "title") + title_style_rows <- vctrs::vec_slice(styles_tbl, styles_tbl$locname == "title") if (nrow(title_style_rows) > 0) { title_styles <- title_style_rows$html_style } @@ -112,7 +112,7 @@ create_heading_component_g <- function(data) { subtitle_styles <- NA_character_ if (subtitle_defined && "subtitle" %in% styles_tbl$locname) { - subtitle_style_rows <- dplyr::filter(styles_tbl, locname == "subtitle") + subtitle_style_rows <- vctrs::vec_slice(styles_tbl, styles_tbl$locname == "subtitle") if (nrow(subtitle_style_rows) > 0) { subtitle_styles <- subtitle_style_rows$html_style } @@ -762,7 +762,7 @@ create_source_notes_component_g <- function(data) { style <- NA if ("source_notes" %in% styles_tbl$locname) { - source_notes_style <- dplyr::filter(styles_tbl, locname == "source_notes") + source_notes_style <- vctrs::vec_slice(styles_tbl, styles_tbl$locname == "source_notes") if (nrow(source_notes_style)) { style <- source_notes_style$html_style } @@ -804,7 +804,7 @@ create_footnotes_component_g <- function(data) { style <- NA if ("footnotes" %in% styles_tbl$locname) { - footnotes_style <- dplyr::filter(styles_tbl, locname == "footnotes") + footnotes_style <- vctrs::vec_slice(styles_tbl, styles_tbl$locname == "footnotes") if (nrow(footnotes_style) > 0) { style <- footnotes_style$html_style } diff --git a/R/utils_render_html.R b/R/utils_render_html.R index 571608ed9..a5ddc5790 100644 --- a/R/utils_render_html.R +++ b/R/utils_render_html.R @@ -270,13 +270,13 @@ get_table_defs <- function(data) { if ("stub" %in% widths[["type"]]) { stub_idx <- which(widths$type == "stub") othr_idx <- base::setdiff(seq_len(nrow(widths)), stub_idx) - widths <- dplyr::slice(widths, stub_idx, othr_idx) + widths <- vctrs::vec_slice(widths, c(stub_idx, othr_idx)) } if ("row_group" %in% widths[["type"]] && row_group_as_column) { row_group_idx <- which(widths$type == "row_group") othr_idx <- base::setdiff(seq_len(nrow(widths)), row_group_idx) - widths <- dplyr::slice(widths, row_group_idx, othr_idx) + widths <- vctrs::vec_slice(widths, c(row_group_idx, othr_idx)) } widths <- widths[seq_len(nrow(widths)), "column_width", drop = TRUE] diff --git a/R/utils_render_latex.R b/R/utils_render_latex.R index 6759c5664..932c5f356 100644 --- a/R/utils_render_latex.R +++ b/R/utils_render_latex.R @@ -165,13 +165,13 @@ create_table_start_l <- function(data, colwidth_df) { if ("stub" %in% colwidth_df_visible[["type"]]) { stub_idx <- which(colwidth_df_visible$type == "stub") othr_idx <- base::setdiff(seq_len(nrow(colwidth_df_visible)), stub_idx) - colwidth_df_visible <- dplyr::slice(colwidth_df_visible, stub_idx, othr_idx) + colwidth_df_visible <- vctrs::vec_slice(colwidth_df_visible, c(stub_idx, othr_idx)) } if ("row_group" %in% colwidth_df_visible[["type"]]) { row_group_idx <- which(colwidth_df_visible$type == "row_group") othr_idx <- base::setdiff(seq_len(nrow(colwidth_df_visible)), row_group_idx) - colwidth_df_visible <- dplyr::slice(colwidth_df_visible, row_group_idx, othr_idx) + colwidth_df_visible <- vctrs::vec_slice(colwidth_df_visible, c(row_group_idx, othr_idx)) } # Determine if there are any footnotes or source notes; if any, @@ -454,7 +454,7 @@ create_columns_component_l <- function(data, colwidth_df) { styles_stubhead <- consolidate_cell_styles_l( - dplyr::filter(styles_tbl, locname == "stubhead") + vctrs::vec_slice(styles_tbl, styles_tbl$locname == "stubhead") ) headings_vars <- prepend_vec(headings_vars, "::stub") @@ -948,7 +948,7 @@ summary_rows_for_group_l <- function( styles_df <- dt_styles_get(data) styles_df <- styles_df[styles_df$locname == loc_type & styles_df$grpname == group_id, , drop = FALSE] # set colname to ::rowname:: if colname is present and colnum = 0 - styles_df$colname[is.na(styles_df$colname) & styles_df$colnum == 0] <- "::rowname::" + styles_df$colname[is.na(styles_df$colname) & styles_df$colnum == 0] <- rowname_col_private styles_summary <- styles_df[styles_df$colname == col_name, , drop = FALSE] diff --git a/R/utils_render_xml.R b/R/utils_render_xml.R index 5a61daa7d..cdf76a3e5 100644 --- a/R/utils_render_xml.R +++ b/R/utils_render_xml.R @@ -1629,11 +1629,11 @@ create_columns_component_xml <- function( for (i in seq_len(length(headings_vars) - stub_available)) { cell_style <- - dplyr::filter( + vctrs::vec_slice( styles_tbl, - locname %in% c("columns_columns"), - rownum == -1, - colnum == i + styles_tbl$locname %in% c("columns_columns") & + styles_tbl$rownum == -1 & + styles_tbl$colnum == i ) cell_style <- cell_style$styles[1][[1]] @@ -1780,10 +1780,10 @@ create_columns_component_xml <- function( if (colspans[i] > 0) { cell_style <- - dplyr::filter( + vctrs::vec_slice( styles_tbl, - locname %in% c("columns_groups"), - grpname %in% spanner_row_ids[i] + styles_tbl$locname %in% c("columns_groups") & + styles_tbl$grpname %in% spanner_row_ids[i] ) cell_style <- cell_style$styles[1][[1]] @@ -1869,7 +1869,7 @@ create_body_component_xml <- function( # Get the column alignments for the data columns (this # doesn't include the stub alignment) - col_alignment <- boxh[boxh$type == "default", ][["column_align"]] + col_alignment <- vctrs::vec_slice(boxh$column_align, boxh$type == "default") # Determine whether the stub is available through analysis # of the `stub_components` @@ -1944,10 +1944,10 @@ create_body_component_xml <- function( ][[1]] cell_style <- - dplyr::filter( + vctrs::vec_slice( styles_tbl, - locname == "row_groups", - rownum == (i - 0.1) + styles_tbl$locname == "row_groups" & + styles_tbl$rownum == (i - 0.1) ) cell_style <- cell_style$styles[1][[1]] @@ -1998,11 +1998,11 @@ create_body_component_xml <- function( style_col_idx <- ifelse(stub_available, y - 1, y) cell_style <- - dplyr::filter( + vctrs::vec_slice( styles_tbl, - locname %in% c("data","stub"), - rownum == i, - colnum == style_col_idx + styles_tbl$locname %in% c("data","stub") & + styles_tbl$rownum == i & + styles_tbl$colnum == style_col_idx ) cell_style <- cell_style$styles[1][[1]] @@ -2110,10 +2110,10 @@ create_body_component_xml <- function( grand_summary_col %in% names(list_of_summaries$summary_df_display_list)) { summary_styles <- - dplyr::filter( + vctrs::vec_slice( styles_tbl, - locname %in% "grand_summary_cells", - grpname %in% c("::GRAND_SUMMARY") + styles_tbl$locname %in% "grand_summary_cells" & + styles_tbl$grpname %in% c("::GRAND_SUMMARY") ) grand_summary_section <- diff --git a/R/z_utils_render_footnotes.R b/R/z_utils_render_footnotes.R index 782b510eb..4f63aa4b2 100644 --- a/R/z_utils_render_footnotes.R +++ b/R/z_utils_render_footnotes.R @@ -746,7 +746,7 @@ apply_footnotes_to_summary <- function(data, context = "html") { # make sure rownames are recognized to add footnote marks # to cells_stub_grand_summary() / cells_stub_summary() #1832 # dplyr::coalesce() - footnotes_tbl$colname[is.na(footnotes_tbl$colname)] <- "::rowname::" + footnotes_tbl$colname[is.na(footnotes_tbl$colname)] <- rowname_col_private summary_df_list <- list_of_summaries$summary_df_display_list if ("summary_cells" %in% footnotes_tbl$locname) { diff --git a/tests/testthat/test-tab_spanner.R b/tests/testthat/test-tab_spanner.R index a6524b1a0..88aee3ea0 100644 --- a/tests/testthat/test-tab_spanner.R +++ b/tests/testthat/test-tab_spanner.R @@ -9,7 +9,7 @@ reactive_table_to_json <- function(reactable_obj){ htmltools::as.tags() %>% htmltools::doRenderTags() %>% stringr::str_match(pattern = '