Skip to content

Commit

Permalink
Merge pull request #1756 from rstudio/nanoplot-boxplot-enhance
Browse files Browse the repository at this point in the history
Correctly render outliers of nanoplot-based boxplots
  • Loading branch information
rich-iannone authored Jul 8, 2024
2 parents 3284691 + d8c904f commit f9954e9
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 24 deletions.
2 changes: 1 addition & 1 deletion R/modify_columns.R
Original file line number Diff line number Diff line change
Expand Up @@ -2755,7 +2755,7 @@ cols_add <- function(
#' sales for a selection of days. By converting the string-time 24-hour-clock
#' time values to the number of seconds elapsed in a day, we get continuous
#' values that can be incorporated into each box plot. And, by supplying a
#' function to the `y_val_fmt_fn` argument within `nanoplot_options()`, we can
#' function to the `y_val_fmt_fn` argument within [nanoplot_options()], we can
#' transform the integer seconds values back to clock times for display on
#' hover.
#'
Expand Down
69 changes: 48 additions & 21 deletions R/utils_plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -1211,26 +1211,40 @@ generate_nanoplot <- function(
box_thickness <- data_point_radius[1] * 6

# Calculate statistics for boxplot
stat_p05 <- unname(stats::quantile(y_vals, probs = 0.05, na.rm = TRUE))
stat_q_1 <- unname(stats::quantile(y_vals, probs = 0.25, na.rm = TRUE))
stat_med <- unname(stats::quantile(y_vals, probs = 0.50, na.rm = TRUE))
stat_q_3 <- unname(stats::quantile(y_vals, probs = 0.75, na.rm = TRUE))
stat_p95 <- unname(stats::quantile(y_vals, probs = 0.95, na.rm = TRUE))
stat_iqr <- stats::IQR(y_vals, na.rm = TRUE)

if (length(y_vals) > 25) {
low_outliers <- y_vals[y_vals < stat_q_1 - (1.5 * stat_iqr)]
high_outliers <- y_vals[y_vals > stat_q_3 + (1.5 * stat_iqr)]

stat_min_excl_low_outliers <-
min(base::setdiff(y_vals, low_outliers), na.rm = TRUE)

stat_max_excl_high_outliers <-
max(base::setdiff(y_vals, high_outliers), na.rm = TRUE)

plot_only_outliers <- length(y_vals) >= 20

if (plot_only_outliers) {

# Plot only outliers since the number of data values is sufficiently high
y_vals_plot <- y_vals[y_vals < stat_p05 | y_vals > stat_p95]
y_vals_plot <- c(low_outliers, high_outliers)

data_point_radius <- 4
data_point_stroke_width <- 2
data_point_stroke_color <- adjust_luminance(data_bar_stroke_color[1], steps = 0.75)
data_point_fill_color <- adjust_luminance(data_point_stroke_color[1], steps = 1.75)

data_point_stroke_color <-
adjust_luminance(data_bar_stroke_color[1], steps = 0.75)

data_point_fill_color <-
adjust_luminance(data_point_stroke_color[1], steps = 1.75)

} else {

# Plot all data values but diminish the visibility of the data points
# as the number approaches 25
# as the number approaches 20
y_vals_plot <- y_vals

if (length(y_vals) < 10) {
Expand All @@ -1241,7 +1255,9 @@ generate_nanoplot <- function(
data_point_stroke_width <- 2
}

data_point_stroke_color <- adjust_luminance("black", steps = length(y_vals) / 25)
data_point_stroke_color <-
adjust_luminance("black", steps = length(y_vals) / 25)

data_point_fill_color <- "transparent"
}

Expand All @@ -1251,42 +1267,53 @@ generate_nanoplot <- function(
vals = y_vals,
all_vals = all_y_vals,
y_vals_plot = y_vals_plot,
stat_low = stat_p05,
stat_min = stat_min_excl_low_outliers,
stat_qlow = stat_q_1,
stat_med = stat_med,
stat_qup = stat_q_3,
stat_high = stat_p95
stat_max = stat_max_excl_high_outliers
)

y_proportions <- y_proportions_list[["vals"]]
y_proportions_plot <- y_proportions_list[["y_vals_plot"]]
y_stat_p05 <- y_proportions_list[["stat_low"]]
y_stat_min <- y_proportions_list[["stat_min"]]
y_stat_q_1 <- y_proportions_list[["stat_qlow"]]
y_stat_med <- y_proportions_list[["stat_med"]]
y_stat_q_3 <- y_proportions_list[["stat_qup"]]
y_stat_p95 <- y_proportions_list[["stat_high"]]
y_stat_max <- y_proportions_list[["stat_max"]]

# Calculate boxplot x values
fence_start <- y_stat_p05 * data_x_width
fence_start <- y_stat_min * data_x_width
box_start <- y_stat_q_1 * data_x_width
median_x <- y_stat_med * data_x_width
box_end <- y_stat_q_3 * data_x_width
fence_end <- y_stat_p95 * data_x_width
fence_end <- y_stat_max * data_x_width
box_width <- (y_stat_q_3 - y_stat_q_1) * data_x_width

# Establish positions for plottable x and y values
plotted_x_vals <- y_proportions_plot * data_x_width

if (length(y_vals) == 1) {

plotted_y_vals <- bottom_y / 2

} else {
plotted_y_vals <- jitter(rep(bottom_y / 2, length(plotted_x_vals)), factor = 10)

if (plot_only_outliers) {

plotted_y_vals <- rep(bottom_y / 2, length(plotted_x_vals))

} else {

plotted_y_vals <-
jitter(rep(bottom_y / 2, length(plotted_x_vals)), factor = 10)
}
}

# Format numbers compactly
stat_p05_value <-
stat_min_value <-
format_number_compactly(
val = stat_p05,
val = stat_min_excl_low_outliers,
currency = currency,
fn = y_val_fmt_fn
)
Expand All @@ -1308,9 +1335,9 @@ generate_nanoplot <- function(
currency = currency,
fn = y_val_fmt_fn
)
stat_p95_value <-
stat_max_value <-
format_number_compactly(
val = stat_p95,
val = stat_max_excl_high_outliers,
currency = currency,
fn = y_val_fmt_fn
)
Expand Down Expand Up @@ -1358,7 +1385,7 @@ generate_nanoplot <- function(
"font-size=\"30px\" ",
"text-anchor=\"end\"",
">",
stat_p05_value,
stat_min_value,
"</text>",
"<text ",
"x=\"", box_start - 6, "\" ",
Expand Down Expand Up @@ -1396,7 +1423,7 @@ generate_nanoplot <- function(
"stroke=\"transparent\" ",
"font-size=\"30px\"",
">",
stat_p95_value,
stat_max_value,
"</text>"
)
}
Expand Down
Binary file modified images/man_cols_nanoplot_7.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion man/cols_nanoplot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion man/info_date_style.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 comments on commit f9954e9

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.