From 88c89cc9a7e088b63b1e5d34d233bf3a5d315961 Mon Sep 17 00:00:00 2001 From: Hubert Baniecki <32574004+hbaniecki@users.noreply.github.com> Date: Tue, 10 Dec 2019 00:47:09 +0100 Subject: [PATCH] v0.4.2 (#78) * v0.4.2 * update message * Update .travis.yml * Update .travis.yml --- .travis.yml | 24 ++- DESCRIPTION | 5 +- NEWS.md | 12 +- R/aggregate_profiles.R | 28 ++- R/plotD3_aggregated_profiles.R | 45 +---- R/plotD3_ceteris_paribus.R | 30 ++- R/plot_ceteris_paribus.R | 33 ++-- docs/404.html | 6 +- docs/articles/index.html | 6 +- docs/articles/vignette_describe.html | 150 +++++++------- .../figure-html/unnamed-chunk-10-1.png | Bin 62424 -> 11775 bytes .../figure-html/unnamed-chunk-11-1.png | Bin 53202 -> 16264 bytes .../figure-html/unnamed-chunk-3-1.png | Bin 55593 -> 14054 bytes .../figure-html/unnamed-chunk-5-1.png | Bin 52679 -> 13899 bytes .../figure-html/unnamed-chunk-9-1.png | Bin 52794 -> 15665 bytes docs/articles/vignette_titanic.html | 184 +++++++++--------- .../figure-html/unnamed-chunk-10-1.png | Bin 390012 -> 67717 bytes .../figure-html/unnamed-chunk-4-1.png | Bin 63219 -> 16679 bytes .../figure-html/unnamed-chunk-5-1.png | Bin 80690 -> 20044 bytes .../figure-html/unnamed-chunk-6-1.png | Bin 81736 -> 20094 bytes .../figure-html/unnamed-chunk-7-1.png | Bin 78100 -> 19242 bytes .../figure-html/unnamed-chunk-8-1.png | Bin 84467 -> 19428 bytes .../figure-html/unnamed-chunk-9-1.png | Bin 102820 -> 23078 bytes docs/authors.html | 10 +- docs/index.html | 13 +- docs/news/index.html | 124 +++++++----- docs/pkgdown.yml | 2 +- docs/reference/accumulated_dependency-1.png | Bin 67812 -> 18520 bytes docs/reference/accumulated_dependency-2.png | Bin 94975 -> 21872 bytes docs/reference/accumulated_dependency.html | 63 ++++-- docs/reference/aggregate_profiles-1.png | Bin 92468 -> 20514 bytes docs/reference/aggregate_profiles-2.png | Bin 298855 -> 40923 bytes docs/reference/aggregate_profiles-3.png | Bin 62808 -> 19881 bytes docs/reference/aggregate_profiles-4.png | Bin 64457 -> 17779 bytes docs/reference/aggregate_profiles-5.png | Bin 62156 -> 19542 bytes docs/reference/aggregate_profiles-6.png | Bin 64233 -> 16935 bytes docs/reference/aggregate_profiles.html | 34 +++- docs/reference/calculate_oscillations.html | 20 +- .../reference/calculate_variable_profile.html | 39 +++- docs/reference/calculate_variable_split.html | 25 ++- docs/reference/ceteris_paribus-1.png | Bin 65025 -> 17318 bytes docs/reference/ceteris_paribus-2.png | Bin 195882 -> 30307 bytes docs/reference/ceteris_paribus.html | 47 +++-- docs/reference/ceteris_paribus_2d-1.png | Bin 117909 -> 28515 bytes docs/reference/ceteris_paribus_2d-2.png | Bin 80033 -> 29738 bytes docs/reference/ceteris_paribus_2d.html | 46 +++-- docs/reference/cluster_profiles-1.png | Bin 61159 -> 16527 bytes docs/reference/cluster_profiles-2.png | Bin 111635 -> 22388 bytes docs/reference/cluster_profiles-3.png | Bin 236117 -> 40643 bytes docs/reference/cluster_profiles.html | 36 +++- docs/reference/conditional_dependency-1.png | Bin 69083 -> 18622 bytes docs/reference/conditional_dependency-2.png | Bin 94760 -> 21943 bytes docs/reference/conditional_dependency.html | 59 ++++-- docs/reference/describe-1.png | Bin 94103 -> 25563 bytes docs/reference/describe-2.png | Bin 54155 -> 15021 bytes docs/reference/describe.html | 45 +++-- docs/reference/feature_importance-1.png | Bin 56233 -> 15310 bytes docs/reference/feature_importance-2.png | Bin 53613 -> 15822 bytes docs/reference/feature_importance-3.png | Bin 69987 -> 17317 bytes docs/reference/feature_importance-4.png | Bin 60172 -> 17836 bytes docs/reference/feature_importance-5.png | Bin 59149 -> 18209 bytes docs/reference/feature_importance-6.png | Bin 76626 -> 18342 bytes docs/reference/feature_importance-7.png | Bin 59961 -> 15904 bytes docs/reference/feature_importance-8.png | Bin 55454 -> 15617 bytes docs/reference/feature_importance-9.png | Bin 75734 -> 17589 bytes docs/reference/feature_importance.html | 73 ++++--- docs/reference/index.html | 6 +- docs/reference/partial_dependency-1.png | Bin 65208 -> 18717 bytes docs/reference/partial_dependency-2.png | Bin 93708 -> 21884 bytes docs/reference/partial_dependency.html | 58 ++++-- .../plot.aggregated_profiles_explainer-1.png | Bin 101368 -> 23648 bytes .../plot.aggregated_profiles_explainer-2.png | Bin 70977 -> 17319 bytes .../plot.aggregated_profiles_explainer-3.png | Bin 91317 -> 20402 bytes .../plot.aggregated_profiles_explainer-4.png | Bin 324986 -> 43715 bytes .../plot.aggregated_profiles_explainer.html | 39 ++-- .../plot.ceteris_paribus_2d_explainer-1.png | Bin 164195 -> 65526 bytes .../plot.ceteris_paribus_2d_explainer-2.png | Bin 133298 -> 56250 bytes .../plot.ceteris_paribus_2d_explainer-3.png | Bin 163277 -> 64995 bytes .../plot.ceteris_paribus_2d_explainer-4.png | Bin 90685 -> 21713 bytes .../plot.ceteris_paribus_2d_explainer-5.png | Bin 163214 -> 69655 bytes .../plot.ceteris_paribus_2d_explainer.html | 32 ++- .../plot.ceteris_paribus_explainer-1.png | Bin 64273 -> 17634 bytes .../plot.ceteris_paribus_explainer-2.png | Bin 321140 -> 43611 bytes .../plot.ceteris_paribus_explainer-3.png | Bin 82255 -> 20076 bytes .../plot.ceteris_paribus_explainer-4.png | Bin 67539 -> 16573 bytes .../plot.ceteris_paribus_explainer-5.png | Bin 60563 -> 15320 bytes .../plot.ceteris_paribus_explainer-6.png | Bin 83869 -> 20522 bytes .../plot.ceteris_paribus_explainer.html | 35 +++- .../plot.ceteris_paribus_oscillations-1.png | Bin 113744 -> 22239 bytes .../plot.ceteris_paribus_oscillations-2.png | Bin 68123 -> 16108 bytes .../plot.ceteris_paribus_oscillations.html | 20 +- .../plot.feature_importance_explainer-1.png | Bin 54962 -> 15106 bytes .../plot.feature_importance_explainer-2.png | Bin 59403 -> 17388 bytes .../plot.feature_importance_explainer-3.png | Bin 59375 -> 15668 bytes .../plot.feature_importance_explainer-4.png | Bin 56285 -> 12433 bytes .../plot.feature_importance_explainer-5.png | Bin 76641 -> 18630 bytes .../plot.feature_importance_explainer.html | 37 ++-- .../reference/plotD3_aggregated_profiles.html | 52 ++--- docs/reference/plotD3_ceteris_paribus.html | 36 +++- docs/reference/plotD3_feature_importance.html | 42 ++-- .../print.aggregated_profiles_explainer.html | 18 +- .../print.ceteris_paribus_explainer.html | 14 +- docs/reference/select_neighbours.html | 28 ++- docs/reference/select_sample.html | 18 +- docs/reference/show_aggregated_profiles-1.png | Bin 125953 -> 21498 bytes docs/reference/show_aggregated_profiles-2.png | Bin 332606 -> 44614 bytes docs/reference/show_aggregated_profiles.html | 30 ++- docs/reference/show_observations-1.png | Bin 250639 -> 45476 bytes docs/reference/show_observations.html | 27 ++- docs/reference/show_profiles-1.png | Bin 105170 -> 19596 bytes docs/reference/show_profiles-2.png | Bin 341252 -> 45433 bytes docs/reference/show_profiles.html | 26 ++- docs/reference/show_residuals-1.png | Bin 164520 -> 24083 bytes docs/reference/show_residuals-2.png | Bin 157075 -> 23825 bytes docs/reference/show_residuals.html | 26 ++- docs/reference/show_rugs-1.png | Bin 249010 -> 44863 bytes docs/reference/show_rugs.html | 28 ++- man/plotD3_aggregated_profiles.Rd | 8 +- tests/testthat/test_plotD3.R | 2 +- 119 files changed, 1094 insertions(+), 647 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9cc6c05d..51732c25 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,19 @@ sudo: false cache: packages dist: trusty +r: + - release + - devel + +os: + - linux + - osx + +matrix: + exclude: + - os: osx + r: devel + env: global: - R_CHECK_ARGS="--no-build-vignettes --no-manual --timings --run-donttest" @@ -10,14 +23,19 @@ env: notifications: email: false +#before_install: +# - sudo apt-get install --yes udunits-bin libproj-dev libgeos-dev libgdal-dev libgdal1-dev libudunits2-dev before_install: - - sudo apt-get install --yes udunits-bin libproj-dev libgeos-dev libgdal-dev libgdal1-dev libudunits2-dev - + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install llvm && + export PATH="/usr/local/opt/llvm/bin:$PATH" && + export LDFLAGS="-L/usr/local/opt/llvm/lib" && + export CFLAGS="-I/usr/local/opt/llvm/include"; fi + r_packages: - ggplot2 - covr -r_github_packages: modelOriented/DALEX +#r_github_packages: modelOriented/DALEX after_success: - Rscript -e 'library(covr); codecov()' diff --git a/DESCRIPTION b/DESCRIPTION index 0bbdbd24..b8868fbd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,13 +1,12 @@ Package: ingredients Title: Effects and Importances of Model Ingredients -Version: 0.4.1 +Version: 0.4.2 Authors@R: c(person("Przemyslaw", "Biecek", email = "przemyslaw.biecek@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8423-1823")), person("Hubert", "Baniecki", role = "aut", comment = c(ORCID = "0000-0001-6661-5364")), - person("Adam", "Izdebski", role = "aut"), - person("Katarzyna", "Pekala", role = "aut")) + person("Adam", "Izdebski", role = "aut")) Description: Collection of tools for assessment of feature importance and feature effects. Key functions are: feature_importance() for assessment of global level feature importance, diff --git a/NEWS.md b/NEWS.md index a2212179..e2053e44 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +ingredients 0.4.2 +--------------------------------------------------------------- +* change `variable_type` and `variables` arguments usage in the +`aggregate_profiles`, `plot.ceteris_paribus` and `plotD3.ceteris_paribus` +* remove `variable_type` argument from `plotD3.aggregated_profiles` +(now the same as in `plot.aggregated_profiles`) +* Kasia Pekala is removed from the contributors as `aspect_importance` is moved to `DALEXtra` +([See v0.3.12 changelog](https://modeloriented.github.io/ingredients/news/index.html#ingredients-0-3-12)) +* added Travis-CI for OSX + ingredients 0.4.1 --------------------------------------------------------------- * fixed rounding problem in the describe function ([#76](https://github.com/ModelOriented/ingredients/issues/76)) @@ -31,7 +41,7 @@ cluster_profiles(), plot() and others, as requested in #15 ingredients 0.3.8 ---------------------------------------------------------------- -* Natural language description generated with `describe()` function for `ceteris_paribus()`, `feature_importance()` and `aggregated_profiles()` explanations. +* Natural language description generated with `describe()` function for `ceteris_paribus()`, `feature_importance()` and `aggregate_profiles()` explanations. ingredients 0.3.7 diff --git a/R/aggregate_profiles.R b/R/aggregate_profiles.R index 16f2adf1..32c1d8bd 100644 --- a/R/aggregate_profiles.R +++ b/R/aggregate_profiles.R @@ -104,16 +104,38 @@ aggregate_profiles <- function(x, ..., if (length(all_variables_intersect) == 0) stop(paste0("parameter variables do not overlap with ", paste(all_variables, collapse = ", "))) all_variables <- all_variables_intersect } + # only numerical or only factors? is_numeric <- sapply(all_profiles[, all_variables, drop = FALSE], is.numeric) + if (variable_type == "numerical") { vnames <- names(which(is_numeric)) - if (length(vnames) == 0) stop("There are no numerical variables") all_profiles$`_x_` <- 0 + + # there are no numerical variables + if (length(vnames) == 0) { + # change to categorical + variable_type <- "categorical" + all_profiles$`_x_` <- "" + # send message + message("'variable_type' changed to 'categorical' due to lack of numerical variables.") + # take all + vnames <- all_variables + } else if (!is.null(variables) && length(vnames) != length(variables)) { + message("Non-numerical variables (from the 'variables' argument) are rejected.") + } } else { vnames <- names(which(!is_numeric)) - if (length(vnames) == 0) stop("There are no non-numerical variables") all_profiles$`_x_` <- "" + + # there are variables selected + if (!is.null(variables)) { + # take all + vnames <- all_variables + } else if (length(vnames) == 0) { + # there were no variables selected and there are no categorical variables + stop("There are no non-numerical variables.") + } } # select only suitable variables @@ -132,7 +154,7 @@ aggregate_profiles <- function(x, ..., if (variable_type == "categorical") { all_profiles$`_x_` <- as.character(apply(all_profiles, 1, function(x) x[x["_vname_"]])) } - + if (!is.null(groups) && ! groups %in% colnames(all_profiles)) { stop("groups parameter is not a name of any column") } diff --git a/R/plotD3_aggregated_profiles.R b/R/plotD3_aggregated_profiles.R index 172df334..6ee63e9b 100644 --- a/R/plotD3_aggregated_profiles.R +++ b/R/plotD3_aggregated_profiles.R @@ -12,8 +12,6 @@ #' @param color a character. Set line/bar color #' @param size a numeric. Set width of lines #' @param alpha a numeric between \code{0} and \code{1}. Opacity of lines -#' @param variable_type a character. If "numerical" then only numerical variables will be plotted. -#' If "categorical" then only categorical variables will be plotted. #' @param facet_ncol number of columns for the \code{\link[ggplot2]{facet_wrap}} #' @param scale_plot a logical. If \code{TRUE}, the height of plot scales with window size. By default it's \code{FALSE} #' @param variables if not \code{NULL} then only \code{variables} will be presented @@ -49,25 +47,22 @@ #' pdp_rf_a <- aggregate_profiles(cp_rf, type = "accumulated", variable_type = "numerical") #' pdp_rf_a$`_label_` <- "RF_accumulated" #' -#' plotD3(pdp_rf_p, pdp_rf_c, pdp_rf_a, variable_type = "numerical", scale_plot = TRUE) +#' plotD3(pdp_rf_p, pdp_rf_c, pdp_rf_a, scale_plot = TRUE) #' #' pdp <- aggregate_profiles(cp_rf, type = "partial", variable_type = "categorical") #' pdp$`_label_` <- "RF_partial" #' -#' plotD3(pdp, variables = c("gender","class"), variable_type = "categorical", label_margin = 70) +#' plotD3(pdp, variables = c("gender","class"), label_margin = 70) #' #' @export #' @rdname plotD3_aggregated_profiles plotD3.aggregated_profiles_explainer <- function(x, ..., size = 2, alpha = 1, color = "#46bac2", - variable_type = "numerical", facet_ncol = 2, scale_plot = FALSE, variables = NULL, chart_title = "Aggregated Profiles", label_margin = 60) { - check_variable_type(variable_type) - # if there is more explainers, they should be merged into a single data frame dfl <- c(list(x), list(...)) aggregated_profiles <- do.call(rbind, dfl) @@ -77,35 +72,13 @@ plotD3.aggregated_profiles_explainer <- function(x, ..., size = 2, alpha = 1, if (!is.null(variables)) { all_variables <- intersect(all_variables, variables) if (length(all_variables) == 0) stop(paste0("variables do not overlap with ", paste(all_variables, collapse = ", "))) - } - hl <- split(aggregated_profiles, f = as.character(aggregated_profiles$`_vname_`), drop = FALSE)[all_variables] - - # only numerical or only factor? - is_numeric <- unlist(lapply(hl, function(x){ - is.numeric(x$`_x_`) - })) - - if (variable_type == "numerical") { - vnames <- names(which(is_numeric)) - - if (length(vnames) == 0) { - # but `variables` are selected, then change to factor - if (length(variables) > 0) { - variable_type <- "categorical" - vnames <- variables - } else { - stop("There are no numerical variables") - } - } - } else { - vnames <- names(which(!is_numeric)) - # there are no numerical features - if (length(vnames) == 0) stop("There are no non-numerical variables") + aggregated_profiles <- aggregated_profiles[aggregated_profiles$`_vname_` %in% all_variables, ] } + is_x_numeric <- is.numeric(aggregated_profiles$`_x_`) + # prepare profiles data - aggregated_profiles <- aggregated_profiles[aggregated_profiles$`_vname_` %in% vnames, ] aggregated_profiles$`_vname_` <- droplevels(aggregated_profiles$`_vname_`) rownames(aggregated_profiles) <- NULL @@ -118,7 +91,7 @@ plotD3.aggregated_profiles_explainer <- function(x, ..., size = 2, alpha = 1, min_max_list <- ymean <- label_names <- NULL # line plot or bar plot? - if (variable_type == "numerical") { + if (is_x_numeric) { aggregated_profiles_list <- lapply(aggregated_profiles_list, function(x){ ret <- x[, c('_x_', "_yhat_", "_vname_", "_label_")] colnames(ret) <- c("xhat", "yhat", "vname", "label") @@ -150,12 +123,12 @@ plotD3.aggregated_profiles_explainer <- function(x, ..., size = 2, alpha = 1, ymean <- round(attr(x, "mean_prediction"),3) } - options <- list(variableNames = as.list(vnames), - n = length(vnames), c = length(list(...)) + 1, + options <- list(variableNames = as.list(all_variables), + n = length(all_variables), c = length(list(...)) + 1, yMax = ymax + ymargin, yMin = ymin - ymargin, yMean = ymean, labelNames = label_names, size = size, alpha = alpha, color = color, - onlyNumerical = variable_type == "numerical", + onlyNumerical = is_x_numeric, facetNcol = facet_ncol, scalePlot = scale_plot, chartTitle = chart_title, labelMargin = label_margin) diff --git a/R/plotD3_ceteris_paribus.R b/R/plotD3_ceteris_paribus.R index b778c41f..06bf1217 100644 --- a/R/plotD3_ceteris_paribus.R +++ b/R/plotD3_ceteris_paribus.R @@ -85,20 +85,32 @@ plotD3.ceteris_paribus_explainer <- function(x, ..., size = 2, alpha = 1, if (variable_type == "numerical") { vnames <- names(which(is_numeric)) + all_profiles$`_x_` <- 0 + # there are no numerical variables if (length(vnames) == 0) { - # but `variables` are selected, then change to factor - if (length(variables) > 0) { - variable_type <- "categorical" - vnames <- variables - } else { - stop("There are no numerical variables") - } + # change to categorical + variable_type <- "categorical" + all_profiles$`_x_` <- "" + # send message + message("'variable_type' changed to 'categorical' due to lack of numerical variables.") + # take all + vnames <- all_variables + } else if (!is.null(variables) && length(vnames) != length(variables)) { + message("Non-numerical variables (from the 'variables' argument) are rejected.") } } else { vnames <- names(which(!is_numeric)) - # there are no numerical features - if (length(vnames) == 0) stop("There are no non-numerical variables") + all_profiles$`_x_` <- "" + + # there are variables selected + if (!is.null(variables)) { + # take all + vnames <- all_variables + } else if (length(vnames) == 0) { + # there were no variables selected and there are no categorical variables + stop("There are no non-numerical variables.") + } } # prepare clean observations data for tooltips diff --git a/R/plot_ceteris_paribus.R b/R/plot_ceteris_paribus.R index eeda8efb..b585b147 100644 --- a/R/plot_ceteris_paribus.R +++ b/R/plot_ceteris_paribus.R @@ -102,29 +102,38 @@ plot.ceteris_paribus_explainer <- function(x, ..., } # is color a variable or literal? is_color_a_variable <- color %in% c(all_variables, "_label_", "_vname_", "_ids_") + # only numerical or only factors? is_numeric <- sapply(all_profiles[, all_variables, drop = FALSE], is.numeric) + if (variable_type == "numerical") { vnames <- names(which(is_numeric)) all_profiles$`_x_` <- 0 + # there are no numerical variables if (length(vnames) == 0) { - # but `variables` are selected, then change to factor - if (length(variables) > 0) { - variable_type <- "categorical" - vnames <- variables - all_profiles$`_x_` <- "" - } else { - stop("There are no numerical variables") - } + # change to categorical + variable_type <- "categorical" + all_profiles$`_x_` <- "" + # send message + message("'variable_type' changed to 'categorical' due to lack of numerical variables.") + # take all + vnames <- all_variables + } else if (!is.null(variables) && length(vnames) != length(variables)) { + message("Non-numerical variables (from the 'variables' argument) are rejected.") } - } else { vnames <- names(which(!is_numeric)) - # there are no numerical features - if (length(vnames) == 0) stop("There are no non-numerical variables") - all_profiles$`_x_` <- "" + + # there are variables selected + if (!is.null(variables)) { + # take all + vnames <- all_variables + } else if (length(vnames) == 0) { + # there were no variables selected and there are no categorical variables + stop("There are no non-numerical variables.") + } } # how to plot profiles diff --git a/docs/404.html b/docs/404.html index 71ab905e..eec5693e 100644 --- a/docs/404.html +++ b/docs/404.html @@ -36,12 +36,12 @@ + - @@ -80,7 +80,7 @@
part of the DrWhy.AI developed by the MI^2 DataLab - 0.4 + 0.4.2
@@ -146,7 +146,7 @@

Page not found (404)