diff --git a/API_url_tracker.Rmd b/API_url_tracker.Rmd index 1db37351..458f3f1c 100644 --- a/API_url_tracker.Rmd +++ b/API_url_tracker.Rmd @@ -7,15 +7,7 @@ output: html_document ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) - -# install experience dashboard if not already install -if ("experiencesdashboard" %in% installed.packages()) { - if ("remotes" %in% installed.packages()) install.packages("remotes", repos = "http://cran.r-project.org") - remotes::install_github("CDU-data-science-team/experiencesdashboard", upgrade = "never", quiet = TRUE) -} - library(experiencesdashboard) -library(dplyr) ``` # Last rendered at `r round(Sys.time())` @@ -40,7 +32,7 @@ pending_jobs <- dplyr::tbl( "api_jobs" ) ) |> - filter(status == "submitted") |> + dplyr::filter(status == "submitted") |> dplyr::collect() ``` diff --git a/NAMESPACE b/NAMESPACE index 764fecff..e46a42d8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(api_pred) export(calculate_table) export(clean_dataframe) export(demographic_distribution) +export(drop_na_for_col) export(get_api_pred_url) export(get_pred_from_url) export(html_decoder) diff --git a/R/app_server.R b/R/app_server.R index f7cb917a..019d63f6 100644 --- a/R/app_server.R +++ b/R/app_server.R @@ -50,7 +50,6 @@ app_server <- function(input, output, session) { latest_time <- api_jobs$latest_time wait_time <- api_jobs$estimated_wait_time - if (!is.null(latest_time) & data_exists) { # filter out all the unfinished rows(api job time is same as last_upload_date when doing data upload) db_data <- db_data %>% @@ -275,9 +274,10 @@ app_server <- function(input, output, session) { dplyr::arrange(date) } - # TRANSFORM THE SENTIMENT COLUMN + # Transform the sentiment column return_data <- return_data %>% - transform_sentiment() + transform_sentiment() %>% + drop_na_for_col(c('category', 'super_category', 'sentiment')) # also return a dataset with unique individuals unique_data <- return_data %>% diff --git a/R/app_ui.R b/R/app_ui.R index 9acd1865..5325d8b9 100644 --- a/R/app_ui.R +++ b/R/app_ui.R @@ -7,9 +7,6 @@ #' @noRd app_ui <- function(request) { tagList( - - # Leave this function for adding external resources - golem_add_external_resources(), # List the first level UI elements here dashboardPage( dashboardHeader( @@ -37,12 +34,9 @@ app_ui <- function(request) { ), dashboardBody( ### Changing theme ---- - fresh::use_theme(nhs_shiny_theme()), # <-- use fresh object theme to style the whole dashboard - - #### Add css file for buttons ---- - includeCSS(system.file("app/www/", "button-style.css", - package = "experiencesdashboard" - )), + fresh::use_theme(nhs_shiny_theme()), # use fresh object theme to style the whole dashboard + # Leave this function for adding external resources + golem_add_external_resources(), HTML(' purrr::array_tree() @@ -15,7 +20,7 @@ get_api_pred_url <- function(data, api_key) { r <- httr::POST( endpoint, body = json_data, - query = list(code = api_key), + query = list(code = api_key, target = target), encode = "json" ) @@ -62,6 +67,34 @@ get_pred_from_url <- function(api_url) { } } +#' convert the API prediction into a format that is suitable for the database +#' +#' @param prediction dataframe, prediction from the pxtextmining api with column "comment_id","labels" and "sentiment" +#' +#' @return dataframe, transform prediction data +#' @noRd +transform_prediction_for_database <- function(prediction) { + stopifnot('"comment_id","labels" and "sentiment" columns are required' = all(c("comment_id", "labels", "sentiment") %in% names(prediction))) + + prediction |> + dplyr::rename( + category = labels + ) |> + dplyr::mutate(dplyr::across(c(comment_id, sentiment), as.integer)) |> + # assign the super categories + tidyr::unnest(category) |> # Unnest the category column into rows and columns + dplyr::mutate(super_category = assign_highlevel_categories(category)) |> # assign super categories + dplyr::group_by(comment_id) %>% + dplyr::summarise( + across(-tidyselect::all_of(c("category", "super_category")), unique), + across(c(category, super_category), list) # duplicate super category value is preserved. This will allow easy manipulation later. for ex. see get_tidy_filter_data() + ) |> + dplyr::ungroup() |> + # convert the category and super category column to raw json before loading into the database + dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, jsonlite::toJSON))) |> + dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, charToRaw))) +} + #' Track the API job table. If prediction is done, it writes it to the main table and delete the job from the api job table #' #' @param job an instance of the api job table @@ -91,20 +124,22 @@ track_api_job <- function(job, conn, write_db = TRUE) { if (is.data.frame(prediction)) { cat("Job", job_id, "is done \n") - prediction <- prediction |> - dplyr::mutate(dplyr::across(dplyr::everything(), as.integer)) - # update the job status as complete (Prediction has been returned) DBI::dbExecute(conn, paste("UPDATE api_jobs SET status='completed' WHERE job_id =", job_id)) if (!write_db) { # update the job status as uploaded (successfully write prediction to main table) DBI::dbExecute(conn, paste("UPDATE api_jobs SET status='uploaded' WHERE job_id =", job_id)) - + return(prediction) } + prediction <- prediction |> + transform_prediction_for_database() + # update the main table + cat("Updating database with prediction \n") + dplyr::rows_update( dplyr::tbl(conn, trust_id), prediction, @@ -192,8 +227,8 @@ api_question_code <- function(value) { #' @param pool database connection #' @param trust_id string, the trust id #' @param schedule_time integer, number of mins used in the Rmarkdown schedule that tracks the api job table. -#' default is 15mins. its only needed to guess how long users might have to wait for the prediction to complete -#' +#' default is 15mins. it's only needed to guess how long users might have to wait for the prediction to complete +#' #' @return list of latest_time and estimated_wait_time (in mins) #' @noRd check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), schedule_time = 15) { @@ -207,7 +242,7 @@ check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), sched dplyr::filter( trust_id == !!trust_id, status %in% c("submitted", "completed") - ) + ) if (data |> dplyr::tally() |> @@ -215,7 +250,7 @@ check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), sched return(list("latest_time" = NULL, "estimated_wait_time" = 0)) } else { data <- data |> - dplyr::filter(date == max(date, na.rm = TRUE)) + dplyr::filter(date == max(date, na.rm = TRUE)) } latest_time <- data |> @@ -236,5 +271,7 @@ check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), sched TRUE ~ 120 ) + estimated_wait_time <- if (estimated_wait_time > 0) estimated_wait_time else 10 + list("latest_time" = latest_time, "estimated_wait_time" = estimated_wait_time) } diff --git a/R/golem_utils_server.R b/R/golem_utils_server.R index fdf5d35d..001ab602 100644 --- a/R/golem_utils_server.R +++ b/R/golem_utils_server.R @@ -343,7 +343,7 @@ assign_highlevel_categories <- function(sub_cats) { hl_cat <- framework %>% dplyr::filter(`Sub-category` == v) %>% dplyr::pull(Category) - return(if (length(hl_cat) != 0) hl_cat else "Other Category") + return(if (length(hl_cat) != 0) hl_cat else "Unknown Category") }, simplify = TRUE, USE.NAMES = FALSE ) @@ -420,4 +420,30 @@ transform_sentiment <- function(data, sentiment_column = 'sentiment') { dplyr::mutate( sentiment = factor(!!rlang::sym(sentiment_column), levels = c("Positive", "Neutral/Mixed", "Negative")) ) -} \ No newline at end of file +} + +#' Find rows containing missing values in all specified +#' column while keeping rows where any contains values +#' +#' @param df A data frame +#' @param vars list of strings containing the columns +#' @param negate logical, whether to return the rows where the NA check is true or otherwise +#' default is `TRUE` +#' +#' @return data frame +#' @export +drop_na_for_col <- function(df, vars, negate = TRUE) { + + diff <- setdiff(vars, names(df)) + stopifnot("Some column doesn't exist in data" = length(diff) == 0) + + if (negate) { + return( + df %>% + dplyr::filter(rowSums(is.na(dplyr::select(., dplyr::all_of(vars)))) != length(vars)) + ) + } + + df %>% + dplyr::filter(rowSums(is.na(dplyr::select(., dplyr::all_of(vars)))) == length(vars)) +} diff --git a/R/mod_data_management.R b/R/mod_data_management.R index 11042940..fade330e 100644 --- a/R/mod_data_management.R +++ b/R/mod_data_management.R @@ -471,7 +471,7 @@ mod_data_management_server <- function(id, db_conn, filter_data, data_exists, us showModal(modalDialog( title = strong("Success!"), HTML(paste( - h5(paste(nrow(raw_df), "records successfully imported. The new data is not ready yet, the dashboard is busy predicting the sentiment score.")), + h5(paste(nrow(raw_df), "records successfully imported. The dashboard is still processing data to predict category and sentiment")), h4(strong(em(paste("Please check back or refresh your browser in about", wait_time, "mins to access the new data")))) )), easyClose = FALSE diff --git a/R/mod_documentation_page.R b/R/mod_documentation_page.R index a9ae5778..11516422 100644 --- a/R/mod_documentation_page.R +++ b/R/mod_documentation_page.R @@ -8,28 +8,41 @@ mod_documentation_page_ui <- function(id) { ns <- NS(id) tagList( - br(), - p("This dashboard uses a machine learning tool (pxtextmining API) to assign one or more sub-categories to free text comments. - The categories and subcategories developed in the Qualitative Data Categorisation (QDC) framework are used. - The visualisations and interactivity in this dashboard have been chosen to help users to engage with the comments and - not just quantify the data."), - p("The QDC framework is an evidence-based work that has been carefully designed. it has multiple categories, each with its - own set of sub-categories. The category groups similar topics together in a meaningful way to help users navigate the framework - more easily. The sub-categories are the actual topics that better reflect the underlying data. - A high-level visual of the categories and sub-categories is displayed below:"), - img(src = "www/framework_v5.png", width = "100%"), - hr(), - p("To see detailed description of the sub-categories, please expand the categories below"), + h3(strong("Introduction to the Data Categorisation Framework"), style = "color : #005EB8;"), + p("This dashboard utilizes the pxtextmining API, a machine learning tool, to assign one or more subcategories to free-text comments + based on the Qualitative Data Categorization (QDC) framework. The QDC framework is an evidence-based work that has been designed + with several categories, each with its own set of subcategories. The categories group similar topics together to make it easier + for users to navigate the framework, while the subcategories reflect the actual topics that better represent the underlying data."), + p("The dashboard's visualizations and intuitive interactivity are thoughtfully created to help users effectively engage with the + comments and not merely quantify the data. Below is a high-level visual of the categories and subcategories:"), + img(src = "www/framework_MVP_version.jpeg", width = "100%"), + rep_br(2), + p("To see detailed description of the sub-categories, kindly click on the category to expand it."), DT::DTOutput(ns("framework_table")), - hr(), tagList( - p("To get further detail about the data categorisation framework and the dashboard + h4(strong("Making best use of the qualitative comments"), style = "color : #005EB8;"), + HTML(paste0( + "This dashboard should be used to facilitate initial exploration of your qualitative data, before + drawing fuller insight from the underlying qualitative comments. Before using the dashboard, you + should read the good practice guidance on the documentation page: ", + a(strong("Good practice guidance."), + href = "https://cdu-data-science-team.github.io/PatientExperience-QDC/dashboard/dashboard3.html", + target = "_blank" + ), + "This includes important + information, tips, and advice to help you maximise your use of the categorised qualitative comments, + whilst avoiding the risks around relying on the quantification of qualitative data." + )), + rep_br(2), + HTML(paste( + "To get further detail about the data categorisation framework and the dashboard including some illustrative examples for each of the sub-categories. - Please see the"), - a(p("Patient Experience - QDC documentation Page"), - href = "https://cdu-data-science-team.github.io/PatientExperience-QDC/framework/framework3.html", - target = "_blank" - ) + Please see the", + a(strong("Patient Experience - QDC documentation Page."), + href = "https://cdu-data-science-team.github.io/PatientExperience-QDC/framework/framework3.html", + target = "_blank" + ) + )) ), ) } @@ -42,7 +55,6 @@ mod_documentation_page_server <- function(id) { # table output$framework_table <- DT::renderDT({ - # JaveScript code to collapse the table callback_js <- DT::JS( "table.on('click', 'tr.dtrg-group', function () {", diff --git a/R/tidy_data.R b/R/tidy_data.R index 69609a3e..0732aebd 100644 --- a/R/tidy_data.R +++ b/R/tidy_data.R @@ -12,10 +12,9 @@ tidy_all_trusts <- function(data) { dplyr::filter( !is.na(comment_txt), !is.null(comment_txt), - comment_txt != "NA", - comment_txt != "NULL", - comment_txt != "", - comment_txt != " ", + !comment_txt %in% c("Did not answer", "NULL", + "#NAME?", "NA", "N/A", "", " "), + nchar(comment_txt) > 1, hidden == 0 ) } else { diff --git a/R/tidy_upload.R b/R/tidy_upload.R index b2be4b39..288cf18a 100644 --- a/R/tidy_upload.R +++ b/R/tidy_upload.R @@ -75,13 +75,13 @@ clean_dataframe <- function(data, comment_column) { data %>% dplyr::mutate( dplyr::across( - where(is.character), + where(is.character), \(.x) stringr::str_replace_all(.x, "[^[:alnum:][:punct:]]+", " ") # remove non-graphical characters from all character columns, ‘⁠[:graph: is not reliable⁠’ - ), + ), dplyr::across( where(is.character), ~ dplyr::case_when( grepl("^[?]+$", .) ~ NA_character_, # remove multiple question marks - . %in% c("NULL", "#NAME?", "NA", "N/A", "", " ") ~ NA_character_, + . %in% c("NULL", "#NAME?", "NA", "N/A", "", " ") ~ NA_character_, TRUE ~ . ) ) @@ -106,9 +106,9 @@ clean_dataframe <- function(data, comment_column) { upload_data <- function(data, conn, trust_id, user, write_db = TRUE) { # throw error if for any reason the trust_id is not same a trust name stopifnot("trust_id should be same as trust_name" = get_golem_config("trust_name") == trust_id) - - last_upload_date = Sys.time() # to track when the data upload started. to be used in the api job table and the main table - + + last_upload_date <- Sys.time() # to track when the data upload started. to be used in the api job table and the main table + # reformat and clean the uploaded data ---- required_cols <- c( "date", "pt_id", "location_1", "location_2", "location_3", @@ -134,9 +134,9 @@ upload_data <- function(data, conn, trust_id, user, write_db = TRUE) { # parse the date (if it hasn't been parsed) and confirm if it's well parsed (the assumption here is that, data older than year 2000 won't be uploaded). data <- parse_date(data) stopifnot("Start year should reasonably be after year 2000" = lubridate::year(min(data$date)) > 2000) - + db_tidy <- data %>% - dplyr::arrange(date) %>% + dplyr::arrange(date) %>% dplyr::mutate(pt_id = seq.int(max_ptid + 1, max_ptid + nrow(.))) %>% # to uniquely identify individual responder tidyr::pivot_longer( cols = dplyr::starts_with("question"), @@ -146,8 +146,8 @@ upload_data <- function(data, conn, trust_id, user, write_db = TRUE) { dplyr::select(dplyr::any_of(required_cols)) %>% clean_dataframe("comment_text") %>% dplyr::mutate( - comment_id = seq.int(max_id + 1, max_id + nrow(.)) - ) # to uniquely identify individual comment + comment_id = seq.int(max_id + 1, max_id + nrow(.)) # to uniquely identify individual comment + ) # do trust specific data cleaning ---- if (trust_id == "trust_GOSH") db_tidy <- db_tidy %>% tidy_trust_gosh() @@ -155,8 +155,7 @@ upload_data <- function(data, conn, trust_id, user, write_db = TRUE) { if (trust_id == "trust_NTH") db_tidy <- db_tidy %>% tidy_trust_nth() # call API for predictions ---- - - # prepare the data for the API + ## prepare the data for the API ---- tidy_data <- db_tidy |> dplyr::mutate(question_type = comment_type) |> dplyr::mutate( @@ -181,15 +180,15 @@ upload_data <- function(data, conn, trust_id, user, write_db = TRUE) { dplyr::filter(question_type == api_question_code(get_golem_config("comment_1"))) } - ## sentiment prediction ---- - cat("Making sentiment predictions for", nrow(db_tidy), "comments from pxtextming API \n") + ## get prediction url ---- + cat("Making sentiment and label predictions for", nrow(db_tidy), "comments from pxtextming API \n") api_result <- get_api_pred_url(tidy_data, Sys.getenv("API_key")) + ## update api job table ---- # get the maximum job id from the api job table max_job_id <- DBI::dbGetQuery(conn, paste0("SELECT MAX(job_id) FROM api_jobs"))$`MAX(job_id)` max_job_id <- if (is.na(max_job_id)) 0 else max_job_id - # update api job table ---- job_table <- dplyr::tibble( job_id = max_job_id + 1, date = last_upload_date, @@ -202,45 +201,23 @@ upload_data <- function(data, conn, trust_id, user, write_db = TRUE) { DBI::dbWriteTable(conn, "api_jobs", job_table, append = TRUE) - ## label prediction ---- - cat("\nMaking label predictions for", nrow(db_tidy), "comments from pxtextming API \n") - preds <- batch_predict(tidy_data) %>% - dplyr::mutate(comment_id = as.integer(comment_id)) - - cat("Done with API call. Remenber to get sentiment prediction from URL... \n") + cat("Done with API call. Remember to get prediction from URL... \n") - # rename the columns to make the data compatible with old data format currently in use + # final data tidy ---- + cat("Doing final data tidy \n") final_df <- db_tidy %>% - dplyr::left_join(preds, by = c("comment_id", "comment_text")) %>% + # rename the columns to make the data compatible with old data format currently in use dplyr::rename( - fft = fft_score, category = labels, + fft = fft_score, comment_txt = comment_text ) %>% dplyr::mutate( - hidden = 0 + hidden = 0, + comment_type = stringr::str_replace_all(.data$comment_type, "question", "comment"), + last_upload_date = last_upload_date # update the last upload date column with todays date ) - cat("Doing final data tidy \n") - final_df <- final_df %>% - dplyr::mutate( - comment_type = stringr::str_replace_all(.data$comment_type, "question", "comment") - ) %>% - # update the last upload date column with todays date - dplyr::mutate(last_upload_date = last_upload_date) %>% - # assign the super categories - tidyr::unnest(category) %>% # Unnest the category column into rows and columns - dplyr::mutate(super_category = assign_highlevel_categories(category)) %>% # assign super categories - dplyr::group_by(comment_id, comment_type) %>% - dplyr::summarise( - across(-tidyselect::all_of(c("category", "super_category")), unique), - across(c(category, super_category), list) # duplicate super category value is preserved. This will allow easy manipulation later. for ex. see get_tidy_filter_data() - ) %>% - dplyr::ungroup() %>% - # convert the category and super category column to raw json before loading into the database - dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, jsonlite::toJSON))) %>% - dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, charToRaw))) - - # Do a final check on the data before loading to db + # Do a final check on the data before loading to db ---- # throw error if comment_id is not unique stopifnot("values in 'comment ID' should be unique" = final_df$comment_id %>% duplicated() %>% sum() == 0) stopifnot("comment_id column should not be empty" = all(!is.na(final_df$comment_id) & final_df$comment_id != "")) diff --git a/data-raw/framework.R b/data-raw/framework.R index c60ebdff..7ac3fd7e 100644 --- a/data-raw/framework.R +++ b/data-raw/framework.R @@ -1,5 +1,10 @@ ## code to prepare `framework` dataset goes here -framework <- readxl::read_excel(here::here(app_sys(), "app/www", "FFT-QDC Framework v5 - 20230428.xlsx"), +framework <- readxl::read_excel( + here::here( + app_sys(), + "app/www", + "FFT-QDC_Framework_MVP_version_20230925.xlsx" + ), sheet = 2 ) %>% dplyr::arrange(Category, `Sub-category`) %>% diff --git a/data/framework.rda b/data/framework.rda index 0e408a7b..bd1647ce 100644 Binary files a/data/framework.rda and b/data/framework.rda differ diff --git a/inst/app/www/FFT-QDC Framework v5 - 20230428.xlsx b/inst/app/www/FFT-QDC Framework v5 - 20230428.xlsx deleted file mode 100644 index 7e307a4a..00000000 Binary files a/inst/app/www/FFT-QDC Framework v5 - 20230428.xlsx and /dev/null differ diff --git a/inst/app/www/FFT-QDC_Framework_MVP_version_20230925.xlsx b/inst/app/www/FFT-QDC_Framework_MVP_version_20230925.xlsx new file mode 100644 index 00000000..d017039d Binary files /dev/null and b/inst/app/www/FFT-QDC_Framework_MVP_version_20230925.xlsx differ diff --git a/inst/app/www/crit-table.css b/inst/app/www/crit-table.css deleted file mode 100644 index 12c0eea1..00000000 --- a/inst/app/www/crit-table.css +++ /dev/null @@ -1,52 +0,0 @@ -.tag { - display: inline-block; - padding: 2px 12px; - border-radius: 15px; - font-weight: 600; - font-size: 12px; -} - -.crit_-5 { - background: #A50026; -} - -.crit_-4 { - background: #D73027; -} - -.crit_-3 { - background: #F46D43; -} - -.crit_-2 { - background: #FDAE61; -} - -.crit_-1 { - background: #FEE08B; -} - -.crit_0 { - background: #FFFFBF; -} - -.crit_1 { - background: #D9EF8B; -} - -.crit_2 { - background: #A6D96A; -} - -.crit_3 { - background: #66BD63; -} - -.crit_4 { - background: #1A9850; -} - -.crit_5 { - background: #006837; -} - diff --git a/inst/app/www/custom.css b/inst/app/www/custom.css deleted file mode 100644 index e69de29b..00000000 diff --git a/inst/app/www/framework_MVP_version.jpeg b/inst/app/www/framework_MVP_version.jpeg new file mode 100644 index 00000000..4b33715a Binary files /dev/null and b/inst/app/www/framework_MVP_version.jpeg differ diff --git a/inst/app/www/framework_v5.png b/inst/app/www/framework_v5.png deleted file mode 100644 index fdcccf66..00000000 Binary files a/inst/app/www/framework_v5.png and /dev/null differ diff --git a/inst/app/www/github_link.html b/inst/app/www/github_link.html deleted file mode 100644 index 8e1cf38e..00000000 --- a/inst/app/www/github_link.html +++ /dev/null @@ -1 +0,0 @@ -'' diff --git a/man/drop_na_for_col.Rd b/man/drop_na_for_col.Rd new file mode 100644 index 00000000..3a8bd469 --- /dev/null +++ b/man/drop_na_for_col.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/golem_utils_server.R +\name{drop_na_for_col} +\alias{drop_na_for_col} +\title{Find rows containing missing values in all specified +column while keeping rows where any contains values} +\usage{ +drop_na_for_col(df, vars, negate = TRUE) +} +\arguments{ +\item{df}{A data frame} + +\item{vars}{list of strings containing the columns} + +\item{negate}{logical, whether to return the rows where the NA check is true or otherwise +default is `TRUE`} +} +\value{ +data frame +} +\description{ +Find rows containing missing values in all specified +column while keeping rows where any contains values +} diff --git a/man/get_api_pred_url.Rd b/man/get_api_pred_url.Rd index 0e409d5e..bbc201f5 100644 --- a/man/get_api_pred_url.Rd +++ b/man/get_api_pred_url.Rd @@ -4,12 +4,15 @@ \alias{get_api_pred_url} \title{Get the prediction URL or the data from the `pxtextmining API`} \usage{ -get_api_pred_url(data, api_key) +get_api_pred_url(data, api_key, target = "ms") } \arguments{ \item{data}{Dataframe with column `comment_id`, `comment_text` and `question_type`} \item{api_key}{api key to access the api} + +\item{target}{to determine the type of prediction, the options are `m` for multilabel, +`s` for sentiment or `ms` for both. default to `ms`} } \value{ a dataframe or a url to get the data diff --git a/renv.lock b/renv.lock index 878f528b..9de1b28f 100644 --- a/renv.lock +++ b/renv.lock @@ -575,9 +575,9 @@ }, "datamods": { "Package": "datamods", - "Version": "1.4.1", + "Version": "1.4.2", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Requirements": [ "R", "data.table", @@ -595,13 +595,13 @@ "tools", "writexl" ], - "Hash": "bfe71f2780b426763072e059457f7650" + "Hash": "4966dd81eb82568e17834efec656a9db" }, "dbplyr": { "Package": "dbplyr", - "Version": "2.3.3", + "Version": "2.3.4", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Requirements": [ "DBI", "R", @@ -623,7 +623,7 @@ "vctrs", "withr" ], - "Hash": "d6fd1b1440c1cacc6623aaa4e9fe352b" + "Hash": "63534894354af6b2587b7aa518a5193a" }, "desc": { "Package": "desc", @@ -766,15 +766,15 @@ }, "experiencesdashboard": { "Package": "experiencesdashboard", - "Version": "0.7.0", + "Version": "0.8.1", "Source": "GitHub", "Remotes": "nhs-r-community/NHSRtheme", "RemoteType": "github", "RemoteHost": "api.github.com", "RemoteRepo": "experiencesdashboard", "RemoteUsername": "CDU-data-science-team", - "RemoteRef": "HEAD", - "RemoteSha": "d66c471fa48cbfeaa38c620b75de73dea0995645", + "RemoteRef": "185-update-framework", + "RemoteSha": "d4c79c21ad34158029d28de30b19d4c28518ca57", "Requirements": [ "ComplexUpset", "DBI", @@ -814,7 +814,7 @@ "writexl", "xml2" ], - "Hash": "6fd034f3941b2fd3ede91240d233bc2c" + "Hash": "fafe49bf06ef9aa3408c8e35c5d8f32d" }, "fansi": { "Package": "fansi", @@ -1293,16 +1293,16 @@ }, "lubridate": { "Package": "lubridate", - "Version": "1.9.2", + "Version": "1.9.3", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Requirements": [ "R", "generics", "methods", "timechange" ], - "Hash": "e25f18436e3efd42c7c590a1c4c15390" + "Hash": "680ad542fbcf801442c83a6ac5a2126c" }, "magrittr": { "Package": "magrittr", @@ -2650,16 +2650,16 @@ }, "withr": { "Package": "withr", - "Version": "2.5.0", + "Version": "2.5.1", "Source": "Repository", - "Repository": "CRAN", + "Repository": "RSPM", "Requirements": [ "R", "grDevices", "graphics", "stats" ], - "Hash": "c0e49a9760983e81e55cdd9be92e7182" + "Hash": "d77c6f74be05c33164e33fbc85540cae" }, "writexl": { "Package": "writexl", diff --git a/rsconnect/documents/API_url_tracker.Rmd/connect.strategyunitwm.nhs.uk/oluwasegun.apejoye/api_url_tracker.dcf b/rsconnect/documents/API_url_tracker.Rmd/connect.strategyunitwm.nhs.uk/oluwasegun.apejoye/api_url_tracker.dcf new file mode 100644 index 00000000..9a39bf91 --- /dev/null +++ b/rsconnect/documents/API_url_tracker.Rmd/connect.strategyunitwm.nhs.uk/oluwasegun.apejoye/api_url_tracker.dcf @@ -0,0 +1,12 @@ +name: api_url_tracker +title: API_url_tracker +username: oluwasegun.apejoye +account: oluwasegun.apejoye +server: connect.strategyunitwm.nhs.uk +hostUrl: https://connect.strategyunitwm.nhs.uk/__api__ +appId: 149 +bundleId: 915 +url: https://connect.strategyunitwm.nhs.uk/api_tracker/ +version: 1 +asMultiple: FALSE +asStatic: FALSE diff --git a/tests/p2_db_data_template.rds b/tests/p2_db_data_template.rds index c1fd5e2c..0b8a6dac 100644 Binary files a/tests/p2_db_data_template.rds and b/tests/p2_db_data_template.rds differ diff --git a/tests/prediction.rds b/tests/prediction.rds new file mode 100644 index 00000000..4df8a2b9 Binary files /dev/null and b/tests/prediction.rds differ diff --git a/tests/testthat/_snaps/app_ui.md b/tests/testthat/_snaps/app_ui.md index d418d487..2c9be26d 100644 --- a/tests/testthat/_snaps/app_ui.md +++ b/tests/testthat/_snaps/app_ui.md @@ -53,17 +53,6 @@
- -

This dashboard uses a machine learning tool (pxtextmining API) to assign one or more sub-categories to free text comments. - The categories and subcategories developed in the Qualitative Data Categorisation (QDC) framework are used. - The visualisations and interactivity in this dashboard have been chosen to help users to engage with the comments and - not just quantify the data.

-

The QDC framework is an evidence-based work that has been carefully designed. it has multiple categories, each with its - own set of sub-categories. The category groups similar topics together in a meaningful way to help users navigate the framework - more easily. The sub-categories are the actual topics that better reflect the underlying data. - A high-level visual of the categories and sub-categories is displayed below:

- -
-

To see detailed description of the sub-categories, please expand the categories below

+

+ Introduction to the Data Categorisation Framework +

+

This dashboard utilizes the pxtextmining API, a machine learning tool, to assign one or more subcategories to free-text comments + based on the Qualitative Data Categorization (QDC) framework. The QDC framework is an evidence-based work that has been designed + with several categories, each with its own set of subcategories. The categories group similar topics together to make it easier + for users to navigate the framework, while the subcategories reflect the actual topics that better represent the underlying data.

+

The dashboard's visualizations and intuitive interactivity are thoughtfully created to help users effectively engage with the + comments and not merely quantify the data. Below is a high-level visual of the categories and subcategories:

+ +

+

To see detailed description of the sub-categories, kindly click on the category to expand it.

-
-

To get further detail about the data categorisation framework and the dashboard +

+ Making best use of the qualitative comments +

+ This dashboard should be used to facilitate initial exploration of your qualitative data, before + drawing fuller insight from the underlying qualitative comments. Before using the dashboard, you + should read the good practice guidance on the documentation page:
+ Good practice guidance. + This includes important + information, tips, and advice to help you maximise your use of the categorised qualitative comments, + whilst avoiding the risks around relying on the quantification of qualitative data. +

+ To get further detail about the data categorisation framework and the dashboard including some illustrative examples for each of the sub-categories. - Please see the

- -

Patient Experience - QDC documentation Page

+ Please see the
+ Patient Experience - QDC documentation Page. # mod_trend_overlap_ui works diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 455df4df..207516c5 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -12,9 +12,7 @@ tidy_trust_data <- readr::read_csv(here::here("tests/test_data.csv"), phase_2_upload_data <- readRDS(here::here("tests/phase_2_test_template.rds")) # read data that mimic the database data -phase_2_db_data <- readRDS(here::here("tests/p2_db_data_template.rds")) %>% - dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, rawToChar))) %>% - dplyr::mutate(sentiment = sample(1:5, nrow(.), replace = T)) +phase_2_db_data <- readRDS(here::here("tests/p2_db_data_template.rds")) unique_data <- phase_2_db_data %>% dplyr::distinct(pt_id, .keep_all = TRUE) diff --git a/tests/testthat/test-all-modules.R b/tests/testthat/test-all-modules.R index af7882b9..70a543de 100644 --- a/tests/testthat/test-all-modules.R +++ b/tests/testthat/test-all-modules.R @@ -414,8 +414,8 @@ test_that("mod_summary_record_server works correctly", { expect_no_error(output$current_individualBox) # assert all global variable are expected values after a call to output$dynamic_summary_record - expect_equal(global$n_responses, 1981) - expect_equal(global$n_individuals, 1000) + expect_equal(global$n_responses, 1900) + expect_equal(global$n_individuals, 959) expect_equal(global$current_responses, 10) expect_equal(global$current_individuals, 5) }) @@ -734,8 +734,8 @@ test_that("mod_overlap_1_server works correctly when given some inputs", { expect_no_error(output$dynamic_select_category_ui) # the upset plot contents are working - expect_equal(nrow(upset_data()), 120) - expect_equal(length(all_categories()), 26) + expect_equal(nrow(upset_data()), 110) + expect_equal(length(all_categories()), 15) expect_equal(length(filtered_categories()), 3) expect_no_error(output$category_upset) @@ -787,4 +787,4 @@ test_that("module server works well if passed data is empty", { # show expected result expect_true(grepl("No data to show", session$returned)) }) -}) +}) \ No newline at end of file diff --git a/tests/testthat/test-database-data.R b/tests/testthat/test-database-data.R index 0beea112..93635c7a 100644 --- a/tests/testthat/test-database-data.R +++ b/tests/testthat/test-database-data.R @@ -20,8 +20,7 @@ test_that("Global Databse Data pass all checks", { add_results(report) # the single row per category data - phase_2_db_data %>% - get_tidy_filter_data(TRUE) %>% + single_labeled_filter_data %>% data.validator::validate(name = "Verifying Single row filter_data") %>% data.validator::validate_if(has_all_names( "date", "location_1", @@ -31,7 +30,7 @@ test_that("Global Databse Data pass all checks", { validate_rows(col_concat, is_uniq, comment_id, category, description = "comment_id and category combination is unique") %>% validate_if(!is.na(comment_id) & comment_id != "", description = "comment_id column is not empty") %>% validate_if(!is.na(pt_id) & pt_id != "", description = "pat_id column is not empty") %>% - validate_if(is.character(category), description = "category column is a list") %>% + validate_if(is.character(category), description = "category column is a character") %>% validate_if(inherits(date, "Date"), description = "date column is in date format") %>% validate_if(lubridate::year(min(date)) > 2015, description = "Start Date is after 2015") %>% add_results(report) diff --git a/tests/testthat/test-fct_api_pred.R b/tests/testthat/test-fct_api_pred.R index 0eb46ec0..da97dd12 100644 --- a/tests/testthat/test-fct_api_pred.R +++ b/tests/testthat/test-fct_api_pred.R @@ -1,16 +1,3 @@ -test_that("API and Framework are in sync", { - single_filter_data <- phase_2_db_data %>% - dplyr::mutate(across(category, ~ purrr::map(.x, jsonlite::fromJSON))) %>% # unserialise the category data from json into list - tidyr::unnest(category) %>% # Unnest the category column into rows and columns - dplyr::mutate(super_category = assign_highlevel_categories(category)) - - not_in_framewk <- single_filter_data %>% - filter(super_category == "Other Category") %>% - pull(category) %>% - unique() - expect_true(all(not_in_framewk == c("Labelling not possible", "Admission"))) -}) - test_that("api_pred and batch_predict is working...", { text_data <- data.frame( comment_id = c(1, 2, 3), @@ -21,7 +8,7 @@ test_that("api_pred and batch_predict is working...", { question_type = c("what_good", "could_improve", "nonspecific") ) - expect_error(api_pred(text_data |> + expect_no_error(api_pred(text_data |> select(-question_type))) expect_no_error(api_pred(text_data)) @@ -30,10 +17,51 @@ test_that("api_pred and batch_predict is working...", { batch_predict() expect_equal(nrow(preds), 3) - expect_true(all(c("comment_id", "comment_text", "labels") %in% names(preds))) + expect_true(all(c("comment_id", "labels") == names(preds))) expect_equal(sum(is.na(preds$labels)), 0) }) +test_that("assign_highlevel_categories function is working and API vs Framework are in sync", { + + withr::local_envvar("R_CONFIG_ACTIVE" = "phase_2_demo") + + text_data <- phase_2_db_data %>% + head(100) %>% + dplyr::mutate(comment_text = comment_txt, + question_type = comment_type) %>% + dplyr::select(comment_id, comment_text, question_type) %>% + dplyr::mutate( + question_type = stringr::str_replace_all( + question_type, "comment_1", + api_question_code(get_golem_config("comment_1")) + ) + ) |> + dplyr::mutate( + question_type = stringr::str_replace_all( + question_type, "comment_2", + api_question_code(get_golem_config("comment_2")) + ) + ) + + preds <- text_data |> + batch_predict() + + # assign the super category + preds <- preds %>% + rename(category = labels) %>% + tidyr::unnest(category) %>% + dplyr::mutate(super_category = assign_highlevel_categories(category)) + + not_in_framewk <- preds %>% + filter(super_category == "Unknown Category") %>% + pull(category) %>% + unique() + + # all assigned labels must be in framework + expect_true(all(preds$category %in% framework$`Sub-category`)) + expect_true(length(not_in_framewk) == 0) +}) + test_that("api_question_code works", { expect_equal(api_question_code("What did we do well"), "what_good") expect_equal(api_question_code("What could be improved"), "could_improve") @@ -63,12 +91,16 @@ test_that("get_api_pred_url works and return expected result", { expect_equal(get_api_pred_url(df, "api_key"), "data") - # throw expected error - stub(get_api_pred_url, "httr::POST", list(message = "failed call")) + # throw expected error - when api call fails + stub(get_api_pred_url, "httr::POST", list(status_code = 404, message = "failed call")) stub(get_api_pred_url, "httr::http_status", identity) stub(get_api_pred_url, "httr::status_code", identity) expect_error(get_api_pred_url(df, "api_key"), "failed call") + + # throw expected error - when wrong parameter is supplied + expect_error(get_api_pred_url(df, "api_key", "o"), + "target must be one of 'ms', 'm' or 's'") }) test_that("get_pred_from_url works and return expected result", { @@ -93,8 +125,22 @@ test_that("get_pred_from_url works and return expected result", { expect_error(get_pred_from_url(df), "can't reach server") }) +test_that("transform_prediction_for_database works and return expected result", { + prediction <- readRDS(here::here("tests/prediction.rds")) + + result <- transform_prediction_for_database(prediction) + + expect_true(nrow(result) == nrow(prediction)) + expect_true(result$comment_id |> duplicated() |> sum() == 0) + expect_true(inherits(result$super_category, "list")) + expect_true(inherits(result$category, "list")) + + # throw expected error + expect_error(transform_prediction_for_database(select(prediction, -sentiment))) +}) + test_that("track_api_job correctly handles completed job", { - test_pred <- data.frame(comment_id = 1:5, prediction = 1:5) + test_pred <- readRDS(here::here("tests/prediction.rds")) # Create a mock for the `get_pred_from_url` function stub(track_api_job, "get_pred_from_url", test_pred) # return a test prediction dataframe @@ -118,13 +164,18 @@ test_that("track_api_job correctly handles completed job", { # Create a new mock for the database connection (DBI::dbExecute) m2 <- mock(TRUE, cycle = TRUE) stub(track_api_job, "DBI::dbExecute", m2) # return a success status + m3 <- mock(TRUE) + stub(track_api_job, "dplyr::rows_update", m3) # mock dplyr::rows_update + stub(track_api_job, "dplyr::tbl", identical) + # Call the function with the mocks - Check it completes track_api_job(test_job, conn = NULL, write_db = TRUE) |> expect_output("Job 1 prediction has been successfully written to database") - # expect DBI::dbExecute is called twice - expect_called(m2, 2) + + expect_called(m2, 2) # expect DBI::dbExecute is called twice + expect_called(m3, 1) # expect dplyr::rows_update is called once }) test_that("track_api_job correctly handles pending job", { @@ -165,45 +216,47 @@ test_that("track_api_job correctly handles failed job", { test_that("check_api_job works and return expected result", { - api_table <- data.frame( job_id = 1, url = "url", date = as.POSIXct("2023-09-15 12:00:00", tz = "UTC"), - no_comments = 5000, + no_comments = 5000, trist_id = "phase_2_demo", email = NA, user = NA, status = "uploaded" ) - + # mock lubridate::now to return a particular time (20mins from the job table time) stub(check_api_job, "lubridate::now", as.POSIXct("2023-09-15 12:20:00", tz = "UTC")) - + # no pending job - status uploaded # mock database connection dplyr::tbl to return job table with status 'completed' stub(check_api_job, "dplyr::tbl", api_table) - result1 <- check_api_job('pool', 'phase_2_demo', schedule_time = 10) + result1 <- check_api_job("pool", "phase_2_demo", schedule_time = 10) expect_equal(result1, list("latest_time" = NULL, "estimated_wait_time" = 0)) - - + + # still busy if job status is submitted # mock database connection dplyr::tbl to return job table with status 'submitted' stub(check_api_job, "dplyr::tbl", mutate(api_table, status = "submitted")) - - result2 <- check_api_job('pool', 'phase_2_demo', schedule_time = 10) - - expect_equal(result2, list("latest_time" = as.POSIXct("2023-09-15 12:00:00", tz = "UTC"), - "estimated_wait_time" = 20)) - - + + result2 <- check_api_job("pool", "phase_2_demo", schedule_time = 10) + + expect_equal(result2, list( + "latest_time" = as.POSIXct("2023-09-15 12:00:00", tz = "UTC"), + "estimated_wait_time" = 20 + )) + + # still busy if job status is completed # mock database connection dplyr::tbl to return job table with status 'completed' stub(check_api_job, "dplyr::tbl", mutate(api_table, status = "submitted")) - - result3 <- check_api_job('pool', 'phase_2_demo', schedule_time = 10) - - expect_equal(result3, list("latest_time" = as.POSIXct("2023-09-15 12:00:00", tz = "UTC"), - "estimated_wait_time" = 20)) - -}) \ No newline at end of file + + result3 <- check_api_job("pool", "phase_2_demo", schedule_time = 10) + + expect_equal(result3, list( + "latest_time" = as.POSIXct("2023-09-15 12:00:00", tz = "UTC"), + "estimated_wait_time" = 20 + )) +}) diff --git a/tests/testthat/test-general_helpers.R b/tests/testthat/test-general_helpers.R index 16019cc8..3e6d5d76 100644 --- a/tests/testthat/test-general_helpers.R +++ b/tests/testthat/test-general_helpers.R @@ -194,4 +194,20 @@ test_that("transform_sentiment works and return expected result", { expect_equal(as.character(result$sentiment), c("Negative", "Positive", NA, NA)) expect_equal(nrow(df), nrow(result)) -}) \ No newline at end of file +}) + +test_that("drop_na_for_col works and return expected result", { + + df <- data.frame(id = 1:4, x = c(1, 2, NA, NA), y = c("a", NA, "b", NA), z = c("a", NA, "b", NA)) + vars <- c('x', 'y', 'z') + vars2 = c('z', 'y') + + result1 <- drop_na_for_col(df, vars) + result2 <- drop_na_for_col(df, vars2) + result3 <- drop_na_for_col(df, vars2, F) + + expect_identical(result1, filter(df, id != 4)) + expect_identical(result2, filter(df, id %in% c(1,3))) + expect_identical(result3, filter(df, id %in% c(2,4))) + expect_identical(names(df), names(result1), names(result2), names(result3)) +}) diff --git a/tests/testthat/test-tidy_data.R b/tests/testthat/test-tidy_data.R index bcdedfe8..732c9b4f 100644 --- a/tests/testthat/test-tidy_data.R +++ b/tests/testthat/test-tidy_data.R @@ -1,7 +1,7 @@ test_that("Data tidies in all trusts", { new_data <- tidy_trust_data |> - mutate(comment_txt = sample(c(unique(comment_txt), "", NA, " ", 'NA', 'NULL', NA), + mutate(comment_txt = sample(c(unique(comment_txt), "", NA, " ", 'NA', 'NULL', NA, "A"), nrow(tidy_trust_data))) db_data <- new_data %>% @@ -12,6 +12,11 @@ test_that("Data tidies in all trusts", { expect_true(all(text != '')) expect_true(all(text != ' ')) expect_true(all(!is.na(text))) + + db_data |> + mutate(no_char = nchar(comment_txt)) |> + filter(no_char <= 1) |> + nrow() |> expect_equal(0) testthat::expect_gt(nrow(db_data), 0) }) diff --git a/tests/testthat/test-tidy_upload.R b/tests/testthat/test-tidy_upload.R index 82e8d3fb..8abbe6e3 100644 --- a/tests/testthat/test-tidy_upload.R +++ b/tests/testthat/test-tidy_upload.R @@ -133,13 +133,11 @@ test_that("uploaded data works", { conn = NULL, trust_id = get_golem_config("trust_name"), user = "test user", - write_db = F + write_db = FALSE ) expect_called(m, 1) expect_true(inherits(test_upload, "data.frame")) - expect_true(inherits(test_upload$super_category, "list")) - expect_true(inherits(test_upload$category, "list")) expect_no_error( upload_data( @@ -147,7 +145,7 @@ test_that("uploaded data works", { conn = NULL, trust_id = get_golem_config("trust_name"), user = "test user", - write_db = T + write_db = TRUE ) ) })