Skip to content

Commit

Permalink
Merge pull request #189 from CDU-data-science-team/185-update-framework
Browse files Browse the repository at this point in the history
185 update framework
  • Loading branch information
asegun-cod authored Oct 12, 2023
2 parents d144a20 + 63007ac commit cb8d1ac
Show file tree
Hide file tree
Showing 33 changed files with 347 additions and 252 deletions.
10 changes: 1 addition & 9 deletions API_url_tracker.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,7 @@ output: html_document

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
# install experience dashboard if not already install
if ("experiencesdashboard" %in% installed.packages()) {
if ("remotes" %in% installed.packages()) install.packages("remotes", repos = "http://cran.r-project.org")
remotes::install_github("CDU-data-science-team/experiencesdashboard", upgrade = "never", quiet = TRUE)
}
library(experiencesdashboard)
library(dplyr)
```

# Last rendered at `r round(Sys.time())`
Expand All @@ -40,7 +32,7 @@ pending_jobs <- dplyr::tbl(
"api_jobs"
)
) |>
filter(status == "submitted") |>
dplyr::filter(status == "submitted") |>
dplyr::collect()
```

Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(api_pred)
export(calculate_table)
export(clean_dataframe)
export(demographic_distribution)
export(drop_na_for_col)
export(get_api_pred_url)
export(get_pred_from_url)
export(html_decoder)
Expand Down
6 changes: 3 additions & 3 deletions R/app_server.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ app_server <- function(input, output, session) {
latest_time <- api_jobs$latest_time
wait_time <- api_jobs$estimated_wait_time


if (!is.null(latest_time) & data_exists) {
# filter out all the unfinished rows(api job time is same as last_upload_date when doing data upload)
db_data <- db_data %>%
Expand Down Expand Up @@ -275,9 +274,10 @@ app_server <- function(input, output, session) {
dplyr::arrange(date)
}

# TRANSFORM THE SENTIMENT COLUMN
# Transform the sentiment column
return_data <- return_data %>%
transform_sentiment()
transform_sentiment() %>%
drop_na_for_col(c('category', 'super_category', 'sentiment'))

# also return a dataset with unique individuals
unique_data <- return_data %>%
Expand Down
13 changes: 3 additions & 10 deletions R/app_ui.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
#' @noRd
app_ui <- function(request) {
tagList(

# Leave this function for adding external resources
golem_add_external_resources(),
# List the first level UI elements here
dashboardPage(
dashboardHeader(
Expand Down Expand Up @@ -37,12 +34,9 @@ app_ui <- function(request) {
),
dashboardBody(
### Changing theme ----
fresh::use_theme(nhs_shiny_theme()), # <-- use fresh object theme to style the whole dashboard

#### Add css file for buttons ----
includeCSS(system.file("app/www/", "button-style.css",
package = "experiencesdashboard"
)),
fresh::use_theme(nhs_shiny_theme()), # use fresh object theme to style the whole dashboard
# Leave this function for adding external resources
golem_add_external_resources(),
HTML('<a href="https://github.com/CDU-data-science-team/experiencesdashboard"
class="github-corner" aria-label="View source on GitHub"><svg width="80"
height="80" viewBox="0 0 250 250" style="fill:#64CEAA; color:#fff; position:
Expand All @@ -69,7 +63,6 @@ app_ui <- function(request) {
tabItems(
tabItem(
tabName = "experiences-user",
# h1("Service User Experiences"),
mod_patient_experience_ui("patient_experience_ui_1")
)
)
Expand Down
57 changes: 47 additions & 10 deletions R/fct_api_pred.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,24 @@
#'
#' @param data Dataframe with column `comment_id`, `comment_text` and `question_type`
#' @param api_key api key to access the api
#' @param target to determine the type of prediction, the options are `m` for multilabel,
#' `s` for sentiment or `ms` for both. default to `ms`
#' @return a dataframe or a url to get the data
#' @export
get_api_pred_url <- function(data, api_key) {
get_api_pred_url <- function(data, api_key, target = "ms") {
endpoint <- "https://pxtextmining-docker-api.azurewebsites.net/api/StartContainerInstance"

# validate the target argument
stopifnot("target must be one of 'ms', 'm' or 's'" = target %in% c("ms", "m", "s"))

# convert the dataframe to nested list
json_data <- data |>
purrr::array_tree()

r <- httr::POST(
endpoint,
body = json_data,
query = list(code = api_key),
query = list(code = api_key, target = target),
encode = "json"
)

Expand Down Expand Up @@ -62,6 +67,34 @@ get_pred_from_url <- function(api_url) {
}
}

#' convert the API prediction into a format that is suitable for the database
#'
#' @param prediction dataframe, prediction from the pxtextmining api with column "comment_id","labels" and "sentiment"
#'
#' @return dataframe, transform prediction data
#' @noRd
transform_prediction_for_database <- function(prediction) {
stopifnot('"comment_id","labels" and "sentiment" columns are required' = all(c("comment_id", "labels", "sentiment") %in% names(prediction)))

prediction |>
dplyr::rename(
category = labels
) |>
dplyr::mutate(dplyr::across(c(comment_id, sentiment), as.integer)) |>
# assign the super categories
tidyr::unnest(category) |> # Unnest the category column into rows and columns
dplyr::mutate(super_category = assign_highlevel_categories(category)) |> # assign super categories
dplyr::group_by(comment_id) %>%
dplyr::summarise(
across(-tidyselect::all_of(c("category", "super_category")), unique),
across(c(category, super_category), list) # duplicate super category value is preserved. This will allow easy manipulation later. for ex. see get_tidy_filter_data()
) |>
dplyr::ungroup() |>
# convert the category and super category column to raw json before loading into the database
dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, jsonlite::toJSON))) |>
dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, charToRaw)))
}

#' Track the API job table. If prediction is done, it writes it to the main table and delete the job from the api job table
#'
#' @param job an instance of the api job table
Expand Down Expand Up @@ -91,20 +124,22 @@ track_api_job <- function(job, conn, write_db = TRUE) {
if (is.data.frame(prediction)) {
cat("Job", job_id, "is done \n")

prediction <- prediction |>
dplyr::mutate(dplyr::across(dplyr::everything(), as.integer))

# update the job status as complete (Prediction has been returned)
DBI::dbExecute(conn, paste("UPDATE api_jobs SET status='completed' WHERE job_id =", job_id))

if (!write_db) {
# update the job status as uploaded (successfully write prediction to main table)
DBI::dbExecute(conn, paste("UPDATE api_jobs SET status='uploaded' WHERE job_id =", job_id))

return(prediction)
}

prediction <- prediction |>
transform_prediction_for_database()

# update the main table
cat("Updating database with prediction \n")

dplyr::rows_update(
dplyr::tbl(conn, trust_id),
prediction,
Expand Down Expand Up @@ -192,8 +227,8 @@ api_question_code <- function(value) {
#' @param pool database connection
#' @param trust_id string, the trust id
#' @param schedule_time integer, number of mins used in the Rmarkdown schedule that tracks the api job table.
#' default is 15mins. its only needed to guess how long users might have to wait for the prediction to complete
#'
#' default is 15mins. it's only needed to guess how long users might have to wait for the prediction to complete
#'
#' @return list of latest_time and estimated_wait_time (in mins)
#' @noRd
check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), schedule_time = 15) {
Expand All @@ -207,15 +242,15 @@ check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), sched
dplyr::filter(
trust_id == !!trust_id,
status %in% c("submitted", "completed")
)
)

if (data |>
dplyr::tally() |>
dplyr::pull() < 1) {
return(list("latest_time" = NULL, "estimated_wait_time" = 0))
} else {
data <- data |>
dplyr::filter(date == max(date, na.rm = TRUE))
dplyr::filter(date == max(date, na.rm = TRUE))
}

latest_time <- data |>
Expand All @@ -236,5 +271,7 @@ check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), sched
TRUE ~ 120
)

estimated_wait_time <- if (estimated_wait_time > 0) estimated_wait_time else 10

list("latest_time" = latest_time, "estimated_wait_time" = estimated_wait_time)
}
30 changes: 28 additions & 2 deletions R/golem_utils_server.R
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ assign_highlevel_categories <- function(sub_cats) {
hl_cat <- framework %>%
dplyr::filter(`Sub-category` == v) %>%
dplyr::pull(Category)
return(if (length(hl_cat) != 0) hl_cat else "Other Category")
return(if (length(hl_cat) != 0) hl_cat else "Unknown Category")
},
simplify = TRUE, USE.NAMES = FALSE
)
Expand Down Expand Up @@ -420,4 +420,30 @@ transform_sentiment <- function(data, sentiment_column = 'sentiment') {
dplyr::mutate(
sentiment = factor(!!rlang::sym(sentiment_column), levels = c("Positive", "Neutral/Mixed", "Negative"))
)
}
}

#' Find rows containing missing values in all specified
#' column while keeping rows where any contains values
#'
#' @param df A data frame
#' @param vars list of strings containing the columns
#' @param negate logical, whether to return the rows where the NA check is true or otherwise
#' default is `TRUE`
#'
#' @return data frame
#' @export
drop_na_for_col <- function(df, vars, negate = TRUE) {

diff <- setdiff(vars, names(df))
stopifnot("Some column doesn't exist in data" = length(diff) == 0)

if (negate) {
return(
df %>%
dplyr::filter(rowSums(is.na(dplyr::select(., dplyr::all_of(vars)))) != length(vars))
)
}

df %>%
dplyr::filter(rowSums(is.na(dplyr::select(., dplyr::all_of(vars)))) == length(vars))
}
2 changes: 1 addition & 1 deletion R/mod_data_management.R
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ mod_data_management_server <- function(id, db_conn, filter_data, data_exists, us
showModal(modalDialog(
title = strong("Success!"),
HTML(paste(
h5(paste(nrow(raw_df), "records successfully imported. The new data is not ready yet, the dashboard is busy predicting the sentiment score.")),
h5(paste(nrow(raw_df), "records successfully imported. The dashboard is still processing data to predict category and sentiment")),
h4(strong(em(paste("Please check back or refresh your browser in about", wait_time, "mins to access the new data"))))
)),
easyClose = FALSE
Expand Down
52 changes: 32 additions & 20 deletions R/mod_documentation_page.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,41 @@
mod_documentation_page_ui <- function(id) {
ns <- NS(id)
tagList(
br(),
p("This dashboard uses a machine learning tool (pxtextmining API) to assign one or more sub-categories to free text comments.
The categories and subcategories developed in the Qualitative Data Categorisation (QDC) framework are used.
The visualisations and interactivity in this dashboard have been chosen to help users to engage with the comments and
not just quantify the data."),
p("The QDC framework is an evidence-based work that has been carefully designed. it has multiple categories, each with its
own set of sub-categories. The category groups similar topics together in a meaningful way to help users navigate the framework
more easily. The sub-categories are the actual topics that better reflect the underlying data.
A high-level visual of the categories and sub-categories is displayed below:"),
img(src = "www/framework_v5.png", width = "100%"),
hr(),
p("To see detailed description of the sub-categories, please expand the categories below"),
h3(strong("Introduction to the Data Categorisation Framework"), style = "color : #005EB8;"),
p("This dashboard utilizes the pxtextmining API, a machine learning tool, to assign one or more subcategories to free-text comments
based on the Qualitative Data Categorization (QDC) framework. The QDC framework is an evidence-based work that has been designed
with several categories, each with its own set of subcategories. The categories group similar topics together to make it easier
for users to navigate the framework, while the subcategories reflect the actual topics that better represent the underlying data."),
p("The dashboard's visualizations and intuitive interactivity are thoughtfully created to help users effectively engage with the
comments and not merely quantify the data. Below is a high-level visual of the categories and subcategories:"),
img(src = "www/framework_MVP_version.jpeg", width = "100%"),
rep_br(2),
p("To see detailed description of the sub-categories, kindly click on the category to expand it."),
DT::DTOutput(ns("framework_table")),
hr(),
tagList(
p("To get further detail about the data categorisation framework and the dashboard
h4(strong("Making best use of the qualitative comments"), style = "color : #005EB8;"),
HTML(paste0(
"This dashboard should be used to facilitate initial exploration of your qualitative data, before
drawing fuller insight from the underlying qualitative comments. Before using the dashboard, you
should read the good practice guidance on the documentation page: ",
a(strong("Good practice guidance."),
href = "https://cdu-data-science-team.github.io/PatientExperience-QDC/dashboard/dashboard3.html",
target = "_blank"
),
"This includes important
information, tips, and advice to help you maximise your use of the categorised qualitative comments,
whilst avoiding the risks around relying on the quantification of qualitative data."
)),
rep_br(2),
HTML(paste(
"To get further detail about the data categorisation framework and the dashboard
including some illustrative examples for each of the sub-categories.
Please see the"),
a(p("Patient Experience - QDC documentation Page"),
href = "https://cdu-data-science-team.github.io/PatientExperience-QDC/framework/framework3.html",
target = "_blank"
)
Please see the",
a(strong("Patient Experience - QDC documentation Page."),
href = "https://cdu-data-science-team.github.io/PatientExperience-QDC/framework/framework3.html",
target = "_blank"
)
))
),
)
}
Expand All @@ -42,7 +55,6 @@ mod_documentation_page_server <- function(id) {

# table
output$framework_table <- DT::renderDT({

# JaveScript code to collapse the table
callback_js <- DT::JS(
"table.on('click', 'tr.dtrg-group', function () {",
Expand Down
7 changes: 3 additions & 4 deletions R/tidy_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@ tidy_all_trusts <- function(data) {
dplyr::filter(
!is.na(comment_txt),
!is.null(comment_txt),
comment_txt != "NA",
comment_txt != "NULL",
comment_txt != "",
comment_txt != " ",
!comment_txt %in% c("Did not answer", "NULL",
"#NAME?", "NA", "N/A", "", " "),
nchar(comment_txt) > 1,
hidden == 0
)
} else {
Expand Down
Loading

0 comments on commit cb8d1ac

Please sign in to comment.