diff --git a/API_url_tracker.Rmd b/API_url_tracker.Rmd
index 1db37351..458f3f1c 100644
--- a/API_url_tracker.Rmd
+++ b/API_url_tracker.Rmd
@@ -7,15 +7,7 @@ output: html_document
 
 ```{r setup, include=FALSE}
 knitr::opts_chunk$set(echo = TRUE)
-
-# install experience dashboard if not already install
-if ("experiencesdashboard" %in% installed.packages()) {
-  if ("remotes" %in% installed.packages()) install.packages("remotes", repos = "http://cran.r-project.org")
-  remotes::install_github("CDU-data-science-team/experiencesdashboard", upgrade = "never", quiet = TRUE)
-}
-
 library(experiencesdashboard)
-library(dplyr)
 ```
 
 # Last rendered at `r round(Sys.time())`
@@ -40,7 +32,7 @@ pending_jobs <- dplyr::tbl(
     "api_jobs"
   )
 ) |>
-  filter(status == "submitted") |>
+  dplyr::filter(status == "submitted") |>
   dplyr::collect()
 ```
 
diff --git a/NAMESPACE b/NAMESPACE
index 764fecff..e46a42d8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,7 @@ export(api_pred)
 export(calculate_table)
 export(clean_dataframe)
 export(demographic_distribution)
+export(drop_na_for_col)
 export(get_api_pred_url)
 export(get_pred_from_url)
 export(html_decoder)
diff --git a/R/app_server.R b/R/app_server.R
index f7cb917a..019d63f6 100644
--- a/R/app_server.R
+++ b/R/app_server.R
@@ -50,7 +50,6 @@ app_server <- function(input, output, session) {
   latest_time <- api_jobs$latest_time
   wait_time <- api_jobs$estimated_wait_time
 
-
   if (!is.null(latest_time) & data_exists) {
     # filter out all the unfinished rows(api job time is same as last_upload_date when doing data upload)
     db_data <- db_data %>%
@@ -275,9 +274,10 @@ app_server <- function(input, output, session) {
         dplyr::arrange(date)
     }
 
-    # TRANSFORM THE SENTIMENT COLUMN
+    # Transform the sentiment column
     return_data <- return_data %>% 
-      transform_sentiment()
+      transform_sentiment() %>% 
+      drop_na_for_col(c('category', 'super_category', 'sentiment'))
     
     # also return a dataset with unique individuals
     unique_data <- return_data %>%
diff --git a/R/app_ui.R b/R/app_ui.R
index 9acd1865..5325d8b9 100644
--- a/R/app_ui.R
+++ b/R/app_ui.R
@@ -7,9 +7,6 @@
 #' @noRd
 app_ui <- function(request) {
   tagList(
-
-    # Leave this function for adding external resources
-    golem_add_external_resources(),
     # List the first level UI elements here
     dashboardPage(
       dashboardHeader(
@@ -37,12 +34,9 @@ app_ui <- function(request) {
       ),
       dashboardBody(
         ### Changing theme ----
-        fresh::use_theme(nhs_shiny_theme()), # <-- use fresh object theme to style the whole dashboard
-
-        #### Add css file for buttons ----
-        includeCSS(system.file("app/www/", "button-style.css",
-          package = "experiencesdashboard"
-        )),
+        fresh::use_theme(nhs_shiny_theme()), # use fresh object theme to style the whole dashboard
+        # Leave this function for adding external resources
+        golem_add_external_resources(),
         HTML('<a href="https://github.com/CDU-data-science-team/experiencesdashboard"
              class="github-corner" aria-label="View source on GitHub"><svg width="80"
              height="80" viewBox="0 0 250 250" style="fill:#64CEAA; color:#fff; position:
@@ -69,7 +63,6 @@ app_ui <- function(request) {
         tabItems(
           tabItem(
             tabName = "experiences-user",
-            # h1("Service User Experiences"),
             mod_patient_experience_ui("patient_experience_ui_1")
           )
         )
diff --git a/R/fct_api_pred.R b/R/fct_api_pred.R
index c97c9ac4..c318e643 100644
--- a/R/fct_api_pred.R
+++ b/R/fct_api_pred.R
@@ -3,11 +3,16 @@
 #'
 #' @param data Dataframe with column `comment_id`, `comment_text` and `question_type`
 #' @param api_key api key to access the api
+#' @param target to determine the type of prediction, the options are `m` for multilabel,
+#' `s` for sentiment or `ms` for both. default to `ms`
 #' @return a dataframe or a url to get the data
 #' @export
-get_api_pred_url <- function(data, api_key) {
+get_api_pred_url <- function(data, api_key, target = "ms") {
   endpoint <- "https://pxtextmining-docker-api.azurewebsites.net/api/StartContainerInstance"
 
+  # validate the target argument
+  stopifnot("target must be one of 'ms', 'm' or 's'" = target %in% c("ms", "m", "s"))
+  
   # convert the dataframe to nested list
   json_data <- data |>
     purrr::array_tree()
@@ -15,7 +20,7 @@ get_api_pred_url <- function(data, api_key) {
   r <- httr::POST(
     endpoint,
     body = json_data,
-    query = list(code = api_key),
+    query = list(code = api_key, target = target),
     encode = "json"
   )
 
@@ -62,6 +67,34 @@ get_pred_from_url <- function(api_url) {
   }
 }
 
+#' convert the API prediction into a format that is suitable for the database
+#'
+#' @param prediction dataframe, prediction from the pxtextmining api with column "comment_id","labels" and "sentiment"
+#'
+#' @return dataframe, transform prediction data
+#' @noRd
+transform_prediction_for_database <- function(prediction) {
+  stopifnot('"comment_id","labels" and "sentiment" columns are required' = all(c("comment_id", "labels", "sentiment") %in% names(prediction)))
+
+  prediction |>
+    dplyr::rename(
+      category = labels
+    ) |>
+    dplyr::mutate(dplyr::across(c(comment_id, sentiment), as.integer)) |>
+    #  assign the super categories
+    tidyr::unnest(category) |> # Unnest the category column into rows and columns
+    dplyr::mutate(super_category = assign_highlevel_categories(category)) |> # assign super categories
+    dplyr::group_by(comment_id) %>%
+    dplyr::summarise(
+      across(-tidyselect::all_of(c("category", "super_category")), unique),
+      across(c(category, super_category), list) # duplicate super category value is preserved. This will allow easy manipulation later. for ex. see get_tidy_filter_data()
+    ) |>
+    dplyr::ungroup() |>
+    # convert the category and super category column to raw json before loading into the database
+    dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, jsonlite::toJSON))) |>
+    dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, charToRaw)))
+}
+
 #' Track the API job table. If prediction is done, it writes it to the main table and delete the job from the api job table
 #'
 #' @param job an instance of the api job table
@@ -91,20 +124,22 @@ track_api_job <- function(job, conn, write_db = TRUE) {
   if (is.data.frame(prediction)) {
     cat("Job", job_id, "is done \n")
 
-    prediction <- prediction |>
-      dplyr::mutate(dplyr::across(dplyr::everything(), as.integer))
-
     # update the job status as complete (Prediction has been returned)
     DBI::dbExecute(conn, paste("UPDATE api_jobs SET status='completed' WHERE job_id =", job_id))
 
     if (!write_db) {
       # update the job status as uploaded (successfully write prediction to main table)
       DBI::dbExecute(conn, paste("UPDATE api_jobs SET status='uploaded' WHERE job_id =", job_id))
-      
+
       return(prediction)
     }
 
+    prediction <- prediction |>
+      transform_prediction_for_database()
+
     # update the main table
+    cat("Updating database with prediction \n")
+    
     dplyr::rows_update(
       dplyr::tbl(conn, trust_id),
       prediction,
@@ -192,8 +227,8 @@ api_question_code <- function(value) {
 #' @param pool database connection
 #' @param trust_id string, the trust id
 #' @param schedule_time integer, number of mins used in the Rmarkdown schedule that tracks the api job table.
-#' default is 15mins. its only needed to guess how long users might have to wait for the prediction to complete 
-#' 
+#' default is 15mins. it's only needed to guess how long users might have to wait for the prediction to complete
+#'
 #' @return list of latest_time and estimated_wait_time (in mins)
 #' @noRd
 check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), schedule_time = 15) {
@@ -207,7 +242,7 @@ check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), sched
     dplyr::filter(
       trust_id == !!trust_id,
       status %in% c("submitted", "completed")
-    ) 
+    )
 
   if (data |>
     dplyr::tally() |>
@@ -215,7 +250,7 @@ check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), sched
     return(list("latest_time" = NULL, "estimated_wait_time" = 0))
   } else {
     data <- data |>
-    dplyr::filter(date == max(date, na.rm = TRUE))
+      dplyr::filter(date == max(date, na.rm = TRUE))
   }
 
   latest_time <- data |>
@@ -236,5 +271,7 @@ check_api_job <- function(pool, trust_id = get_golem_config("trust_name"), sched
     TRUE ~ 120
   )
 
+  estimated_wait_time <- if (estimated_wait_time > 0) estimated_wait_time else 10
+  
   list("latest_time" = latest_time, "estimated_wait_time" = estimated_wait_time)
 }
diff --git a/R/golem_utils_server.R b/R/golem_utils_server.R
index fdf5d35d..001ab602 100644
--- a/R/golem_utils_server.R
+++ b/R/golem_utils_server.R
@@ -343,7 +343,7 @@ assign_highlevel_categories <- function(sub_cats) {
     hl_cat <- framework %>%
       dplyr::filter(`Sub-category` == v) %>%
       dplyr::pull(Category)
-    return(if (length(hl_cat) != 0) hl_cat else "Other Category")
+    return(if (length(hl_cat) != 0) hl_cat else "Unknown Category")
   },
   simplify = TRUE, USE.NAMES = FALSE
   )
@@ -420,4 +420,30 @@ transform_sentiment <- function(data, sentiment_column = 'sentiment') {
     dplyr::mutate(
       sentiment = factor(!!rlang::sym(sentiment_column), levels = c("Positive", "Neutral/Mixed", "Negative"))
     )
-}
\ No newline at end of file
+}
+
+#' Find rows containing missing values in all specified 
+#' column while keeping rows where any contains values
+#'
+#' @param df A data frame
+#' @param vars list of strings containing the columns
+#' @param negate logical, whether to return the rows where the NA check is true or otherwise
+#' default is `TRUE`
+#'
+#' @return data frame
+#' @export
+drop_na_for_col <- function(df, vars, negate = TRUE) {
+  
+  diff <- setdiff(vars, names(df))
+  stopifnot("Some column doesn't exist in data" = length(diff) == 0)
+  
+  if (negate) {
+    return(
+      df %>% 
+        dplyr::filter(rowSums(is.na(dplyr::select(., dplyr::all_of(vars)))) != length(vars))
+    )
+  }
+  
+  df %>% 
+    dplyr::filter(rowSums(is.na(dplyr::select(., dplyr::all_of(vars)))) == length(vars))
+}
diff --git a/R/mod_data_management.R b/R/mod_data_management.R
index 11042940..fade330e 100644
--- a/R/mod_data_management.R
+++ b/R/mod_data_management.R
@@ -471,7 +471,7 @@ mod_data_management_server <- function(id, db_conn, filter_data, data_exists, us
           showModal(modalDialog(
             title = strong("Success!"),
             HTML(paste(
-              h5(paste(nrow(raw_df), "records successfully imported. The new data is not ready yet, the dashboard is busy predicting the sentiment score.")),
+              h5(paste(nrow(raw_df), "records successfully imported. The dashboard is still processing data to predict category and sentiment")),
               h4(strong(em(paste("Please check back or refresh your browser in about", wait_time, "mins to access the new data"))))
             )),
             easyClose = FALSE
diff --git a/R/mod_documentation_page.R b/R/mod_documentation_page.R
index a9ae5778..11516422 100644
--- a/R/mod_documentation_page.R
+++ b/R/mod_documentation_page.R
@@ -8,28 +8,41 @@
 mod_documentation_page_ui <- function(id) {
   ns <- NS(id)
   tagList(
-    br(),
-    p("This dashboard uses a machine learning tool (pxtextmining API) to assign one or more sub-categories to free text comments.
-    The categories and subcategories developed in the Qualitative Data Categorisation (QDC) framework are used.
-    The visualisations and interactivity in this dashboard have been chosen to help users to engage with the comments and
-    not just quantify the data."),
-    p("The QDC framework is an evidence-based work that has been carefully designed. it has multiple categories, each with its
-    own set of sub-categories. The category groups similar topics together in a meaningful way to help users navigate the framework
-    more easily. The sub-categories are the actual topics that better reflect the underlying data.
-    A high-level visual of the categories and sub-categories is displayed below:"),
-    img(src = "www/framework_v5.png", width = "100%"),
-    hr(),
-    p("To see detailed description of the sub-categories, please expand the categories below"),
+    h3(strong("Introduction to the Data Categorisation Framework"), style = "color : #005EB8;"),
+    p("This dashboard utilizes the pxtextmining API, a machine learning tool, to assign one or more subcategories to free-text comments
+      based on the Qualitative Data Categorization (QDC) framework. The QDC framework is an evidence-based work that has been designed
+      with several categories, each with its own set of subcategories. The categories group similar topics together to make it easier
+      for users to navigate the framework, while the subcategories reflect the actual topics that better represent the underlying data."),
+    p("The dashboard's visualizations and intuitive interactivity are thoughtfully created to help users effectively engage with the
+      comments and not merely quantify the data. Below is a high-level visual of the categories and subcategories:"),
+    img(src = "www/framework_MVP_version.jpeg", width = "100%"),
+    rep_br(2),
+    p("To see detailed description of the sub-categories, kindly click on the category to expand it."),
     DT::DTOutput(ns("framework_table")),
-    hr(),
     tagList(
-      p("To get further detail about the data categorisation framework and the dashboard
+      h4(strong("Making best use of the qualitative comments"), style = "color : #005EB8;"),
+      HTML(paste0(
+        "This dashboard should be used to facilitate initial exploration of your qualitative data, before
+        drawing fuller insight from the underlying qualitative comments. Before using the dashboard, you
+        should read the good practice guidance on the documentation page: ",
+        a(strong("Good practice guidance."),
+          href = "https://cdu-data-science-team.github.io/PatientExperience-QDC/dashboard/dashboard3.html",
+          target = "_blank"
+        ),
+        "This includes important
+        information, tips, and advice to help you maximise your use of the categorised qualitative comments,
+        whilst avoiding the risks around relying on the quantification of qualitative data."
+      )),
+      rep_br(2),
+      HTML(paste(
+        "To get further detail about the data categorisation framework and the dashboard
         including some illustrative examples for each of the sub-categories.
-        Please see the"),
-      a(p("Patient Experience - QDC documentation Page"),
-        href = "https://cdu-data-science-team.github.io/PatientExperience-QDC/framework/framework3.html",
-        target = "_blank"
-      )
+        Please see the",
+        a(strong("Patient Experience - QDC documentation Page."),
+          href = "https://cdu-data-science-team.github.io/PatientExperience-QDC/framework/framework3.html",
+          target = "_blank"
+        )
+      ))
     ),
   )
 }
@@ -42,7 +55,6 @@ mod_documentation_page_server <- function(id) {
 
     # table
     output$framework_table <- DT::renderDT({
-
       # JaveScript code to collapse the table
       callback_js <- DT::JS(
         "table.on('click', 'tr.dtrg-group', function () {",
diff --git a/R/tidy_data.R b/R/tidy_data.R
index 69609a3e..0732aebd 100644
--- a/R/tidy_data.R
+++ b/R/tidy_data.R
@@ -12,10 +12,9 @@ tidy_all_trusts <- function(data) {
       dplyr::filter(
         !is.na(comment_txt),
         !is.null(comment_txt),
-        comment_txt != "NA",
-        comment_txt != "NULL",
-        comment_txt != "",
-        comment_txt != " ",
+        !comment_txt %in% c("Did not answer", "NULL", 
+                            "#NAME?", "NA", "N/A", "", " "),
+        nchar(comment_txt) > 1,
         hidden == 0
       )
   } else {
diff --git a/R/tidy_upload.R b/R/tidy_upload.R
index b2be4b39..288cf18a 100644
--- a/R/tidy_upload.R
+++ b/R/tidy_upload.R
@@ -75,13 +75,13 @@ clean_dataframe <- function(data, comment_column) {
   data %>%
     dplyr::mutate(
       dplyr::across(
-        where(is.character), 
+        where(is.character),
         \(.x) stringr::str_replace_all(.x, "[^[:alnum:][:punct:]]+", " ") # remove non-graphical characters from all character columns, ‘⁠[:graph:  is not reliable⁠’
-        ),
+      ),
       dplyr::across(
         where(is.character), ~ dplyr::case_when(
           grepl("^[?]+$", .) ~ NA_character_, # remove multiple question marks
-          . %in% c("NULL", "#NAME?", "NA", "N/A", "", " ") ~ NA_character_,  
+          . %in% c("NULL", "#NAME?", "NA", "N/A", "", " ") ~ NA_character_,
           TRUE ~ .
         )
       )
@@ -106,9 +106,9 @@ clean_dataframe <- function(data, comment_column) {
 upload_data <- function(data, conn, trust_id, user, write_db = TRUE) {
   # throw error if for any reason the trust_id is not same a trust name
   stopifnot("trust_id should be same as trust_name" = get_golem_config("trust_name") == trust_id)
-  
-  last_upload_date = Sys.time() # to track when the data upload started. to be used in the api job table and the main table
-  
+
+  last_upload_date <- Sys.time() # to track when the data upload started. to be used in the api job table and the main table
+
   # reformat and clean the uploaded data ----
   required_cols <- c(
     "date", "pt_id", "location_1", "location_2", "location_3",
@@ -134,9 +134,9 @@ upload_data <- function(data, conn, trust_id, user, write_db = TRUE) {
   # parse the date (if it hasn't been parsed) and confirm if it's well parsed (the assumption here is that, data older than year 2000 won't be uploaded).
   data <- parse_date(data)
   stopifnot("Start year should reasonably be after year 2000" = lubridate::year(min(data$date)) > 2000)
-  
+
   db_tidy <- data %>%
-    dplyr::arrange(date) %>% 
+    dplyr::arrange(date) %>%
     dplyr::mutate(pt_id = seq.int(max_ptid + 1, max_ptid + nrow(.))) %>% # to uniquely identify individual responder
     tidyr::pivot_longer(
       cols = dplyr::starts_with("question"),
@@ -146,8 +146,8 @@ upload_data <- function(data, conn, trust_id, user, write_db = TRUE) {
     dplyr::select(dplyr::any_of(required_cols)) %>%
     clean_dataframe("comment_text") %>%
     dplyr::mutate(
-      comment_id = seq.int(max_id + 1, max_id + nrow(.))
-    ) # to uniquely identify individual comment
+      comment_id = seq.int(max_id + 1, max_id + nrow(.)) # to uniquely identify individual comment
+    ) 
 
   # do trust specific data cleaning ----
   if (trust_id == "trust_GOSH") db_tidy <- db_tidy %>% tidy_trust_gosh()
@@ -155,8 +155,7 @@ upload_data <- function(data, conn, trust_id, user, write_db = TRUE) {
   if (trust_id == "trust_NTH") db_tidy <- db_tidy %>% tidy_trust_nth()
 
   # call API for predictions ----
-
-  # prepare the data for the API
+  ## prepare the data for the API ----
   tidy_data <- db_tidy |>
     dplyr::mutate(question_type = comment_type) |>
     dplyr::mutate(
@@ -181,15 +180,15 @@ upload_data <- function(data, conn, trust_id, user, write_db = TRUE) {
       dplyr::filter(question_type == api_question_code(get_golem_config("comment_1")))
   }
 
-  ## sentiment prediction ----
-  cat("Making sentiment predictions for", nrow(db_tidy), "comments from pxtextming API \n")
+  ## get prediction url ----
+  cat("Making sentiment and label predictions for", nrow(db_tidy), "comments from pxtextming API \n")
   api_result <- get_api_pred_url(tidy_data, Sys.getenv("API_key"))
 
+  ## update api job table ----
   # get the maximum job id from the api job table
   max_job_id <- DBI::dbGetQuery(conn, paste0("SELECT MAX(job_id) FROM api_jobs"))$`MAX(job_id)`
   max_job_id <- if (is.na(max_job_id)) 0 else max_job_id
 
-  # update api job table ----
   job_table <- dplyr::tibble(
     job_id = max_job_id + 1,
     date = last_upload_date,
@@ -202,45 +201,23 @@ upload_data <- function(data, conn, trust_id, user, write_db = TRUE) {
 
   DBI::dbWriteTable(conn, "api_jobs", job_table, append = TRUE)
 
-  ## label prediction ----
-  cat("\nMaking label predictions for", nrow(db_tidy), "comments from pxtextming API \n")
-  preds <- batch_predict(tidy_data) %>%
-    dplyr::mutate(comment_id = as.integer(comment_id))
-
-  cat("Done with API call. Remenber to get sentiment prediction from URL... \n")
+  cat("Done with API call. Remember to get prediction from URL... \n")
 
-  # rename the columns to make the data compatible with old data format currently in use
+  # final data tidy ----
+  cat("Doing final data tidy \n")
   final_df <- db_tidy %>%
-    dplyr::left_join(preds, by = c("comment_id", "comment_text")) %>%
+    # rename the columns to make the data compatible with old data format currently in use
     dplyr::rename(
-      fft = fft_score, category = labels,
+      fft = fft_score,
       comment_txt = comment_text
     ) %>%
     dplyr::mutate(
-      hidden = 0
+      hidden = 0,
+      comment_type = stringr::str_replace_all(.data$comment_type, "question", "comment"),
+      last_upload_date = last_upload_date # update the last upload date column with todays date
     )
 
-  cat("Doing final data tidy \n")
-  final_df <- final_df %>%
-    dplyr::mutate(
-      comment_type = stringr::str_replace_all(.data$comment_type, "question", "comment")
-    ) %>%
-    # update the last upload date column with todays date
-    dplyr::mutate(last_upload_date = last_upload_date) %>%
-    #  assign the super categories
-    tidyr::unnest(category) %>% # Unnest the category column into rows and columns
-    dplyr::mutate(super_category = assign_highlevel_categories(category)) %>% # assign super categories
-    dplyr::group_by(comment_id, comment_type) %>%
-    dplyr::summarise(
-      across(-tidyselect::all_of(c("category", "super_category")), unique),
-      across(c(category, super_category), list) # duplicate super category value is preserved. This will allow easy manipulation later. for ex. see get_tidy_filter_data()
-    ) %>%
-    dplyr::ungroup() %>%
-    # convert the category and super category column to raw json before loading into the database
-    dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, jsonlite::toJSON))) %>%
-    dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, charToRaw)))
-
-  # Do a final check on the data before loading to db
+  # Do a final check on the data before loading to db ----
   # throw error if comment_id is not unique
   stopifnot("values in 'comment ID' should be unique" = final_df$comment_id %>% duplicated() %>% sum() == 0)
   stopifnot("comment_id column should not be empty" = all(!is.na(final_df$comment_id) & final_df$comment_id != ""))
diff --git a/data-raw/framework.R b/data-raw/framework.R
index c60ebdff..7ac3fd7e 100644
--- a/data-raw/framework.R
+++ b/data-raw/framework.R
@@ -1,5 +1,10 @@
 ## code to prepare `framework` dataset goes here
-framework <- readxl::read_excel(here::here(app_sys(), "app/www", "FFT-QDC Framework v5 - 20230428.xlsx"),
+framework <- readxl::read_excel(
+  here::here(
+    app_sys(),
+    "app/www",
+    "FFT-QDC_Framework_MVP_version_20230925.xlsx"
+  ),
   sheet = 2
 ) %>%
   dplyr::arrange(Category, `Sub-category`) %>%
diff --git a/data/framework.rda b/data/framework.rda
index 0e408a7b..bd1647ce 100644
Binary files a/data/framework.rda and b/data/framework.rda differ
diff --git a/inst/app/www/FFT-QDC Framework v5 - 20230428.xlsx b/inst/app/www/FFT-QDC Framework v5 - 20230428.xlsx
deleted file mode 100644
index 7e307a4a..00000000
Binary files a/inst/app/www/FFT-QDC Framework v5 - 20230428.xlsx and /dev/null differ
diff --git a/inst/app/www/FFT-QDC_Framework_MVP_version_20230925.xlsx b/inst/app/www/FFT-QDC_Framework_MVP_version_20230925.xlsx
new file mode 100644
index 00000000..d017039d
Binary files /dev/null and b/inst/app/www/FFT-QDC_Framework_MVP_version_20230925.xlsx differ
diff --git a/inst/app/www/crit-table.css b/inst/app/www/crit-table.css
deleted file mode 100644
index 12c0eea1..00000000
--- a/inst/app/www/crit-table.css
+++ /dev/null
@@ -1,52 +0,0 @@
-.tag {
-  display: inline-block;
-  padding: 2px 12px;
-  border-radius: 15px;
-  font-weight: 600;
-  font-size: 12px;
-}
-
-.crit_-5 {
-  background: #A50026;
-}
-
-.crit_-4 {
-  background: #D73027;
-}
-
-.crit_-3 {
-  background: #F46D43;
-}
-
-.crit_-2 {
-  background: #FDAE61;
-}
-
-.crit_-1 {
-  background: #FEE08B;
-}
-
-.crit_0 {
-  background: #FFFFBF;
-}
-
-.crit_1 {
-  background: #D9EF8B;
-}
-
-.crit_2 {
-  background: #A6D96A;
-}
-
-.crit_3 {
-  background: #66BD63;
-}
-
-.crit_4 {
-  background: #1A9850;
-}
-
-.crit_5 {
-  background: #006837;
-}
-
diff --git a/inst/app/www/custom.css b/inst/app/www/custom.css
deleted file mode 100644
index e69de29b..00000000
diff --git a/inst/app/www/framework_MVP_version.jpeg b/inst/app/www/framework_MVP_version.jpeg
new file mode 100644
index 00000000..4b33715a
Binary files /dev/null and b/inst/app/www/framework_MVP_version.jpeg differ
diff --git a/inst/app/www/framework_v5.png b/inst/app/www/framework_v5.png
deleted file mode 100644
index fdcccf66..00000000
Binary files a/inst/app/www/framework_v5.png and /dev/null differ
diff --git a/inst/app/www/github_link.html b/inst/app/www/github_link.html
deleted file mode 100644
index 8e1cf38e..00000000
--- a/inst/app/www/github_link.html
+++ /dev/null
@@ -1 +0,0 @@
-'<a href="https://github.com/CDU-data-science-team/experiencesdashboard" class="github-corner" aria-label="View source on GitHub"><svg width="80" height="80" viewBox="0 0 250 250" style="fill:#64CEAA; color:#fff; position: absolute; top: 50; border: 0; right: 0;" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="M128.3,109.0 C113.8,99.7 119.0,89.6 119.0,89.6 C122.0,82.7 120.5,78.6 120.5,78.6 C119.2,72.0 123.4,76.3 123.4,76.3 C127.3,80.9 125.5,87.3 125.5,87.3 C122.9,97.6 130.6,101.9 134.4,103.2" fill="currentColor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a><style>.github-corner:hover .octo-arm{animation:octocat-wave 560ms ease-in-out}@keyframes octocat-wave{0%,100%{transform:rotate(0)}20%,60%{transform:rotate(-25deg)}40%,80%{transform:rotate(10deg)}}@media (max-width:500px){.github-corner:hover .octo-arm{animation:none}.github-corner .octo-arm{animation:octocat-wave 560ms ease-in-out}}</style>'
diff --git a/man/drop_na_for_col.Rd b/man/drop_na_for_col.Rd
new file mode 100644
index 00000000..3a8bd469
--- /dev/null
+++ b/man/drop_na_for_col.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/golem_utils_server.R
+\name{drop_na_for_col}
+\alias{drop_na_for_col}
+\title{Find rows containing missing values in all specified 
+column while keeping rows where any contains values}
+\usage{
+drop_na_for_col(df, vars, negate = TRUE)
+}
+\arguments{
+\item{df}{A data frame}
+
+\item{vars}{list of strings containing the columns}
+
+\item{negate}{logical, whether to return the rows where the NA check is true or otherwise
+default is `TRUE`}
+}
+\value{
+data frame
+}
+\description{
+Find rows containing missing values in all specified 
+column while keeping rows where any contains values
+}
diff --git a/man/get_api_pred_url.Rd b/man/get_api_pred_url.Rd
index 0e409d5e..bbc201f5 100644
--- a/man/get_api_pred_url.Rd
+++ b/man/get_api_pred_url.Rd
@@ -4,12 +4,15 @@
 \alias{get_api_pred_url}
 \title{Get the prediction URL or the data from the `pxtextmining API`}
 \usage{
-get_api_pred_url(data, api_key)
+get_api_pred_url(data, api_key, target = "ms")
 }
 \arguments{
 \item{data}{Dataframe with column `comment_id`, `comment_text` and `question_type`}
 
 \item{api_key}{api key to access the api}
+
+\item{target}{to determine the type of prediction, the options are `m` for multilabel,
+`s` for sentiment or `ms` for both. default to `ms`}
 }
 \value{
 a dataframe or a url to get the data
diff --git a/renv.lock b/renv.lock
index 878f528b..9de1b28f 100644
--- a/renv.lock
+++ b/renv.lock
@@ -575,9 +575,9 @@
     },
     "datamods": {
       "Package": "datamods",
-      "Version": "1.4.1",
+      "Version": "1.4.2",
       "Source": "Repository",
-      "Repository": "CRAN",
+      "Repository": "RSPM",
       "Requirements": [
         "R",
         "data.table",
@@ -595,13 +595,13 @@
         "tools",
         "writexl"
       ],
-      "Hash": "bfe71f2780b426763072e059457f7650"
+      "Hash": "4966dd81eb82568e17834efec656a9db"
     },
     "dbplyr": {
       "Package": "dbplyr",
-      "Version": "2.3.3",
+      "Version": "2.3.4",
       "Source": "Repository",
-      "Repository": "CRAN",
+      "Repository": "RSPM",
       "Requirements": [
         "DBI",
         "R",
@@ -623,7 +623,7 @@
         "vctrs",
         "withr"
       ],
-      "Hash": "d6fd1b1440c1cacc6623aaa4e9fe352b"
+      "Hash": "63534894354af6b2587b7aa518a5193a"
     },
     "desc": {
       "Package": "desc",
@@ -766,15 +766,15 @@
     },
     "experiencesdashboard": {
       "Package": "experiencesdashboard",
-      "Version": "0.7.0",
+      "Version": "0.8.1",
       "Source": "GitHub",
       "Remotes": "nhs-r-community/NHSRtheme",
       "RemoteType": "github",
       "RemoteHost": "api.github.com",
       "RemoteRepo": "experiencesdashboard",
       "RemoteUsername": "CDU-data-science-team",
-      "RemoteRef": "HEAD",
-      "RemoteSha": "d66c471fa48cbfeaa38c620b75de73dea0995645",
+      "RemoteRef": "185-update-framework",
+      "RemoteSha": "d4c79c21ad34158029d28de30b19d4c28518ca57",
       "Requirements": [
         "ComplexUpset",
         "DBI",
@@ -814,7 +814,7 @@
         "writexl",
         "xml2"
       ],
-      "Hash": "6fd034f3941b2fd3ede91240d233bc2c"
+      "Hash": "fafe49bf06ef9aa3408c8e35c5d8f32d"
     },
     "fansi": {
       "Package": "fansi",
@@ -1293,16 +1293,16 @@
     },
     "lubridate": {
       "Package": "lubridate",
-      "Version": "1.9.2",
+      "Version": "1.9.3",
       "Source": "Repository",
-      "Repository": "CRAN",
+      "Repository": "RSPM",
       "Requirements": [
         "R",
         "generics",
         "methods",
         "timechange"
       ],
-      "Hash": "e25f18436e3efd42c7c590a1c4c15390"
+      "Hash": "680ad542fbcf801442c83a6ac5a2126c"
     },
     "magrittr": {
       "Package": "magrittr",
@@ -2650,16 +2650,16 @@
     },
     "withr": {
       "Package": "withr",
-      "Version": "2.5.0",
+      "Version": "2.5.1",
       "Source": "Repository",
-      "Repository": "CRAN",
+      "Repository": "RSPM",
       "Requirements": [
         "R",
         "grDevices",
         "graphics",
         "stats"
       ],
-      "Hash": "c0e49a9760983e81e55cdd9be92e7182"
+      "Hash": "d77c6f74be05c33164e33fbc85540cae"
     },
     "writexl": {
       "Package": "writexl",
diff --git a/rsconnect/documents/API_url_tracker.Rmd/connect.strategyunitwm.nhs.uk/oluwasegun.apejoye/api_url_tracker.dcf b/rsconnect/documents/API_url_tracker.Rmd/connect.strategyunitwm.nhs.uk/oluwasegun.apejoye/api_url_tracker.dcf
new file mode 100644
index 00000000..9a39bf91
--- /dev/null
+++ b/rsconnect/documents/API_url_tracker.Rmd/connect.strategyunitwm.nhs.uk/oluwasegun.apejoye/api_url_tracker.dcf
@@ -0,0 +1,12 @@
+name: api_url_tracker
+title: API_url_tracker
+username: oluwasegun.apejoye
+account: oluwasegun.apejoye
+server: connect.strategyunitwm.nhs.uk
+hostUrl: https://connect.strategyunitwm.nhs.uk/__api__
+appId: 149
+bundleId: 915
+url: https://connect.strategyunitwm.nhs.uk/api_tracker/
+version: 1
+asMultiple: FALSE
+asStatic: FALSE
diff --git a/tests/p2_db_data_template.rds b/tests/p2_db_data_template.rds
index c1fd5e2c..0b8a6dac 100644
Binary files a/tests/p2_db_data_template.rds and b/tests/p2_db_data_template.rds differ
diff --git a/tests/prediction.rds b/tests/prediction.rds
new file mode 100644
index 00000000..4df8a2b9
Binary files /dev/null and b/tests/prediction.rds differ
diff --git a/tests/testthat/_snaps/app_ui.md b/tests/testthat/_snaps/app_ui.md
index d418d487..2c9be26d 100644
--- a/tests/testthat/_snaps/app_ui.md
+++ b/tests/testthat/_snaps/app_ui.md
@@ -53,17 +53,6 @@
           </aside>
           <div class="content-wrapper">
             <section class="content">
-              <style type="text/css">.btn {
-        color: #fff; background-color: #005EB8; 
-        border: none; 
-        border-radius: 10px;
-        min-height:30px; 
-      }
-      
-      .btn:hover {
-        color:#fff;
-        background-color:#003087; 
-      }</style>
               <a href="https://github.com/CDU-data-science-team/experiencesdashboard"
                    class="github-corner" aria-label="View source on GitHub"><svg width="80"
                    height="80" viewBox="0 0 250 250" style="fill:#64CEAA; color:#fff; position:
@@ -135,25 +124,34 @@
     Code
       mod_documentation_page_ui("id")
     Output
-      <br/>
-      <p>This dashboard uses a machine learning tool (pxtextmining API) to assign one or more sub-categories to free text comments.
-          The categories and subcategories developed in the Qualitative Data Categorisation (QDC) framework are used.
-          The visualisations and interactivity in this dashboard have been chosen to help users to engage with the comments and
-          not just quantify the data.</p>
-      <p>The QDC framework is an evidence-based work that has been carefully designed. it has multiple categories, each with its
-          own set of sub-categories. The category groups similar topics together in a meaningful way to help users navigate the framework
-          more easily. The sub-categories are the actual topics that better reflect the underlying data.
-          A high-level visual of the categories and sub-categories is displayed below:</p>
-      <img src="www/framework_v5.png" width="100%"/>
-      <hr/>
-      <p>To see detailed description of the sub-categories, please expand the categories below</p>
+      <h3 style="color : #005EB8;">
+        <strong>Introduction to the Data Categorisation Framework</strong>
+      </h3>
+      <p>This dashboard utilizes the pxtextmining API, a machine learning tool, to assign one or more subcategories to free-text comments
+            based on the Qualitative Data Categorization (QDC) framework. The QDC framework is an evidence-based work that has been designed
+            with several categories, each with its own set of subcategories. The categories group similar topics together to make it easier
+            for users to navigate the framework, while the subcategories reflect the actual topics that better represent the underlying data.</p>
+      <p>The dashboard's visualizations and intuitive interactivity are thoughtfully created to help users effectively engage with the
+            comments and not merely quantify the data. Below is a high-level visual of the categories and subcategories:</p>
+      <img src="www/framework_MVP_version.jpeg" width="100%"/>
+      <br/> <br/>
+      <p>To see detailed description of the sub-categories, kindly click on the category to expand it.</p>
       <div class="datatables html-widget html-widget-output shiny-report-size html-fill-item-overflow-hidden html-fill-item" id="id-framework_table" style="width:100%;height:auto;"></div>
-      <hr/>
-      <p>To get further detail about the data categorisation framework and the dashboard
+      <h4 style="color : #005EB8;">
+        <strong>Making best use of the qualitative comments</strong>
+      </h4>
+      This dashboard should be used to facilitate initial exploration of your qualitative data, before
+              drawing fuller insight from the underlying qualitative comments. Before using the dashboard, you
+              should read the good practice guidance on the documentation page: <a href="https://cdu-data-science-team.github.io/PatientExperience-QDC/dashboard/dashboard3.html" target="_blank">
+        <strong>Good practice guidance.</strong>
+      </a>This includes important
+              information, tips, and advice to help you maximise your use of the categorised qualitative comments,
+              whilst avoiding the risks around relying on the quantification of qualitative data.
+      <br/> <br/>
+      To get further detail about the data categorisation framework and the dashboard
               including some illustrative examples for each of the sub-categories.
-              Please see the</p>
-      <a href="https://cdu-data-science-team.github.io/PatientExperience-QDC/framework/framework3.html" target="_blank">
-        <p>Patient Experience - QDC documentation Page</p>
+              Please see the <a href="https://cdu-data-science-team.github.io/PatientExperience-QDC/framework/framework3.html" target="_blank">
+        <strong>Patient Experience - QDC documentation Page.</strong>
       </a>
 
 # mod_trend_overlap_ui works
diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R
index 455df4df..207516c5 100644
--- a/tests/testthat/setup.R
+++ b/tests/testthat/setup.R
@@ -12,9 +12,7 @@ tidy_trust_data <- readr::read_csv(here::here("tests/test_data.csv"),
 phase_2_upload_data <- readRDS(here::here("tests/phase_2_test_template.rds"))
 
 # read data that mimic the database data
-phase_2_db_data <- readRDS(here::here("tests/p2_db_data_template.rds")) %>%
-  dplyr::mutate(across(c(category, super_category), ~ purrr::map(.x, rawToChar))) %>% 
-  dplyr::mutate(sentiment = sample(1:5, nrow(.), replace = T))
+phase_2_db_data <- readRDS(here::here("tests/p2_db_data_template.rds"))
 
 unique_data <- phase_2_db_data %>%
   dplyr::distinct(pt_id, .keep_all = TRUE)
diff --git a/tests/testthat/test-all-modules.R b/tests/testthat/test-all-modules.R
index af7882b9..70a543de 100644
--- a/tests/testthat/test-all-modules.R
+++ b/tests/testthat/test-all-modules.R
@@ -414,8 +414,8 @@ test_that("mod_summary_record_server works correctly", {
     expect_no_error(output$current_individualBox)
 
     # assert all global variable are expected values after a call to output$dynamic_summary_record
-    expect_equal(global$n_responses, 1981)
-    expect_equal(global$n_individuals, 1000)
+    expect_equal(global$n_responses, 1900)
+    expect_equal(global$n_individuals, 959)
     expect_equal(global$current_responses, 10)
     expect_equal(global$current_individuals, 5)
   })
@@ -734,8 +734,8 @@ test_that("mod_overlap_1_server works correctly when given some inputs", {
     expect_no_error(output$dynamic_select_category_ui)
 
     # the upset plot contents are working
-    expect_equal(nrow(upset_data()), 120)
-    expect_equal(length(all_categories()), 26)
+    expect_equal(nrow(upset_data()), 110)
+    expect_equal(length(all_categories()), 15)
     expect_equal(length(filtered_categories()), 3)
     expect_no_error(output$category_upset)
 
@@ -787,4 +787,4 @@ test_that("module server works well if passed data is empty", {
                # show expected result
                expect_true(grepl("No data to show", session$returned))
              })
-})
+})
\ No newline at end of file
diff --git a/tests/testthat/test-database-data.R b/tests/testthat/test-database-data.R
index 0beea112..93635c7a 100644
--- a/tests/testthat/test-database-data.R
+++ b/tests/testthat/test-database-data.R
@@ -20,8 +20,7 @@ test_that("Global Databse Data pass all checks", {
     add_results(report)
 
   # the single row per category data
-  phase_2_db_data %>%
-    get_tidy_filter_data(TRUE) %>%
+  single_labeled_filter_data %>%
     data.validator::validate(name = "Verifying Single row filter_data") %>%
     data.validator::validate_if(has_all_names(
       "date", "location_1",
@@ -31,7 +30,7 @@ test_that("Global Databse Data pass all checks", {
     validate_rows(col_concat, is_uniq, comment_id, category, description = "comment_id and category combination is unique") %>%
     validate_if(!is.na(comment_id) & comment_id != "", description = "comment_id column is not empty") %>%
     validate_if(!is.na(pt_id) & pt_id != "", description = "pat_id column is not empty") %>%
-    validate_if(is.character(category), description = "category column is a list") %>%
+    validate_if(is.character(category), description = "category column is a character") %>%
     validate_if(inherits(date, "Date"), description = "date column is in date format") %>%
     validate_if(lubridate::year(min(date)) > 2015, description = "Start Date is after 2015") %>%
     add_results(report)
diff --git a/tests/testthat/test-fct_api_pred.R b/tests/testthat/test-fct_api_pred.R
index 0eb46ec0..da97dd12 100644
--- a/tests/testthat/test-fct_api_pred.R
+++ b/tests/testthat/test-fct_api_pred.R
@@ -1,16 +1,3 @@
-test_that("API and Framework are in sync", {
-  single_filter_data <- phase_2_db_data %>%
-    dplyr::mutate(across(category, ~ purrr::map(.x, jsonlite::fromJSON))) %>% # unserialise the category data from json into list
-    tidyr::unnest(category) %>% # Unnest the category column into rows and columns
-    dplyr::mutate(super_category = assign_highlevel_categories(category))
-
-  not_in_framewk <- single_filter_data %>%
-    filter(super_category == "Other Category") %>%
-    pull(category) %>%
-    unique()
-  expect_true(all(not_in_framewk == c("Labelling not possible", "Admission")))
-})
-
 test_that("api_pred and batch_predict is working...", {
   text_data <- data.frame(
     comment_id = c(1, 2, 3),
@@ -21,7 +8,7 @@ test_that("api_pred and batch_predict is working...", {
     question_type = c("what_good", "could_improve", "nonspecific")
   )
 
-  expect_error(api_pred(text_data |>
+  expect_no_error(api_pred(text_data |>
     select(-question_type)))
 
   expect_no_error(api_pred(text_data))
@@ -30,10 +17,51 @@ test_that("api_pred and batch_predict is working...", {
     batch_predict()
 
   expect_equal(nrow(preds), 3)
-  expect_true(all(c("comment_id", "comment_text", "labels") %in% names(preds)))
+  expect_true(all(c("comment_id", "labels") == names(preds)))
   expect_equal(sum(is.na(preds$labels)), 0)
 })
 
+test_that("assign_highlevel_categories function is working and API vs Framework are in sync", {
+
+  withr::local_envvar("R_CONFIG_ACTIVE" = "phase_2_demo")
+
+  text_data <- phase_2_db_data %>%
+    head(100) %>%
+    dplyr::mutate(comment_text = comment_txt,
+           question_type = comment_type) %>%
+    dplyr::select(comment_id, comment_text, question_type) %>%
+    dplyr::mutate(
+      question_type = stringr::str_replace_all(
+        question_type, "comment_1",
+        api_question_code(get_golem_config("comment_1"))
+      )
+    ) |>
+    dplyr::mutate(
+      question_type = stringr::str_replace_all(
+        question_type, "comment_2",
+        api_question_code(get_golem_config("comment_2"))
+      )
+    )
+
+  preds <- text_data |>
+    batch_predict()
+
+  # assign the super category
+  preds <- preds %>%
+    rename(category = labels) %>%
+    tidyr::unnest(category) %>%
+    dplyr::mutate(super_category = assign_highlevel_categories(category))
+
+  not_in_framewk <- preds %>%
+    filter(super_category == "Unknown Category") %>%
+    pull(category) %>%
+    unique()
+
+  # all assigned labels must be in framework
+  expect_true(all(preds$category %in% framework$`Sub-category`))
+  expect_true(length(not_in_framewk) == 0)
+})
+
 test_that("api_question_code works", {
   expect_equal(api_question_code("What did we do well"), "what_good")
   expect_equal(api_question_code("What could be improved"), "could_improve")
@@ -63,12 +91,16 @@ test_that("get_api_pred_url works and return expected result", {
 
   expect_equal(get_api_pred_url(df, "api_key"), "data")
 
-  # throw expected error
-  stub(get_api_pred_url, "httr::POST", list(message = "failed call"))
+  # throw expected error - when api call fails
+  stub(get_api_pred_url, "httr::POST", list(status_code = 404, message = "failed call"))
   stub(get_api_pred_url, "httr::http_status", identity)
   stub(get_api_pred_url, "httr::status_code", identity)
 
   expect_error(get_api_pred_url(df, "api_key"), "failed call")
+  
+  # throw expected error - when wrong parameter is supplied
+  expect_error(get_api_pred_url(df, "api_key", "o"), 
+               "target must be one of 'ms', 'm' or 's'")
 })
 
 test_that("get_pred_from_url works and return expected result", {
@@ -93,8 +125,22 @@ test_that("get_pred_from_url works and return expected result", {
   expect_error(get_pred_from_url(df), "can't reach server")
 })
 
+test_that("transform_prediction_for_database works and return expected result", {
+  prediction <- readRDS(here::here("tests/prediction.rds"))
+
+  result <- transform_prediction_for_database(prediction)
+
+  expect_true(nrow(result) == nrow(prediction))
+  expect_true(result$comment_id |> duplicated() |> sum() == 0)
+  expect_true(inherits(result$super_category, "list"))
+  expect_true(inherits(result$category, "list"))
+
+  # throw expected error
+  expect_error(transform_prediction_for_database(select(prediction, -sentiment)))
+})
+
 test_that("track_api_job correctly handles completed job", {
-  test_pred <- data.frame(comment_id = 1:5, prediction = 1:5)
+  test_pred <- readRDS(here::here("tests/prediction.rds"))
 
   # Create a mock for the `get_pred_from_url` function
   stub(track_api_job, "get_pred_from_url", test_pred) # return a test prediction dataframe
@@ -118,13 +164,18 @@ test_that("track_api_job correctly handles completed job", {
   # Create a new mock for the database connection (DBI::dbExecute)
   m2 <- mock(TRUE, cycle = TRUE)
   stub(track_api_job, "DBI::dbExecute", m2) # return a success status
+  m3 <- mock(TRUE)
+  stub(track_api_job, "dplyr::rows_update", m3) # mock dplyr::rows_update  
+  stub(track_api_job, "dplyr::tbl", identical) 
+  
 
   # Call the function with the mocks - Check it completes
   track_api_job(test_job, conn = NULL, write_db = TRUE) |>
     expect_output("Job 1 prediction has been successfully written to database")
 
-  # expect DBI::dbExecute is called twice
-  expect_called(m2, 2)
+  
+  expect_called(m2, 2) # expect DBI::dbExecute is called twice
+  expect_called(m3, 1) # expect dplyr::rows_update is called once
 })
 
 test_that("track_api_job correctly handles pending job", {
@@ -165,45 +216,47 @@ test_that("track_api_job correctly handles failed job", {
 
 
 test_that("check_api_job works and return expected result", {
-  
   api_table <- data.frame(
     job_id = 1,
     url = "url",
     date = as.POSIXct("2023-09-15 12:00:00", tz = "UTC"),
-    no_comments  = 5000,
+    no_comments = 5000,
     trist_id = "phase_2_demo",
     email = NA,
     user = NA,
     status = "uploaded"
   )
-  
+
   # mock lubridate::now to return a particular time (20mins from the job table time)
   stub(check_api_job, "lubridate::now", as.POSIXct("2023-09-15 12:20:00", tz = "UTC"))
-  
+
   # no pending job - status uploaded
   # mock database connection dplyr::tbl to return job table with status 'completed'
   stub(check_api_job, "dplyr::tbl", api_table)
-  result1 <- check_api_job('pool', 'phase_2_demo', schedule_time = 10)
+  result1 <- check_api_job("pool", "phase_2_demo", schedule_time = 10)
   expect_equal(result1, list("latest_time" = NULL, "estimated_wait_time" = 0))
-  
-  
+
+
   # still busy if job status is submitted
   # mock database connection dplyr::tbl to return job table with status 'submitted'
   stub(check_api_job, "dplyr::tbl", mutate(api_table, status = "submitted"))
-  
-  result2 <- check_api_job('pool', 'phase_2_demo', schedule_time = 10)
-  
-  expect_equal(result2, list("latest_time" = as.POSIXct("2023-09-15 12:00:00", tz = "UTC"),
-                             "estimated_wait_time" = 20))
-  
-  
+
+  result2 <- check_api_job("pool", "phase_2_demo", schedule_time = 10)
+
+  expect_equal(result2, list(
+    "latest_time" = as.POSIXct("2023-09-15 12:00:00", tz = "UTC"),
+    "estimated_wait_time" = 20
+  ))
+
+
   # still busy if job status is completed
   # mock database connection dplyr::tbl to return job table with status 'completed'
   stub(check_api_job, "dplyr::tbl", mutate(api_table, status = "submitted"))
-  
-  result3 <- check_api_job('pool', 'phase_2_demo', schedule_time = 10)
-  
-  expect_equal(result3, list("latest_time" = as.POSIXct("2023-09-15 12:00:00", tz = "UTC"),
-                             "estimated_wait_time" = 20))
-  
-})
\ No newline at end of file
+
+  result3 <- check_api_job("pool", "phase_2_demo", schedule_time = 10)
+
+  expect_equal(result3, list(
+    "latest_time" = as.POSIXct("2023-09-15 12:00:00", tz = "UTC"),
+    "estimated_wait_time" = 20
+  ))
+})
diff --git a/tests/testthat/test-general_helpers.R b/tests/testthat/test-general_helpers.R
index 16019cc8..3e6d5d76 100644
--- a/tests/testthat/test-general_helpers.R
+++ b/tests/testthat/test-general_helpers.R
@@ -194,4 +194,20 @@ test_that("transform_sentiment works and return expected result", {
   expect_equal(as.character(result$sentiment), c("Negative", "Positive", NA, NA))
    
   expect_equal(nrow(df), nrow(result))
-})
\ No newline at end of file
+})
+
+test_that("drop_na_for_col works and return expected result", {
+  
+  df <- data.frame(id = 1:4, x = c(1, 2, NA, NA), y = c("a", NA, "b", NA),  z = c("a", NA, "b", NA))
+  vars <- c('x', 'y', 'z')
+  vars2 = c('z', 'y')
+  
+  result1 <- drop_na_for_col(df, vars)
+  result2 <- drop_na_for_col(df, vars2)
+  result3 <- drop_na_for_col(df, vars2, F)
+  
+  expect_identical(result1, filter(df, id != 4))
+  expect_identical(result2, filter(df, id %in% c(1,3)))
+  expect_identical(result3, filter(df, id %in% c(2,4)))
+  expect_identical(names(df), names(result1), names(result2), names(result3))
+})
diff --git a/tests/testthat/test-tidy_data.R b/tests/testthat/test-tidy_data.R
index bcdedfe8..732c9b4f 100644
--- a/tests/testthat/test-tidy_data.R
+++ b/tests/testthat/test-tidy_data.R
@@ -1,7 +1,7 @@
 test_that("Data tidies in all trusts", {
   
   new_data <- tidy_trust_data |>
-    mutate(comment_txt = sample(c(unique(comment_txt), "", NA, " ", 'NA', 'NULL', NA), 
+    mutate(comment_txt = sample(c(unique(comment_txt), "", NA, " ", 'NA', 'NULL', NA, "A"), 
                                 nrow(tidy_trust_data)))
   
   db_data <- new_data %>%
@@ -12,6 +12,11 @@ test_that("Data tidies in all trusts", {
   expect_true(all(text != ''))
   expect_true(all(text != ' '))
   expect_true(all(!is.na(text)))
+  
+  db_data |> 
+    mutate(no_char = nchar(comment_txt)) |>
+    filter(no_char <= 1) |>
+    nrow() |> expect_equal(0)
 
   testthat::expect_gt(nrow(db_data), 0)
 })
diff --git a/tests/testthat/test-tidy_upload.R b/tests/testthat/test-tidy_upload.R
index 82e8d3fb..8abbe6e3 100644
--- a/tests/testthat/test-tidy_upload.R
+++ b/tests/testthat/test-tidy_upload.R
@@ -133,13 +133,11 @@ test_that("uploaded data works", {
     conn = NULL,
     trust_id = get_golem_config("trust_name"),
     user = "test user",
-    write_db = F
+    write_db = FALSE
   )
 
   expect_called(m, 1)
   expect_true(inherits(test_upload, "data.frame"))
-  expect_true(inherits(test_upload$super_category, "list"))
-  expect_true(inherits(test_upload$category, "list"))
 
   expect_no_error(
     upload_data(
@@ -147,7 +145,7 @@ test_that("uploaded data works", {
       conn = NULL,
       trust_id = get_golem_config("trust_name"),
       user = "test user",
-      write_db = T
+      write_db = TRUE
     )
   )
 })