attempt to fix #191

The-Strategy-Unit · Oct 30, 2023 · 0446515 · 0446515
1 parent 4ab268d
commit 0446515
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 44 deletions.
diff --git a/R/filter_text.R b/R/filter_text.R
@@ -23,11 +23,13 @@ filter_df <- function(text_data, comment_type_filter = NULL) {
 #'
 #' @noRd
 #'
-#' @return string as a vector
+#' @return vector of string
 input_sanitizer <- function(filter_text) {
-  strsplit(filter_text, ",")[[1]] %>%
+  sanitized_input <- strsplit(filter_text, ",")[[1]] %>%
     stringr::str_to_lower() %>%
     stringr::str_remove_all("[^[:alnum:]]")
+
+  return(sanitized_input[sanitized_input != ""])
 }
 
 #' Takes a list of comments and return it in lowercase
@@ -43,19 +45,29 @@ lowered_comments <- function(comments) {
     stringr::str_to_lower()
 }
 
-#' Find list of words in a string
+#' Check if a word or its variation (singular or plural
+#' version - if they are valid words) exist in a comment
 #'
-#' @param string a string
-#' @param search_strings list of strings with search terms in it
+#' @param comment string, a single comment
 #' @param search_fn type of search ('and', 'or')
+#' @param sanitized_input list of strings, best derived from `input_sanitizer``
+#' @return logical
 #' @noRd
-check_match <- function(string, search_strings, search_fn) {
-  search_fn(
-    lapply(
-      search_strings,
-      \(p) grepl(paste0("\\b.*", p, ".*\\b"), string)
-    ) %>% unlist()
-  )
+match_term_or_stem <- function(comment, search_fn, sanitized_input) {
+  tokens <- gsub("[[:punct:] ]+", " ", comment) %>%
+    strsplit(" ") %>%
+    unlist()
+
+  lapply(sanitized_input, \(x) any(
+    x %in% tokens, # actual word
+    ifelse(stringr::str_sub(x, start = -1L) == "s",
+      stringr::str_sub(x, end = -2L)  %in% tokens,
+      FALSE
+    ), # attempt to get the singular form of a word by removing last "s"
+    paste0(x, "s") %in% tokens # the plural version
+  )) %>%
+    unlist() %>%
+    search_fn()
 }
 
 #' Find comment in list of comments where all/any of search strings exist
@@ -78,23 +90,10 @@ check_match <- function(string, search_strings, search_fn) {
 #' )
 matched_comments <- function(lowered_comments, search_fn, search_strings) {
   lowered_comments %>%
-    lapply(\(comment) check_match(comment, search_strings, search_fn)) %>%
+    lapply(\(comment) match_term_or_stem(comment, search_fn, search_strings)) %>%
     unlist()
 }
 
-#' Find the stem word version of each search term
-#'
-#' @description sanitise the input strings and add their stemmed words to the list of words to search
-#' @param filter_text comma separated string with search terms in
-#' @noRd
-sanitized_search_strings <- function(filter_text) {
-  sanitized_input <- input_sanitizer(filter_text)
-  stemmed_input <- tm::stemDocument(sanitized_input)
-  search_strings <- stemmed_input[stemmed_input != ""]
-
-  return(search_strings)
-}
-
 #' Return text from a freetext search
 #' @description combine search terms with OR and AND then return text from a specific
 #' question
@@ -132,7 +131,7 @@ return_search_text <- function(text_data, filter_text, comment_type_filter = NUL
   )
 
   # sanitise the input strings and add their stemmed words to the list of words to search
-  search_strings <- sanitized_search_strings(filter_text)
+  search_strings <- input_sanitizer(filter_text)
   cat("search strings: ") # for logging
   print(search_strings) # for logging
 

diff --git a/R/mod_search_text.R b/R/mod_search_text.R
@@ -29,11 +29,14 @@ mod_search_text_server <- function(id, filter_data) {
     ns <- session$ns
 
     output$dynamic_comment_ui <- renderUI({
-      req(text_search())
+      validate(
+        need(text_search(), "Please enter a search term")
+      )
       req(return_data())
 
       tagList(
-        uiOutput(ns("comment_output"))
+        uiOutput(ns("comment_output")) %>%
+          shinycssloaders::withSpinner()
       )
     })
 
@@ -60,17 +63,12 @@ mod_search_text_server <- function(id, filter_data) {
         prep_data_for_comment_table(in_tidy_format = FALSE)
     })
 
-
     ## the comments tables ----
     output$comment_output <- renderUI({
-      validate(
-        need(text_search(), "Please enter a search term")
-      )
-
       mod_comment_download_server(
         ns("comment_download_1"),
         return_data(),
-        filepath = sanitized_search_strings(text_search()) %>%
+        filepath = input_sanitizer(text_search()) %>%
           paste(collapse = "_") %>%
           paste0("-")
       )

diff --git a/tests/testthat/test-filter_text.R b/tests/testthat/test-filter_text.R
@@ -1,4 +1,12 @@
 test_that("filter_text works", {
+
+  # check_match and match_term_or_stem works as expected
+  comment = "There was a bit of a wait, which can’t be helped during shortage of doctors"
+  match_term_or_stem(comment, all, "doctors") |>
+    expect_true()
+  match_term_or_stem(comment, all, "doctor") |>
+    expect_true()
+
   # lowered_comments works as expected
   expect_equal("this   and ThaT" %>%
     lowered_comments(), "this and that")
@@ -23,7 +31,7 @@ test_that("filter_text works", {
       ),
       comment_type = c("comment_1", "comment_1", "comment_1")
     ),
-    filter_text = "Uick, time, appraisal$, &!",
+    filter_text = "qUick, time, appraisal$, &!",
     comment_type_filter = "comment_1", search_type = "and",
     return_dataframe = FALSE
   )
@@ -48,8 +56,8 @@ test_that("filter_text works", {
 
   expect_equal(test3, c("<hr/>no matching result"))
 
-  # test 4 sanitized_search_strings() work correctly
-  expect_equal(sanitized_search_strings("DocTORS, staffs"), c("doctor", "staff"))
+  # test 4 input_sanitizer() work correctly
+  expect_equal(input_sanitizer("DocTORS, staffs"), c("doctors", "staffs"))
 })
 
 test_that("filter_text works - stemmed words version of each search term are searched and return", {
@@ -66,13 +74,13 @@ test_that("filter_text works - stemmed words version of each search term are sea
   search_type <- "and"
   return_dataframe <- FALSE
 
-  search_strings <- sanitized_search_strings(filter_text)
-  expect_equal(search_strings, c("doctor"))
+  search_strings <- input_sanitizer(filter_text)
+  expect_equal(search_strings, c("doctors"))
 
-  t_d <- filter_df(text_data, comment_type_filter)
-  expect_equal(nrow(t_d), 3)
+  result <- filter_df(text_data, comment_type_filter)
+  expect_equal(nrow(result), 3)
 
-  comments <- t_d |>
+  comments <- result |>
     dplyr::pull(comment_txt)
   expect_equal(comments, c(
     "tricky times, I need emergency doctor appointment",

diff --git a/tests/testthat/test-search_text.R b/tests/testthat/test-search_text.R
@@ -40,7 +40,7 @@ test_that("Searching text works", {
     comment_type_filter = "comment_2",
     search_type = "or"
   )
-  expect_equal(nrow(test_text), 8)
+  expect_equal(nrow(test_text), 3)
 
   test_text <- return_search_text(
     text_data = tidy_trust_data,