diff --git a/R/filter_text.R b/R/filter_text.R index ac369be..f567e36 100644 --- a/R/filter_text.R +++ b/R/filter_text.R @@ -23,11 +23,13 @@ filter_df <- function(text_data, comment_type_filter = NULL) { #' #' @noRd #' -#' @return string as a vector +#' @return vector of string input_sanitizer <- function(filter_text) { - strsplit(filter_text, ",")[[1]] %>% + sanitized_input <- strsplit(filter_text, ",")[[1]] %>% stringr::str_to_lower() %>% stringr::str_remove_all("[^[:alnum:]]") + + return(sanitized_input[sanitized_input != ""]) } #' Takes a list of comments and return it in lowercase @@ -43,19 +45,29 @@ lowered_comments <- function(comments) { stringr::str_to_lower() } -#' Find list of words in a string +#' Check if a word or its variation (singular or plural +#' version - if they are valid words) exist in a comment #' -#' @param string a string -#' @param search_strings list of strings with search terms in it +#' @param comment string, a single comment #' @param search_fn type of search ('and', 'or') +#' @param sanitized_input list of strings, best derived from `input_sanitizer`` +#' @return logical #' @noRd -check_match <- function(string, search_strings, search_fn) { - search_fn( - lapply( - search_strings, - \(p) grepl(paste0("\\b.*", p, ".*\\b"), string) - ) %>% unlist() - ) +match_term_or_stem <- function(comment, search_fn, sanitized_input) { + tokens <- gsub("[[:punct:] ]+", " ", comment) %>% + strsplit(" ") %>% + unlist() + + lapply(sanitized_input, \(x) any( + x %in% tokens, # actual word + ifelse(stringr::str_sub(x, start = -1L) == "s", + stringr::str_sub(x, end = -2L) %in% tokens, + FALSE + ), # attempt to get the singular form of a word by removing last "s" + paste0(x, "s") %in% tokens # the plural version + )) %>% + unlist() %>% + search_fn() } #' Find comment in list of comments where all/any of search strings exist @@ -78,23 +90,10 @@ check_match <- function(string, search_strings, search_fn) { #' ) matched_comments <- function(lowered_comments, search_fn, search_strings) { lowered_comments %>% - lapply(\(comment) check_match(comment, search_strings, search_fn)) %>% + lapply(\(comment) match_term_or_stem(comment, search_fn, search_strings)) %>% unlist() } -#' Find the stem word version of each search term -#' -#' @description sanitise the input strings and add their stemmed words to the list of words to search -#' @param filter_text comma separated string with search terms in -#' @noRd -sanitized_search_strings <- function(filter_text) { - sanitized_input <- input_sanitizer(filter_text) - stemmed_input <- tm::stemDocument(sanitized_input) - search_strings <- stemmed_input[stemmed_input != ""] - - return(search_strings) -} - #' Return text from a freetext search #' @description combine search terms with OR and AND then return text from a specific #' question @@ -132,7 +131,7 @@ return_search_text <- function(text_data, filter_text, comment_type_filter = NUL ) # sanitise the input strings and add their stemmed words to the list of words to search - search_strings <- sanitized_search_strings(filter_text) + search_strings <- input_sanitizer(filter_text) cat("search strings: ") # for logging print(search_strings) # for logging diff --git a/R/mod_search_text.R b/R/mod_search_text.R index 4d795c4..9a826d8 100644 --- a/R/mod_search_text.R +++ b/R/mod_search_text.R @@ -29,11 +29,14 @@ mod_search_text_server <- function(id, filter_data) { ns <- session$ns output$dynamic_comment_ui <- renderUI({ - req(text_search()) + validate( + need(text_search(), "Please enter a search term") + ) req(return_data()) tagList( - uiOutput(ns("comment_output")) + uiOutput(ns("comment_output")) %>% + shinycssloaders::withSpinner() ) }) @@ -60,17 +63,12 @@ mod_search_text_server <- function(id, filter_data) { prep_data_for_comment_table(in_tidy_format = FALSE) }) - ## the comments tables ---- output$comment_output <- renderUI({ - validate( - need(text_search(), "Please enter a search term") - ) - mod_comment_download_server( ns("comment_download_1"), return_data(), - filepath = sanitized_search_strings(text_search()) %>% + filepath = input_sanitizer(text_search()) %>% paste(collapse = "_") %>% paste0("-") ) diff --git a/tests/testthat/test-filter_text.R b/tests/testthat/test-filter_text.R index 0bd984e..75d1742 100644 --- a/tests/testthat/test-filter_text.R +++ b/tests/testthat/test-filter_text.R @@ -1,4 +1,12 @@ test_that("filter_text works", { + + # check_match and match_term_or_stem works as expected + comment = "There was a bit of a wait, which can’t be helped during shortage of doctors" + match_term_or_stem(comment, all, "doctors") |> + expect_true() + match_term_or_stem(comment, all, "doctor") |> + expect_true() + # lowered_comments works as expected expect_equal("this and ThaT" %>% lowered_comments(), "this and that") @@ -23,7 +31,7 @@ test_that("filter_text works", { ), comment_type = c("comment_1", "comment_1", "comment_1") ), - filter_text = "Uick, time, appraisal$, &!", + filter_text = "qUick, time, appraisal$, &!", comment_type_filter = "comment_1", search_type = "and", return_dataframe = FALSE ) @@ -48,8 +56,8 @@ test_that("filter_text works", { expect_equal(test3, c("
no matching result")) - # test 4 sanitized_search_strings() work correctly - expect_equal(sanitized_search_strings("DocTORS, staffs"), c("doctor", "staff")) + # test 4 input_sanitizer() work correctly + expect_equal(input_sanitizer("DocTORS, staffs"), c("doctors", "staffs")) }) test_that("filter_text works - stemmed words version of each search term are searched and return", { @@ -66,13 +74,13 @@ test_that("filter_text works - stemmed words version of each search term are sea search_type <- "and" return_dataframe <- FALSE - search_strings <- sanitized_search_strings(filter_text) - expect_equal(search_strings, c("doctor")) + search_strings <- input_sanitizer(filter_text) + expect_equal(search_strings, c("doctors")) - t_d <- filter_df(text_data, comment_type_filter) - expect_equal(nrow(t_d), 3) + result <- filter_df(text_data, comment_type_filter) + expect_equal(nrow(result), 3) - comments <- t_d |> + comments <- result |> dplyr::pull(comment_txt) expect_equal(comments, c( "tricky times, I need emergency doctor appointment", diff --git a/tests/testthat/test-search_text.R b/tests/testthat/test-search_text.R index 26bea06..46027a2 100644 --- a/tests/testthat/test-search_text.R +++ b/tests/testthat/test-search_text.R @@ -40,7 +40,7 @@ test_that("Searching text works", { comment_type_filter = "comment_2", search_type = "or" ) - expect_equal(nrow(test_text), 8) + expect_equal(nrow(test_text), 3) test_text <- return_search_text( text_data = tidy_trust_data,