Skip to content

Commit

Permalink
attempt to fix #191
Browse files Browse the repository at this point in the history
  • Loading branch information
asegun-cod committed Oct 30, 2023
1 parent 4ab268d commit 0446515
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 44 deletions.
53 changes: 26 additions & 27 deletions R/filter_text.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ filter_df <- function(text_data, comment_type_filter = NULL) {
#'
#' @noRd
#'
#' @return string as a vector
#' @return vector of string
input_sanitizer <- function(filter_text) {
strsplit(filter_text, ",")[[1]] %>%
sanitized_input <- strsplit(filter_text, ",")[[1]] %>%
stringr::str_to_lower() %>%
stringr::str_remove_all("[^[:alnum:]]")

return(sanitized_input[sanitized_input != ""])
}

#' Takes a list of comments and return it in lowercase
Expand All @@ -43,19 +45,29 @@ lowered_comments <- function(comments) {
stringr::str_to_lower()
}

#' Find list of words in a string
#' Check if a word or its variation (singular or plural
#' version - if they are valid words) exist in a comment
#'
#' @param string a string
#' @param search_strings list of strings with search terms in it
#' @param comment string, a single comment
#' @param search_fn type of search ('and', 'or')
#' @param sanitized_input list of strings, best derived from `input_sanitizer``
#' @return logical
#' @noRd
check_match <- function(string, search_strings, search_fn) {
search_fn(
lapply(
search_strings,
\(p) grepl(paste0("\\b.*", p, ".*\\b"), string)
) %>% unlist()
)
match_term_or_stem <- function(comment, search_fn, sanitized_input) {
tokens <- gsub("[[:punct:] ]+", " ", comment) %>%
strsplit(" ") %>%
unlist()

lapply(sanitized_input, \(x) any(
x %in% tokens, # actual word
ifelse(stringr::str_sub(x, start = -1L) == "s",
stringr::str_sub(x, end = -2L) %in% tokens,
FALSE
), # attempt to get the singular form of a word by removing last "s"
paste0(x, "s") %in% tokens # the plural version
)) %>%
unlist() %>%
search_fn()
}

#' Find comment in list of comments where all/any of search strings exist
Expand All @@ -78,23 +90,10 @@ check_match <- function(string, search_strings, search_fn) {
#' )
matched_comments <- function(lowered_comments, search_fn, search_strings) {
lowered_comments %>%
lapply(\(comment) check_match(comment, search_strings, search_fn)) %>%
lapply(\(comment) match_term_or_stem(comment, search_fn, search_strings)) %>%
unlist()
}

#' Find the stem word version of each search term
#'
#' @description sanitise the input strings and add their stemmed words to the list of words to search
#' @param filter_text comma separated string with search terms in
#' @noRd
sanitized_search_strings <- function(filter_text) {
sanitized_input <- input_sanitizer(filter_text)
stemmed_input <- tm::stemDocument(sanitized_input)
search_strings <- stemmed_input[stemmed_input != ""]

return(search_strings)
}

#' Return text from a freetext search
#' @description combine search terms with OR and AND then return text from a specific
#' question
Expand Down Expand Up @@ -132,7 +131,7 @@ return_search_text <- function(text_data, filter_text, comment_type_filter = NUL
)

# sanitise the input strings and add their stemmed words to the list of words to search
search_strings <- sanitized_search_strings(filter_text)
search_strings <- input_sanitizer(filter_text)
cat("search strings: ") # for logging
print(search_strings) # for logging

Expand Down
14 changes: 6 additions & 8 deletions R/mod_search_text.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ mod_search_text_server <- function(id, filter_data) {
ns <- session$ns

output$dynamic_comment_ui <- renderUI({
req(text_search())
validate(
need(text_search(), "Please enter a search term")
)
req(return_data())

tagList(
uiOutput(ns("comment_output"))
uiOutput(ns("comment_output")) %>%
shinycssloaders::withSpinner()
)
})

Expand All @@ -60,17 +63,12 @@ mod_search_text_server <- function(id, filter_data) {
prep_data_for_comment_table(in_tidy_format = FALSE)
})


## the comments tables ----
output$comment_output <- renderUI({
validate(
need(text_search(), "Please enter a search term")
)

mod_comment_download_server(
ns("comment_download_1"),
return_data(),
filepath = sanitized_search_strings(text_search()) %>%
filepath = input_sanitizer(text_search()) %>%
paste(collapse = "_") %>%
paste0("-")
)
Expand Down
24 changes: 16 additions & 8 deletions tests/testthat/test-filter_text.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
test_that("filter_text works", {

# check_match and match_term_or_stem works as expected
comment = "There was a bit of a wait, which can’t be helped during shortage of doctors"
match_term_or_stem(comment, all, "doctors") |>
expect_true()
match_term_or_stem(comment, all, "doctor") |>
expect_true()

# lowered_comments works as expected
expect_equal("this and ThaT" %>%
lowered_comments(), "this and that")
Expand All @@ -23,7 +31,7 @@ test_that("filter_text works", {
),
comment_type = c("comment_1", "comment_1", "comment_1")
),
filter_text = "Uick, time, appraisal$, &!",
filter_text = "qUick, time, appraisal$, &!",
comment_type_filter = "comment_1", search_type = "and",
return_dataframe = FALSE
)
Expand All @@ -48,8 +56,8 @@ test_that("filter_text works", {

expect_equal(test3, c("<hr/>no matching result"))

# test 4 sanitized_search_strings() work correctly
expect_equal(sanitized_search_strings("DocTORS, staffs"), c("doctor", "staff"))
# test 4 input_sanitizer() work correctly
expect_equal(input_sanitizer("DocTORS, staffs"), c("doctors", "staffs"))
})

test_that("filter_text works - stemmed words version of each search term are searched and return", {
Expand All @@ -66,13 +74,13 @@ test_that("filter_text works - stemmed words version of each search term are sea
search_type <- "and"
return_dataframe <- FALSE

search_strings <- sanitized_search_strings(filter_text)
expect_equal(search_strings, c("doctor"))
search_strings <- input_sanitizer(filter_text)
expect_equal(search_strings, c("doctors"))

t_d <- filter_df(text_data, comment_type_filter)
expect_equal(nrow(t_d), 3)
result <- filter_df(text_data, comment_type_filter)
expect_equal(nrow(result), 3)

comments <- t_d |>
comments <- result |>
dplyr::pull(comment_txt)
expect_equal(comments, c(
"tricky times, I need emergency doctor appointment",
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-search_text.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ test_that("Searching text works", {
comment_type_filter = "comment_2",
search_type = "or"
)
expect_equal(nrow(test_text), 8)
expect_equal(nrow(test_text), 3)

test_text <- return_search_text(
text_data = tidy_trust_data,
Expand Down

0 comments on commit 0446515

Please sign in to comment.