Skip to content

Commit

Permalink
feat: ordering of dictionaries + simple tests
Browse files Browse the repository at this point in the history
  • Loading branch information
advieser committed Oct 1, 2024
1 parent e335493 commit 8559688
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 20 deletions.
3 changes: 1 addition & 2 deletions R/Dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ Dictionary = R6::R6Class("Dictionary",
)

dictionary_get = function(self, key, ..., .dicts_suggest) {
obj = dictionary_retrieve_item(self, key, .dicts_suggest = .dicts_suggest)
obj = dictionary_retrieve_item(self, key, .dicts_suggest)
dots = assert_list(list(...), names = "unique", .var.name = "arguments passed to Dictionary")
dictionary_initialize_item(key, obj, dots)
}
Expand Down Expand Up @@ -209,7 +209,6 @@ dictionary_initialize_item = function(key, obj, cargs = list()) {
}
}


#' @export
as.data.table.Dictionary = function(x, ...) {
setkeyv(as.data.table(list(key = x$keys())), "key")[]
Expand Down
60 changes: 42 additions & 18 deletions R/did_you_mean.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
#' @examples
#' did_you_mean("yep", c("yes", "no"))
did_you_mean = function(str, candidates) {
suggested = find_suggested(str, candidates, threshold = 0.2)
suggestions = find_suggestions(str, candidates, threshold = 0.2, max_candidates = 3L, ret_dist = FALSE)

if (!length(suggested)) {
if (!length(suggestions)) {
return("")
}
sprintf(" Did you mean %s?", str_collapse(suggested, quote = "'", sep = " / "))
sprintf(" Did you mean %s?", str_collapse(suggestions, quote = "'", sep = " / "))
}

#' @title Suggest Alternatives from Given Dictionaries
Expand All @@ -36,27 +36,42 @@ did_you_mean_dicts = function(key, dicts) {
if (is.null(dicts)) {
return("")
}
# Iterate through dicts, get suggestions, paste as messages
suggested = character(length(dicts))

# Initialize variables to store suggestions and minimum distances
suggestions = character(length(dicts))
min_distance_per_dict = numeric(length(dicts))

for (i in seq_along(dicts)) {
entries = find_suggested(key, dicts[[i]]$keys())
# Get distances and the corresponding entries for the current dictionary
distances = find_suggestions(key, dicts[[i]]$keys(), ret_dist = TRUE)
entries = names(distances)

if (length(entries)) {
suggested[[i]] = sprintf("%s: %s", names(dicts)[[i]],
str_collapse(entries, quote = "'", sep = " / "))
# Handle the case of no matches: skip the dictionary
if (!length(entries)) {
min_distance_per_dict[[i]] = NA
next
}
# Record the closest distance
min_distance_per_dict[[i]] = min(distances)

# Create a suggestion message for the current dictionary
suggestions[[i]] = sprintf("%s: %s", names(dicts)[[i]],
str_collapse(entries, quote = "'", sep = " / "))
}
# Drop elements for dicts for which no suggestions could be made
suggested = suggested[nchar(suggested) > 0L]

if (!length(suggested)) {
# Order the suggestions by their closest match
suggestions = suggestions[order(min_distance_per_dict)]
# Remove empty suggestions (i.e., dictionaries with no close matches)
valid_suggestions = suggestions[nchar(suggestions) > 0L]
# Only show 3 dictionaries with best matches
# valid_suggestions = head(valid_suggestions, 3L)

# If no valid suggestions, return an empty string
if (!length(valid_suggestions)) {
return("")
}
sprintf(" Similar entries in other dictionaries, %s?", str_collapse(suggested, sep = " or "))

# TODO: handle ordering for exact hits (order dicts approriately?)
# TODO: maximum number of suggestions (within dict is handled by find_suggested, but not if we are looking at many dicts)
# TODO: Tests
sprintf("\nSimilar entries in other dictionaries, %s.", str_collapse(valid_suggestions, sep = ", or "))
}

#' @title Find Suggestions
Expand All @@ -67,9 +82,18 @@ did_you_mean_dicts = function(key, dicts) {
#' Candidate strings.
#' @param threshold (`numeric(1)`)\cr
#' Percentage value of characters when sorting `candidates` by distance
#' @param max_candidates (`integer(1)`)\cr
#' Maximum number of candidates to return.
#' @param ret_similarity (`logical(1)`)\cr
#' Return similarity values instead of names.
#' @return (`character(1)`). Either suggested candidates from `candidates` or an empty string if no close match is found.
find_suggested = function(str, candidates, threshold = 0.2) {
find_suggestions = function(str, candidates, threshold = 0.2, max_candidates = 3L, ret_dist = FALSE) {
candidates = unique(candidates)
D = set_names(adist(str, candidates, ignore.case = TRUE, partial = TRUE)[1L, ], candidates)
names(head(sort(D[D <= ceiling(threshold * nchar(str))]), 3L))
sorted = head(sort(D[D <= ceiling(threshold * nchar(str))]), max_candidates)
if (ret_dist) {
sorted
} else {
names(sorted)
}
}
21 changes: 21 additions & 0 deletions tests/testthat/test_Dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,24 @@ test_that("#115", {
d$add("a", function() A$new())
expect_error(dictionary_sugar_get(d, "a", y = 10), "Did you mean")
})

test_that("similar entries in other dictionaries", {
obj = R6Class("A", public = list(x = NULL))

d = Dictionary$new()
d$add("abc", obj)

d_lookup1 = Dictionary$new()
d_lookup1$add("cde", obj)

expect_error(dictionary_sugar_get(d, "cde", .dicts_suggest = list("lookup1" = d_lookup1)), "Similar entries in other dictionaries")

d_lookup2 = Dictionary$new()
d_lookup2$add("bcd", obj)

# Dictionaries ordered by closest match per dictionary
expect_error(
dictionary_sugar_get(d, "cde", .dicts_suggest = list("lookup1" = d_lookup1, "lookup2" = d_lookup2)),
"Similar entries in other dictionaries.*lookup1.*lookup2"
)
})

0 comments on commit 8559688

Please sign in to comment.