Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search suffix tree implementation #51954

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions src/CONST.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1274,6 +1274,9 @@ const CONST = {
SEARCH_OPTION_LIST_DEBOUNCE_TIME: 300,
RESIZE_DEBOUNCE_TIME: 100,
UNREAD_UPDATE_DEBOUNCE_TIME: 300,
SEARCH_CONVERT_SEARCH_VALUES: 'search_convert_search_values',
SEARCH_MAKE_TREE: 'search_make_tree',
SEARCH_BUILD_TREE: 'search_build_tree',
SEARCH_FILTER_OPTIONS: 'search_filter_options',
USE_DEBOUNCED_STATE_DELAY: 300,
LIST_SCROLLING_DEBOUNCE_TIME: 200,
Expand Down
69 changes: 64 additions & 5 deletions src/components/Search/SearchRouter/SearchRouter.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import usePolicy from '@hooks/usePolicy';
import useResponsiveLayout from '@hooks/useResponsiveLayout';
import useThemeStyles from '@hooks/useThemeStyles';
import * as CardUtils from '@libs/CardUtils';
import FastSearch from '@libs/FastSearch';
import * as OptionsListUtils from '@libs/OptionsListUtils';
import {getAllTaxRates} from '@libs/PolicyUtils';
import type {OptionData} from '@libs/ReportUtils';
Expand Down Expand Up @@ -77,6 +78,49 @@ function SearchRouter({onRouterClose}: SearchRouterProps) {
return OptionsListUtils.getSearchOptions(options, '', betas ?? []);
}, [areOptionsInitialized, betas, options]);

/**
* Builds a suffix tree and returns a function to search in it.
*/
const findInSearchTree = useMemo(() => {
const fastSearch = FastSearch.createFastSearch([
{
data: searchOptions.personalDetails,
toSearchableString: (option) => {
const displayName = option.participantsList?.[0]?.displayName ?? '';
return [option.login ?? '', option.login !== displayName ? displayName : ''].join();
},
},
{
data: searchOptions.recentReports,
toSearchableString: (option) => {
const searchStringForTree = [option.text ?? '', option.login ?? ''];

if (option.isThread) {
if (option.alternateText) {
searchStringForTree.push(option.alternateText);
}
} else if (!!option.isChatRoom || !!option.isPolicyExpenseChat) {
if (option.subtitle) {
searchStringForTree.push(option.subtitle);
}
}

return searchStringForTree.join();
},
},
]);
function search(searchInput: string) {
const [personalDetails, recentReports] = fastSearch.search(searchInput);

return {
personalDetails,
recentReports,
};
}

return search;
}, [searchOptions.personalDetails, searchOptions.recentReports]);

const filteredOptions = useMemo(() => {
if (debouncedInputValue.trim() === '') {
return {
Expand All @@ -87,15 +131,30 @@ function SearchRouter({onRouterClose}: SearchRouterProps) {
}

Timing.start(CONST.TIMING.SEARCH_FILTER_OPTIONS);
const newOptions = OptionsListUtils.filterOptions(searchOptions, debouncedInputValue, {sortByReportTypeInSearch: true, preferChatroomsOverThreads: true});
const newOptions = findInSearchTree(debouncedInputValue);
Timing.end(CONST.TIMING.SEARCH_FILTER_OPTIONS);

// See OptionListUtils.filterOptions#sortByReportTypeInSearch:
const noneReportPersonalDetails = OptionsListUtils.filteredPersonalDetailsOfRecentReports(newOptions.recentReports, newOptions.personalDetails);
const recentReportsWithPersonalDetails = newOptions.recentReports.concat(noneReportPersonalDetails);
const sortedReports = OptionsListUtils.orderOptions(recentReportsWithPersonalDetails, debouncedInputValue, {
preferChatroomsOverThreads: true,
});

const userToInvite = OptionsListUtils.pickUserToInvite({
canInviteUser: true,
recentReports: sortedReports,
personalDetails: [],
searchValue: debouncedInputValue,
optionsToExclude: [{login: CONST.EMAIL.NOTIFICATIONS}],
});

return {
recentReports: newOptions.recentReports,
personalDetails: newOptions.personalDetails,
userToInvite: newOptions.userToInvite,
recentReports: sortedReports,
personalDetails: [],
userToInvite,
};
}, [debouncedInputValue, searchOptions]);
}, [debouncedInputValue, findInSearchTree]);

const recentReports: OptionData[] = useMemo(() => {
if (debouncedInputValue === '') {
Expand Down
140 changes: 140 additions & 0 deletions src/libs/FastSearch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/* eslint-disable rulesdir/prefer-at */
import CONST from '@src/CONST';
import Timing from './actions/Timing';
import SuffixUkkonenTree from './SuffixUkkonenTree';

type SearchableData<T> = {
/**
* The data that should be searchable
*/
data: T[];
/**
* A function that generates a string from a data entry. The string's value is used for searching.
* If you have multiple fields that should be searchable, simply concat them to the string and return it.
*/
toSearchableString: (data: T) => string;
};

// There are certain characters appear very often in our search data (email addresses), which we don't need to search for.
const charSetToSkip = new Set(['@', '.', '#', '$', '%', '&', '*', '+', '-', '/', ':', ';', '<', '=', '>', '?', '_', '~', '!', ' ', ',']);

/**
* Creates a new "FastSearch" instance. "FastSearch" uses a suffix tree to search for substrings in a list of strings.
* You can provide multiple datasets. The search results will be returned for each dataset.
*
* Note: Creating a FastSearch instance with a lot of data is computationally expensive. You should create an instance once and reuse it.
* Searches will be very fast though, even with a lot of data.
*/
function createFastSearch<T>(dataSets: Array<SearchableData<T>>) {
Timing.start(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES);
const maxNumericListSize = 400_000;
// The user might provide multiple data sets, but internally, the search values will be stored in this one list:
let concatenatedNumericList = new Uint8Array(maxNumericListSize);
// Here we store the index of the data item in the original data list, so we can map the found occurrences back to the original data:
const occurrenceToIndex = new Uint32Array(maxNumericListSize * 4);
// As we are working with ArrayBuffers, we need to keep track of the current offset:
const offset = {value: 1};
// We store the last offset for a dataSet, so we can map the found occurrences to the correct dataSet:
const listOffsets: number[] = [];

for (const {data, toSearchableString} of dataSets) {
// Performance critical: the array parameters are passed by reference, so we don't have to create new arrays every time:
dataToNumericRepresentation(concatenatedNumericList, occurrenceToIndex, offset, {data, toSearchableString});
listOffsets.push(offset.value);
}
concatenatedNumericList[offset.value++] = SuffixUkkonenTree.END_CHAR_CODE;
listOffsets[listOffsets.length - 1] = offset.value;
Timing.end(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES);

// The list might be larger than necessary, so we clamp it to the actual size:
concatenatedNumericList = concatenatedNumericList.slice(0, offset.value);

// Create & build the suffix tree:
Timing.start(CONST.TIMING.SEARCH_MAKE_TREE);
const tree = SuffixUkkonenTree.makeTree(concatenatedNumericList);
Timing.end(CONST.TIMING.SEARCH_MAKE_TREE);

Timing.start(CONST.TIMING.SEARCH_BUILD_TREE);
tree.build();
Timing.end(CONST.TIMING.SEARCH_BUILD_TREE);

/**
* Searches for the given input and returns results for each dataset.
*/
function search(searchInput: string): T[][] {
const cleanedSearchString = cleanString(searchInput);
const {numeric} = SuffixUkkonenTree.stringToNumeric(cleanedSearchString, {
charSetToSkip,
// stringToNumeric might return a list that is larger than necessary, so we clamp it to the actual size
// (otherwise the search could fail as we include in our search empty array values):
clamp: true,
});
const result = tree.findSubstring(Array.from(numeric));

const resultsByDataSet = Array.from({length: dataSets.length}, () => new Set<T>());
// eslint-disable-next-line @typescript-eslint/prefer-for-of
for (let i = 0; i < result.length; i++) {
const occurrenceIndex = result[i];
const itemIndexInDataSet = occurrenceToIndex[occurrenceIndex];
const dataSetIndex = listOffsets.findIndex((listOffset) => occurrenceIndex < listOffset);

if (dataSetIndex === -1) {
throw new Error(`[FastSearch] The occurrence index ${occurrenceIndex} is not in any dataset`);
}
const item = dataSets[dataSetIndex].data[itemIndexInDataSet];
if (!item) {
throw new Error(`[FastSearch] The item with index ${itemIndexInDataSet} in dataset ${dataSetIndex} is not defined`);
}
resultsByDataSet[dataSetIndex].add(item);
}

return resultsByDataSet.map((set) => Array.from(set));
}

return {
search,
};
}

/**
* The suffix tree can only store string like values, and internally stores those as numbers.
* This function converts the user data (which are most likely objects) to a numeric representation.
* Additionally a list of the original data and their index position in the numeric list is created, which is used to map the found occurrences back to the original data.
*/
function dataToNumericRepresentation<T>(concatenatedNumericList: Uint8Array, occurrenceToIndex: Uint32Array, offset: {value: number}, {data, toSearchableString}: SearchableData<T>): void {
data.forEach((option, index) => {
const searchStringForTree = toSearchableString(option);
const cleanedSearchStringForTree = cleanString(searchStringForTree);

if (cleanedSearchStringForTree.length === 0) {
return;
}

SuffixUkkonenTree.stringToNumeric(cleanedSearchStringForTree, {
charSetToSkip,
out: {
outArray: concatenatedNumericList,
offset,
outOccurrenceToIndex: occurrenceToIndex,
index,
},
});
// eslint-disable-next-line no-param-reassign
occurrenceToIndex[offset.value] = index;
// eslint-disable-next-line no-param-reassign
concatenatedNumericList[offset.value++] = SuffixUkkonenTree.DELIMITER_CHAR_CODE;
});
}

/**
* Everything in the tree is treated as lowercase.
*/
function cleanString(input: string) {
return input.toLowerCase();
}

const FastSearch = {
createFastSearch,
};

export default FastSearch;
68 changes: 46 additions & 22 deletions src/libs/OptionsListUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2477,6 +2477,39 @@ function getPersonalDetailSearchTerms(item: Partial<ReportUtils.OptionData>) {
function getCurrentUserSearchTerms(item: ReportUtils.OptionData) {
return [item.text ?? '', item.login ?? '', item.login?.replace(CONST.EMAIL_SEARCH_REGEX, '') ?? ''];
}

type PickUserToInviteParams = {
canInviteUser: boolean;
recentReports: ReportUtils.OptionData[];
personalDetails: ReportUtils.OptionData[];
searchValue: string;
config?: FilterOptionsConfig;
optionsToExclude: Option[];
};

const pickUserToInvite = ({canInviteUser, recentReports, personalDetails, searchValue, config, optionsToExclude}: PickUserToInviteParams) => {
let userToInvite = null;
if (canInviteUser) {
if (recentReports.length === 0 && personalDetails.length === 0) {
userToInvite = getUserToInviteOption({
searchValue,
selectedOptions: config?.selectedOptions,
optionsToExclude,
});
}
}

return userToInvite;
};

/**
* Remove the personal details for the DMs that are already in the recent reports so that we don't show duplicates
*/
function filteredPersonalDetailsOfRecentReports(recentReports: ReportUtils.OptionData[], personalDetails: ReportUtils.OptionData[]) {
const excludedLogins = new Set(recentReports.map((report) => report.login));
return personalDetails.filter((personalDetail) => !excludedLogins.has(personalDetail.login));
}

/**
* Filters options based on the search input value
*/
Expand All @@ -2490,11 +2523,6 @@ function filterOptions(options: Options, searchInputValue: string, config?: Filt
preferPolicyExpenseChat = false,
preferRecentExpenseReports = false,
} = config ?? {};
// Remove the personal details for the DMs that are already in the recent reports so that we don't show duplicates
function filteredPersonalDetailsOfRecentReports(recentReports: ReportUtils.OptionData[], personalDetails: ReportUtils.OptionData[]) {
const excludedLogins = new Set(recentReports.map((report) => report.login));
return personalDetails.filter((personalDetail) => !excludedLogins.has(personalDetail.login));
}
if (searchInputValue.trim() === '' && maxRecentReportsToShow > 0) {
const recentReports = options.recentReports.slice(0, maxRecentReportsToShow);
const personalDetails = filteredPersonalDetailsOfRecentReports(recentReports, options.personalDetails);
Expand Down Expand Up @@ -2555,34 +2583,27 @@ function filterOptions(options: Options, searchInputValue: string, config?: Filt
};
}, options);

let {recentReports, personalDetails} = matchResults;
const {recentReports, personalDetails} = matchResults;

const noneReportPersonalDetails = filteredPersonalDetailsOfRecentReports(recentReports, personalDetails);

let filteredPersonalDetails: ReportUtils.OptionData[] = noneReportPersonalDetails;
let filteredRecentReports: ReportUtils.OptionData[] = recentReports;
if (sortByReportTypeInSearch) {
personalDetails = filteredPersonalDetailsOfRecentReports(recentReports, personalDetails);
recentReports = recentReports.concat(personalDetails);
personalDetails = [];
recentReports = orderOptions(recentReports, searchValue);
filteredRecentReports = recentReports.concat(noneReportPersonalDetails);
filteredPersonalDetails = [];
}

let userToInvite = null;
if (canInviteUser) {
if (recentReports.length === 0 && personalDetails.length === 0) {
userToInvite = getUserToInviteOption({
searchValue,
selectedOptions: config?.selectedOptions,
optionsToExclude,
});
}
}
const userToInvite = pickUserToInvite({canInviteUser, recentReports, personalDetails, searchValue, config, optionsToExclude});

if (maxRecentReportsToShow > 0 && recentReports.length > maxRecentReportsToShow) {
recentReports.splice(maxRecentReportsToShow);
}
const filteredPersonalDetails = filteredPersonalDetailsOfRecentReports(recentReports, personalDetails);

const sortedRecentReports = orderOptions(filteredRecentReports, searchValue, {preferChatroomsOverThreads, preferPolicyExpenseChat, preferRecentExpenseReports});
return {
personalDetails: filteredPersonalDetails,
recentReports: orderOptions(recentReports, searchValue, {preferChatroomsOverThreads, preferPolicyExpenseChat, preferRecentExpenseReports}),
recentReports: sortedRecentReports,
userToInvite,
currentUserOption: matchResults.currentUserOption,
categoryOptions: [],
Expand Down Expand Up @@ -2643,6 +2664,7 @@ export {
formatMemberForList,
formatSectionsFromSearchTerm,
getShareLogOptions,
orderOptions,
filterOptions,
createOptionList,
createOptionFromReport,
Expand All @@ -2657,7 +2679,9 @@ export {
shouldUseBoldText,
getAttendeeOptions,
getAlternateText,
pickUserToInvite,
hasReportErrors,
filteredPersonalDetailsOfRecentReports,
};

export type {MemberForList, CategorySection, CategoryTreeSection, Options, OptionList, SearchOption, PayeePersonalDetails, Category, Tax, TaxRatesOption, Option, OptionTree};
Loading
Loading