| // Copyright 2022 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| import {Trie} from './structs/trie.js'; |
| import {EmojiVariants} from './types.js'; |
| |
| /** |
| * Preprocess a phrase by the following operations: |
| * (1) remove white whitespace at both ends of the phrase. |
| * (2) convert all letters into lowercase. |
| */ |
| function sanitize(phrase: string) { |
| return phrase.trim().toLowerCase(); |
| } |
| |
| export class EmojiPrefixSearch { |
| private tokenTrie = new Trie(); |
| private wordToEmojisMap: Map<string, Set<string>> = new Map(); |
| private emojiMap: Map<string, EmojiVariants> = new Map(); |
| |
| setCollection(collection: EmojiVariants[]) { |
| this.clear(); |
| for (const record of collection) { |
| if (!record.base.string) { |
| continue; |
| } |
| const string = record.base.string; |
| const name = record.base.name; |
| const terms = this.tokenize(name).map(term => sanitize(term)); |
| terms.forEach(term => { |
| if (!this.wordToEmojisMap.has(term)) { |
| this.wordToEmojisMap.set(term, new Set()); |
| } |
| this.wordToEmojisMap.get(term)?.add(string); |
| this.emojiMap.set(string, record); |
| this.tokenTrie.add(term); |
| }); |
| } |
| } |
| |
| /** |
| * Returns all items whose name contains the prefix. |
| */ |
| matchPrefixToEmojis(prefix: string): string[] { |
| const results: Set<string> = new Set(); |
| const terms = this.tokenTrie.getKeys(prefix); |
| for (const term of terms) { |
| const matchedItems = this.wordToEmojisMap.get(term); |
| if (matchedItems !== undefined) { |
| matchedItems.forEach(item => { |
| results.add(item); |
| }); |
| } |
| } |
| return Array.from(results); |
| } |
| |
| /** |
| * Clear trie and lookup table. |
| */ |
| clear(): void { |
| this.tokenTrie.clear(); |
| this.wordToEmojisMap.clear(); |
| this.emojiMap.clear(); |
| } |
| |
| private tokenize(phrase: string): string[] { |
| return phrase.split(' ').filter(token => token.length > 0); |
| } |
| |
| /** |
| * Fetch all words from the emoji data that have 'term' has prefix and attach |
| * matching metadata for each word. |
| */ |
| getMatchedKeywords(emoji: EmojiVariants, term: string): Array<{ |
| pos: number, |
| isMatched: boolean, |
| token: string, |
| weight: number, |
| }> { |
| const PRIMARY_NAME_WEIGHT = 1; |
| return this.tokenize(sanitize(emoji.base.name)) |
| .map((token, pos) => ({ |
| pos, |
| isMatched: token.startsWith(term), |
| token, |
| weight: PRIMARY_NAME_WEIGHT, |
| })) |
| .filter(item => item.isMatched); |
| } |
| |
| /** |
| * Calculate the matching score of a term against a given emoji. |
| */ |
| scoreTermAgainstEmoji(emoji: EmojiVariants, term: string) { |
| let score = 0; |
| for (const item of this.getMatchedKeywords(emoji, term)) { |
| if (item.token.length === 0) { |
| throw new Error('Token can not be empty.'); |
| } |
| // Link to single-word match score formula: |
| // https://docs.google.com/document/d/1Ub89xsElqVyRaq8tldhligd29-iXsSMWlAdL6Q-Xpr8/edit# |
| score += |
| (item.weight / (1 + item.pos)) * (term.length / item.token.length); |
| } |
| return score; |
| } |
| |
| /** |
| * Search for all items that match with the given query |
| */ |
| search(query: string) { |
| const queryScores = new Map(); |
| const sanitizedQuery = sanitize(query); |
| this.tokenize(sanitizedQuery).forEach((term, idx) => { |
| // For each token |
| const termScores = new Map(); |
| const candidateEmojis = this.matchPrefixToEmojis(term); |
| |
| for (const emoji of candidateEmojis) { |
| const emojiRecord = this.emojiMap.get(emoji); |
| if (emojiRecord) { |
| termScores.set(emoji, this.scoreTermAgainstEmoji(emojiRecord, term)); |
| } |
| } |
| |
| for (const emoji of termScores.keys()) { |
| // If it is the first term in the query phrase, we apply the |
| // normalization factor. |
| if (idx === 0) { |
| const emojiName = this.emojiMap.get(emoji)?.base?.name ?? ' '; |
| queryScores.set(emoji, sanitizedQuery.length / emojiName.length); |
| } |
| if (queryScores.has(emoji)) { |
| queryScores.set( |
| emoji, queryScores.get(emoji) * termScores.get(emoji)); |
| } |
| } |
| |
| // Remove any emoji at query level if it does not match at term level. |
| for (const emoji of queryScores.keys()) { |
| if (!termScores.has(emoji)) { |
| queryScores.delete(emoji); |
| } |
| } |
| }); |
| |
| const results = |
| Array.from(queryScores.keys()).map(emoji => ({ |
| item: this.emojiMap.get(emoji) as |
| EmojiVariants, |
| score: queryScores.get(emoji), |
| })); |
| return this.sort(results); |
| } |
| |
| /** |
| * Sort the array of Emoji objects by relevance score in descending order |
| */ |
| private sort(results: Array<{item: EmojiVariants, score: number}>) { |
| return results.sort((emoji1, emoji2) => emoji2.score - emoji1.score); |
| } |
| } |