| // Copyright 2019 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef CHROMEOS_ASH_COMPONENTS_STRING_MATCHING_TERM_BREAK_ITERATOR_H_ |
| #define CHROMEOS_ASH_COMPONENTS_STRING_MATCHING_TERM_BREAK_ITERATOR_H_ |
| |
| #include <stddef.h> |
| |
| #include <memory> |
| #include <string> |
| |
| #include "base/memory/raw_ref.h" |
| |
| namespace base::i18n { |
| class UTF16CharIterator; |
| } |
| |
| namespace ash::string_matching { |
| |
| // TermBreakIterator breaks terms out of a word. Terms are broken on |
| // camel case boundaries and alpha/number boundaries. Numbers are defined |
| // as [0-9\.,]+. |
| // e.g. |
| // CamelCase -> Camel, Case |
| // Python2.7 -> Python, 2.7 |
| class TermBreakIterator { |
| public: |
| // Note that |word| must out live this iterator. |
| explicit TermBreakIterator(const std::u16string& word); |
| |
| TermBreakIterator(const TermBreakIterator&) = delete; |
| TermBreakIterator& operator=(const TermBreakIterator&) = delete; |
| |
| ~TermBreakIterator(); |
| |
| // Advance to the next term. Returns false if at the end of the word. |
| bool Advance(); |
| |
| // Returns the current term, which is the substr of |word_| in range |
| // [prev_, pos_). |
| const std::u16string GetCurrentTerm() const; |
| |
| size_t prev() const { return prev_; } |
| size_t pos() const { return pos_; } |
| |
| static const size_t npos = static_cast<size_t>(-1); |
| |
| private: |
| enum State { |
| STATE_START, // Initial state |
| STATE_NUMBER, // Current char is a number [0-9\.,]. |
| STATE_UPPER, // Current char is upper case. |
| STATE_LOWER, // Current char is lower case. |
| STATE_CHAR, // Current char has no case, e.g. a cjk char. |
| STATE_LAST, |
| }; |
| |
| // Returns new state for given |ch|. |
| State GetNewState(char16_t ch); |
| |
| const raw_ref<const std::u16string> word_; |
| size_t prev_; |
| size_t pos_; |
| |
| std::unique_ptr<base::i18n::UTF16CharIterator> iter_; |
| State state_; |
| }; |
| |
| } // namespace ash::string_matching |
| |
| #endif // CHROMEOS_ASH_COMPONENTS_STRING_MATCHING_TERM_BREAK_ITERATOR_H_ |