| //===--- FuzzySymbolIndex.cpp - Lookup symbols for autocomplete -*- C++ -*-===// | 
 | // | 
 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
 | // See https://llvm.org/LICENSE.txt for license information. | 
 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 | #include "FuzzySymbolIndex.h" | 
 | #include "llvm/Support/Regex.h" | 
 |  | 
 | using clang::find_all_symbols::SymbolAndSignals; | 
 | using llvm::StringRef; | 
 |  | 
 | namespace clang { | 
 | namespace include_fixer { | 
 | namespace { | 
 |  | 
 | class MemSymbolIndex : public FuzzySymbolIndex { | 
 | public: | 
 |   MemSymbolIndex(std::vector<SymbolAndSignals> Symbols) { | 
 |     for (auto &Symbol : Symbols) { | 
 |       auto Tokens = tokenize(Symbol.Symbol.getName()); | 
 |       this->Symbols.emplace_back( | 
 |           StringRef(llvm::join(Tokens.begin(), Tokens.end(), " ")), | 
 |           std::move(Symbol)); | 
 |     } | 
 |   } | 
 |  | 
 |   std::vector<SymbolAndSignals> search(StringRef Query) override { | 
 |     auto Tokens = tokenize(Query); | 
 |     llvm::Regex Pattern("^" + queryRegexp(Tokens)); | 
 |     std::vector<SymbolAndSignals> Results; | 
 |     for (const Entry &E : Symbols) | 
 |       if (Pattern.match(E.first)) | 
 |         Results.push_back(E.second); | 
 |     return Results; | 
 |   } | 
 |  | 
 | private: | 
 |   using Entry = std::pair<llvm::SmallString<32>, SymbolAndSignals>; | 
 |   std::vector<Entry> Symbols; | 
 | }; | 
 |  | 
 | // Helpers for tokenize state machine. | 
 | enum TokenizeState { | 
 |   EMPTY,      // No pending characters. | 
 |   ONE_BIG,    // Read one uppercase letter, could be WORD or Word. | 
 |   BIG_WORD,   // Reading an uppercase WORD. | 
 |   SMALL_WORD, // Reading a lowercase word. | 
 |   NUMBER      // Reading a number. | 
 | }; | 
 |  | 
 | enum CharType { UPPER, LOWER, DIGIT, MISC }; | 
 | CharType classify(char c) { | 
 |   if (isupper(c)) | 
 |     return UPPER; | 
 |   if (islower(c)) | 
 |     return LOWER; | 
 |   if (isdigit(c)) | 
 |     return DIGIT; | 
 |   return MISC; | 
 | } | 
 |  | 
 | } // namespace | 
 |  | 
 | std::vector<std::string> FuzzySymbolIndex::tokenize(StringRef Text) { | 
 |   std::vector<std::string> Result; | 
 |   // State describes the treatment of text from Start to I. | 
 |   // Once text is Flush()ed into Result, we're done with it and advance Start. | 
 |   TokenizeState State = EMPTY; | 
 |   size_t Start = 0; | 
 |   auto Flush = [&](size_t End) { | 
 |     if (State != EMPTY) { | 
 |       Result.push_back(Text.substr(Start, End - Start).lower()); | 
 |       State = EMPTY; | 
 |     } | 
 |     Start = End; | 
 |   }; | 
 |   for (size_t I = 0; I < Text.size(); ++I) { | 
 |     CharType Type = classify(Text[I]); | 
 |     if (Type == MISC) | 
 |       Flush(I); | 
 |     else if (Type == LOWER) | 
 |       switch (State) { | 
 |       case BIG_WORD: | 
 |         Flush(I - 1); // FOOBar: first token is FOO, not FOOB. | 
 |         [[fallthrough]]; | 
 |       case ONE_BIG: | 
 |         State = SMALL_WORD; | 
 |         [[fallthrough]]; | 
 |       case SMALL_WORD: | 
 |         break; | 
 |       default: | 
 |         Flush(I); | 
 |         State = SMALL_WORD; | 
 |       } | 
 |     else if (Type == UPPER) | 
 |       switch (State) { | 
 |       case ONE_BIG: | 
 |         State = BIG_WORD; | 
 |         [[fallthrough]]; | 
 |       case BIG_WORD: | 
 |         break; | 
 |       default: | 
 |         Flush(I); | 
 |         State = ONE_BIG; | 
 |       } | 
 |     else if (Type == DIGIT && State != NUMBER) { | 
 |       Flush(I); | 
 |       State = NUMBER; | 
 |     } | 
 |   } | 
 |   Flush(Text.size()); | 
 |   return Result; | 
 | } | 
 |  | 
 | std::string | 
 | FuzzySymbolIndex::queryRegexp(const std::vector<std::string> &Tokens) { | 
 |   std::string Result; | 
 |   for (size_t I = 0; I < Tokens.size(); ++I) { | 
 |     if (I) | 
 |       Result.append("[[:alnum:]]* "); | 
 |     for (size_t J = 0; J < Tokens[I].size(); ++J) { | 
 |       if (J) | 
 |         Result.append("([[:alnum:]]* )?"); | 
 |       Result.push_back(Tokens[I][J]); | 
 |     } | 
 |   } | 
 |   return Result; | 
 | } | 
 |  | 
 | llvm::Expected<std::unique_ptr<FuzzySymbolIndex>> | 
 | FuzzySymbolIndex::createFromYAML(StringRef FilePath) { | 
 |   auto Buffer = llvm::MemoryBuffer::getFile(FilePath); | 
 |   if (!Buffer) | 
 |     return llvm::errorCodeToError(Buffer.getError()); | 
 |   return std::make_unique<MemSymbolIndex>( | 
 |       find_all_symbols::ReadSymbolInfosFromYAML(Buffer.get()->getBuffer())); | 
 | } | 
 |  | 
 | } // namespace include_fixer | 
 | } // namespace clang |