|  | //===- DFAEmitter.cpp - Finite state automaton emitter --------------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This class can produce a generic deterministic finite state automaton (DFA), | 
|  | // given a set of possible states and transitions. | 
|  | // | 
|  | // The input transitions can be nondeterministic - this class will produce the | 
|  | // deterministic equivalent state machine. | 
|  | // | 
|  | // The generated code can run the DFA and produce an accepted / not accepted | 
|  | // state and also produce, given a sequence of transitions that results in an | 
|  | // accepted state, the sequence of intermediate states. This is useful if the | 
|  | // initial automaton was nondeterministic - it allows mapping back from the DFA | 
|  | // to the NFA. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | #define DEBUG_TYPE "dfa-emitter" | 
|  |  | 
|  | #include "DFAEmitter.h" | 
|  | #include "CodeGenTarget.h" | 
|  | #include "SequenceToOffsetTable.h" | 
|  | #include "TableGenBackends.h" | 
|  | #include "llvm/ADT/SmallVector.h" | 
|  | #include "llvm/ADT/StringExtras.h" | 
|  | #include "llvm/ADT/UniqueVector.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Support/raw_ostream.h" | 
|  | #include "llvm/TableGen/Record.h" | 
|  | #include "llvm/TableGen/TableGenBackend.h" | 
|  | #include <cassert> | 
|  | #include <cstdint> | 
|  | #include <map> | 
|  | #include <set> | 
|  | #include <string> | 
|  | #include <vector> | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // DfaEmitter implementation. This is independent of the GenAutomaton backend. | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | void DfaEmitter::addTransition(state_type From, state_type To, action_type A) { | 
|  | Actions.insert(A); | 
|  | NfaStates.insert(From); | 
|  | NfaStates.insert(To); | 
|  | NfaTransitions[{From, A}].push_back(To); | 
|  | ++NumNfaTransitions; | 
|  | } | 
|  |  | 
|  | void DfaEmitter::visitDfaState(DfaState DS) { | 
|  | // For every possible action... | 
|  | auto FromId = DfaStates.idFor(DS); | 
|  | for (action_type A : Actions) { | 
|  | DfaState NewStates; | 
|  | DfaTransitionInfo TI; | 
|  | // For every represented state, word pair in the original NFA... | 
|  | for (state_type &FromState : DS) { | 
|  | // If this action is possible from this state add the transitioned-to | 
|  | // states to NewStates. | 
|  | auto I = NfaTransitions.find({FromState, A}); | 
|  | if (I == NfaTransitions.end()) | 
|  | continue; | 
|  | for (state_type &ToState : I->second) { | 
|  | NewStates.push_back(ToState); | 
|  | TI.emplace_back(FromState, ToState); | 
|  | } | 
|  | } | 
|  | if (NewStates.empty()) | 
|  | continue; | 
|  | // Sort and unique. | 
|  | sort(NewStates); | 
|  | NewStates.erase(std::unique(NewStates.begin(), NewStates.end()), | 
|  | NewStates.end()); | 
|  | sort(TI); | 
|  | TI.erase(std::unique(TI.begin(), TI.end()), TI.end()); | 
|  | unsigned ToId = DfaStates.insert(NewStates); | 
|  | DfaTransitions.emplace(std::make_pair(FromId, A), std::make_pair(ToId, TI)); | 
|  | } | 
|  | } | 
|  |  | 
|  | void DfaEmitter::constructDfa() { | 
|  | DfaState Initial(1, /*NFA initial state=*/0); | 
|  | DfaStates.insert(Initial); | 
|  |  | 
|  | // Note that UniqueVector starts indices at 1, not zero. | 
|  | unsigned DfaStateId = 1; | 
|  | while (DfaStateId <= DfaStates.size()) | 
|  | visitDfaState(DfaStates[DfaStateId++]); | 
|  | } | 
|  |  | 
|  | void DfaEmitter::emit(StringRef Name, raw_ostream &OS) { | 
|  | constructDfa(); | 
|  |  | 
|  | OS << "// Input NFA has " << NfaStates.size() << " states with " | 
|  | << NumNfaTransitions << " transitions.\n"; | 
|  | OS << "// Generated DFA has " << DfaStates.size() << " states with " | 
|  | << DfaTransitions.size() << " transitions.\n\n"; | 
|  |  | 
|  | // Implementation note: We don't bake a simple std::pair<> here as it requires | 
|  | // significantly more effort to parse. A simple test with a large array of | 
|  | // struct-pairs (N=100000) took clang-10 6s to parse. The same array of | 
|  | // std::pair<uint64_t, uint64_t> took 242s. Instead we allow the user to | 
|  | // define the pair type. | 
|  | // | 
|  | // FIXME: It may make sense to emit these as ULEB sequences instead of | 
|  | // pairs of uint64_t. | 
|  | OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n"; | 
|  | OS << "// transition implies a set of NFA transitions. These are referred\n"; | 
|  | OS << "// to by index in " << Name << "Transitions[].\n"; | 
|  |  | 
|  | SequenceToOffsetTable<DfaTransitionInfo> Table; | 
|  | std::map<DfaTransitionInfo, unsigned> EmittedIndices; | 
|  | for (auto &T : DfaTransitions) | 
|  | Table.add(T.second.second); | 
|  | Table.layout(); | 
|  | OS << "std::array<NfaStatePair, " << Table.size() << "> " << Name | 
|  | << "TransitionInfo = {{\n"; | 
|  | Table.emit( | 
|  | OS, | 
|  | [](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) { | 
|  | OS << "{" << P.first << ", " << P.second << "}"; | 
|  | }, | 
|  | "{0ULL, 0ULL}"); | 
|  |  | 
|  | OS << "}};\n\n"; | 
|  |  | 
|  | OS << "// A transition in the generated " << Name << " DFA.\n"; | 
|  | OS << "struct " << Name << "Transition {\n"; | 
|  | OS << "  unsigned FromDfaState; // The transitioned-from DFA state.\n"; | 
|  | OS << "  "; | 
|  | printActionType(OS); | 
|  | OS << " Action;       // The input symbol that causes this transition.\n"; | 
|  | OS << "  unsigned ToDfaState;   // The transitioned-to DFA state.\n"; | 
|  | OS << "  unsigned InfoIdx;      // Start index into " << Name | 
|  | << "TransitionInfo.\n"; | 
|  | OS << "};\n\n"; | 
|  |  | 
|  | OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n"; | 
|  | OS << "// The initial state is 1, not zero.\n"; | 
|  | OS << "std::array<" << Name << "Transition, " << DfaTransitions.size() << "> " | 
|  | << Name << "Transitions = {{\n"; | 
|  | for (auto &KV : DfaTransitions) { | 
|  | dfa_state_type From = KV.first.first; | 
|  | dfa_state_type To = KV.second.first; | 
|  | action_type A = KV.first.second; | 
|  | unsigned InfoIdx = Table.get(KV.second.second); | 
|  | OS << "  {" << From << ", "; | 
|  | printActionValue(A, OS); | 
|  | OS << ", " << To << ", " << InfoIdx << "},\n"; | 
|  | } | 
|  | OS << "\n}};\n\n"; | 
|  | } | 
|  |  | 
|  | void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; } | 
|  |  | 
|  | void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; } | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // AutomatonEmitter implementation | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | namespace { | 
|  | // FIXME: This entire discriminated union could be removed with c++17: | 
|  | //   using Action = std::variant<Record *, unsigned, std::string>; | 
|  | struct Action { | 
|  | Record *R = nullptr; | 
|  | unsigned I = 0; | 
|  | std::string S = nullptr; | 
|  |  | 
|  | Action() = default; | 
|  | Action(Record *R, unsigned I, std::string S) : R(R), I(I), S(S) {} | 
|  |  | 
|  | void print(raw_ostream &OS) const { | 
|  | if (R) | 
|  | OS << R->getName(); | 
|  | else if (!S.empty()) | 
|  | OS << '"' << S << '"'; | 
|  | else | 
|  | OS << I; | 
|  | } | 
|  | bool operator<(const Action &Other) const { | 
|  | return std::make_tuple(R, I, S) < | 
|  | std::make_tuple(Other.R, Other.I, Other.S); | 
|  | } | 
|  | }; | 
|  |  | 
|  | using ActionTuple = std::vector<Action>; | 
|  | class Automaton; | 
|  |  | 
|  | class Transition { | 
|  | uint64_t NewState; | 
|  | // The tuple of actions that causes this transition. | 
|  | ActionTuple Actions; | 
|  | // The types of the actions; this is the same across all transitions. | 
|  | SmallVector<std::string, 4> Types; | 
|  |  | 
|  | public: | 
|  | Transition(Record *R, Automaton *Parent); | 
|  | const ActionTuple &getActions() { return Actions; } | 
|  | SmallVector<std::string, 4> getTypes() { return Types; } | 
|  |  | 
|  | bool canTransitionFrom(uint64_t State); | 
|  | uint64_t transitionFrom(uint64_t State); | 
|  | }; | 
|  |  | 
|  | class Automaton { | 
|  | RecordKeeper &Records; | 
|  | Record *R; | 
|  | std::vector<Transition> Transitions; | 
|  | /// All possible action tuples, uniqued. | 
|  | UniqueVector<ActionTuple> Actions; | 
|  | /// The fields within each Transition object to find the action symbols. | 
|  | std::vector<StringRef> ActionSymbolFields; | 
|  |  | 
|  | public: | 
|  | Automaton(RecordKeeper &Records, Record *R); | 
|  | void emit(raw_ostream &OS); | 
|  |  | 
|  | ArrayRef<StringRef> getActionSymbolFields() { return ActionSymbolFields; } | 
|  | /// If the type of action A has been overridden (there exists a field | 
|  | /// "TypeOf_A") return that, otherwise return the empty string. | 
|  | StringRef getActionSymbolType(StringRef A); | 
|  | }; | 
|  |  | 
|  | class AutomatonEmitter { | 
|  | RecordKeeper &Records; | 
|  |  | 
|  | public: | 
|  | AutomatonEmitter(RecordKeeper &R) : Records(R) {} | 
|  | void run(raw_ostream &OS); | 
|  | }; | 
|  |  | 
|  | /// A DfaEmitter implementation that can print our variant action type. | 
|  | class CustomDfaEmitter : public DfaEmitter { | 
|  | const UniqueVector<ActionTuple> &Actions; | 
|  | std::string TypeName; | 
|  |  | 
|  | public: | 
|  | CustomDfaEmitter(const UniqueVector<ActionTuple> &Actions, StringRef TypeName) | 
|  | : Actions(Actions), TypeName(TypeName) {} | 
|  |  | 
|  | void printActionType(raw_ostream &OS) override; | 
|  | void printActionValue(action_type A, raw_ostream &OS) override; | 
|  | }; | 
|  | } // namespace | 
|  |  | 
|  | void AutomatonEmitter::run(raw_ostream &OS) { | 
|  | for (Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) { | 
|  | Automaton A(Records, R); | 
|  | OS << "#ifdef GET_" << R->getName() << "_DECL\n"; | 
|  | A.emit(OS); | 
|  | OS << "#endif  // GET_" << R->getName() << "_DECL\n"; | 
|  | } | 
|  | } | 
|  |  | 
|  | Automaton::Automaton(RecordKeeper &Records, Record *R) | 
|  | : Records(Records), R(R) { | 
|  | LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n"); | 
|  | ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields"); | 
|  | } | 
|  |  | 
|  | void Automaton::emit(raw_ostream &OS) { | 
|  | StringRef TransitionClass = R->getValueAsString("TransitionClass"); | 
|  | for (Record *T : Records.getAllDerivedDefinitions(TransitionClass)) { | 
|  | assert(T->isSubClassOf("Transition")); | 
|  | Transitions.emplace_back(T, this); | 
|  | Actions.insert(Transitions.back().getActions()); | 
|  | } | 
|  |  | 
|  | LLVM_DEBUG(dbgs() << "  Action alphabet cardinality: " << Actions.size() | 
|  | << "\n"); | 
|  | LLVM_DEBUG(dbgs() << "  Each state has " << Transitions.size() | 
|  | << " potential transitions.\n"); | 
|  |  | 
|  | StringRef Name = R->getName(); | 
|  |  | 
|  | CustomDfaEmitter Emitter(Actions, std::string(Name) + "Action"); | 
|  | // Starting from the initial state, build up a list of possible states and | 
|  | // transitions. | 
|  | std::deque<uint64_t> Worklist(1, 0); | 
|  | std::set<uint64_t> SeenStates; | 
|  | unsigned NumTransitions = 0; | 
|  | SeenStates.insert(Worklist.front()); | 
|  | while (!Worklist.empty()) { | 
|  | uint64_t State = Worklist.front(); | 
|  | Worklist.pop_front(); | 
|  | for (Transition &T : Transitions) { | 
|  | if (!T.canTransitionFrom(State)) | 
|  | continue; | 
|  | uint64_t NewState = T.transitionFrom(State); | 
|  | if (SeenStates.emplace(NewState).second) | 
|  | Worklist.emplace_back(NewState); | 
|  | ++NumTransitions; | 
|  | Emitter.addTransition(State, NewState, Actions.idFor(T.getActions())); | 
|  | } | 
|  | } | 
|  | LLVM_DEBUG(dbgs() << "  NFA automaton has " << SeenStates.size() | 
|  | << " states with " << NumTransitions << " transitions.\n"); | 
|  |  | 
|  | const auto &ActionTypes = Transitions.back().getTypes(); | 
|  | OS << "// The type of an action in the " << Name << " automaton.\n"; | 
|  | if (ActionTypes.size() == 1) { | 
|  | OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n"; | 
|  | } else { | 
|  | OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ") | 
|  | << ">;\n"; | 
|  | } | 
|  | OS << "\n"; | 
|  |  | 
|  | Emitter.emit(Name, OS); | 
|  | } | 
|  |  | 
|  | StringRef Automaton::getActionSymbolType(StringRef A) { | 
|  | Twine Ty = "TypeOf_" + A; | 
|  | if (!R->getValue(Ty.str())) | 
|  | return ""; | 
|  | return R->getValueAsString(Ty.str()); | 
|  | } | 
|  |  | 
|  | Transition::Transition(Record *R, Automaton *Parent) { | 
|  | BitsInit *NewStateInit = R->getValueAsBitsInit("NewState"); | 
|  | NewState = 0; | 
|  | assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 && | 
|  | "State cannot be represented in 64 bits!"); | 
|  | for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) { | 
|  | if (auto *Bit = dyn_cast<BitInit>(NewStateInit->getBit(I))) { | 
|  | if (Bit->getValue()) | 
|  | NewState |= 1ULL << I; | 
|  | } | 
|  | } | 
|  |  | 
|  | for (StringRef A : Parent->getActionSymbolFields()) { | 
|  | RecordVal *SymbolV = R->getValue(A); | 
|  | if (auto *Ty = dyn_cast<RecordRecTy>(SymbolV->getType())) { | 
|  | Actions.emplace_back(R->getValueAsDef(A), 0, ""); | 
|  | Types.emplace_back(Ty->getAsString()); | 
|  | } else if (isa<IntRecTy>(SymbolV->getType())) { | 
|  | Actions.emplace_back(nullptr, R->getValueAsInt(A), ""); | 
|  | Types.emplace_back("unsigned"); | 
|  | } else if (isa<StringRecTy>(SymbolV->getType()) || | 
|  | isa<CodeRecTy>(SymbolV->getType())) { | 
|  | Actions.emplace_back(nullptr, 0, R->getValueAsString(A)); | 
|  | Types.emplace_back("std::string"); | 
|  | } else { | 
|  | report_fatal_error("Unhandled symbol type!"); | 
|  | } | 
|  |  | 
|  | StringRef TypeOverride = Parent->getActionSymbolType(A); | 
|  | if (!TypeOverride.empty()) | 
|  | Types.back() = TypeOverride; | 
|  | } | 
|  | } | 
|  |  | 
|  | bool Transition::canTransitionFrom(uint64_t State) { | 
|  | if ((State & NewState) == 0) | 
|  | // The bits we want to set are not set; | 
|  | return true; | 
|  | return false; | 
|  | } | 
|  |  | 
|  | uint64_t Transition::transitionFrom(uint64_t State) { | 
|  | return State | NewState; | 
|  | } | 
|  |  | 
|  | void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; } | 
|  |  | 
|  | void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) { | 
|  | const ActionTuple &AT = Actions[A]; | 
|  | if (AT.size() > 1) | 
|  | OS << "std::make_tuple("; | 
|  | bool First = true; | 
|  | for (const auto &SingleAction : AT) { | 
|  | if (!First) | 
|  | OS << ", "; | 
|  | First = false; | 
|  | SingleAction.print(OS); | 
|  | } | 
|  | if (AT.size() > 1) | 
|  | OS << ")"; | 
|  | } | 
|  |  | 
|  | namespace llvm { | 
|  |  | 
|  | void EmitAutomata(RecordKeeper &RK, raw_ostream &OS) { | 
|  | AutomatonEmitter(RK).run(OS); | 
|  | } | 
|  |  | 
|  | } // namespace llvm |