diff options
| author | Patrick Schönberger | 2021-08-14 14:56:12 +0200 |
|---|---|---|
| committer | Patrick Schönberger | 2021-08-14 14:56:12 +0200 |
| commit | c6ad2948bb98d42f8e0883ef82cd14cd2d5eda60 (patch) | |
| tree | 9e83d6d8f61e56f5d3425b8709314d6bdb9315a9 /antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa | |
| parent | 9f94b672a5dc32da5ad01742bd4e976315a30d9c (diff) | |
| download | toc-main.tar.gz toc-main.zip | |
Diffstat (limited to 'antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa')
8 files changed, 604 insertions, 0 deletions
diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFA.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFA.cpp new file mode 100644 index 0000000..3f83180 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFA.cpp @@ -0,0 +1,127 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFASerializer.h" +#include "dfa/LexerDFASerializer.h" +#include "support/CPPUtils.h" +#include "atn/StarLoopEntryState.h" +#include "atn/ATNConfigSet.h" + +#include "dfa/DFA.h" + +using namespace antlr4; +using namespace antlr4::dfa; +using namespace antlrcpp; + +DFA::DFA(atn::DecisionState *atnStartState) : DFA(atnStartState, 0) { +} + +DFA::DFA(atn::DecisionState *atnStartState, size_t decision) + : atnStartState(atnStartState), s0(nullptr), decision(decision) { + + _precedenceDfa = false; + if (is<atn::StarLoopEntryState *>(atnStartState)) { + if (static_cast<atn::StarLoopEntryState *>(atnStartState)->isPrecedenceDecision) { + _precedenceDfa = true; + s0 = new DFAState(std::unique_ptr<atn::ATNConfigSet>(new atn::ATNConfigSet())); + s0->isAcceptState = false; + s0->requiresFullContext = false; + } + } +} + +DFA::DFA(DFA &&other) : atnStartState(other.atnStartState), decision(other.decision) { + // Source states are implicitly cleared by the move. + states = std::move(other.states); + + other.atnStartState = nullptr; + other.decision = 0; + s0 = other.s0; + other.s0 = nullptr; + _precedenceDfa = other._precedenceDfa; + other._precedenceDfa = false; +} + +DFA::~DFA() { + bool s0InList = (s0 == nullptr); + for (auto *state : states) { + if (state == s0) + s0InList = true; + delete state; + } + + if (!s0InList) + delete s0; +} + +bool DFA::isPrecedenceDfa() const { + return _precedenceDfa; +} + +DFAState* DFA::getPrecedenceStartState(int precedence) const { + assert(_precedenceDfa); // Only precedence DFAs may contain a precedence start state. + + auto iterator = s0->edges.find(precedence); + if (iterator == s0->edges.end()) + return nullptr; + + return iterator->second; +} + +void DFA::setPrecedenceStartState(int precedence, DFAState *startState, SingleWriteMultipleReadLock &lock) { + if (!isPrecedenceDfa()) { + throw IllegalStateException("Only precedence DFAs may contain a precedence start state."); + } + + if (precedence < 0) { + return; + } + + { + lock.writeLock(); + s0->edges[precedence] = startState; + lock.writeUnlock(); + } +} + +std::vector<DFAState *> DFA::getStates() const { + std::vector<DFAState *> result; + for (auto *state : states) + result.push_back(state); + + std::sort(result.begin(), result.end(), [](DFAState *o1, DFAState *o2) -> bool { + return o1->stateNumber < o2->stateNumber; + }); + + return result; +} + +std::string DFA::toString(const std::vector<std::string> &tokenNames) { + if (s0 == nullptr) { + return ""; + } + DFASerializer serializer(this, tokenNames); + + return serializer.toString(); +} + +std::string DFA::toString(const Vocabulary &vocabulary) const { + if (s0 == nullptr) { + return ""; + } + + DFASerializer serializer(this, vocabulary); + return serializer.toString(); +} + +std::string DFA::toLexerString() { + if (s0 == nullptr) { + return ""; + } + LexerDFASerializer serializer(this); + + return serializer.toString(); +} + diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFA.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFA.h new file mode 100644 index 0000000..99daf0a --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFA.h @@ -0,0 +1,91 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "dfa/DFAState.h" + +namespace antlrcpp { + class SingleWriteMultipleReadLock; +} + +namespace antlr4 { +namespace dfa { + + class ANTLR4CPP_PUBLIC DFA { + public: + /// A set of all DFA states. Use a map so we can get old state back. + /// Set only allows you to see if it's there. + + /// From which ATN state did we create this DFA? + atn::DecisionState *atnStartState; + std::unordered_set<DFAState *, DFAState::Hasher, DFAState::Comparer> states; // States are owned by this class. + DFAState *s0; + size_t decision; + + DFA(atn::DecisionState *atnStartState); + DFA(atn::DecisionState *atnStartState, size_t decision); + DFA(const DFA &other) = delete; + DFA(DFA &&other); + virtual ~DFA(); + + /** + * Gets whether this DFA is a precedence DFA. Precedence DFAs use a special + * start state {@link #s0} which is not stored in {@link #states}. The + * {@link DFAState#edges} array for this start state contains outgoing edges + * supplying individual start states corresponding to specific precedence + * values. + * + * @return {@code true} if this is a precedence DFA; otherwise, + * {@code false}. + * @see Parser#getPrecedence() + */ + bool isPrecedenceDfa() const; + + /** + * Get the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @return The start state corresponding to the specified precedence, or + * {@code null} if no start state exists for the specified precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + DFAState* getPrecedenceStartState(int precedence) const; + + /** + * Set the start state for a specific precedence value. + * + * @param precedence The current precedence. + * @param startState The start state corresponding to the specified + * precedence. + * + * @throws IllegalStateException if this is not a precedence DFA. + * @see #isPrecedenceDfa() + */ + void setPrecedenceStartState(int precedence, DFAState *startState, antlrcpp::SingleWriteMultipleReadLock &lock); + + /// Return a list of all states in this DFA, ordered by state number. + virtual std::vector<DFAState *> getStates() const; + + /** + * @deprecated Use {@link #toString(Vocabulary)} instead. + */ + virtual std::string toString(const std::vector<std::string>& tokenNames); + std::string toString(const Vocabulary &vocabulary) const; + + virtual std::string toLexerString(); + + private: + /** + * {@code true} if this DFA is for a precedence decision; otherwise, + * {@code false}. This is the backing field for {@link #isPrecedenceDfa}. + */ + bool _precedenceDfa; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFASerializer.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFASerializer.cpp new file mode 100644 index 0000000..d27e53f --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFASerializer.cpp @@ -0,0 +1,67 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "dfa/DFA.h" +#include "Vocabulary.h" + +#include "dfa/DFASerializer.h" + +using namespace antlr4::dfa; + +DFASerializer::DFASerializer(const DFA *dfa, const std::vector<std::string>& tokenNames) + : DFASerializer(dfa, Vocabulary::fromTokenNames(tokenNames)) { +} + +DFASerializer::DFASerializer(const DFA *dfa, const Vocabulary &vocabulary) : _dfa(dfa), _vocabulary(vocabulary) { +} + +DFASerializer::~DFASerializer() { +} + +std::string DFASerializer::toString() const { + if (_dfa->s0 == nullptr) { + return ""; + } + + std::stringstream ss; + std::vector<DFAState *> states = _dfa->getStates(); + for (auto *s : states) { + for (size_t i = 0; i < s->edges.size(); i++) { + DFAState *t = s->edges[i]; + if (t != nullptr && t->stateNumber != INT32_MAX) { + ss << getStateString(s); + std::string label = getEdgeLabel(i); + ss << "-" << label << "->" << getStateString(t) << "\n"; + } + } + } + + return ss.str(); +} + +std::string DFASerializer::getEdgeLabel(size_t i) const { + return _vocabulary.getDisplayName(i); // ml: no longer needed -1 as we use a map for edges, without offset. +} + +std::string DFASerializer::getStateString(DFAState *s) const { + size_t n = s->stateNumber; + + const std::string baseStateStr = std::string(s->isAcceptState ? ":" : "") + "s" + std::to_string(n) + + (s->requiresFullContext ? "^" : ""); + + if (s->isAcceptState) { + if (!s->predicates.empty()) { + std::string buf; + for (size_t i = 0; i < s->predicates.size(); i++) { + buf.append(s->predicates[i]->toString()); + } + return baseStateStr + "=>" + buf; + } else { + return baseStateStr + "=>" + std::to_string(s->prediction); + } + } else { + return baseStateStr; + } +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFASerializer.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFASerializer.h new file mode 100644 index 0000000..a1fe5a5 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFASerializer.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "Vocabulary.h" + +namespace antlr4 { +namespace dfa { + + /// A DFA walker that knows how to dump them to serialized strings. + class ANTLR4CPP_PUBLIC DFASerializer { + public: + DFASerializer(const DFA *dfa, const std::vector<std::string>& tnames); + DFASerializer(const DFA *dfa, const Vocabulary &vocabulary); + virtual ~DFASerializer(); + + virtual std::string toString() const; + + protected: + virtual std::string getEdgeLabel(size_t i) const; + virtual std::string getStateString(DFAState *s) const; + + private: + const DFA *_dfa; + const Vocabulary &_vocabulary; + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFAState.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFAState.cpp new file mode 100644 index 0000000..998fed3 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFAState.cpp @@ -0,0 +1,100 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATNConfigSet.h" +#include "atn/SemanticContext.h" +#include "atn/ATNConfig.h" +#include "misc/MurmurHash.h" + +#include "dfa/DFAState.h" + +using namespace antlr4::dfa; +using namespace antlr4::atn; + +DFAState::PredPrediction::PredPrediction(const Ref<SemanticContext> &pred, int alt) : pred(pred) { + InitializeInstanceFields(); + this->alt = alt; +} + +DFAState::PredPrediction::~PredPrediction() { +} + +std::string DFAState::PredPrediction::toString() { + return std::string("(") + pred->toString() + ", " + std::to_string(alt) + ")"; +} + +void DFAState::PredPrediction::InitializeInstanceFields() { + alt = 0; +} + +DFAState::DFAState() { + InitializeInstanceFields(); +} + +DFAState::DFAState(int state) : DFAState() { + stateNumber = state; +} + +DFAState::DFAState(std::unique_ptr<ATNConfigSet> configs_) : DFAState() { + configs = std::move(configs_); +} + +DFAState::~DFAState() { + for (auto *predicate : predicates) { + delete predicate; + } +} + +std::set<size_t> DFAState::getAltSet() { + std::set<size_t> alts; + if (configs != nullptr) { + for (size_t i = 0; i < configs->size(); i++) { + alts.insert(configs->get(i)->alt); + } + } + return alts; +} + +size_t DFAState::hashCode() const { + size_t hash = misc::MurmurHash::initialize(7); + hash = misc::MurmurHash::update(hash, configs->hashCode()); + hash = misc::MurmurHash::finish(hash, 1); + return hash; +} + +bool DFAState::operator == (const DFAState &o) const { + // compare set of ATN configurations in this set with other + if (this == &o) { + return true; + } + + return *configs == *o.configs; +} + +std::string DFAState::toString() { + std::stringstream ss; + ss << stateNumber; + if (configs) { + ss << ":" << configs->toString(); + } + if (isAcceptState) { + ss << " => "; + if (!predicates.empty()) { + for (size_t i = 0; i < predicates.size(); i++) { + ss << predicates[i]->toString(); + } + } else { + ss << prediction; + } + } + return ss.str(); +} + +void DFAState::InitializeInstanceFields() { + stateNumber = -1; + isAcceptState = false; + prediction = 0; + requiresFullContext = false; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFAState.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFAState.h new file mode 100644 index 0000000..2f0ddba --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/DFAState.h @@ -0,0 +1,144 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace dfa { + + /// <summary> + /// A DFA state represents a set of possible ATN configurations. + /// As Aho, Sethi, Ullman p. 117 says "The DFA uses its state + /// to keep track of all possible states the ATN can be in after + /// reading each input symbol. That is to say, after reading + /// input a1a2..an, the DFA is in a state that represents the + /// subset T of the states of the ATN that are reachable from the + /// ATN's start state along some path labeled a1a2..an." + /// In conventional NFA->DFA conversion, therefore, the subset T + /// would be a bitset representing the set of states the + /// ATN could be in. We need to track the alt predicted by each + /// state as well, however. More importantly, we need to maintain + /// a stack of states, tracking the closure operations as they + /// jump from rule to rule, emulating rule invocations (method calls). + /// I have to add a stack to simulate the proper lookahead sequences for + /// the underlying LL grammar from which the ATN was derived. + /// <p/> + /// I use a set of ATNConfig objects not simple states. An ATNConfig + /// is both a state (ala normal conversion) and a RuleContext describing + /// the chain of rules (if any) followed to arrive at that state. + /// <p/> + /// A DFA state may have multiple references to a particular state, + /// but with different ATN contexts (with same or different alts) + /// meaning that state was reached via a different set of rule invocations. + /// </summary> + class ANTLR4CPP_PUBLIC DFAState { + public: + class PredPrediction { + public: + Ref<atn::SemanticContext> pred; // never null; at least SemanticContext.NONE + int alt; + + PredPrediction(const Ref<atn::SemanticContext> &pred, int alt); + virtual ~PredPrediction(); + + virtual std::string toString(); + + private: + void InitializeInstanceFields(); + }; + + int stateNumber; + + std::unique_ptr<atn::ATNConfigSet> configs; + + /// {@code edges[symbol]} points to target of symbol. Shift up by 1 so (-1) + /// <seealso cref="Token#EOF"/> maps to {@code edges[0]}. + // ml: this is a sparse list, so we use a map instead of a vector. + // Watch out: we no longer have the -1 offset, as it isn't needed anymore. + std::unordered_map<size_t, DFAState *> edges; + + bool isAcceptState; + + /// if accept state, what ttype do we match or alt do we predict? + /// This is set to <seealso cref="ATN#INVALID_ALT_NUMBER"/> when <seealso cref="#predicates"/>{@code !=null} or + /// <seealso cref="#requiresFullContext"/>. + size_t prediction; + + Ref<atn::LexerActionExecutor> lexerActionExecutor; + + /// <summary> + /// Indicates that this state was created during SLL prediction that + /// discovered a conflict between the configurations in the state. Future + /// <seealso cref="ParserATNSimulator#execATN"/> invocations immediately jumped doing + /// full context prediction if this field is true. + /// </summary> + bool requiresFullContext; + + /// <summary> + /// During SLL parsing, this is a list of predicates associated with the + /// ATN configurations of the DFA state. When we have predicates, + /// <seealso cref="#requiresFullContext"/> is {@code false} since full context prediction evaluates predicates + /// on-the-fly. If this is not null, then <seealso cref="#prediction"/> is + /// <seealso cref="ATN#INVALID_ALT_NUMBER"/>. + /// <p/> + /// We only use these for non-<seealso cref="#requiresFullContext"/> but conflicting states. That + /// means we know from the context (it's $ or we don't dip into outer + /// context) that it's an ambiguity not a conflict. + /// <p/> + /// This list is computed by <seealso cref="ParserATNSimulator#predicateDFAState"/>. + /// </summary> + std::vector<PredPrediction *> predicates; + + /// Map a predicate to a predicted alternative. + DFAState(); + DFAState(int state); + DFAState(std::unique_ptr<atn::ATNConfigSet> configs); + virtual ~DFAState(); + + /// <summary> + /// Get the set of all alts mentioned by all ATN configurations in this + /// DFA state. + /// </summary> + virtual std::set<size_t> getAltSet(); + + virtual size_t hashCode() const; + + /// Two DFAState instances are equal if their ATN configuration sets + /// are the same. This method is used to see if a state already exists. + /// + /// Because the number of alternatives and number of ATN configurations are + /// finite, there is a finite number of DFA states that can be processed. + /// This is necessary to show that the algorithm terminates. + /// + /// Cannot test the DFA state numbers here because in + /// ParserATNSimulator#addDFAState we need to know if any other state + /// exists that has this exact set of ATN configurations. The + /// stateNumber is irrelevant. + bool operator == (const DFAState &o) const; + + virtual std::string toString(); + + struct Hasher + { + size_t operator()(DFAState *k) const { + return k->hashCode(); + } + }; + + struct Comparer { + bool operator()(DFAState *lhs, DFAState *rhs) const + { + return *lhs == *rhs; + } + }; + + private: + void InitializeInstanceFields(); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/LexerDFASerializer.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/LexerDFASerializer.cpp new file mode 100644 index 0000000..c3af41c --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/LexerDFASerializer.cpp @@ -0,0 +1,20 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "Vocabulary.h" + +#include "dfa/LexerDFASerializer.h" + +using namespace antlr4::dfa; + +LexerDFASerializer::LexerDFASerializer(DFA *dfa) : DFASerializer(dfa, Vocabulary::EMPTY_VOCABULARY) { +} + +LexerDFASerializer::~LexerDFASerializer() { +} + +std::string LexerDFASerializer::getEdgeLabel(size_t i) const { + return std::string("'") + static_cast<char>(i) + "'"; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/LexerDFASerializer.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/LexerDFASerializer.h new file mode 100644 index 0000000..d157107 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/dfa/LexerDFASerializer.h @@ -0,0 +1,23 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "dfa/DFASerializer.h" + +namespace antlr4 { +namespace dfa { + + class ANTLR4CPP_PUBLIC LexerDFASerializer : public DFASerializer { + public: + LexerDFASerializer(DFA *dfa); + virtual ~LexerDFASerializer(); + + protected: + virtual std::string getEdgeLabel(size_t i) const override; + }; + +} // namespace atn +} // namespace antlr4 |
