From c6ad2948bb98d42f8e0883ef82cd14cd2d5eda60 Mon Sep 17 00:00:00 2001 From: Patrick Schönberger Date: Sat, 14 Aug 2021 14:56:12 +0200 Subject: add antlr source code and ReadMe --- .../runtime/src/misc/InterpreterDataReader.cpp | 124 +++++ .../runtime/src/misc/InterpreterDataReader.h | 31 ++ .../runtime/src/misc/Interval.cpp | 89 ++++ .../runtime/src/misc/Interval.h | 84 ++++ .../runtime/src/misc/IntervalSet.cpp | 521 +++++++++++++++++++++ .../runtime/src/misc/IntervalSet.h | 198 ++++++++ .../runtime/src/misc/MurmurHash.cpp | 134 ++++++ .../runtime/src/misc/MurmurHash.h | 82 ++++ .../runtime/src/misc/Predicate.cpp | 4 + .../runtime/src/misc/Predicate.h | 21 + 10 files changed, 1288 insertions(+) create mode 100644 antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/InterpreterDataReader.cpp create mode 100644 antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/InterpreterDataReader.h create mode 100644 antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Interval.cpp create mode 100644 antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Interval.h create mode 100644 antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/IntervalSet.cpp create mode 100644 antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/IntervalSet.h create mode 100644 antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/MurmurHash.cpp create mode 100644 antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/MurmurHash.h create mode 100644 antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Predicate.cpp create mode 100644 antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Predicate.h (limited to 'antlr4-cpp-runtime-4.9.2-source/runtime/src/misc') diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/InterpreterDataReader.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/InterpreterDataReader.cpp new file mode 100644 index 0000000..c77b8bc --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/InterpreterDataReader.cpp @@ -0,0 +1,124 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "atn/ATN.h" +#include "atn/ATNDeserializer.h" +#include "Vocabulary.h" + +#include "misc/InterpreterDataReader.h" + +using namespace antlr4::dfa; +using namespace antlr4::atn; +using namespace antlr4::misc; + +InterpreterData::InterpreterData(std::vector const& literalNames, std::vector const& symbolicNames) +: vocabulary(literalNames, symbolicNames) { +} + +InterpreterData InterpreterDataReader::parseFile(std::string const& fileName) { + // The structure of the data file is very simple. Everything is line based with empty lines + // separating the different parts. For lexers the layout is: + // token literal names: + // ... + // + // token symbolic names: + // ... + // + // rule names: + // ... + // + // channel names: + // ... + // + // mode names: + // ... + // + // atn: + // enclosed in a pair of squared brackets. + // + // Data for a parser does not contain channel and mode names. + + std::ifstream input(fileName); + if (!input.good()) + return {}; + + std::vector literalNames; + std::vector symbolicNames; + + std::string line; + + std::getline(input, line, '\n'); + assert(line == "token literal names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + literalNames.push_back(line == "null" ? "" : line); + }; + + std::getline(input, line, '\n'); + assert(line == "token symbolic names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + symbolicNames.push_back(line == "null" ? "" : line); + }; + InterpreterData result(literalNames, symbolicNames); + + std::getline(input, line, '\n'); + assert(line == "rule names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + result.ruleNames.push_back(line); + }; + + std::getline(input, line, '\n'); + if (line == "channel names:") { + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + result.channels.push_back(line); + }; + + std::getline(input, line, '\n'); + assert(line == "mode names:"); + while (true) { + std::getline(input, line, '\n'); + if (line.empty()) + break; + + result.modes.push_back(line); + }; + } + + std::vector serializedATN; + + std::getline(input, line, '\n'); + assert(line == "atn:"); + std::getline(input, line, '\n'); + std::stringstream tokenizer(line); + std::string value; + while (tokenizer.good()) { + std::getline(tokenizer, value, ','); + unsigned long number; + if (value[0] == '[') + number = std::strtoul(&value[1], nullptr, 10); + else + number = std::strtoul(value.c_str(), nullptr, 10); + serializedATN.push_back(static_cast(number)); + } + + ATNDeserializer deserializer; + result.atn = deserializer.deserialize(serializedATN); + return result; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/InterpreterDataReader.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/InterpreterDataReader.h new file mode 100644 index 0000000..0c32ac6 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/InterpreterDataReader.h @@ -0,0 +1,31 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + struct InterpreterData { + atn::ATN atn; + dfa::Vocabulary vocabulary; + std::vector ruleNames; + std::vector channels; // Only valid for lexer grammars. + std::vector modes; // ditto + + InterpreterData() {}; // For invalid content. + InterpreterData(std::vector const& literalNames, std::vector const& symbolicNames); + }; + + // A class to read plain text interpreter data produced by ANTLR. + class ANTLR4CPP_PUBLIC InterpreterDataReader { + public: + static InterpreterData parseFile(std::string const& fileName); + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Interval.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Interval.cpp new file mode 100644 index 0000000..bb521eb --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Interval.cpp @@ -0,0 +1,89 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/Interval.h" + +using namespace antlr4::misc; + +size_t antlr4::misc::numericToSymbol(ssize_t v) { + return static_cast(v); +} + +ssize_t antlr4::misc::symbolToNumeric(size_t v) { + return static_cast(v); +} + +Interval const Interval::INVALID; + +Interval::Interval() : Interval(static_cast(-1), -2) { // Need an explicit cast here for VS. +} + +Interval::Interval(size_t a_, size_t b_) : Interval(symbolToNumeric(a_), symbolToNumeric(b_)) { +} + +Interval::Interval(ssize_t a_, ssize_t b_) : a(a_), b(b_) { +} + +size_t Interval::length() const { + if (b < a) { + return 0; + } + return size_t(b - a + 1); +} + +bool Interval::operator == (const Interval &other) const { + return a == other.a && b == other.b; +} + +size_t Interval::hashCode() const { + size_t hash = 23; + hash = hash * 31 + static_cast(a); + hash = hash * 31 + static_cast(b); + return hash; +} + +bool Interval::startsBeforeDisjoint(const Interval &other) const { + return a < other.a && b < other.a; +} + +bool Interval::startsBeforeNonDisjoint(const Interval &other) const { + return a <= other.a && b >= other.a; +} + +bool Interval::startsAfter(const Interval &other) const { + return a > other.a; +} + +bool Interval::startsAfterDisjoint(const Interval &other) const { + return a > other.b; +} + +bool Interval::startsAfterNonDisjoint(const Interval &other) const { + return a > other.a && a <= other.b; // b >= other.b implied +} + +bool Interval::disjoint(const Interval &other) const { + return startsBeforeDisjoint(other) || startsAfterDisjoint(other); +} + +bool Interval::adjacent(const Interval &other) const { + return a == other.b + 1 || b == other.a - 1; +} + +bool Interval::properlyContains(const Interval &other) const { + return other.a >= a && other.b <= b; +} + +Interval Interval::Union(const Interval &other) const { + return Interval(std::min(a, other.a), std::max(b, other.b)); +} + +Interval Interval::intersection(const Interval &other) const { + return Interval(std::max(a, other.a), std::min(b, other.b)); +} + +std::string Interval::toString() const { + return std::to_string(a) + ".." + std::to_string(b); +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Interval.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Interval.h new file mode 100644 index 0000000..0198ee5 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Interval.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + // Helpers to convert certain unsigned symbols (e.g. Token::EOF) to their original numeric value (e.g. -1) + // and vice versa. This is needed mostly for intervals to keep their original order and for toString() + // methods to print the original numeric value (e.g. for tests). + size_t numericToSymbol(ssize_t v); + ssize_t symbolToNumeric(size_t v); + + /// An immutable inclusive interval a..b + class ANTLR4CPP_PUBLIC Interval { + public: + static const Interval INVALID; + + // Must stay signed to guarantee the correct sort order. + ssize_t a; + ssize_t b; + + Interval(); + explicit Interval(size_t a_, size_t b_); // For unsigned -> signed mappings. + Interval(ssize_t a_, ssize_t b_); + + /// return number of elements between a and b inclusively. x..x is length 1. + /// if b < a, then length is 0. 9..10 has length 2. + size_t length() const; + + bool operator == (const Interval &other) const; + + size_t hashCode() const; + + /// + /// Does this start completely before other? Disjoint + bool startsBeforeDisjoint(const Interval &other) const; + + /// + /// Does this start at or before other? Nondisjoint + bool startsBeforeNonDisjoint(const Interval &other) const; + + /// + /// Does this.a start after other.b? May or may not be disjoint + bool startsAfter(const Interval &other) const; + + /// + /// Does this start completely after other? Disjoint + bool startsAfterDisjoint(const Interval &other) const; + + /// + /// Does this start after other? NonDisjoint + bool startsAfterNonDisjoint(const Interval &other) const; + + /// + /// Are both ranges disjoint? I.e., no overlap? + bool disjoint(const Interval &other) const; + + /// + /// Are two intervals adjacent such as 0..41 and 42..42? + bool adjacent(const Interval &other) const; + + bool properlyContains(const Interval &other) const; + + /// + /// Return the interval computed from combining this and other + Interval Union(const Interval &other) const; + + /// + /// Return the interval in common between this and o + Interval intersection(const Interval &other) const; + + std::string toString() const; + + private: + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/IntervalSet.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/IntervalSet.cpp new file mode 100644 index 0000000..80182da --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/IntervalSet.cpp @@ -0,0 +1,521 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" +#include "Lexer.h" +#include "Exceptions.h" +#include "Vocabulary.h" + +#include "misc/IntervalSet.h" + +using namespace antlr4; +using namespace antlr4::misc; + +IntervalSet const IntervalSet::COMPLETE_CHAR_SET = + IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE); + +IntervalSet const IntervalSet::EMPTY_SET; + +IntervalSet::IntervalSet() : _intervals() { +} + +IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() { + _intervals = set._intervals; +} + +IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) { +} + +IntervalSet::IntervalSet(std::vector&& intervals) : _intervals(std::move(intervals)) { +} + +IntervalSet& IntervalSet::operator=(const IntervalSet& other) { + _intervals = other._intervals; + return *this; +} + +IntervalSet& IntervalSet::operator=(IntervalSet&& other) { + _intervals = move(other._intervals); + return *this; +} + +IntervalSet IntervalSet::of(ssize_t a) { + return IntervalSet({ Interval(a, a) }); +} + +IntervalSet IntervalSet::of(ssize_t a, ssize_t b) { + return IntervalSet({ Interval(a, b) }); +} + +void IntervalSet::clear() { + _intervals.clear(); +} + +void IntervalSet::add(ssize_t el) { + add(el, el); +} + +void IntervalSet::add(ssize_t a, ssize_t b) { + add(Interval(a, b)); +} + +void IntervalSet::add(const Interval &addition) { + if (addition.b < addition.a) { + return; + } + + // find position in list + for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) { + Interval r = *iterator; + if (addition == r) { + return; + } + + if (addition.adjacent(r) || !addition.disjoint(r)) { + // next to each other, make a single larger interval + Interval bigger = addition.Union(r); + *iterator = bigger; + + // make sure we didn't just create an interval that + // should be merged with next interval in list + while (iterator + 1 != _intervals.end()) { + Interval next = *++iterator; + if (!bigger.adjacent(next) && bigger.disjoint(next)) { + break; + } + + // if we bump up against or overlap next, merge + iterator = _intervals.erase(iterator);// remove this one + --iterator; // move backwards to what we just set + *iterator = bigger.Union(next); // set to 3 merged ones + // ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result + } + return; + } + + if (addition.startsBeforeDisjoint(r)) { + // insert before r + //--iterator; + _intervals.insert(iterator, addition); + return; + } + + // if disjoint and after r, a future iteration will handle it + } + + // ok, must be after last interval (and disjoint from last interval) + // just add it + _intervals.push_back(addition); +} + +IntervalSet IntervalSet::Or(const std::vector &sets) { + IntervalSet result; + for (const auto &s : sets) { + result.addAll(s); + } + return result; +} + +IntervalSet& IntervalSet::addAll(const IntervalSet &set) { + // walk set and add each interval + for (auto const& interval : set._intervals) { + add(interval); + } + return *this; +} + +IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const { + return complement(IntervalSet::of(minElement, maxElement)); +} + +IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const { + return vocabulary.subtract(*this); +} + +IntervalSet IntervalSet::subtract(const IntervalSet &other) const { + return subtract(*this, other); +} + +IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) { + if (left.isEmpty()) { + return IntervalSet(); + } + + if (right.isEmpty()) { + // right set has no elements; just return the copy of the current set + return left; + } + + IntervalSet result(left); + size_t resultI = 0; + size_t rightI = 0; + while (resultI < result._intervals.size() && rightI < right._intervals.size()) { + Interval &resultInterval = result._intervals[resultI]; + const Interval &rightInterval = right._intervals[rightI]; + + // operation: (resultInterval - rightInterval) and update indexes + + if (rightInterval.b < resultInterval.a) { + rightI++; + continue; + } + + if (rightInterval.a > resultInterval.b) { + resultI++; + continue; + } + + Interval beforeCurrent; + Interval afterCurrent; + if (rightInterval.a > resultInterval.a) { + beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1); + } + + if (rightInterval.b < resultInterval.b) { + afterCurrent = Interval(rightInterval.b + 1, resultInterval.b); + } + + if (beforeCurrent.a > -1) { // -1 is the default value + if (afterCurrent.a > -1) { + // split the current interval into two + result._intervals[resultI] = beforeCurrent; + result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent); + resultI++; + rightI++; + } else { + // replace the current interval + result._intervals[resultI] = beforeCurrent; + resultI++; + } + } else { + if (afterCurrent.a > -1) { + // replace the current interval + result._intervals[resultI] = afterCurrent; + rightI++; + } else { + // remove the current interval (thus no need to increment resultI) + result._intervals.erase(result._intervals.begin() + resultI); + } + } + } + + // If rightI reached right.intervals.size(), no more intervals to subtract from result. + // If resultI reached result.intervals.size(), we would be subtracting from an empty set. + // Either way, we are done. + return result; +} + +IntervalSet IntervalSet::Or(const IntervalSet &a) const { + IntervalSet result; + result.addAll(*this); + result.addAll(a); + return result; +} + +IntervalSet IntervalSet::And(const IntervalSet &other) const { + IntervalSet intersection; + size_t i = 0; + size_t j = 0; + + // iterate down both interval lists looking for nondisjoint intervals + while (i < _intervals.size() && j < other._intervals.size()) { + Interval mine = _intervals[i]; + Interval theirs = other._intervals[j]; + + if (mine.startsBeforeDisjoint(theirs)) { + // move this iterator looking for interval that might overlap + i++; + } else if (theirs.startsBeforeDisjoint(mine)) { + // move other iterator looking for interval that might overlap + j++; + } else if (mine.properlyContains(theirs)) { + // overlap, add intersection, get next theirs + intersection.add(mine.intersection(theirs)); + j++; + } else if (theirs.properlyContains(mine)) { + // overlap, add intersection, get next mine + intersection.add(mine.intersection(theirs)); + i++; + } else if (!mine.disjoint(theirs)) { + // overlap, add intersection + intersection.add(mine.intersection(theirs)); + + // Move the iterator of lower range [a..b], but not + // the upper range as it may contain elements that will collide + // with the next iterator. So, if mine=[0..115] and + // theirs=[115..200], then intersection is 115 and move mine + // but not theirs as theirs may collide with the next range + // in thisIter. + // move both iterators to next ranges + if (mine.startsAfterNonDisjoint(theirs)) { + j++; + } else if (theirs.startsAfterNonDisjoint(mine)) { + i++; + } + } + } + + return intersection; +} + +bool IntervalSet::contains(size_t el) const { + return contains(symbolToNumeric(el)); +} + +bool IntervalSet::contains(ssize_t el) const { + if (_intervals.empty()) + return false; + + if (el < _intervals[0].a) // list is sorted and el is before first interval; not here + return false; + + for (const auto &interval : _intervals) { + if (el >= interval.a && el <= interval.b) { + return true; // found in this interval + } + } + return false; +} + +bool IntervalSet::isEmpty() const { + return _intervals.empty(); +} + +ssize_t IntervalSet::getSingleElement() const { + if (_intervals.size() == 1) { + if (_intervals[0].a == _intervals[0].b) { + return _intervals[0].a; + } + } + + return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work? +} + +ssize_t IntervalSet::getMaxElement() const { + if (_intervals.empty()) { + return Token::INVALID_TYPE; + } + + return _intervals.back().b; +} + +ssize_t IntervalSet::getMinElement() const { + if (_intervals.empty()) { + return Token::INVALID_TYPE; + } + + return _intervals[0].a; +} + +std::vector const& IntervalSet::getIntervals() const { + return _intervals; +} + +size_t IntervalSet::hashCode() const { + size_t hash = MurmurHash::initialize(); + for (const auto &interval : _intervals) { + hash = MurmurHash::update(hash, interval.a); + hash = MurmurHash::update(hash, interval.b); + } + + return MurmurHash::finish(hash, _intervals.size() * 2); +} + +bool IntervalSet::operator == (const IntervalSet &other) const { + if (_intervals.empty() && other._intervals.empty()) + return true; + + if (_intervals.size() != other._intervals.size()) + return false; + + return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin()); +} + +std::string IntervalSet::toString() const { + return toString(false); +} + +std::string IntervalSet::toString(bool elemAreChar) const { + if (_intervals.empty()) { + return "{}"; + } + + std::stringstream ss; + size_t effectiveSize = size(); + if (effectiveSize > 1) { + ss << "{"; + } + + bool firstEntry = true; + for (const auto &interval : _intervals) { + if (!firstEntry) + ss << ", "; + firstEntry = false; + + ssize_t a = interval.a; + ssize_t b = interval.b; + if (a == b) { + if (a == -1) { + ss << ""; + } else if (elemAreChar) { + ss << "'" << static_cast(a) << "'"; + } else { + ss << a; + } + } else { + if (elemAreChar) { + ss << "'" << static_cast(a) << "'..'" << static_cast(b) << "'"; + } else { + ss << a << ".." << b; + } + } + } + if (effectiveSize > 1) { + ss << "}"; + } + + return ss.str(); +} + +std::string IntervalSet::toString(const std::vector &tokenNames) const { + return toString(dfa::Vocabulary::fromTokenNames(tokenNames)); +} + +std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const { + if (_intervals.empty()) { + return "{}"; + } + + std::stringstream ss; + size_t effectiveSize = size(); + if (effectiveSize > 1) { + ss << "{"; + } + + bool firstEntry = true; + for (const auto &interval : _intervals) { + if (!firstEntry) + ss << ", "; + firstEntry = false; + + ssize_t a = interval.a; + ssize_t b = interval.b; + if (a == b) { + ss << elementName(vocabulary, a); + } else { + for (ssize_t i = a; i <= b; i++) { + if (i > a) { + ss << ", "; + } + ss << elementName(vocabulary, i); + } + } + } + if (effectiveSize > 1) { + ss << "}"; + } + + return ss.str(); +} + +std::string IntervalSet::elementName(const std::vector &tokenNames, ssize_t a) const { + return elementName(dfa::Vocabulary::fromTokenNames(tokenNames), a); +} + +std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const { + if (a == -1) { + return ""; + } else if (a == -2) { + return ""; + } else { + return vocabulary.getDisplayName(a); + } +} + +size_t IntervalSet::size() const { + size_t result = 0; + for (const auto &interval : _intervals) { + result += size_t(interval.b - interval.a + 1); + } + return result; +} + +std::vector IntervalSet::toList() const { + std::vector result; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + result.push_back(v); + } + } + return result; +} + +std::set IntervalSet::toSet() const { + std::set result; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + result.insert(v); + } + } + return result; +} + +ssize_t IntervalSet::get(size_t i) const { + size_t index = 0; + for (const auto &interval : _intervals) { + ssize_t a = interval.a; + ssize_t b = interval.b; + for (ssize_t v = a; v <= b; v++) { + if (index == i) { + return v; + } + index++; + } + } + return -1; +} + +void IntervalSet::remove(size_t el) { + remove(symbolToNumeric(el)); +} + +void IntervalSet::remove(ssize_t el) { + for (size_t i = 0; i < _intervals.size(); ++i) { + Interval &interval = _intervals[i]; + ssize_t a = interval.a; + ssize_t b = interval.b; + if (el < a) { + break; // list is sorted and el is before this interval; not here + } + + // if whole interval x..x, rm + if (el == a && el == b) { + _intervals.erase(_intervals.begin() + (long)i); + break; + } + // if on left edge x..b, adjust left + if (el == a) { + interval.a++; + break; + } + // if on right edge a..x, adjust right + if (el == b) { + interval.b--; + break; + } + // if in middle a..x..b, split interval + if (el > a && el < b) { // found in this interval + ssize_t oldb = interval.b; + interval.b = el - 1; // [a..x-1] + add(el + 1, oldb); // add [x+1..b] + + break; // ml: not in the Java code but I believe we also should stop searching here, as we found x. + } + } +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/IntervalSet.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/IntervalSet.h new file mode 100644 index 0000000..aa2adf6 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/IntervalSet.h @@ -0,0 +1,198 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "misc/Interval.h" +#include "Exceptions.h" + +namespace antlr4 { +namespace misc { + + /** + * This class implements the {@link IntSet} backed by a sorted array of + * non-overlapping intervals. It is particularly efficient for representing + * large collections of numbers, where the majority of elements appear as part + * of a sequential range of numbers that are all part of the set. For example, + * the set { 1, 2, 3, 4, 7, 8 } may be represented as { [1, 4], [7, 8] }. + * + *

+ * This class is able to represent sets containing any combination of values in + * the range {@link Integer#MIN_VALUE} to {@link Integer#MAX_VALUE} + * (inclusive).

+ */ + class ANTLR4CPP_PUBLIC IntervalSet { + public: + static IntervalSet const COMPLETE_CHAR_SET; + static IntervalSet const EMPTY_SET; + + private: + /// The list of sorted, disjoint intervals. + std::vector _intervals; + + explicit IntervalSet(std::vector&& intervals); + + public: + IntervalSet(); + IntervalSet(IntervalSet const& set); + IntervalSet(IntervalSet&& set); + + template + IntervalSet(int, T1 t1, T_NEXT&&... next) : IntervalSet() { + // The first int argument is an ignored count for compatibility + // with the previous varargs based interface. + addItems(t1, std::forward(next)...); + } + + IntervalSet& operator=(IntervalSet const& set); + IntervalSet& operator=(IntervalSet&& set); + + /// Create a set with a single element, el. + static IntervalSet of(ssize_t a); + + /// Create a set with all ints within range [a..b] (inclusive) + static IntervalSet of(ssize_t a, ssize_t b); + + void clear(); + + /// Add a single element to the set. An isolated element is stored + /// as a range el..el. + void add(ssize_t el); + + /// Add interval; i.e., add all integers from a to b to set. + /// If b &sets); + + // Copy on write so we can cache a..a intervals and sets of that. + void add(const Interval &addition); + IntervalSet& addAll(const IntervalSet &set); + + template + void addItems(T1 t1, T_NEXT&&... next) { + add(t1); + addItems(std::forward(next)...); + } + + IntervalSet complement(ssize_t minElement, ssize_t maxElement) const; + + /// Given the set of possible values (rather than, say UNICODE or MAXINT), + /// return a new set containing all elements in vocabulary, but not in + /// this. The computation is (vocabulary - this). + /// + /// 'this' is assumed to be either a subset or equal to vocabulary. + IntervalSet complement(const IntervalSet &vocabulary) const; + + /// Compute this-other via this&~other. + /// Return a new set containing all elements in this but not in other. + /// other is assumed to be a subset of this; + /// anything that is in other but not in this will be ignored. + IntervalSet subtract(const IntervalSet &other) const; + + /** + * Compute the set difference between two interval sets. The specific + * operation is {@code left - right}. If either of the input sets is + * {@code null}, it is treated as though it was an empty set. + */ + static IntervalSet subtract(const IntervalSet &left, const IntervalSet &right); + + IntervalSet Or(const IntervalSet &a) const; + + /// Return a new set with the intersection of this set with other. Because + /// the intervals are sorted, we can use an iterator for each list and + /// just walk them together. This is roughly O(min(n,m)) for interval + /// list lengths n and m. + IntervalSet And(const IntervalSet &other) const; + + /// Is el in any range of this set? + bool contains(size_t el) const; // For mapping of e.g. Token::EOF to -1 etc. + bool contains(ssize_t el) const; + + /// return true if this set has no members + bool isEmpty() const; + + /// If this set is a single integer, return it otherwise Token.INVALID_TYPE. + ssize_t getSingleElement() const; + + /** + * Returns the maximum value contained in the set. + * + * @return the maximum value contained in the set. If the set is empty, this + * method returns {@link Token#INVALID_TYPE}. + */ + ssize_t getMaxElement() const; + + /** + * Returns the minimum value contained in the set. + * + * @return the minimum value contained in the set. If the set is empty, this + * method returns {@link Token#INVALID_TYPE}. + */ + ssize_t getMinElement() const; + + /// + /// Return a list of Interval objects. + std::vector const& getIntervals() const; + + size_t hashCode() const; + + /// Are two IntervalSets equal? Because all intervals are sorted + /// and disjoint, equals is a simple linear walk over both lists + /// to make sure they are the same. + bool operator == (const IntervalSet &other) const; + std::string toString() const; + std::string toString(bool elemAreChar) const; + + /** + * @deprecated Use {@link #toString(Vocabulary)} instead. + */ + std::string toString(const std::vector &tokenNames) const; + std::string toString(const dfa::Vocabulary &vocabulary) const; + + protected: + /** + * @deprecated Use {@link #elementName(Vocabulary, int)} instead. + */ + std::string elementName(const std::vector &tokenNames, ssize_t a) const; + std::string elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const; + + public: + size_t size() const; + std::vector toList() const; + std::set toSet() const; + + /// Get the ith element of ordered set. Used only by RandomPhrase so + /// don't bother to implement if you're not doing that for a new + /// ANTLR code gen target. + ssize_t get(size_t i) const; + void remove(size_t el); // For mapping of e.g. Token::EOF to -1 etc. + void remove(ssize_t el); + + private: + void addItems() { /* No-op */ } + }; + +} // namespace atn +} // namespace antlr4 + +// Hash function for IntervalSet. + +namespace std { + using antlr4::misc::IntervalSet; + + template <> struct hash + { + size_t operator() (const IntervalSet &x) const + { + return x.hashCode(); + } + }; +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/MurmurHash.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/MurmurHash.cpp new file mode 100644 index 0000000..55e96c9 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/MurmurHash.cpp @@ -0,0 +1,134 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#include "misc/MurmurHash.h" + +using namespace antlr4::misc; + +// A variation of the MurmurHash3 implementation (https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp) +// Here we unrolled the loop used there into individual calls to update(), as we usually hash object fields +// instead of entire buffers. + +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define FORCE_INLINE __forceinline + +#include + +#define ROTL32(x,y) _rotl(x,y) +#define ROTL64(x,y) _rotl64(x,y) + +#define BIG_CONSTANT(x) (x) + +#else // defined(_MSC_VER) + +// Other compilers + +#define FORCE_INLINE inline __attribute__((always_inline)) + +inline uint32_t rotl32 (uint32_t x, int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} + +inline uint64_t rotl64 (uint64_t x, int8_t r) +{ + return (x << r) | (x >> (64 - r)); +} + +#define ROTL32(x,y) rotl32(x,y) +#define ROTL64(x,y) rotl64(x,y) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +size_t MurmurHash::initialize() { + return initialize(DEFAULT_SEED); +} + +size_t MurmurHash::initialize(size_t seed) { + return seed; +} + +#if defined(_WIN32) || defined(_WIN64) + #if _WIN64 + #define ENVIRONMENT64 + #else + #define ENVIRONMENT32 + #endif +#endif + +#if defined(__GNUC__) + #if defined(__x86_64__) || defined(__ppc64__) + #define ENVIRONMENT64 + #else + #define ENVIRONMENT32 + #endif +#endif + +#if defined(ENVIRONMENT32) + +size_t MurmurHash::update(size_t hash, size_t value) { + static const size_t c1 = 0xCC9E2D51; + static const size_t c2 = 0x1B873593; + + size_t k1 = value; + k1 *= c1; + k1 = ROTL32(k1, 15); + k1 *= c2; + + hash ^= k1; + hash = ROTL32(hash, 13); + hash = hash * 5 + 0xE6546B64; + + return hash; +} + + +size_t MurmurHash::finish(size_t hash, size_t entryCount) { + hash ^= entryCount * 4; + hash ^= hash >> 16; + hash *= 0x85EBCA6B; + hash ^= hash >> 13; + hash *= 0xC2B2AE35; + hash ^= hash >> 16; + return hash; +} + +#else + +size_t MurmurHash::update(size_t hash, size_t value) { + static const size_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + static const size_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + + size_t k1 = value; + k1 *= c1; + k1 = ROTL64(k1, 31); + k1 *= c2; + + hash ^= k1; + hash = ROTL64(hash, 27); + hash = hash * 5 + 0x52dce729; + + return hash; +} + + +size_t MurmurHash::finish(size_t hash, size_t entryCount) { + hash ^= entryCount * 8; + hash ^= hash >> 33; + hash *= 0xff51afd7ed558ccd; + hash ^= hash >> 33; + hash *= 0xc4ceb9fe1a85ec53; + hash ^= hash >> 33; + return hash; +} + +#endif diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/MurmurHash.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/MurmurHash.h new file mode 100644 index 0000000..598e13d --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/MurmurHash.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + class ANTLR4CPP_PUBLIC MurmurHash { + + private: +#if __cplusplus >= 201703L + static constexpr size_t DEFAULT_SEED = 0; +#else + enum : size_t { + DEFAULT_SEED = 0, + }; +#endif + + /// Initialize the hash using the default seed value. + /// Returns the intermediate hash value. + public: + static size_t initialize(); + + /// Initialize the hash using the specified seed. + static size_t initialize(size_t seed); + + /// Update the intermediate hash value for the next input {@code value}. + /// the intermediate hash value + /// the value to add to the current hash + /// Returns the updated intermediate hash value. + static size_t update(size_t hash, size_t value); + + /** + * Update the intermediate hash value for the next input {@code value}. + * + * @param hash the intermediate hash value + * @param value the value to add to the current hash + * @return the updated intermediate hash value + */ + template + static size_t update(size_t hash, Ref const& value) { + return update(hash, value != nullptr ? value->hashCode() : 0); + } + + template + static size_t update(size_t hash, T *value) { + return update(hash, value != nullptr ? value->hashCode() : 0); + } + + /// + /// Apply the final computation steps to the intermediate value {@code hash} + /// to form the final result of the MurmurHash 3 hash function. + /// + /// the intermediate hash value + /// the number of calls to update() before calling finish() + /// the final hash result + static size_t finish(size_t hash, size_t entryCount); + + /// Utility function to compute the hash code of an array using the MurmurHash3 algorithm. + /// + /// @param the array element type + /// the array data + /// the seed for the MurmurHash algorithm + /// the hash code of the data + template // where T is C array type + static size_t hashCode(const std::vector> &data, size_t seed) { + size_t hash = initialize(seed); + for (auto entry : data) { + hash = update(hash, entry->hashCode()); + } + + return finish(hash, data.size()); + } + }; + +} // namespace atn +} // namespace antlr4 diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Predicate.cpp b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Predicate.cpp new file mode 100644 index 0000000..c35f192 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Predicate.cpp @@ -0,0 +1,4 @@ +#include "misc/Predicate.h" + +antlr4::misc::Predicate::~Predicate() { +} diff --git a/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Predicate.h b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Predicate.h new file mode 100644 index 0000000..1032d53 --- /dev/null +++ b/antlr4-cpp-runtime-4.9.2-source/runtime/src/misc/Predicate.h @@ -0,0 +1,21 @@ +/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. + * Use of this file is governed by the BSD 3-clause license that + * can be found in the LICENSE.txt file in the project root. + */ + +#pragma once + +#include "antlr4-common.h" + +namespace antlr4 { +namespace misc { + + class ANTLR4CPP_PUBLIC Predicate { + public: + virtual ~Predicate(); + + virtual bool test(tree::ParseTree *t) = 0; + }; + +} // namespace tree +} // namespace antlr4 -- cgit v1.2.3