Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ Dockerfile
Makefile
.qmake.stash
SyntaxTutor
.cache
10 changes: 5 additions & 5 deletions src/backend/grammar_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ bool GrammarFactory::HasUnreachableSymbols(Grammar& grammar) const {
if (it != grammar.g_.end()) {
for (const auto& production : it->second) {
for (const auto& symbol : production) {
if (!grammar.st_.IsTerminal(symbol) &&
if (grammar.st_.IsNonTerminal(symbol) &&
!reachable.contains(symbol)) {
reachable.insert(symbol);
pending.push(symbol);
Expand Down Expand Up @@ -283,7 +283,7 @@ bool GrammarFactory::IsInfinite(Grammar& grammar) const {
for (const auto& prod : productions) {
bool all_generating = true;
for (const auto& symbol : prod) {
if (!grammar.st_.IsTerminal(symbol) &&
if (grammar.st_.IsNonTerminal(symbol) &&
!generating.contains(symbol)) {
all_generating = false;
break;
Expand Down Expand Up @@ -321,11 +321,11 @@ bool GrammarFactory::HasIndirectLeftRecursion(const Grammar& grammar) const {
for (const auto& [nt, productions] : grammar.g_) {
graph[nt] = {};
for (const production& prod : productions) {
if (!grammar.st_.IsTerminal(prod[0])) {
if (grammar.st_.IsNonTerminal(prod[0])) {
graph[nt].insert(prod[0]);
}
for (size_t i = 1; i < prod.size(); ++i) {
if (grammar.st_.IsTerminal(prod[i])) {
if (!grammar.st_.IsNonTerminal(prod[i])) {
break;
}
graph[nt].insert(prod[i]);
Expand Down Expand Up @@ -576,7 +576,7 @@ void GrammarFactory::NormalizeNonTerminals(FactoryItem& item,
for (auto& [old_nt, prods] : item.g_) {
for (auto& prod : prods) {
for (auto& symbol : prod) {
if (!item.st_.IsTerminal(symbol)) {
if (item.st_.IsNonTerminal(symbol)) {
symbol = nt;
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/backend/ll1_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ void LL1Parser::ComputeFollowSets() {
for (const production& rhs : rule.second) {
for (size_t i = 0; i < rhs.size(); ++i) {
const std::string& symbol = rhs[i];
if (!gr_.st_.IsTerminal(symbol)) {
if (gr_.st_.IsNonTerminal(symbol)) {
std::unordered_set<std::string> first_remaining;

if (i + 1 < rhs.size()) {
Expand Down Expand Up @@ -201,4 +201,4 @@ LL1Parser::PredictionSymbols(const std::string& antecedent,
hd.erase(gr_.st_.EPSILON_);
hd.merge(Follow(antecedent));
return hd;
}
}
4 changes: 2 additions & 2 deletions src/backend/slr1_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ void SLR1Parser::ClosureUtil(std::unordered_set<Lr0Item>& items,
if (next == gr_.st_.EPSILON_) {
continue;
}
if (!gr_.st_.IsTerminal(next) &&
if (gr_.st_.IsNonTerminal(next) &&
std::find(visited.cbegin(), visited.cend(), next) ==
visited.cend()) {
const std::vector<production>& rules = gr_.g_.at(next);
Expand Down Expand Up @@ -328,7 +328,7 @@ void SLR1Parser::ComputeFollowSets() {
for (const production& rhs : rule.second) {
for (size_t i = 0; i < rhs.size(); ++i) {
const std::string& symbol = rhs[i];
if (!gr_.st_.IsTerminal(symbol)) {
if (gr_.st_.IsNonTerminal(symbol)) {
std::unordered_set<std::string> first_remaining;

if (i + 1 < rhs.size()) {
Expand Down
38 changes: 35 additions & 3 deletions src/backend/symbol_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,32 @@
#include <vector>

void SymbolTable::PutSymbol(const std::string& identifier, bool isTerminal) {
if (identifier == EPSILON_) {
st_[identifier] = symbol_type::META;
meta_symbols_.insert(identifier);
terminals_.erase(identifier);
terminals_wtho_eol_.erase(identifier);
non_terminals_.erase(identifier);
return;
}

if (identifier == EOL_) {
st_[identifier] = symbol_type::TERMINAL;
terminals_.insert(identifier);
terminals_wtho_eol_.erase(identifier);
non_terminals_.erase(identifier);
meta_symbols_.erase(identifier);
return;
}

if (st_.contains(identifier)) {
return;
}

if (isTerminal) {
st_.insert({identifier, symbol_type::TERMINAL});
terminals_.insert(identifier);
terminals_wtho_eol_.insert(identifier);

} else {
st_.insert({identifier, symbol_type::NO_TERMINAL});
non_terminals_.insert(identifier);
Expand All @@ -37,9 +58,20 @@ bool SymbolTable::In(const std::string& s) const {
}

bool SymbolTable::IsTerminal(const std::string& s) const {
return terminals_.contains(s);
auto it = st_.find(s);
return it != st_.end() && it->second == symbol_type::TERMINAL;
}

bool SymbolTable::IsTerminalWthoEol(const std::string& s) const {
return s != EPSILON_ && terminals_.contains(s);
return terminals_wtho_eol_.contains(s);
}

bool SymbolTable::IsNonTerminal(const std::string& s) const {
auto it = st_.find(s);
return it != st_.end() && it->second == symbol_type::NO_TERMINAL;
}

bool SymbolTable::IsMeta(const std::string& s) const {
auto it = st_.find(s);
return it != st_.end() && it->second == symbol_type::META;
}
42 changes: 31 additions & 11 deletions src/backend/symbol_table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,20 @@
* @enum symbol_type
* @brief Represents the type of a grammar symbol.
*
* This enum distinguishes between terminal and non-terminal symbols
* This enum distinguishes between terminal, non-terminal, and meta symbols
* within the grammar and the symbol table.
*/
enum class symbol_type { NO_TERMINAL, TERMINAL };
enum class symbol_type { NO_TERMINAL, TERMINAL, META };

/**
* @struct SymbolTable
* @brief Stores and manages grammar symbols, including their classification and
* special markers.
*
* This structure holds information about all terminals and non-terminals used
* in a grammar, as well as special symbols such as EPSILON and the end-of-line
* marker ($). It supports symbol classification, membership checks, and
* filtered views such as terminals excluding $.
* in a grammar, as well as special symbols such as EPSILON (meta symbol) and
* the end-of-line marker ($). It supports symbol classification, membership
* checks, and filtered views such as terminals excluding $.
*/
struct SymbolTable {
/// @brief End-of-line symbol used in parsing, initialized as "$".
Expand All @@ -50,10 +50,9 @@ struct SymbolTable {
/// "EPSILON".
std::string EPSILON_{"EPSILON"};

/// @brief Main symbol table, mapping identifiers to a pair of symbol type
/// and its regex.
/// @brief Main symbol table, mapping identifiers to their symbol type.
std::unordered_map<std::string, symbol_type> st_{
{EOL_, symbol_type::TERMINAL}, {EPSILON_, symbol_type::TERMINAL}};
{EOL_, symbol_type::TERMINAL}, {EPSILON_, symbol_type::META}};

/**
* @brief Set of all terminal symbols (including EOL).
Expand All @@ -71,10 +70,15 @@ struct SymbolTable {
std::unordered_set<std::string> non_terminals_;

/**
* @brief Adds a non-terminal symbol to the symbol table.
* @brief Set of meta symbols (e.g., EPSILON).
*/
std::unordered_set<std::string> meta_symbols_{EPSILON_};

/**
* @brief Adds a symbol to the symbol table.
*
* @param identifier Name of the symbol.
* @param isTerminal True if the identifier is a terminal symbol
* @param identifier Name of the symbol.
* @param isTerminal True if the identifier is a terminal symbol.
*/
void PutSymbol(const std::string& identifier, bool isTerminal);

Expand All @@ -101,4 +105,20 @@ struct SymbolTable {
* @return true if the symbol is terminal, otherwise false.
*/
bool IsTerminalWthoEol(const std::string& s) const;

/**
* @brief Checks if a symbol is a non-terminal.
*
* @param s Symbol identifier to check.
* @return true if the symbol is non-terminal, otherwise false.
*/
bool IsNonTerminal(const std::string& s) const;

/**
* @brief Checks if a symbol is a meta symbol (e.g., EPSILON).
*
* @param s Symbol identifier to check.
* @return true if the symbol is meta, otherwise false.
*/
bool IsMeta(const std::string& s) const;
};
54 changes: 6 additions & 48 deletions src/backend/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ TEST(GrammarTest, GrammarWithEpsilon) {

ASSERT_EQ(gr.axiom_, "S");
ASSERT_EQ(gr.g_, expected);
ASSERT_TRUE(gr.st_.terminals_.contains(eps));
ASSERT_TRUE(gr.st_.IsMeta(eps));
ASSERT_TRUE(gr.st_.non_terminals_.contains("A"));
}

Expand Down Expand Up @@ -145,7 +145,7 @@ TEST(GrammarTest, ComplexGrammarWithEpsilonAndRecursion) {
ASSERT_TRUE(gr.st_.terminals_.contains("a"));
ASSERT_TRUE(gr.st_.terminals_.contains("b"));
ASSERT_TRUE(gr.st_.terminals_.contains("c"));
ASSERT_TRUE(gr.st_.terminals_.contains(eps));
ASSERT_TRUE(gr.st_.IsMeta(eps));
ASSERT_TRUE(gr.st_.terminals_.contains("$"));
ASSERT_TRUE(gr.st_.non_terminals_.contains("A"));
ASSERT_TRUE(gr.st_.non_terminals_.contains("B"));
Expand Down Expand Up @@ -435,28 +435,6 @@ TEST(GrammarFactoryTest, GeneratedLv5LL1GrammarIsAlwaysLL1) {
}
}

TEST(GrammarFactoryTest, GeneratedLv6LL1GrammarIsAlwaysLL1) {
GrammarFactory factory;

factory.Init();
for (int i = 0; i < 10; ++i) {
Grammar g = factory.GenLL1Grammar(6);
LL1Parser ll1(g);
ASSERT_TRUE(ll1.CreateLL1Table());
}
}

TEST(GrammarFactoryTest, GeneratedLv7LL1GrammarIsAlwaysLL1) {
GrammarFactory factory;

factory.Init();
for (int i = 0; i < 3; ++i) {
Grammar g = factory.GenLL1Grammar(7);
LL1Parser ll1(g);
ASSERT_TRUE(ll1.CreateLL1Table());
}
}

TEST(GrammarFactoryTest, GeneratedLv1SLR1GrammarIsAlwaysSLR1) {
GrammarFactory factory;

Expand Down Expand Up @@ -512,28 +490,6 @@ TEST(GrammarFactoryTest, GeneratedLv5SLR1GrammarIsAlwaysSLR1) {
}
}

TEST(GrammarFactoryTest, GeneratedLv6SLR1GrammarIsAlwaysSLR1) {
GrammarFactory factory;

factory.Init();
for (int i = 0; i < 5; ++i) {
Grammar g = factory.GenSLR1Grammar(6);
SLR1Parser slr1(g);
ASSERT_TRUE(slr1.MakeParser());
}
}

TEST(GrammarFactoryTest, GeneratedLv7SLR1GrammarIsAlwaysSLR1) {
GrammarFactory factory;

factory.Init();
for (int i = 0; i < 5; ++i) {
Grammar g = factory.GenSLR1Grammar(7);
SLR1Parser slr1(g);
ASSERT_TRUE(slr1.MakeParser());
}
}

TEST(GrammarFactoryTest, NormalizeNonTerminals_Basic) {
GrammarFactory factory;

Expand Down Expand Up @@ -2333,6 +2289,7 @@ TEST(LL1__Test, FollowSet2) {
Grammar g;
g.st_.PutSymbol("S'", false);
g.st_.PutSymbol("S", false);
g.st_.PutSymbol("A", false);
g.st_.PutSymbol("A'", false);
g.st_.PutSymbol("B", false);
g.st_.PutSymbol("C", false);
Expand All @@ -2354,7 +2311,7 @@ TEST(LL1__Test, FollowSet2) {
g.AddProduction("C", {"c"});

LL1Parser ll1(g);

ASSERT_TRUE(ll1.CreateLL1Table());
std::unordered_set<std::string> result;
std::unordered_set<std::string> expected{"b", "c", "a", g.st_.EOL_};
result = ll1.Follow("A");
Expand Down Expand Up @@ -3574,7 +3531,8 @@ TEST(SymbolTableTest, IsTerminalWthoEol_ExcludesEpsilon) {
SymbolTable st;
st.PutSymbol(st.EPSILON_, true);

EXPECT_TRUE(st.IsTerminal(st.EPSILON_));
EXPECT_TRUE(st.IsMeta(st.EPSILON_));
EXPECT_FALSE(st.IsTerminal(st.EPSILON_));
EXPECT_FALSE(st.IsTerminalWthoEol(st.EPSILON_));
}

Expand Down
Loading