-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCFG.h
More file actions
130 lines (107 loc) · 4.38 KB
/
CFG.h
File metadata and controls
130 lines (107 loc) · 4.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#pragma once
#include "Languages.h"
#include "Automaton.h"
#include <string>
#include <memory>
#include <vector>
#include <set>
#include <ostream>
#include <functional>
#include <map>
namespace CFG {
/* A symbol in a production. */
struct Symbol {
enum class Type {
TERMINAL,
NONTERMINAL
};
Type type;
char32_t ch;
};
/* Convenience helpers. */
inline Symbol terminal(char32_t ch) {
return { Symbol::Type::TERMINAL, ch };
}
inline Symbol nonterminal(char32_t ch) {
return { Symbol::Type::NONTERMINAL, ch };
}
/* Type representing a production rule. */
struct Production {
char32_t nonterminal;
std::vector<Symbol> replacement;
};
/* Type representing a CFG. This is a "pure data" version of CFG; operations on
* CFGs that need to do more elaborate processing might bundle it with some
* other information.
*/
struct CFG {
Languages::Alphabet alphabet;
std::set<char32_t> nonterminals;
char32_t startSymbol = char32_t(0);
std::vector<Production> productions;
};
/* Type representing a derivation of a string from a grammar. Each item in the
* vector is a pair of the form (production, position).
*/
using Derivation = std::vector<std::pair<Production, std::size_t>>;
/* Input is a string, output is a boolean for whether we match. */
using Matcher = std::function<bool(const std::string&)>;
/* Input is a string, output is a derivation. */
using Deriver = std::function<Derivation (const std::string&)>;
/* Input is a length, output is a pair of "can we make it?" and a string. */
using Generator = std::function<std::pair<bool, std::string>(std::size_t)>;
/* We support three different matchers. */
enum class MatcherType {
EARLEY_LR0, // General purpose, time-optimized. Use as default.
EARLEY, // General purpose, fast for unambiguous grammars, slower as it gets more ambiguous
CYK, // Only works on (weak) CNF; somewhat slow.
};
Matcher matcherFor(const CFG& cfg, MatcherType type = MatcherType::EARLEY_LR0);
Deriver deriverFor(const CFG& cfg); // Earley
Generator generatorFor(const CFG& cfg); // McKenzie
/* * * * * CFG Utility Functions * * * * */
/* Converts a grammar to Chomsky normal form. The nonterminals in the resulting
* grammar may have names that bear no resemblance to the original grammar's
* nonterminal names.
*/
CFG toCNF(const CFG& cfg);
/* Converts a grammar to "weak CNF." This is like CNF except that unit
* productions are permitted as long as they don't form a cycle. This helps
* keep the size of the resulting grammar smaller than regular CNF.
*/
CFG toWeakCNF(const CFG& cfg);
/* * * * * Language Transforms * * * * */
/* Returns a new CFG whose language is the intersection of the languages of the
* input CFG and DFA. (CFLs aren't closed under intersection, but they are closed
* under intersection with regular languages.) The inputs must have the same
* alphabets for this construction to work.
*
* The nonterminals in the resulting CFG will be chosen Arbitrarily and
* Capriciously.
*/
CFG intersect(const CFG& lhs, const Automata::DFA& rhs);
/* Returns a new CFG whose language is the union of the languages of the
* input CFGs. The inputs must have the same alphabets for this construction
* to work.
*
* The nonterminals in the resulting CFG will be chosen Arbitrarily and
* Capriciously.
*/
CFG unionOf(const CFG& lhs, const CFG& rhs);
/* * * * * C++ Utility Functions * * * * */
bool operator== (const Symbol& lhs, const Symbol& rhs);
bool operator< (const Production& lhs, const Production& rhs);
bool operator< (const Symbol& lhs, const Symbol& rhs);
std::ostream& operator<< (std::ostream& out, const Symbol& symbol);
std::ostream& operator<< (std::ostream& out, const Production& production);
std::ostream& operator<< (std::ostream& out, const Derivation& derivation);
std::ostream& operator<< (std::ostream& out, const CFG& cfg);
}
/* Hash support. */
namespace std {
template <> struct hash<CFG::Symbol> {
std::size_t operator()(const CFG::Symbol& s) const {
return s.ch * 997 + std::size_t(s.type);
}
};
}