1- #include " RegexPostfix.hpp"
2- #include " pz_error.hpp"
3-
4- st32 Postfix::get_precedence (TokenType type) {
5- switch (type) {
6- case TokenType::STAR:
7- case TokenType::PLUS:
8- case TokenType::QUESTION:
9- case TokenType::QUANTIFIER_RANGE:
10- return 3 ; // Unary postfix operators
11- case TokenType::CONCAT:
12- return 2 ; // Implicit concatenation
13- case TokenType::ALTERNATION:
14- return 1 ; // Lowest precedence
15- default :
16- return 0 ;
17- }
18- }
19-
20- std::vector<Token> Postfix::convert (const std::vector<Token> &infix) {
21- std::vector<Token> postfix;
22- std::stack<Token> operators;
23- TokenType last_type = TokenType::END; // Tracks previous token for validation
24-
25- for (const auto &t : infix) {
26- switch (t.type ) {
27- // Operands go directly to output
28- case TokenType::LITERAL:
29- case TokenType::DOT:
30- case TokenType::CHAR_CLASS:
31- case TokenType::CARET:
32- case TokenType::DOLLAR:
33- postfix.push_back (t);
34- break ;
35-
36- // '(' is pushed to operator stack and output (for NFA grouping)
37- case TokenType::LPAREN: {
38- postfix.push_back (t);
39- operators.push (t);
40- break ;
41- }
42-
43- // Pop operators until matching '(' is found
44- case TokenType::RPAREN: {
45- if (last_type == TokenType::LPAREN)
46- PzError::report_error (PzError::PzErrorType::PZ_INVALID_INPUT,
47- " Empty Parentheses at position " +
48- std::to_string (t.pos ));
49- while (!operators.empty () && operators.top ().type != TokenType::LPAREN) {
50- postfix.push_back (operators.top ());
51- operators.pop ();
52- }
53- if (operators.empty ())
54- PzError::report_error (PzError::PzErrorType::PZ_INVALID_INPUT,
55- " Mismatched ')' at position " +
56- std::to_string (t.pos ));
57- operators.pop (); // Discard '('
58- postfix.push_back (t);
59- break ;
60- }
61- // Unary postfix operators must follow a valid expression
62- case TokenType::STAR:
63- case TokenType::PLUS:
64- case TokenType::QUESTION:
65- case TokenType::QUANTIFIER_RANGE:
66- if (last_type != TokenType::LITERAL && last_type != TokenType::DOT &&
67- last_type != TokenType::CHAR_CLASS &&
68- last_type != TokenType::RPAREN) {
69- PzError::report_error (PzError::PzErrorType::PZ_INVALID_INPUT,
70- " Quantifier used without a valid preceding "
71- " expression at position " +
72- std::to_string (t.pos ));
73- }
74- postfix.push_back (t);
75- break ;
76-
77- case TokenType::ALTERNATION:
78- // '|' must separate two valid expressions
79- if (last_type == TokenType::END || last_type == TokenType::LPAREN ||
80- last_type == TokenType::ALTERNATION) {
81- PzError::report_error (PzError::PzErrorType::PZ_INVALID_INPUT,
82- " Invalid '|' at position " +
83- std::to_string (t.pos ) +
84- " . It must separate two expressions." );
85- }
86- goto push_operator;
87-
88- // Binary operators handled via precedence rules
89- case TokenType::CONCAT:
90- push_operator:
91- while (!operators.empty () && operators.top ().type != TokenType::LPAREN &&
92- get_precedence (operators.top ().type ) >= get_precedence (t.type )) {
93- postfix.push_back (operators.top ());
94- operators.pop ();
95- }
96- operators.push (t);
97- break ;
98-
99- default :
100- break ;
101- }
102-
103- if (t.type != TokenType::END)
104- last_type = t.type ;
105- }
106-
107- // Pattern must not end with a binary operator
108- if (last_type == TokenType::ALTERNATION || last_type == TokenType::CONCAT) {
109- PzError::report_error (
110- PzError::PzErrorType::PZ_INVALID_INPUT,
111- " Trailing binary operator at end of pattern at position " +
112- std::to_string (infix.back ().pos ));
113- }
114-
115- // Drain remaining operators
116- while (!operators.empty ()) {
117- if (operators.top ().type == TokenType::LPAREN)
118- PzError::report_error (PzError::PzErrorType::PZ_INVALID_INPUT,
119- " Unmatched '(' at position " +
120- std::to_string (operators.top ().pos ));
121- postfix.push_back (operators.top ());
122- operators.pop ();
123- }
124-
125- return postfix;
1+ #include " RegexPostfix.hpp"
2+ #include " pz_error.hpp"
3+
4+ st32 Postfix::get_precedence (TokenType type) {
5+ switch (type) {
6+ case TokenType::STAR:
7+ case TokenType::PLUS:
8+ case TokenType::QUESTION:
9+ case TokenType::QUANTIFIER_RANGE:
10+ return 3 ; // Unary postfix operators
11+ case TokenType::CONCAT:
12+ return 2 ; // Implicit concatenation
13+ case TokenType::ALTERNATION:
14+ return 1 ; // Lowest precedence
15+ default :
16+ return 0 ;
17+ }
18+ }
19+
20+ std::vector<Token> Postfix::convert (const std::vector<Token> &infix) {
21+ std::vector<Token> postfix;
22+ std::stack<Token> operators;
23+ TokenType last_type = TokenType::END; // Tracks previous token for validation
24+
25+ for (const auto &t : infix) {
26+ switch (t.type ) {
27+ // Operands go directly to output
28+ case TokenType::LITERAL:
29+ case TokenType::DOT:
30+ case TokenType::CHAR_CLASS:
31+ case TokenType::CARET:
32+ case TokenType::DOLLAR:
33+ postfix.push_back (t);
34+ break ;
35+
36+ // '(' is pushed to operator stack and output (for NFA grouping)
37+ case TokenType::LPAREN: {
38+ postfix.push_back (t);
39+ operators.push (t);
40+ break ;
41+ }
42+
43+ // Pop operators until matching '(' is found
44+ case TokenType::RPAREN: {
45+ if (last_type == TokenType::LPAREN)
46+ PzError::report_error (PzError::PzErrorType::PZ_INVALID_INPUT,
47+ " Empty Parentheses at position " +
48+ std::to_string (t.pos ));
49+ while (!operators.empty () && operators.top ().type != TokenType::LPAREN) {
50+ postfix.push_back (operators.top ());
51+ operators.pop ();
52+ }
53+ if (operators.empty ())
54+ PzError::report_error (PzError::PzErrorType::PZ_INVALID_INPUT,
55+ " Mismatched ')' at position " +
56+ std::to_string (t.pos ));
57+ operators.pop (); // Discard '('
58+ postfix.push_back (t);
59+ break ;
60+ }
61+ // Unary postfix operators must follow a valid expression
62+ case TokenType::STAR:
63+ case TokenType::PLUS:
64+ case TokenType::QUESTION:
65+ case TokenType::QUANTIFIER_RANGE:
66+ if (last_type != TokenType::LITERAL && last_type != TokenType::DOT &&
67+ last_type != TokenType::CHAR_CLASS &&
68+ last_type != TokenType::RPAREN) {
69+ PzError::report_error (PzError::PzErrorType::PZ_INVALID_INPUT,
70+ " Quantifier used without a valid preceding "
71+ " expression at position " +
72+ std::to_string (t.pos ));
73+ }
74+ postfix.push_back (t);
75+ break ;
76+
77+ case TokenType::ALTERNATION:
78+ // '|' must separate two valid expressions
79+ if (last_type == TokenType::END || last_type == TokenType::LPAREN ||
80+ last_type == TokenType::ALTERNATION) {
81+ PzError::report_error (PzError::PzErrorType::PZ_INVALID_INPUT,
82+ " Invalid '|' at position " +
83+ std::to_string (t.pos ) +
84+ " . It must separate two expressions." );
85+ }
86+ goto push_operator;
87+
88+ // Binary operators handled via precedence rules
89+ case TokenType::CONCAT:
90+ push_operator:
91+ while (!operators.empty () && operators.top ().type != TokenType::LPAREN &&
92+ get_precedence (operators.top ().type ) >= get_precedence (t.type )) {
93+ postfix.push_back (operators.top ());
94+ operators.pop ();
95+ }
96+ operators.push (t);
97+ break ;
98+
99+ default :
100+ break ;
101+ }
102+
103+ if (t.type != TokenType::END)
104+ last_type = t.type ;
105+ }
106+
107+ // Pattern must not end with a binary operator
108+ if (last_type == TokenType::ALTERNATION || last_type == TokenType::CONCAT) {
109+ PzError::report_error (
110+ PzError::PzErrorType::PZ_INVALID_INPUT,
111+ " Trailing binary operator at end of pattern at position " +
112+ std::to_string (infix.back ().pos ));
113+ }
114+
115+ // Drain remaining operators
116+ while (!operators.empty ()) {
117+ if (operators.top ().type == TokenType::LPAREN)
118+ PzError::report_error (PzError::PzErrorType::PZ_INVALID_INPUT,
119+ " Unmatched '(' at position " +
120+ std::to_string (operators.top ().pos ));
121+ postfix.push_back (operators.top ());
122+ operators.pop ();
123+ }
124+
125+ return postfix;
126126}
0 commit comments