Programming-Club-Org
diff --git a/‎libpz/include/Nfa.hpp‎
Lines changed: 112 additions & 0 deletions b/‎libpz/include/Nfa.hpp‎
Lines changed: 112 additions & 0 deletions
diff --git a/‎libpz/include/NfaBuilder.hpp‎
Lines changed: 62 additions & 0 deletions b/‎libpz/include/NfaBuilder.hpp‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎libpz/include/RegexTokenizer.hpp‎
Lines changed: 1 addition & 1 deletion b/‎libpz/include/RegexTokenizer.hpp‎
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,112 @@
+#ifndef NFA_HPP
+#define NFA_HPP
+
+#include <RegexTokenizer.hpp>
+#include <pz_cxx_std.hpp>
+#include <pz_types.hpp>
+
+/**
+ * @brief Types of NFA states used in regex matching.
+ */
+enum class StateType {
+  /** Match a single literal character */
+  CHAR,
+
+  /** Match any character (.) */
+  DOT,
+
+  /** Match a character class ([...]) */
+  CHAR_CLASS,
+
+  /** Accepting (final) state */
+  MATCH,
+
+  /** ε-transition with two outgoing branches */
+  SPLIT,
+
+  /** Save input position (for capture groups) */
+  SAVE,
+
+  /** Start-of-input anchor (^) */
+  ANCHOR_START,
+
+  /** End-of-input anchor ($) */
+  ANCHOR_END
+};
+
+/**
+ * @brief Represents a single state in the NFA.
+ */
+struct State {
+  StateType type;
+
+  /** Literal character to match (valid only for CHAR states, unspecified
+   * otherwise). */
+  ut8 c;
+
+  /** Capture group identifier (used by SAVE states to store input positions).
+   */
+  st32 save_id = -1;
+  // Even IDs represent group start, odd IDs represent group end.
+
+  /** Character ranges for CHAR_CLASS states. */
+  std::vector<CharRange> ranges;
+  bool negated = false;
+
+  /** Primary outgoing transition. */
+  State *out = nullptr;
+
+  /** Secondary outgoing transition (used only by SPLIT states). */
+  State *out1 = nullptr;
+
+  /**
+   * @brief Marker used during NFA simulation.
+   *
+   * Prevents revisiting the same state multiple times in a single step,
+   * avoiding duplicate work and infinite ε-transition loops.
+   */
+  st32 last_list = -1;
+  // Marks whether this state has already been added to the current
+  // active-states list, preventing duplicate entries and infinite ε-transition
+  // loops
+
+  State(StateType t) : type(t) {}
+};
+
+/**
+ * @brief Represents a partially constructed NFA fragment.
+ *
+ * A fragment consists of:
+ *  - a start state
+ *  - a list of dangling outgoing transitions that must be patched later
+ */
+struct Frag {
+  State *start;
+
+  /** Addresses of state pointers that need to be connected later. */
+  std::vector<State **> out_ptrs;
+
+  /**
+   * @brief Construct a fragment with a single dangling exit.
+   */
+  Frag(State *s) : start(s) { out_ptrs.push_back(&s->out); }
+
+  /**
+   * @brief Construct a fragment with multiple dangling exits.
+   */
+  Frag(State *s, std::vector<State **> out) : start(s), out_ptrs(out) {}
+
+  /**
+   * @brief Patch all dangling exits to point to the given state.
+   */
+  void patch(State *s) {
+    for (auto &ptr : out_ptrs) {
+      if (ptr &&
+          !*ptr) { // Only patch if the pointer exists and is currently null
+        *ptr = s;
+      }
+    }
+  }
+};
+
+#endif // NFA_HPP
@@ -0,0 +1,62 @@
+#ifndef NFA_BUILDER_HPP
+#define NFA_BUILDER_HPP
+
+#include <Nfa.hpp>
+
+/**
+ * @brief Builds an ε-NFA from a postfix regex token sequence.
+ *
+ * Implements Thompson-style construction to convert postfix regex tokens
+ * into an NFA graph. All states created during construction are owned
+ * internally and cleaned up automatically.
+ */
+class NfaBuilder {
+public:
+  /**
+   * @brief Build an NFA from a postfix regex.
+   *
+   * The resulting NFA has a single accepting state of type
+   * StateType::MATCH. The returned pointer refers to the start state.
+   *
+   * @param postfix Regex tokens in postfix (RPN) form.
+   * @return Pointer to the start state of the constructed NFA.
+   */
+  State *build(const std::vector<Token> &postfix);
+
+  /**
+   * @brief Create a deep copy of an NFA fragment.
+   *
+   * Used for handling quantifiers that require duplication of subgraphs
+   * (e.g. {m,n}, *, +).
+   */
+  Frag copy_fragment(Frag);
+
+  /**
+   * @brief Deep copy an NFA subgraph starting from a given state.
+   *
+   * Keeps a lookup map to avoid duplicating already-copied states.
+   *
+   * @param s Original state to copy.
+   * @param lookup Map from original states to their copies.
+   * @return Pointer to the copied state.
+   */
+  State *copy_state(State *, std::unordered_map<State *, State *> &);
+
+private:
+  /**
+   * @brief Allocate a new NFA state and store it in the internal pool.
+   *
+   * Ownership is retained by the builder to ensure correct lifetime.
+   */
+  State *create_state(StateType type);
+
+  /**
+   * @brief Owns all NFA states created during construction.
+   *
+   * Ensures that all State objects remain valid for the lifetime
+   * of the NfaBuilder and are automatically destroyed via RAII.
+   */
+  std::vector<std::unique_ptr<State>> state_pool;
+};
+
+#endif // NFA_BUILDER_HPP
@@ -108,7 +108,7 @@ class Tokenizer {
   /** Input regex pattern */
   std::string_view pattern;
   /** Current cursor position */
-  size_t i = 0;
+  size_t cursor_pos = 0;
   /** Counter for assigning group IDs */
   st32 group_counter = 0;
   /** Stack for nested group tracking */