From 52917352389427facfc0717f187b6586dce68417 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 13 Apr 2026 12:22:37 +0000 Subject: [PATCH] fix: preserve trailing punctuation when extracting PIB words MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parse_hybrid_line() was appending words with trailing commas (e.g. "Володимировича,") to pib_words, causing the punctuation to be swallowed during masking replacement. Now strips trailing punctuation (,.!?;:) from each PIB word before adding it. https://claude.ai/code/session_01KownKVoEBudRVp7npQ1idz --- data_masking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_masking.py b/data_masking.py index 8ecdce2..88fe398 100644 --- a/data_masking.py +++ b/data_masking.py @@ -738,7 +738,7 @@ def parse_hybrid_line(line: str) -> Tuple[Optional[str], Optional[str], Optional pib_words = [] for word in parts[pib_start_index:pib_start_index + 3]: if looks_like_name(word): - pib_words.append(word) + pib_words.append(word.rstrip(',.!?;:')) else: break pib = " ".join(pib_words) if pib_words else None