From 99c4ddd0f676b13ad01c46b2639b96438f2125a4 Mon Sep 17 00:00:00 2001
From: egenthon-cmd <egenthon@gladia.io>
Date: Fri, 10 Apr 2026 10:45:01 +0200
Subject: [PATCH 01/10] feat: added italian language

---
 normalization/languages/__init__.py           |   4 +-
 normalization/languages/italian/__init__.py   |   7 ++
 normalization/languages/italian/operators.py  | 116 ++++++++++++++++++
 .../languages/italian/replacements.py         |  11 ++
 4 files changed, 136 insertions(+), 2 deletions(-)
 create mode 100644 normalization/languages/italian/__init__.py
 create mode 100644 normalization/languages/italian/operators.py
 create mode 100644 normalization/languages/italian/replacements.py

diff --git a/normalization/languages/__init__.py b/normalization/languages/__init__.py
index 5674874..838b5ad 100644
--- a/normalization/languages/__init__.py
+++ b/normalization/languages/__init__.py
@@ -1,7 +1,7 @@
-from . import english, french
+from . import english, french, italian
 from .base import LanguageOperators
 from .registry import get_language_registry, register_language
 
 register_language(LanguageOperators)
 
-__all__ = ["english", "french", "get_language_registry"]
+__all__ = ["english", "french", "italian", "get_language_registry"]
diff --git a/normalization/languages/italian/__init__.py b/normalization/languages/italian/__init__.py
new file mode 100644
index 0000000..9278e42
--- /dev/null
+++ b/normalization/languages/italian/__init__.py
@@ -0,0 +1,7 @@
+from .operators import ItalianOperators
+from .replacements import ITALIAN_REPLACEMENTS
+
+__all__ = [
+    "ItalianOperators",
+    "ITALIAN_REPLACEMENTS",
+]
diff --git a/normalization/languages/italian/operators.py b/normalization/languages/italian/operators.py
new file mode 100644
index 0000000..ed48304
--- /dev/null
+++ b/normalization/languages/italian/operators.py
@@ -0,0 +1,116 @@
+import re
+
+from normalization.languages.base import LanguageConfig, LanguageOperators
+from normalization.languages.italian.replacements import ITALIAN_REPLACEMENTS
+from normalization.languages.registry import register_language
+
+# Single digits 1–9: shared by digit_words and any future time/compound helpers.
+_ONE_TO_NINE: dict[str, str] = {
+    "uno": "1",
+    "due": "2",
+    "tre": "3",
+    "quattro": "4",
+    "cinque": "5",
+    "sei": "6",
+    "sette": "7",
+    "otto": "8",
+    "nove": "9",
+}
+
+ITALIAN_SENTENCE_REPLACEMENTS: dict[str, str] = {
+    # Spoken percentages (“dieci per cento”) → one canonical form aligned with “%” → percento
+    "per cento": "percento",
+}
+
+ITALIAN_CONFIG = LanguageConfig(
+    code="it",
+    decimal_separator=",",
+    decimal_word="virgola",
+    thousand_separator=".",
+    symbols_to_words={
+        "@": "chiocciola",
+        ".": "punto",
+        "+": "più",
+        "=": "uguale a",
+        ">": "maggiore di",
+        "<": "minore di",
+        "°": "grado",
+        "°C": "gradi celsius",
+        "°F": "gradi fahrenheit",
+        "%": "percento",
+    },
+    currency_symbol_to_word={
+        "€": "euro",
+        "$": "dollari",
+        "£": "sterline",
+        "¢": "centesimi",
+        "¥": "yen",
+    },
+    filler_words=[
+        "eh",
+        "ehm",
+        "mm",
+        "mh",
+        "cioè",
+        "cioe",
+        "tipo",
+        "insomma",
+        "allora",
+        "beh",
+        "bah",
+        "dunque",
+        "magari",
+        "praticamente",
+    ],
+    sentence_replacements=ITALIAN_SENTENCE_REPLACEMENTS,
+    digit_words={"zero": "0", **_ONE_TO_NINE},
+    number_words=[
+        "zero",
+        *_ONE_TO_NINE,
+        "dieci",
+        "undici",
+        "dodici",
+        "tredici",
+        "quattordici",
+        "quindici",
+        "sedici",
+        "diciassette",
+        "diciotto",
+        "diciannove",
+        "venti",
+        "trenta",
+        "quaranta",
+        "cinquanta",
+        "sessanta",
+        "settanta",
+        "ottanta",
+        "novanta",
+        "cento",
+        "mille",
+        "mila",
+        "milione",
+        "milioni",
+        "miliardo",
+        "miliardi",
+    ],
+    plus_word="più",
+)
+
+
+@register_language
+class ItalianOperators(LanguageOperators):
+    def __init__(self):
+        super().__init__(ITALIAN_CONFIG)
+
+    def fix_one_word_in_numeric_contexts(self, text: str) -> str:
+        text = re.sub(r"(\d+)\s+uno\s+uno\b", r"\1 1 1", text)
+        text = re.sub(r"\buno\s+uno\s+(\d)", r"1 1 \1", text)
+        text = re.sub(r"(\d+)\s+uno\s+(\d)", r"\1 1 \2", text)
+        text = re.sub(r"(\d+)\s+uno\b", r"\1 1", text)
+        text = re.sub(r"\b(\d+)uno\b", r"\1 1", text)
+        text = re.sub(r"\buno\s+(\d)", r"1 \1", text)
+        text = re.sub(r"^uno\s+(?=[a-z])", "1 ", text)
+        return text
+
+    def get_word_replacements(self) -> dict[str, str]:
+        return ITALIAN_REPLACEMENTS
diff --git a/normalization/languages/italian/replacements.py b/normalization/languages/italian/replacements.py
new file mode 100644
index 0000000..12c5e72
--- /dev/null
+++ b/normalization/languages/italian/replacements.py
@@ -0,0 +1,11 @@
+ITALIAN_REPLACEMENTS: dict[str, str] = {
+    "avv": "avvocato",
+    "dott": "dottor",
+    "dr": "dottor",
+    "ecc": "eccetera",
+    "etc": "eccetera",
+    "prof": "professore",
+    "tel": "telefono",
+    "versus": "contro",
+    "vs": "contro",
+}

From ac743b238086cd74aeba045267a94d5730924b37 Mon Sep 17 00:00:00 2001
From: egenthon-cmd <egenthon@gladia.io>
Date: Fri, 10 Apr 2026 10:45:42 +0200
Subject: [PATCH 02/10] test: added italian language tests

---
 tests/e2e/files/gladia-3.csv | 38 +++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/files/gladia-3.csv b/tests/e2e/files/gladia-3.csv
index 85c749b..0096bd3 100644
--- a/tests/e2e/files/gladia-3.csv
+++ b/tests/e2e/files/gladia-3.csv
@@ -123,4 +123,40 @@ x = 5,x equals 5,en
 ¥1000,1000 yens,en
 ø in Danish,o in danish,en
 €20 or €30,20 euros or 30 euros,en
-my  name is bob,my name is bob,en
\ No newline at end of file
+my  name is bob,my name is bob,en
+#1 spot,1 spot,it
+It costs €50,it costs 50 euro,it
+"3,14",3 virgola 14,it
+"1.234,56",1234 virgola 56,it
+2 < 5,2 minore di 5,it
+5 > 3,5 maggiore di 3,it
+50°C,50 gradi celsius,it
+dieci per cento,dieci percento,it
+vs milan,contro milan,it
+dott rossi,dottor rossi,it
+dr rossi,dottor rossi,it
+ehm tipo insomma ciao,ciao,it
+admin+tag@example.com,admin tag chiocciola example punto com,it
+test@example.com,test chiocciola example punto com,it
+uno apple,1 apple,it
+x = 5,x uguale a 5,it
+"The price is 99,99",the price is 99 virgola 99,it
+francois.dupont@gladia.io,francois punto dupont chiocciola gladia punto io,it
+www.example.com,w w w punto example punto com,it
+Version 1.0.0 released,version 1 punto 0 punto 0 released,it
+api.endpoint.v2,api punto endpoint punto v2,it
+fail-safe,fail safe,it
+U.S.A.,u s a,it
+prof bianchi,professore bianchi,it
+avv verdi,avvocato verdi,it
+versus inter,contro inter,it
+tel 123,telefono 123,it
+ecc.,eccetera,it
+etc subito,eccetera subito,it
++1 (619) 981-0181,+16199810181,it
+10:12 pm,10:12 pm,it
+Contact me at john@example.com please,contact me at john chiocciola example punto com please,it
+"Temperature is 98,6 degrees",temperature is 98 virgola 6 degrees,it
+The word [inaudible] is here,the word inaudible is here,it
+$5 and $10,5 dollari and 10 dollari,it
+my  name is bob,my name is bob,it
\ No newline at end of file

From a6958ed6921e65651689ebe1790556957e7f6b03 Mon Sep 17 00:00:00 2001
From: egenthon-cmd <egenthon@gladia.io>
Date: Fri, 10 Apr 2026 11:01:06 +0200
Subject: [PATCH 03/10] feat: add german language

---
 normalization/languages/__init__.py           |  4 +-
 normalization/languages/german/__init__.py    |  7 +++
 normalization/languages/german/operators.py   | 43 +++++++++++++++++++
 .../languages/german/replacements.py          | 11 +++++
 .../languages/german/sentence_replacements.py | 16 +++++++
 5 files changed, 79 insertions(+), 2 deletions(-)
 create mode 100644 normalization/languages/german/__init__.py
 create mode 100644 normalization/languages/german/operators.py
 create mode 100644 normalization/languages/german/replacements.py
 create mode 100644 normalization/languages/german/sentence_replacements.py

diff --git a/normalization/languages/__init__.py b/normalization/languages/__init__.py
index 838b5ad..706540b 100644
--- a/normalization/languages/__init__.py
+++ b/normalization/languages/__init__.py
@@ -1,7 +1,7 @@
-from . import english, french, italian
+from . import english, french, german, italian
 from .base import LanguageOperators
 from .registry import get_language_registry, register_language
 
 register_language(LanguageOperators)
 
-__all__ = ["english", "french", "italian", "get_language_registry"]
+__all__ = ["english", "french", "german", "italian", "get_language_registry"]
diff --git a/normalization/languages/german/__init__.py b/normalization/languages/german/__init__.py
new file mode 100644
index 0000000..838e83b
--- /dev/null
+++ b/normalization/languages/german/__init__.py
@@ -0,0 +1,7 @@
+from .operators import GermanOperators
+from .replacements import GERMAN_REPLACEMENTS
+
+__all__ = [
+    "GermanOperators",
+    "GERMAN_REPLACEMENTS",
+]
diff --git a/normalization/languages/german/operators.py b/normalization/languages/german/operators.py
new file mode 100644
index 0000000..ca3397a
--- /dev/null
+++ b/normalization/languages/german/operators.py
@@ -0,0 +1,43 @@
+from normalization.languages.base import LanguageConfig, LanguageOperators
+from normalization.languages.german.replacements import GERMAN_REPLACEMENTS
+from normalization.languages.german.sentence_replacements import (
+    GERMAN_SENTENCE_REPLACEMENTS,
+)
+from normalization.languages.registry import register_language
+
+GERMAN_CONFIG = LanguageConfig(
+    code="de",
+    decimal_separator=",",
+    decimal_word="komma",
+    thousand_separator=".",
+    symbols_to_words={
+        "@": "at",
+        ".": "punkt",
+        "+": "plus",
+        "=": "gleich",
+        ">": "größer als",
+        "<": "kleiner als",
+        "°": "grad",
+        "°C": "grad celsius",
+        "°F": "grad fahrenheit",
+        "%": "prozent",
+    },
+    currency_symbol_to_word={
+        "€": "euros",
+        "$": "dollars",
+        "£": "pounds",
+        "¢": "cents",
+        "¥": "yens",
+    },
+    filler_words=["äh", "ähm", "hm", "also", "naja", "halt"],
+    sentence_replacements=GERMAN_SENTENCE_REPLACEMENTS,
+)
+
+
+@register_language
+class GermanOperators(LanguageOperators):
+    def __init__(self):
+        super().__init__(GERMAN_CONFIG)
+
+    def get_word_replacements(self) -> dict[str, str]:
+        return GERMAN_REPLACEMENTS
diff --git a/normalization/languages/german/replacements.py b/normalization/languages/german/replacements.py
new file mode 100644
index 0000000..4b220b1
--- /dev/null
+++ b/normalization/languages/german/replacements.py
@@ -0,0 +1,11 @@
+GERMAN_REPLACEMENTS: dict[str, str] = {
+    "u.": "unter",
+    "chr.": "christus",
+    "rissströmungen": "riss-strömungen",
+    "kilometer": "km",
+    "xdrtb": "xdr-tb",
+    "dualradio": "dual-radio",
+    "st.": "sankt",
+    "bis": "-",
+    "maubewegung": "mau-bewegung",
+}
diff --git a/normalization/languages/german/sentence_replacements.py b/normalization/languages/german/sentence_replacements.py
new file mode 100644
index 0000000..31086fa
--- /dev/null
+++ b/normalization/languages/german/sentence_replacements.py
@@ -0,0 +1,16 @@
+GERMAN_SENTENCE_REPLACEMENTS: dict[str, str] = {
+    "regimeet kritischen": "regimekritischen",
+    "cannabis joints": "cannabisjoints",
+    "kampf handlungen": "kampfhandlungen",
+    "erwachsenen pornografie": "erwachsenenpornographie",
+    "standbild format": "standbildformat",
+    "internet radio seite": "internetradioseite",
+    "alt gedienten": "altgedienten",
+    "6 tage krieg": "sechstagekrieg",
+    "kreuzungs punkt": "kreuzungspunkt",
+    "wild card": "wildcard",
+    "national parks": "nationalparks",
+    "internet suche": "internetsuche",
+    "gleichgewicht geschlechtliche": "gleichgeschlechtlichen",
+    "welt kulturerbegebiete": "weltkulturerbegebiete",
+}

From 49339e469175380b27da412015968901627e653d Mon Sep 17 00:00:00 2001
From: egenthon-cmd <egenthon@gladia.io>
Date: Fri, 10 Apr 2026 11:04:14 +0200
Subject: [PATCH 04/10] test: added german language tests

---
 tests/e2e/files/gladia-3.csv | 38 +++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/tests/e2e/files/gladia-3.csv b/tests/e2e/files/gladia-3.csv
index 0096bd3..d91c3c6 100644
--- a/tests/e2e/files/gladia-3.csv
+++ b/tests/e2e/files/gladia-3.csv
@@ -159,4 +159,40 @@ Contact me at john@example.com please,contact me at john chiocciola example punt
 "Temperature is 98,6 degrees",temperature is 98 virgola 6 degrees,it
 The word [inaudible] is here,the word inaudible is here,it
 $5 and $10,5 dollari and 10 dollari,it
-my  name is bob,my name is bob,it
\ No newline at end of file
+my  name is bob,my name is bob,it
+#1 spot,1 spot,de
+It costs €50,it costs 50 euros,de
+"3,14",3 komma 14,de
+"1.234,56",1234 komma 56,de
+2 < 5,2 kleiner als 5,de
+5 > 3,5 grosser als 3,de
+50°C,50 grad celsius,de
+admin+tag@example.com,admin tag at example punkt com,de
+test@example.com,test at example punkt com,de
+x = 5,x gleich 5,de
+"The price is 99,99",the price is 99 komma 99,de
+francois.dupont@gladia.io,francois punkt dupont at gladia punkt io,de
+www.example.com,w w w punkt example punkt com,de
+Version 1.0.0 released,version 1 punkt 0 punkt 0 released,de
+api.endpoint.v2,api punkt endpoint punkt v2,de
+fail-safe,fail safe,de
+U.S.A.,u s a,de
++1 (619) 981-0181,+16199810181,de
+10:12 pm,10:12 pm,de
+Contact me at john@example.com please,contact me at john at example punkt com please,de
+"Temperature is 98,6 degrees",temperature is 98 komma 6 degrees,de
+The word [inaudible] is here,the word inaudible is here,de
+$5 and $10,5 dollars and 10 dollars,de
+my  name is bob,my name is bob,de
+5 bis 10,5 - 10,de
+kilometer weg,km weg,de
+internet suche heute,internetsuche heute,de
+wild card spiel,wildcard spiel,de
+national parks tour,nationalparks tour,de
+also naja hallo,hallo,de
+äh ähm hallo,ah ahm hallo,de
+hm okay,okay,de
+halt mal so,mal so,de
+st. petersburg,st petersburg,de
+6 tage krieg,sechstagekrieg,de
+kreuzungs punkt,kreuzungspunkt,de
\ No newline at end of file

From 8318ca2b6f5078ecfc123bd31d04b58a83dd1e71 Mon Sep 17 00:00:00 2001
From: egenthon-cmd <egenthon@gladia.io>
Date: Fri, 10 Apr 2026 11:28:33 +0200
Subject: [PATCH 05/10] feat: added spanish language

---
 normalization/languages/__init__.py           |   4 +-
 normalization/languages/spanish/__init__.py   |   7 +
 .../languages/spanish/number_normalizer.py    | 196 ++++++++++++++++++
 normalization/languages/spanish/operators.py  | 139 +++++++++++++
 .../languages/spanish/replacements.py         |  30 +++
 5 files changed, 374 insertions(+), 2 deletions(-)
 create mode 100644 normalization/languages/spanish/__init__.py
 create mode 100644 normalization/languages/spanish/number_normalizer.py
 create mode 100644 normalization/languages/spanish/operators.py
 create mode 100644 normalization/languages/spanish/replacements.py

diff --git a/normalization/languages/__init__.py b/normalization/languages/__init__.py
index 706540b..18e07a6 100644
--- a/normalization/languages/__init__.py
+++ b/normalization/languages/__init__.py
@@ -1,7 +1,7 @@
-from . import english, french, german, italian
+from . import english, french, german, italian, spanish
 from .base import LanguageOperators
 from .registry import get_language_registry, register_language
 
 register_language(LanguageOperators)
 
-__all__ = ["english", "french", "german", "italian", "get_language_registry"]
+__all__ = ["english", "french", "german", "italian", "spanish", "get_language_registry"]
diff --git a/normalization/languages/spanish/__init__.py b/normalization/languages/spanish/__init__.py
new file mode 100644
index 0000000..1df78ad
--- /dev/null
+++ b/normalization/languages/spanish/__init__.py
@@ -0,0 +1,7 @@
+from .operators import SpanishOperators
+from .replacements import SPANISH_REPLACEMENTS
+
+__all__ = [
+    "SpanishOperators",
+    "SPANISH_REPLACEMENTS",
+]
diff --git a/normalization/languages/spanish/number_normalizer.py b/normalization/languages/spanish/number_normalizer.py
new file mode 100644
index 0000000..632a3fc
--- /dev/null
+++ b/normalization/languages/spanish/number_normalizer.py
@@ -0,0 +1,196 @@
+"""Convert common Spanish spelled-out numbers to digits (STT-oriented).
+
+Covers 0–999, ``mil`` compounds, and informal ``veinte tres`` → ``23``.
+Accepts spellings with or without accents (common in transcripts).
+"""
+
+from __future__ import annotations
+
+import unicodedata
+
+
+def _fold(s: str) -> str:
+    s = s.lower()
+    return "".join(
+        c for c in unicodedata.normalize("NFD", s) if unicodedata.category(c) != "Mn"
+    )
+
+
+def _get(table: dict[str, int], word: str) -> int | None:
+    fw = _fold(word)
+    for k, v in table.items():
+        if _fold(k) == fw:
+            return v
+    return None
+
+
+_ONES_1_9: dict[str, int] = {
+    "uno": 1,
+    "dos": 2,
+    "tres": 3,
+    "cuatro": 4,
+    "cinco": 5,
+    "seis": 6,
+    "siete": 7,
+    "ocho": 8,
+    "nueve": 9,
+}
+
+_TEENS: dict[str, int] = {
+    "diez": 10,
+    "once": 11,
+    "doce": 12,
+    "trece": 13,
+    "catorce": 14,
+    "quince": 15,
+    "dieciseis": 16,
+    "diecisiete": 17,
+    "dieciocho": 18,
+    "diecinueve": 19,
+}
+
+_VEINTI: dict[str, int] = {
+    "veintiuno": 21,
+    "veintidos": 22,
+    "veintitres": 23,
+    "veinticuatro": 24,
+    "veinticinco": 25,
+    "veintiseis": 26,
+    "veintisiete": 27,
+    "veintiocho": 28,
+    "veintinueve": 29,
+}
+
+_TENS: dict[str, int] = {
+    "treinta": 30,
+    "cuarenta": 40,
+    "cincuenta": 50,
+    "sesenta": 60,
+    "setenta": 70,
+    "ochenta": 80,
+    "noventa": 90,
+}
+
+_HUNDREDS: dict[str, int] = {
+    "cien": 100,
+    "ciento": 100,
+    "doscientos": 200,
+    "trescientos": 300,
+    "cuatrocientos": 400,
+    "quinientos": 500,
+    "seiscientos": 600,
+    "setecientos": 700,
+    "ochocientos": 800,
+    "novecientos": 900,
+}
+
+
+class SpanishNumberNormalizer:
+    def __call__(self, text: str) -> str:
+        if not text.strip():
+            return text
+        words = text.split()
+        out: list[str] = []
+        i = 0
+        n = len(words)
+        while i < n:
+            parsed = self._parse_number(words, i, n)
+            if parsed is not None:
+                end, value = parsed
+                out.append(str(value))
+                i = end
+            else:
+                out.append(words[i])
+                i += 1
+        return " ".join(out)
+
+    def _parse_number(self, words: list[str], i: int, n: int) -> tuple[int, int] | None:
+        """If words[i:] start with a spelled number, return (exclusive_end_index, value)."""
+        if i >= n:
+            return None
+
+        fw = _fold(words[i])
+
+        if fw == "cero":
+            return i + 1, 0
+
+        # --- Optional leading hundred block (cien/ciento/ doscientos …) ---
+        h = _get(_HUNDREDS, words[i])
+        if h is not None:
+            j = i + 1
+            if j < n and _fold(words[j]) == "mil":
+                base = h * 1000
+                j += 1
+                tail = self._parse_number(words, j, n)
+                if tail is not None:
+                    end, v2 = tail
+                    return end, base + v2
+                return j, base
+            if h == 100:
+                sub = self._parse_0_99(words, j, n)
+                if sub is not None:
+                    je, v = sub
+                    return je, 100 + v
+                return j, 100
+            sub = self._parse_0_99(words, j, n)
+            if sub is not None:
+                je, v = sub
+                return je, h + v
+            return j, h
+
+        # --- 0–99 or leading multiplier for "mil" ---
+        sub99 = self._parse_0_99(words, i, n)
+        if sub99 is None:
+            return None
+        j, v = sub99
+        if j < n and _fold(words[j]) == "mil":
+            j += 1
+            if j >= n:
+                return j, v * 1000
+            tail = self._parse_number(words, j, n)
+            if tail is not None:
+                end, v2 = tail
+                return end, v * 1000 + v2
+            return j, v * 1000
+        return j, v
+
+    def _parse_0_99(self, words: list[str], i: int, n: int) -> tuple[int, int] | None:
+        if i >= n:
+            return None
+        w = words[i]
+        fw = _fold(w)
+
+        if fw == "veinte":
+            if i + 1 < n:
+                o = _get(_ONES_1_9, words[i + 1])
+                if o is not None:
+                    return i + 2, 20 + o
+            return i + 1, 20
+
+        v = _get(_VEINTI, w)
+        if v is not None:
+            return i + 1, v
+
+        v = _get(_TEENS, w)
+        if v is not None:
+            return i + 1, v
+
+        v = _get(_ONES_1_9, w)
+        if v is not None:
+            return i + 1, v
+
+        tens = _get(_TENS, w)
+        if tens is None:
+            return None
+        j = i + 1
+        if j < n and _fold(words[j]) == "y":
+            j += 1
+        if j < n:
+            o = _get(_ONES_1_9, words[j])
+            if o is not None:
+                return j + 1, tens + o
+        if i + 1 < n and tens >= 30:
+            o = _get(_ONES_1_9, words[i + 1])
+            if o is not None:
+                return i + 2, tens + o
+        return i + 1, tens
diff --git a/normalization/languages/spanish/operators.py b/normalization/languages/spanish/operators.py
new file mode 100644
index 0000000..61c0dbd
--- /dev/null
+++ b/normalization/languages/spanish/operators.py
@@ -0,0 +1,139 @@
+import re
+
+from normalization.languages.base import LanguageConfig, LanguageOperators
+from normalization.languages.registry import register_language
+from normalization.languages.spanish.number_normalizer import SpanishNumberNormalizer
+from normalization.languages.spanish.replacements import SPANISH_REPLACEMENTS
+
+_ONE_TO_NINE: dict[str, str] = {
+    "uno": "1",
+    "dos": "2",
+    "tres": "3",
+    "cuatro": "4",
+    "cinco": "5",
+    "seis": "6",
+    "siete": "7",
+    "ocho": "8",
+    "nueve": "9",
+}
+
+SPANISH_CONFIG = LanguageConfig(
+    code="es",
+    decimal_separator=",",
+    decimal_word="punto",
+    thousand_separator=".",
+    symbols_to_words={
+        "@": "arroba",
+        ".": "punto",
+        "+": "más",
+        "=": "igual a",
+        ">": "mayor que",
+        "<": "menor que",
+        "°": "grado",
+        "°C": "grados celsius",
+        "°F": "grados fahrenheit",
+        "%": "por ciento",
+    },
+    currency_symbol_to_word={
+        "€": "euros",
+        "$": "dólares",
+        "£": "libras",
+        "¢": "céntimos",
+        "¥": "yenes",
+    },
+    filler_words=[
+        "eh",
+        "ehm",
+        "mm",
+        "mh",
+        "bueno",
+        "pues",
+        "o sea",
+        "tipo",
+        "vale",
+        "vaya",
+        "mira",
+        "hombre",
+        "mujer",
+        "digo",
+        "entonces",
+        "claro",
+        "vamos",
+        "este",
+        "esta",
+    ],
+    sentence_replacements=None,
+    digit_words={"cero": "0", **_ONE_TO_NINE},
+    number_words=[
+        "cero",
+        *_ONE_TO_NINE,
+        "diez",
+        "once",
+        "doce",
+        "trece",
+        "catorce",
+        "quince",
+        "dieciséis",
+        "dieciseis",
+        "diecisiete",
+        "dieciocho",
+        "diecinueve",
+        "veinte",
+        "veintiuno",
+        "veintidos",
+        "veintitres",
+        "veinticuatro",
+        "veinticinco",
+        "veintiseis",
+        "veintisiete",
+        "veintiocho",
+        "veintinueve",
+        "treinta",
+        "cuarenta",
+        "cincuenta",
+        "sesenta",
+        "setenta",
+        "ochenta",
+        "noventa",
+        "cien",
+        "ciento",
+        "doscientos",
+        "trescientos",
+        "cuatrocientos",
+        "quinientos",
+        "seiscientos",
+        "setecientos",
+        "ochocientos",
+        "novecientos",
+        "mil",
+        "millón",
+        "millones",
+        "mil millones",
+        "billón",
+        "billones",
+    ],
+    plus_word="más",
+)
+
+
+@register_language
+class SpanishOperators(LanguageOperators):
+    def __init__(self):
+        super().__init__(SPANISH_CONFIG)
+        self._number_normalizer = SpanishNumberNormalizer()
+
+    def fix_one_word_in_numeric_contexts(self, text: str) -> str:
+        text = re.sub(r"(\d+)\s+uno\s+uno\b", r"\1 1 1", text)
+        text = re.sub(r"\buno\s+uno\s+(\d)", r"1 1 \1", text)
+        text = re.sub(r"(\d+)\s+uno\s+(\d)", r"\1 1 \2", text)
+        text = re.sub(r"(\d+)\s+uno\b", r"\1 1", text)
+        text = re.sub(r"\b(\d+)uno\b", r"\1 1", text)
+        text = re.sub(r"\buno\s+(\d)", r"1 \1", text)
+        text = re.sub(r"^uno\s+(?=[a-z])", "1 ", text)
+        return text
+
+    def get_word_replacements(self) -> dict[str, str]:
+        return SPANISH_REPLACEMENTS
+
+    def expand_written_numbers(self, text: str) -> str:
+        return self._number_normalizer(text)
diff --git a/normalization/languages/spanish/replacements.py b/normalization/languages/spanish/replacements.py
new file mode 100644
index 0000000..a4eda19
--- /dev/null
+++ b/normalization/languages/spanish/replacements.py
@@ -0,0 +1,30 @@
+SPANISH_REPLACEMENTS: dict[str, str] = {
+    "aprox": "aproximadamente",
+    "av": "avenida",
+    "cta": "cuenta",
+    "d": "don",
+    "da": "doña",
+    "dept": "departamento",
+    "depto": "departamento",
+    "doc": "documento",
+    "dr": "doctor",
+    "dra": "doctora",
+    "etc": "etcétera",
+    "ej": "ejemplo",
+    "ext": "extensión",
+    "hab": "habitación",
+    "ing": "ingeniero",
+    "núm": "número",
+    "pag": "página",
+    "prof": "profesor",
+    "profa": "profesora",
+    "pza": "plaza",
+    "tel": "teléfono",
+    "tfno": "teléfono",
+    "ud": "usted",
+    "uds": "ustedes",
+    "vd": "usted",
+    "vds": "ustedes",
+    "versus": "versus",
+    "vs": "versus",
+}

From a6029dcf0bc4bcf8dd6206e8b15d05071eaf1a64 Mon Sep 17 00:00:00 2001
From: karamouche <hugo4ibt@gmail.com>
Date: Mon, 13 Apr 2026 17:18:49 -0400
Subject: [PATCH 06/10] refactor: restructure normalization tests to group by
 language

---
 tests/e2e/normalization_test.py | 43 +++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/tests/e2e/normalization_test.py b/tests/e2e/normalization_test.py
index daa1b3d..5663fd3 100644
--- a/tests/e2e/normalization_test.py
+++ b/tests/e2e/normalization_test.py
@@ -32,7 +32,16 @@ def _load_tests_from_csv(csv_path: Path) -> list[NormalizationTest]:
 
 
 def _case_ids(cases: list[NormalizationTest]) -> list[str]:
-    return [f"{test.language}:{test.input[:60]}" for test in cases]
+    return [test.input[:60] for test in cases]
+
+
+def _group_by_language(
+    tests: list[NormalizationTest],
+) -> dict[str, list[NormalizationTest]]:
+    groups: dict[str, list[NormalizationTest]] = {}
+    for t in tests:
+        groups.setdefault(t.language, []).append(t)
+    return groups
 
 
 def _load_pipeline(preset_name_or_path: str, language: str) -> NormalizationPipeline:
@@ -53,16 +62,24 @@ def _load_pipeline(preset_name_or_path: str, language: str) -> NormalizationPipe
 _GLADIA_3_PIPELINES: dict[str, NormalizationPipeline] = {}
 
 
-@pytest.mark.parametrize(
-    "test",
-    _GLADIA_3_TESTS,
-    ids=_case_ids(_GLADIA_3_TESTS),
-)
-def test_gladia_3(test: NormalizationTest) -> None:
-    pipeline = _load_pipeline("gladia-3", test.language)
-    result = pipeline.normalize(test.input)
-    assert result == test.expected, (
-        f"\n  input:    {test.input!r}"
-        f"\n  expected: {test.expected!r}"
-        f"\n  got:      {result!r}"
+def _make_gladia_3_test(language: str, cases: list[NormalizationTest]):
+    @pytest.mark.parametrize("test", cases, ids=_case_ids(cases))
+    def _test(test: NormalizationTest) -> None:
+        pipeline = _load_pipeline("gladia-3", language)
+        result = pipeline.normalize(test.input)
+        assert result == test.expected, (
+            f"\n  input:    {test.input!r}"
+            f"\n  expected: {test.expected!r}"
+            f"\n  got:      {result!r}"
+        )
+
+    _test.__name__ = f"test_gladia_3_{language}"
+    return _test
+
+
+_GLADIA_3_BY_LANGUAGE = _group_by_language(_GLADIA_3_TESTS)
+
+for _language in sorted(_GLADIA_3_BY_LANGUAGE):
+    globals()[f"test_gladia_3_{_language}"] = _make_gladia_3_test(
+        _language, _GLADIA_3_BY_LANGUAGE[_language]
     )

From 18ebc5ac7224788f161216a595181ac0ccb0e7a4 Mon Sep 17 00:00:00 2001
From: karamouche <hugo4ibt@gmail.com>
Date: Mon, 13 Apr 2026 17:39:57 -0400
Subject: [PATCH 07/10] refactor: test files structure for language
 normalization

---
 .github/pull_request_template.md     |   2 +-
 .gitignore                           |   1 +
 AGENTS.md                            |   2 +-
 CONTRIBUTING.md                      |   2 +-
 docs/contributing-guide.md           |  52 +++---
 tests/e2e/files/gladia-3.csv         | 251 ---------------------------
 tests/e2e/files/gladia-3/de.csv      |  37 ++++
 tests/e2e/files/gladia-3/default.csv |   9 +
 tests/e2e/files/gladia-3/en.csv      | 126 ++++++++++++++
 tests/e2e/files/gladia-3/fr.csv      |  46 +++++
 tests/e2e/files/gladia-3/it.csv      |  37 ++++
 tests/e2e/normalization_test.py      |  43 +++--
 12 files changed, 311 insertions(+), 297 deletions(-)
 delete mode 100644 tests/e2e/files/gladia-3.csv
 create mode 100644 tests/e2e/files/gladia-3/de.csv
 create mode 100644 tests/e2e/files/gladia-3/default.csv
 create mode 100644 tests/e2e/files/gladia-3/en.csv
 create mode 100644 tests/e2e/files/gladia-3/fr.csv
 create mode 100644 tests/e2e/files/gladia-3/it.csv

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 92570a6..efcc3e9 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -20,7 +20,7 @@
 - [ ] Decorated operators class with `@register_language`
 - [ ] Added one import line to `languages/__init__.py`
 - [ ] Added unit tests in `tests/unit/languages/`
-- [ ] Added e2e test rows in `tests/e2e/files/`
+- [ ] Added a per-language CSV in `tests/e2e/files/{preset}/` (e.g. `tests/e2e/files/gladia-3/fr.csv`)
 
 ### New step
 
diff --git a/.gitignore b/.gitignore
index e5bab35..4d7ac38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,7 @@ wheels/
 *.egg-info
 .ruff_cache/
 .pytest_cache/
+.DS_Store
 
 
 # Virtual environments
diff --git a/AGENTS.md b/AGENTS.md
index 0c769fe..74de346 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -144,7 +144,7 @@ Never modify a published preset YAML. Never let a preset reference a step that h
 - [ ] Decorate the class with `@register_language`
 - [ ] Add one import to `languages/__init__.py`
 - [ ] Add tests in `tests/unit/languages/`
-- [ ] Add test rows to `tests/e2e/files/` for the new language
+- [ ] Add a CSV file `tests/e2e/files/{preset}/{language_code}.csv` for each relevant preset (e.g. `tests/e2e/files/gladia-3/fr.csv`)
 
 ## Adding a new step — checklist
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index fcc3211..2448a49 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -18,7 +18,7 @@ A new language requires:
 2. Put all word-level substitutions in `replacements.py`
 3. Instantiate a `LanguageConfig` and subclass `LanguageOperators` in `operators.py`
 4. Decorate with `@register_language` and add one import to `normalization/languages/__init__.py`
-5. Add tests under `tests/unit/languages/` and e2e fixture rows in `tests/e2e/files/`
+5. Add tests under `tests/unit/languages/` and a per-language CSV in `tests/e2e/files/{preset}/` (e.g. `tests/e2e/files/gladia-3/fr.csv`)
 
 See [docs/contributing-guide.md](docs/contributing-guide.md) for the full checklist and design rules.
 
diff --git a/docs/contributing-guide.md b/docs/contributing-guide.md
index 5c038a9..488f5ec 100644
--- a/docs/contributing-guide.md
+++ b/docs/contributing-guide.md
@@ -26,7 +26,7 @@ This ordering is a hard constraint — some steps depend on earlier steps having
 - [ ] Decorate the class with `@register_language`
 - [ ] Add one import to `languages/__init__.py`
 - [ ] Add tests in `tests/unit/languages/`
-- [ ] Add test rows to `tests/e2e/files/` for the new language
+- [ ] Add a CSV file `tests/e2e/files/{preset}/{language_code}.csv` for each relevant preset (e.g. `tests/e2e/files/gladia-3/fr.csv`)
 
 ### Language data vs. language behavior
 
@@ -159,42 +159,46 @@ def test_my_step_with_english(english_operators):
 
 ### E2E tests for a preset
 
-E2E tests validate the full pipeline (preset + language) against a CSV fixture. The test runner lives in `tests/e2e/normalization_test.py` and CSV files go in `tests/e2e/files/`.
+E2E tests validate the full pipeline (preset + language) against CSV fixtures. The test runner lives in `tests/e2e/normalization_test.py` and CSV files are organized under `tests/e2e/files/`.
 
-**CSV format** — three columns, no quoting needed unless the value contains a comma:
+**Directory structure** — one folder per preset, one CSV per language:
 
 ```
-input,expected,language
-$1,000,000,1000000 dollars,en
-hello world,hello world,fr
+tests/e2e/files/
+  gladia-3/
+    default.csv
+    de.csv
+    en.csv
+    fr.csv
+    it.csv
 ```
 
-Each row is one test case. The `language` column must match a registered language code (or `default`).
+**CSV format** — two columns (`input,expected`), no quoting needed unless the value contains a comma:
 
-**Registering a new CSV** — add a block to `normalization_test.py` following the existing pattern:
+```
+input,expected
+"$1,000,000",1000000 dollars
+hello world,hello world
+```
+
+The language is derived from the filename (e.g. `fr.csv` → language code `fr`). Use `default.csv` for the language-agnostic fallback.
+
+**Adding test cases for an existing preset** — drop rows into the appropriate `{language_code}.csv` file, or create a new CSV if the language isn't covered yet. Tests are discovered automatically.
+
+**Registering a new preset** — add a block to `normalization_test.py` following the existing pattern:
 
 ```python
-_MY_PRESET_CSV = _FILES_DIR / "my-preset.csv"
-_MY_PRESET_TESTS = _load_tests_from_csv(_MY_PRESET_CSV) if _MY_PRESET_CSV.exists() else []
+_MY_PRESET_DIR = _FILES_DIR / "my-preset"
+_MY_PRESET_BY_LANGUAGE = _discover_preset_tests(_MY_PRESET_DIR)
 _MY_PRESET_PIPELINES: dict[str, NormalizationPipeline] = {}
 
-
-@pytest.mark.parametrize(
-    "test",
-    _MY_PRESET_TESTS,
-    ids=_case_ids(_MY_PRESET_TESTS),
-)
-def test_my_preset(test: NormalizationTest) -> None:
-    pipeline = _load_pipeline("my-preset", test.language)
-    result = pipeline.normalize(test.input)
-    assert result == test.expected, (
-        f"\n  input:    {test.input!r}"
-        f"\n  expected: {test.expected!r}"
-        f"\n  got:      {result!r}"
+for _language in sorted(_MY_PRESET_BY_LANGUAGE):
+    globals()[f"test_my_preset_{_language}"] = _make_test(
+        "my-preset", _language, _MY_PRESET_BY_LANGUAGE[_language], _MY_PRESET_PIPELINES
     )
 ```
 
-Pipelines are cached per language inside `_MY_PRESET_PIPELINES` to avoid reloading for each parametrized case — follow the `_load_pipeline` helper pattern already in the file.
+Pipelines are cached per language to avoid reloading for each parametrized case.
 
 ---
 
diff --git a/tests/e2e/files/gladia-3.csv b/tests/e2e/files/gladia-3.csv
deleted file mode 100644
index 1381186..0000000
--- a/tests/e2e/files/gladia-3.csv
+++ /dev/null
@@ -1,251 +0,0 @@
-input,expected,language
-#1 spot,1 spot,en
-"$1,000,000",1000000 dollars,en
-$5 and $10,5 dollars and 10 dollars,en
-$50.75 total,50 point 75 dollars total,en
-+1 (619) 981-0181,+16199810181,en
-05 45 pm,5:45 pm,en
-05:45pm,05:45 pm,en
-"1,234.56",1234 point 56,en
-1.1.1.1,1 dot 1 dot 1 dot 1,en
-10 a m,10 am,en
-1012 am,10:12 am,en
-10:00 pm,10 pm,en
-10:12 pm,10:12 pm,en
-10:54 a m,10:54 am,en
-11 a m,11 am,en
-1145 pm,11:45 pm,en
-12 p m,12 pm,en
-12:34 p m,12:34 pm,en
-Let's meet at noon o'clock,let us meet at 12:00,en
-Appointment is at 3 o'clock,appointment is at 3:00,en
-192.168.1.1,192 dot 168 dot 1 dot 1,en
-2 < 5,2 less than 5,en
-3.14,3 point 14,en
-5 45 p m,5:45 pm,en
-5 > 3,5 greater than 3,en
-5 a m,5 am,en
-5.45 p.m.,5:45 pm,en
-50°C,50 degree celsius,en
-545 pm,5:45 pm,en
-6 p m,6 pm,en
-602 am,6:02 am,en
-6:00 am,6 am,en
-6:24 am,6:24 am,en
-9.8 m/s,9 point 8 m/s,en
-About ¢25,about 25 cents,en
-About ¢25 cents only,about 25 cents only,en
-About ¥1000 yen total,about 1000 yens total,en
-admin+tag@example.com,admin tag at example dot com,en
-api.endpoint.v2,api dot endpoint dot v2,en
-at eleven twenty-five a.m,at 11:25 am,en
-at French numbers plus three three oh six six two seven three two six four three,at french numbers +330662732643,en
-at ten o'clock,at 10:00,en
-at two p.m,at 2 pm,en
-at two thirty p.m,at 2:30 pm,en
-bob b-o-b dupov d-u-p-o-v,bob b o b dupov d u p o v,en
-CAPS@EXAMPLE.COM,c a p s at e x a m p l e dot c o m,en
-Contact john.doe@company.co.uk now,contact john dot doe at company dot co dot uk now,en
-Contact me at john@example.com please,contact me at john at example dot com please,en
-example.com,example dot com,en
-fail-safe,fail safe,en
-file.txt,file dot txt,en
-first.last+tag@subdomain.example.com,1st dot last tag at subdomain dot example dot com,en
-francois.dupont@gladia.io,francois dot dupont at gladia dot io,en
-good bye,goodbye,en
-he ain't gonna,he is not going to,en
-hello (yeah) there,hello there,en
-hello (yeah) there,hello there,en
-I have $20 dollars here,i have 20 dollars here,en
-is +16209113040,is +16209113040,en
-is one 620 911 3040,is 16209113040,en
-is one 620 911 3040,is 16209113040,en
-is plus 33 6 80 63 10 00,is +33680631000,en
-is plus 330662732643,is +330662732643,en
-is plus one 620 911 3040,is +16209113040,en
-It costs £30 pounds,it costs 30 pounds,en
-It costs €50,it costs 50 euros,en
-It's 12.5 dollars,it is 12 point 5 dollars,en
-j o h a n n,j o h a n n,en
-jane_smith@test.co.uk,jane smith at test dot co dot uk,en
-john j-o-h-n doe d-o-e,john j o h n doe d o e,en
-my name is B.O.B,my name is b o b,en
-john.doe@company.org,john dot doe at company dot org,en
-lemme see,let me see,en
-my phone number is o 4 5 o 6 4 3 2 1 1 and,my phone number is 0450643211 and,en
-ninety nine items,99 items,en
-o 4 5 o 6 4 3 2 1 1,0450643211,en
-o 4 5 o 6 4 3 2 1 6,0450643216,en
-o 4 5 o 6 4 3 2 1 o,0450643210,en
-one apple,1 apple,en
-one billion dollars,1000000000 dollars,en
-one hundred,100,en
-one hundred people,100 people,en
-one hundred thousand items,100000 items,en
-one million dollars,1000000 dollars,en
-One million dollars total,1000000 dollars total,en
-one thousand dollars,1000 dollars,en
-one thousand three hundred and thirty seven,1337,en
-"one, two, three",123,en
-Plus 1 16 plus equals one.,+116 plus equals one,en
-plus 1-619-981-0181,+16199810181,en
-she ain't gonna,she is not going to,en
-Temperature is 98.6 degrees,temperature is 98 point 6 degrees,en
-ten thousand people,10000 people,en
-test@example.com,test at example dot com,en
-test@example.com.,test at example dot com,en
-test@gladia.io.,test at gladia dot io,en
-That's £100,that is 100 pounds,en
-The code is ABC123 here,the code is a b c 123 here,en
-The price is $99,the price is 99 dollars,en
-The price is 99.99,the price is 99 point 99,en
-The price is €50 euros,the price is 50 euros,en
-The word [inaudible] is here,the word inaudible is here,en
-twenty one dogs,21 dogs,en
-twenty three people,23 people,en
-twenty two items,22 items,en
-two hundred items,200 items,en
-two million people,2000000 people,en
-two thousand,2000,en
-two thousand items,2000 items,en
-U.S.A.,u s a,en
-user.name@domain.com,user dot name at domain dot com,en
-user@domain.com,user at domain dot com,en
-user@sub.domain.example.org,user at sub dot domain dot example dot org,en
-Version 1.0.0 released,version 1 dot 0 dot 0 released,en
-version 2.5.1,version 2 dot 5 dot 1,en
-Visit www.example.com today,visit w w w dot example dot com today,en
-Look at my v12 motor,look at my v12 motor,en
-www.example.com,w w w dot example dot com,en
-x = 5,x equals 5,en
-"zip code 92103, U.S.",zip code 92103 u s,en
-£5.50,5 point 50 pounds,en
-¥1000,1000 yens,en
-ø in Danish,o in danish,en
-€20 or €30,20 euros or 30 euros,en
-my  name is bob,my name is bob,en
-#1 spot,1 spot,it
-It costs €50,it costs 50 euro,it
-"3,14",3 virgola 14,it
-"1.234,56",1234 virgola 56,it
-2 < 5,2 minore di 5,it
-5 > 3,5 maggiore di 3,it
-50°C,50 gradi celsius,it
-dieci per cento,dieci percento,it
-vs milan,contro milan,it
-dott rossi,dottor rossi,it
-dr rossi,dottor rossi,it
-ehm tipo insomma ciao,ciao,it
-admin+tag@example.com,admin tag chiocciola example punto com,it
-test@example.com,test chiocciola example punto com,it
-uno apple,1 apple,it
-x = 5,x uguale a 5,it
-"The price is 99,99",the price is 99 virgola 99,it
-francois.dupont@gladia.io,francois punto dupont chiocciola gladia punto io,it
-www.example.com,w w w punto example punto com,it
-Version 1.0.0 released,version 1 punto 0 punto 0 released,it
-api.endpoint.v2,api punto endpoint punto v2,it
-fail-safe,fail safe,it
-U.S.A.,u s a,it
-prof bianchi,professore bianchi,it
-avv verdi,avvocato verdi,it
-versus inter,contro inter,it
-tel 123,telefono 123,it
-ecc.,eccetera,it
-etc subito,eccetera subito,it
-+1 (619) 981-0181,+16199810181,it
-10:12 pm,10:12 pm,it
-Contact me at john@example.com please,contact me at john chiocciola example punto com please,it
-"Temperature is 98,6 degrees",temperature is 98 virgola 6 degrees,it
-The word [inaudible] is here,the word inaudible is here,it
-$5 and $10,5 dollari and 10 dollari,it
-my  name is bob,my name is bob,it
-#1 spot,1 spot,de
-It costs €50,it costs 50 euros,de
-"3,14",3 komma 14,de
-"1.234,56",1234 komma 56,de
-2 < 5,2 kleiner als 5,de
-5 > 3,5 grosser als 3,de
-50°C,50 grad celsius,de
-admin+tag@example.com,admin tag at example punkt com,de
-test@example.com,test at example punkt com,de
-x = 5,x gleich 5,de
-"The price is 99,99",the price is 99 komma 99,de
-francois.dupont@gladia.io,francois punkt dupont at gladia punkt io,de
-www.example.com,w w w punkt example punkt com,de
-Version 1.0.0 released,version 1 punkt 0 punkt 0 released,de
-api.endpoint.v2,api punkt endpoint punkt v2,de
-fail-safe,fail safe,de
-U.S.A.,u s a,de
-+1 (619) 981-0181,+16199810181,de
-10:12 pm,10:12 pm,de
-Contact me at john@example.com please,contact me at john at example punkt com please,de
-"Temperature is 98,6 degrees",temperature is 98 komma 6 degrees,de
-The word [inaudible] is here,the word inaudible is here,de
-$5 and $10,5 dollars and 10 dollars,de
-my  name is bob,my name is bob,de
-5 bis 10,5 - 10,de
-kilometer weg,km weg,de
-internet suche heute,internetsuche heute,de
-wild card spiel,wildcard spiel,de
-national parks tour,nationalparks tour,de
-also naja hallo,hallo,de
-äh ähm hallo,ah ahm hallo,de
-hm okay,okay,de
-halt mal so,mal so,de
-st. petersburg,st petersburg,de
-6 tage krieg,sechstagekrieg,de
-kreuzungs punkt,kreuzungspunkt,de
-j'ai dit c'est bien,j ai dit c est bien,fr
-vingt trois pommes,23 pommes,fr
-3 milliards d euros,3000000000 d euros,fr
-euh alors hein bah oui,alors oui,fr
-"12,5 degrés",12 virgule 5 degres,fr
-pour 100 de réduction,pourcent de reduction,fr
-pour cent de réduction,pourcent de reduction,fr
-"Hello, world!",hello world,default
-ça va?!,ca va,default
-$100,$100,default
-80 €,80 €,default
-test@example.com,test@example.com,default
-+1234567890,+1234567890,default
-one two three,one two three,default
-5:30 pm,5:30 pm,default
-d'accord,d accord,fr
-qu'il vient,qu il vient,fr
-n'est pas,n est pas,fr
-l'ordinateur,l ordinateur,fr
-m'appelle,m appelle,fr
-s'il vous plait,s il vous plait,fr
-t'as vu,t as vu,fr
-cent euros,100 euros,fr
-mille deux cents,1200,fr
-cinquante trois,53,fr
-contact@exemple.fr,contact arobase exemple point fr,fr
-"2 < 5",2 plus petit que 5,fr
-50°C,50 degres celsius,fr
-ca coute €50,ca coute 50 euros,fr
-euh bonjour hein,bonjour,fr
-mme dupont,madame dupont,fr
-mlle dubois,mademoiselle dubois,fr
-dr martin,docteur martin,fr
-prof dupont,professeur dupont,fr
-st jean,saint jean,fr
-ping pong,pingpong,fr
-volley ball,volleyball,fr
-basket ball,basketball,fr
-hand ball,handball,fr
-water polo,waterpolo,fr
-t shirt,tshirt,fr
-cd rom,cdrom,fr
-super predateur,superpredateur,fr
-"3,14 pi",3 virgule 14 pi,fr
-soixante-dix,70,fr
-quatre-vingts,80,fr
-quatre-vingt-un,81,fr
-nonante-neuf,99,fr
-septante et un,71,fr
-x = 5,x egal a 5,fr
-test@example.com,test arobase example point com,fr
-bonjour (euh) ami,bonjour ami,fr
-ça date d'hier,ca date d hier,fr
diff --git a/tests/e2e/files/gladia-3/de.csv b/tests/e2e/files/gladia-3/de.csv
new file mode 100644
index 0000000..4021dc8
--- /dev/null
+++ b/tests/e2e/files/gladia-3/de.csv
@@ -0,0 +1,37 @@
+input,expected
+#1 spot,1 spot
+It costs €50,it costs 50 euros
+"3,14",3 komma 14
+"1.234,56",1234 komma 56
+2 < 5,2 kleiner als 5
+5 > 3,5 grosser als 3
+50°C,50 grad celsius
+admin+tag@example.com,admin tag at example punkt com
+test@example.com,test at example punkt com
+x = 5,x gleich 5
+"The price is 99,99",the price is 99 komma 99
+francois.dupont@gladia.io,francois punkt dupont at gladia punkt io
+www.example.com,w w w punkt example punkt com
+Version 1.0.0 released,version 1 punkt 0 punkt 0 released
+api.endpoint.v2,api punkt endpoint punkt v2
+fail-safe,fail safe
+U.S.A.,u s a
++1 (619) 981-0181,+16199810181
+10:12 pm,10:12 pm
+Contact me at john@example.com please,contact me at john at example punkt com please
+"Temperature is 98,6 degrees",temperature is 98 komma 6 degrees
+The word [inaudible] is here,the word inaudible is here
+$5 and $10,5 dollars and 10 dollars
+my  name is bob,my name is bob
+5 bis 10,5 - 10
+kilometer weg,km weg
+internet suche heute,internetsuche heute
+wild card spiel,wildcard spiel
+national parks tour,nationalparks tour
+also naja hallo,hallo
+äh ähm hallo,ah ahm hallo
+hm okay,okay
+halt mal so,mal so
+st. petersburg,st petersburg
+6 tage krieg,sechstagekrieg
+kreuzungs punkt,kreuzungspunkt
\ No newline at end of file
diff --git a/tests/e2e/files/gladia-3/default.csv b/tests/e2e/files/gladia-3/default.csv
new file mode 100644
index 0000000..4258ba0
--- /dev/null
+++ b/tests/e2e/files/gladia-3/default.csv
@@ -0,0 +1,9 @@
+input,expected
+"Hello, world!",hello world
+ça va?!,ca va
+$100,$100
+80 €,80 €
+test@example.com,test@example.com
++1234567890,+1234567890
+one two three,one two three
+5:30 pm,5:30 pm
\ No newline at end of file
diff --git a/tests/e2e/files/gladia-3/en.csv b/tests/e2e/files/gladia-3/en.csv
new file mode 100644
index 0000000..25ca0d6
--- /dev/null
+++ b/tests/e2e/files/gladia-3/en.csv
@@ -0,0 +1,126 @@
+input,expected
+#1 spot,1 spot
+"$1,000,000",1000000 dollars
+$5 and $10,5 dollars and 10 dollars
+$50.75 total,50 point 75 dollars total
++1 (619) 981-0181,+16199810181
+05 45 pm,5:45 pm
+05:45pm,05:45 pm
+"1,234.56",1234 point 56
+1.1.1.1,1 dot 1 dot 1 dot 1
+10 a m,10 am
+1012 am,10:12 am
+10:00 pm,10 pm
+10:12 pm,10:12 pm
+10:54 a m,10:54 am
+11 a m,11 am
+1145 pm,11:45 pm
+12 p m,12 pm
+12:34 p m,12:34 pm
+Let's meet at noon o'clock,let us meet at 12:00
+Appointment is at 3 o'clock,appointment is at 3:00
+192.168.1.1,192 dot 168 dot 1 dot 1
+2 < 5,2 less than 5
+3.14,3 point 14
+5 45 p m,5:45 pm
+5 > 3,5 greater than 3
+5 a m,5 am
+5.45 p.m.,5:45 pm
+50°C,50 degree celsius
+545 pm,5:45 pm
+6 p m,6 pm
+602 am,6:02 am
+6:00 am,6 am
+6:24 am,6:24 am
+9.8 m/s,9 point 8 m/s
+About ¢25,about 25 cents
+About ¢25 cents only,about 25 cents only
+About ¥1000 yen total,about 1000 yens total
+admin+tag@example.com,admin tag at example dot com
+api.endpoint.v2,api dot endpoint dot v2
+at eleven twenty-five a.m,at 11:25 am
+at French numbers plus three three oh six six two seven three two six four three,at french numbers +330662732643
+at ten o'clock,at 10:00
+at two p.m,at 2 pm
+at two thirty p.m,at 2:30 pm
+bob b-o-b dupov d-u-p-o-v,bob b o b dupov d u p o v
+CAPS@EXAMPLE.COM,c a p s at e x a m p l e dot c o m
+Contact john.doe@company.co.uk now,contact john dot doe at company dot co dot uk now
+Contact me at john@example.com please,contact me at john at example dot com please
+example.com,example dot com
+fail-safe,fail safe
+file.txt,file dot txt
+first.last+tag@subdomain.example.com,1st dot last tag at subdomain dot example dot com
+francois.dupont@gladia.io,francois dot dupont at gladia dot io
+good bye,goodbye
+he ain't gonna,he is not going to
+hello (yeah) there,hello there
+hello (yeah) there,hello there
+I have $20 dollars here,i have 20 dollars here
+is +16209113040,is +16209113040
+is one 620 911 3040,is 16209113040
+is one 620 911 3040,is 16209113040
+is plus 33 6 80 63 10 00,is +33680631000
+is plus 330662732643,is +330662732643
+is plus one 620 911 3040,is +16209113040
+It costs £30 pounds,it costs 30 pounds
+It costs €50,it costs 50 euros
+It's 12.5 dollars,it is 12 point 5 dollars
+j o h a n n,j o h a n n
+jane_smith@test.co.uk,jane smith at test dot co dot uk
+john j-o-h-n doe d-o-e,john j o h n doe d o e
+my name is B.O.B,my name is b o b
+john.doe@company.org,john dot doe at company dot org
+lemme see,let me see
+my phone number is o 4 5 o 6 4 3 2 1 1 and,my phone number is 0450643211 and
+ninety nine items,99 items
+o 4 5 o 6 4 3 2 1 1,0450643211
+o 4 5 o 6 4 3 2 1 6,0450643216
+o 4 5 o 6 4 3 2 1 o,0450643210
+one apple,1 apple
+one billion dollars,1000000000 dollars
+one hundred,100
+one hundred people,100 people
+one hundred thousand items,100000 items
+one million dollars,1000000 dollars
+One million dollars total,1000000 dollars total
+one thousand dollars,1000 dollars
+one thousand three hundred and thirty seven,1337
+"one, two, three",123
+Plus 1 16 plus equals one.,+116 plus equals one
+plus 1-619-981-0181,+16199810181
+she ain't gonna,she is not going to
+Temperature is 98.6 degrees,temperature is 98 point 6 degrees
+ten thousand people,10000 people
+test@example.com,test at example dot com
+test@example.com.,test at example dot com
+test@gladia.io.,test at gladia dot io
+That's £100,that is 100 pounds
+The code is ABC123 here,the code is a b c 123 here
+The price is $99,the price is 99 dollars
+The price is 99.99,the price is 99 point 99
+The price is €50 euros,the price is 50 euros
+The word [inaudible] is here,the word inaudible is here
+twenty one dogs,21 dogs
+twenty three people,23 people
+twenty two items,22 items
+two hundred items,200 items
+two million people,2000000 people
+two thousand,2000
+two thousand items,2000 items
+U.S.A.,u s a
+user.name@domain.com,user dot name at domain dot com
+user@domain.com,user at domain dot com
+user@sub.domain.example.org,user at sub dot domain dot example dot org
+Version 1.0.0 released,version 1 dot 0 dot 0 released
+version 2.5.1,version 2 dot 5 dot 1
+Visit www.example.com today,visit w w w dot example dot com today
+Look at my v12 motor,look at my v12 motor
+www.example.com,w w w dot example dot com
+x = 5,x equals 5
+"zip code 92103, U.S.",zip code 92103 u s
+£5.50,5 point 50 pounds
+¥1000,1000 yens
+ø in Danish,o in danish
+€20 or €30,20 euros or 30 euros
+my  name is bob,my name is bob
\ No newline at end of file
diff --git a/tests/e2e/files/gladia-3/fr.csv b/tests/e2e/files/gladia-3/fr.csv
new file mode 100644
index 0000000..da16c21
--- /dev/null
+++ b/tests/e2e/files/gladia-3/fr.csv
@@ -0,0 +1,46 @@
+input,expected
+j'ai dit c'est bien,j ai dit c est bien
+vingt trois pommes,23 pommes
+3 milliards d euros,3000000000 d euros
+euh alors hein bah oui,alors oui
+"12,5 degrés",12 virgule 5 degres
+pour 100 de réduction,pourcent de reduction
+pour cent de réduction,pourcent de reduction
+d'accord,d accord
+qu'il vient,qu il vient
+n'est pas,n est pas
+l'ordinateur,l ordinateur
+m'appelle,m appelle
+s'il vous plait,s il vous plait
+t'as vu,t as vu
+cent euros,100 euros
+mille deux cents,1200
+cinquante trois,53
+contact@exemple.fr,contact arobase exemple point fr
+"2 < 5",2 plus petit que 5
+50°C,50 degres celsius
+ca coute €50,ca coute 50 euros
+euh bonjour hein,bonjour
+mme dupont,madame dupont
+mlle dubois,mademoiselle dubois
+dr martin,docteur martin
+prof dupont,professeur dupont
+st jean,saint jean
+ping pong,pingpong
+volley ball,volleyball
+basket ball,basketball
+hand ball,handball
+water polo,waterpolo
+t shirt,tshirt
+cd rom,cdrom
+super predateur,superpredateur
+"3,14 pi",3 virgule 14 pi
+soixante-dix,70
+quatre-vingts,80
+quatre-vingt-un,81
+nonante-neuf,99
+septante et un,71
+x = 5,x egal a 5
+test@example.com,test arobase example point com
+bonjour (euh) ami,bonjour ami
+ça date d'hier,ca date d hier
\ No newline at end of file
diff --git a/tests/e2e/files/gladia-3/it.csv b/tests/e2e/files/gladia-3/it.csv
new file mode 100644
index 0000000..895daae
--- /dev/null
+++ b/tests/e2e/files/gladia-3/it.csv
@@ -0,0 +1,37 @@
+input,expected
+#1 spot,1 spot
+It costs €50,it costs 50 euro
+"3,14",3 virgola 14
+"1.234,56",1234 virgola 56
+2 < 5,2 minore di 5
+5 > 3,5 maggiore di 3
+50°C,50 gradi celsius
+dieci per cento,dieci percento
+vs milan,contro milan
+dott rossi,dottor rossi
+dr rossi,dottor rossi
+ehm tipo insomma ciao,ciao
+admin+tag@example.com,admin tag chiocciola example punto com
+test@example.com,test chiocciola example punto com
+uno apple,1 apple
+x = 5,x uguale a 5
+"The price is 99,99",the price is 99 virgola 99
+francois.dupont@gladia.io,francois punto dupont chiocciola gladia punto io
+www.example.com,w w w punto example punto com
+Version 1.0.0 released,version 1 punto 0 punto 0 released
+api.endpoint.v2,api punto endpoint punto v2
+fail-safe,fail safe
+U.S.A.,u s a
+prof bianchi,professore bianchi
+avv verdi,avvocato verdi
+versus inter,contro inter
+tel 123,telefono 123
+ecc.,eccetera
+etc subito,eccetera subito
++1 (619) 981-0181,+16199810181
+10:12 pm,10:12 pm
+Contact me at john@example.com please,contact me at john chiocciola example punto com please
+"Temperature is 98,6 degrees",temperature is 98 virgola 6 degrees
+The word [inaudible] is here,the word inaudible is here
+$5 and $10,5 dollari and 10 dollari
+my  name is bob,my name is bob
\ No newline at end of file
diff --git a/tests/e2e/normalization_test.py b/tests/e2e/normalization_test.py
index 5663fd3..666b284 100644
--- a/tests/e2e/normalization_test.py
+++ b/tests/e2e/normalization_test.py
@@ -12,7 +12,6 @@
 
 @dataclass
 class NormalizationTest:
-    language: str
     input: str
     expected: str
 
@@ -23,7 +22,6 @@ def _load_tests_from_csv(csv_path: Path) -> list[NormalizationTest]:
         for row in csv.DictReader(f):
             rows.append(
                 NormalizationTest(
-                    language=row["language"],
                     input=row["input"],
                     expected=row["expected"],
                 )
@@ -35,13 +33,31 @@ def _case_ids(cases: list[NormalizationTest]) -> list[str]:
     return [test.input[:60] for test in cases]
 
 
-def _group_by_language(
-    tests: list[NormalizationTest],
+def _discover_preset_tests(
+    preset_dir: Path,
 ) -> dict[str, list[NormalizationTest]]:
-    groups: dict[str, list[NormalizationTest]] = {}
-    for t in tests:
-        groups.setdefault(t.language, []).append(t)
-    return groups
+    """Scan a preset directory for per-language CSV files.
+
+    Returns a dict mapping language code (filename stem) to test cases.
+    """
+    tests: dict[str, list[NormalizationTest]] = {}
+    if not preset_dir.is_dir():
+        return tests
+    for csv_path in sorted(preset_dir.glob("*.csv")):
+        language = csv_path.stem
+        cases = _load_tests_from_csv(csv_path)
+        if cases:
+            tests[language] = cases
+    return tests
+
+
+# ---------------------------------------------------------------------------
+# gladia_3
+# ---------------------------------------------------------------------------
+
+_GLADIA_3_DIR = _FILES_DIR / "gladia-3"
+_GLADIA_3_BY_LANGUAGE = _discover_preset_tests(_GLADIA_3_DIR)
+_GLADIA_3_PIPELINES: dict[str, NormalizationPipeline] = {}
 
 
 def _load_pipeline(preset_name_or_path: str, language: str) -> NormalizationPipeline:
@@ -53,15 +69,6 @@ def _load_pipeline(preset_name_or_path: str, language: str) -> NormalizationPipe
     return _GLADIA_3_PIPELINES[language]
 
 
-# ---------------------------------------------------------------------------
-# gladia_3
-# ---------------------------------------------------------------------------
-
-_GLADIA_3_CSV = _FILES_DIR / "gladia-3.csv"
-_GLADIA_3_TESTS = _load_tests_from_csv(_GLADIA_3_CSV) if _GLADIA_3_CSV.exists() else []
-_GLADIA_3_PIPELINES: dict[str, NormalizationPipeline] = {}
-
-
 def _make_gladia_3_test(language: str, cases: list[NormalizationTest]):
     @pytest.mark.parametrize("test", cases, ids=_case_ids(cases))
     def _test(test: NormalizationTest) -> None:
@@ -77,8 +84,6 @@ def _test(test: NormalizationTest) -> None:
     return _test
 
 
-_GLADIA_3_BY_LANGUAGE = _group_by_language(_GLADIA_3_TESTS)
-
 for _language in sorted(_GLADIA_3_BY_LANGUAGE):
     globals()[f"test_gladia_3_{_language}"] = _make_gladia_3_test(
         _language, _GLADIA_3_BY_LANGUAGE[_language]

From 693499210f9a9c02007e1536b5daf0a2c60d4a70 Mon Sep 17 00:00:00 2001
From: karamouche <hugo4ibt@gmail.com>
Date: Mon, 13 Apr 2026 18:24:27 -0400
Subject: [PATCH 08/10] test: update language normalization tests for German,
 French, Italian and Spanish languages

---
 tests/e2e/files/gladia-3/de.csv      | 24 ++++++++++------------
 tests/e2e/files/gladia-3/default.csv |  4 +++-
 tests/e2e/files/gladia-3/en.csv      |  6 +-----
 tests/e2e/files/gladia-3/es.csv      | 30 ++++++++++++++++++++++++++++
 tests/e2e/files/gladia-3/fr.csv      |  2 +-
 tests/e2e/files/gladia-3/it.csv      | 24 ++++++++++------------
 6 files changed, 55 insertions(+), 35 deletions(-)
 create mode 100644 tests/e2e/files/gladia-3/es.csv

diff --git a/tests/e2e/files/gladia-3/de.csv b/tests/e2e/files/gladia-3/de.csv
index 4021dc8..9704616 100644
--- a/tests/e2e/files/gladia-3/de.csv
+++ b/tests/e2e/files/gladia-3/de.csv
@@ -1,6 +1,6 @@
 input,expected
-#1 spot,1 spot
-It costs €50,it costs 50 euros
+#1 Platz,1 platz
+Es kostet €50,es kostet 50 euros
 "3,14",3 komma 14
 "1.234,56",1234 komma 56
 2 < 5,2 kleiner als 5
@@ -9,20 +9,16 @@ It costs €50,it costs 50 euros
 admin+tag@example.com,admin tag at example punkt com
 test@example.com,test at example punkt com
 x = 5,x gleich 5
-"The price is 99,99",the price is 99 komma 99
+"Der Preis ist 99,99",der preis ist 99 komma 99
 francois.dupont@gladia.io,francois punkt dupont at gladia punkt io
 www.example.com,w w w punkt example punkt com
-Version 1.0.0 released,version 1 punkt 0 punkt 0 released
+Version 1.0.0 veröffentlicht,version 1 punkt 0 punkt 0 veroffentlicht
 api.endpoint.v2,api punkt endpoint punkt v2
-fail-safe,fail safe
-U.S.A.,u s a
-+1 (619) 981-0181,+16199810181
-10:12 pm,10:12 pm
-Contact me at john@example.com please,contact me at john at example punkt com please
-"Temperature is 98,6 degrees",temperature is 98 komma 6 degrees
-The word [inaudible] is here,the word inaudible is here
-$5 and $10,5 dollars and 10 dollars
-my  name is bob,my name is bob
+Kontaktiere mich bei john@example.com bitte,kontaktiere mich bei john at example punkt com bitte
+"Die Temperatur ist 98,6 Grad",die temperatur ist 98 komma 6 grad
+Das Wort [inaudible] ist hier,das wort inaudible ist hier
+$5 und $10,5 dollars und 10 dollars
+mein  Name ist Bob,mein name ist bob
 5 bis 10,5 - 10
 kilometer weg,km weg
 internet suche heute,internetsuche heute
@@ -34,4 +30,4 @@ hm okay,okay
 halt mal so,mal so
 st. petersburg,st petersburg
 6 tage krieg,sechstagekrieg
-kreuzungs punkt,kreuzungspunkt
\ No newline at end of file
+kreuzungs punkt,kreuzungspunkt
diff --git a/tests/e2e/files/gladia-3/default.csv b/tests/e2e/files/gladia-3/default.csv
index 4258ba0..3c369cf 100644
--- a/tests/e2e/files/gladia-3/default.csv
+++ b/tests/e2e/files/gladia-3/default.csv
@@ -6,4 +6,6 @@ $100,$100
 test@example.com,test@example.com
 +1234567890,+1234567890
 one two three,one two three
-5:30 pm,5:30 pm
\ No newline at end of file
+5:30 pm,5:30 pm
+fail-safe,fail safe
+U.S.A.,u s a
diff --git a/tests/e2e/files/gladia-3/en.csv b/tests/e2e/files/gladia-3/en.csv
index 25ca0d6..d37c965 100644
--- a/tests/e2e/files/gladia-3/en.csv
+++ b/tests/e2e/files/gladia-3/en.csv
@@ -48,18 +48,15 @@ CAPS@EXAMPLE.COM,c a p s at e x a m p l e dot c o m
 Contact john.doe@company.co.uk now,contact john dot doe at company dot co dot uk now
 Contact me at john@example.com please,contact me at john at example dot com please
 example.com,example dot com
-fail-safe,fail safe
 file.txt,file dot txt
 first.last+tag@subdomain.example.com,1st dot last tag at subdomain dot example dot com
 francois.dupont@gladia.io,francois dot dupont at gladia dot io
 good bye,goodbye
 he ain't gonna,he is not going to
 hello (yeah) there,hello there
-hello (yeah) there,hello there
 I have $20 dollars here,i have 20 dollars here
 is +16209113040,is +16209113040
 is one 620 911 3040,is 16209113040
-is one 620 911 3040,is 16209113040
 is plus 33 6 80 63 10 00,is +33680631000
 is plus 330662732643,is +330662732643
 is plus one 620 911 3040,is +16209113040
@@ -108,7 +105,6 @@ two hundred items,200 items
 two million people,2000000 people
 two thousand,2000
 two thousand items,2000 items
-U.S.A.,u s a
 user.name@domain.com,user dot name at domain dot com
 user@domain.com,user at domain dot com
 user@sub.domain.example.org,user at sub dot domain dot example dot org
@@ -123,4 +119,4 @@ x = 5,x equals 5
 ¥1000,1000 yens
 ø in Danish,o in danish
 €20 or €30,20 euros or 30 euros
-my  name is bob,my name is bob
\ No newline at end of file
+my  name is bob,my name is bob
diff --git a/tests/e2e/files/gladia-3/es.csv b/tests/e2e/files/gladia-3/es.csv
new file mode 100644
index 0000000..2162d6d
--- /dev/null
+++ b/tests/e2e/files/gladia-3/es.csv
@@ -0,0 +1,30 @@
+input,expected
+$99,99 dólares
+"100,50 €",100 punto 50
+9.8 m/s,9 punto 8 m/s
+admin+tag@example.com,admin tag arroba example punto com
+cien mil items,100000 items
+"Cuesta 12,5 euros",cuesta 12 punto 5 euros
+diez mil personas,10000 personas
+dos mil items,2000 items
+dos millones de personas,2 millones de personas
+"El precio es 99,99 €",el precio es 99 punto 99
+Escribe a test@ejemplo.es por favor,escribe a test arroba ejemplo punto es por favor
+first.last+tag@subdomain.example.com,first punto last tag arroba subdomain punto example punto com
+hola (bien) aquí,hola bien aqui
+jane_smith@test.co.uk,jane smith arroba test punto co punto uk
+john.doe@company.org,john punto doe arroba company punto org
+mil millones de dólares,mil millones de dolares
+noventa y nueve items,99 items
+Señal%Marca,senal%marca
+Tengo 20€ euros aquí,tengo 20 euros euros aqui
+un millón de dólares,un millon de dolares
+user@domain.com,user arroba domain punto com
+veintidós items,22 items
+veintitrés personas,23 personas
+Veintitrés personas aquí,23 personas aqui
+Visita www.ejemplo.com ahora,visita w w w punto ejemplo punto com ahora
+www.gladia.io,w w w punto gladia punto io
+¢25,25 céntimos
+£50,50 libras
+¥1000,1000 yenes
diff --git a/tests/e2e/files/gladia-3/fr.csv b/tests/e2e/files/gladia-3/fr.csv
index da16c21..cd713cb 100644
--- a/tests/e2e/files/gladia-3/fr.csv
+++ b/tests/e2e/files/gladia-3/fr.csv
@@ -43,4 +43,4 @@ septante et un,71
 x = 5,x egal a 5
 test@example.com,test arobase example point com
 bonjour (euh) ami,bonjour ami
-ça date d'hier,ca date d hier
\ No newline at end of file
+ça date d'hier,ca date d hier
diff --git a/tests/e2e/files/gladia-3/it.csv b/tests/e2e/files/gladia-3/it.csv
index 895daae..c383407 100644
--- a/tests/e2e/files/gladia-3/it.csv
+++ b/tests/e2e/files/gladia-3/it.csv
@@ -1,6 +1,6 @@
 input,expected
-#1 spot,1 spot
-It costs €50,it costs 50 euro
+#1 posto,1 posto
+Costa €50,costa 50 euro
 "3,14",3 virgola 14
 "1.234,56",1234 virgola 56
 2 < 5,2 minore di 5
@@ -13,25 +13,21 @@ dr rossi,dottor rossi
 ehm tipo insomma ciao,ciao
 admin+tag@example.com,admin tag chiocciola example punto com
 test@example.com,test chiocciola example punto com
-uno apple,1 apple
+uno mela,1 mela
 x = 5,x uguale a 5
-"The price is 99,99",the price is 99 virgola 99
+"Il prezzo è 99,99",il prezzo e 99 virgola 99
 francois.dupont@gladia.io,francois punto dupont chiocciola gladia punto io
 www.example.com,w w w punto example punto com
-Version 1.0.0 released,version 1 punto 0 punto 0 released
+Versione 1.0.0 rilasciata,versione 1 punto 0 punto 0 rilasciata
 api.endpoint.v2,api punto endpoint punto v2
-fail-safe,fail safe
-U.S.A.,u s a
 prof bianchi,professore bianchi
 avv verdi,avvocato verdi
 versus inter,contro inter
 tel 123,telefono 123
 ecc.,eccetera
 etc subito,eccetera subito
-+1 (619) 981-0181,+16199810181
-10:12 pm,10:12 pm
-Contact me at john@example.com please,contact me at john chiocciola example punto com please
-"Temperature is 98,6 degrees",temperature is 98 virgola 6 degrees
-The word [inaudible] is here,the word inaudible is here
-$5 and $10,5 dollari and 10 dollari
-my  name is bob,my name is bob
\ No newline at end of file
+Contattami a john@example.com per favore,contattami a john chiocciola example punto com per favore
+"La temperatura è di 98,6 gradi",la temperatura e di 98 virgola 6 gradi
+La parola [inaudible] è qui,la parola inaudible e qui
+$5 e $10,5 dollari e 10 dollari
+mi  chiamo bob,mi chiamo bob

From f33ddb38f0138c11d835d9ddd4ab89337edb395c Mon Sep 17 00:00:00 2001
From: karamouche <hugo4ibt@gmail.com>
Date: Tue, 14 Apr 2026 09:09:19 -0400
Subject: [PATCH 09/10] fix(spanish): correct decimal word from "punto" to
 "coma" in Spanish language configuration

---
 normalization/languages/spanish/operators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/normalization/languages/spanish/operators.py b/normalization/languages/spanish/operators.py
index 61c0dbd..e6c9a45 100644
--- a/normalization/languages/spanish/operators.py
+++ b/normalization/languages/spanish/operators.py
@@ -20,7 +20,7 @@
 SPANISH_CONFIG = LanguageConfig(
     code="es",
     decimal_separator=",",
-    decimal_word="punto",
+    decimal_word="coma",
     thousand_separator=".",
     symbols_to_words={
         "@": "arroba",

From 8568b7f562f145dbaf071b71e9ef0d23c76f4934 Mon Sep 17 00:00:00 2001
From: karamouche <hugo4ibt@gmail.com>
Date: Tue, 14 Apr 2026 14:21:25 -0400
Subject: [PATCH 10/10] fix(german): remove incorrect replacement for "bis" in
 German language normalization

---
 normalization/languages/german/replacements.py | 1 -
 tests/e2e/files/gladia-3/de.csv                | 1 -
 tests/e2e/files/gladia-3/es.csv                | 8 ++++----
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/normalization/languages/german/replacements.py b/normalization/languages/german/replacements.py
index 4b220b1..804c528 100644
--- a/normalization/languages/german/replacements.py
+++ b/normalization/languages/german/replacements.py
@@ -6,6 +6,5 @@
     "xdrtb": "xdr-tb",
     "dualradio": "dual-radio",
     "st.": "sankt",
-    "bis": "-",
     "maubewegung": "mau-bewegung",
 }
diff --git a/tests/e2e/files/gladia-3/de.csv b/tests/e2e/files/gladia-3/de.csv
index 9704616..91e265f 100644
--- a/tests/e2e/files/gladia-3/de.csv
+++ b/tests/e2e/files/gladia-3/de.csv
@@ -19,7 +19,6 @@ Kontaktiere mich bei john@example.com bitte,kontaktiere mich bei john at example
 Das Wort [inaudible] ist hier,das wort inaudible ist hier
 $5 und $10,5 dollars und 10 dollars
 mein  Name ist Bob,mein name ist bob
-5 bis 10,5 - 10
 kilometer weg,km weg
 internet suche heute,internetsuche heute
 wild card spiel,wildcard spiel
diff --git a/tests/e2e/files/gladia-3/es.csv b/tests/e2e/files/gladia-3/es.csv
index 2162d6d..bda514e 100644
--- a/tests/e2e/files/gladia-3/es.csv
+++ b/tests/e2e/files/gladia-3/es.csv
@@ -1,14 +1,14 @@
 input,expected
 $99,99 dólares
-"100,50 €",100 punto 50
-9.8 m/s,9 punto 8 m/s
+"100,50 €",100 coma 50
+9.8 m/s,9 coma 8 m/s
 admin+tag@example.com,admin tag arroba example punto com
 cien mil items,100000 items
-"Cuesta 12,5 euros",cuesta 12 punto 5 euros
+"Cuesta 12,5 euros",cuesta 12 coma 5 euros
 diez mil personas,10000 personas
 dos mil items,2000 items
 dos millones de personas,2 millones de personas
-"El precio es 99,99 €",el precio es 99 punto 99
+"El precio es 99,99 €",el precio es 99 coma 99
 Escribe a test@ejemplo.es por favor,escribe a test arroba ejemplo punto es por favor
 first.last+tag@subdomain.example.com,first punto last tag arroba subdomain punto example punto com
 hola (bien) aquí,hola bien aqui