gladiaio · Karamouche · May 5, 2026 · Apr 23, 2026 · May 4, 2026 · May 5, 2026
diff --git a/README.md b/README.md
@@ -110,15 +110,17 @@ Pipelines are defined declaratively in **YAML presets**. Each preset lists the s
 
 ## Supported languages
 
-| Code | Language |
-| ---- | -------- |
-| `en` | English  |
-| `fr` | French   |
-| `de` | German   |
-| `it` | Italian  |
-| `es` | Spanish  |
-| `nl` | Dutch    |
-| `sv` | Swedish  |
+| Code | Language  |
+| ---- | --------- |
+| `en` | English   |
+| `fr` | French    |
+| `de` | German    |
+| `it` | Italian   |
+| `es` | Spanish   |
+| `nl` | Dutch     |
+| `sv` | Swedish   |
+| `fi` | Finnish   |
+| `no` | Norwegian |
 
 Unsupported language codes fall back to a safe default that applies language-independent normalization only.
 

diff --git a/docs/steps.md b/docs/steps.md
@@ -75,6 +75,10 @@ Runs before expand_alphanumeric_codes to prevent 'VIII' -> 'V I I I'.
 Only converts ii-ix to avoid false positives with single letters like 'I'.
 Skips 'v' when adjacent to digits (version-like contexts: v2, v 12).
 
+When ``operators.config.roman_numerals_uppercase_only`` is True, multi-letter
+numerals match only in ALL CAPS (so Swedish/Norwegian ``vi`` / ``Vi`` are not
+read as 6). Standalone ``V`` still matches as 5 for titles like ``Louis V``.
+
 ### `convert_word_based_time_patterns`
 
 **Base class:** `TextStep`
@@ -95,8 +99,10 @@ on the step to avoid recompilation on every call.
 
 Space out uppercase words and alphanumeric codes.
 
-'ABC123' -> 'A B C 1 2 3', 'CNN' -> 'C N N'.
-Skips pure numbers, ordinals (1st, 2nd), and protection markers. Must run before casefold_text.
+'ABC123' -> 'A B C 1 2 3'. When ``operators.config.expand_all_caps_letter_by_letter``
+is False, pure letter ALL-CAPS tokens (e.g. SMS) are left intact for Nordic-style
+acronym handling. Skips pure numbers, ordinals (1st, 2nd), and protection markers.
+Must run before casefold_text.
 
 ### `expand_contractions`
 
@@ -332,17 +338,21 @@ Handles ¤ markers by processing segments separately.
 
 Remove currency symbols that are not adjacent to numbers.
 
-Single-character symbols use the between/start/end patterns. Each
-multi-character key (e.g. ``kr``) is stripped only when it appears as its own
-token (``\b...\b``), so it is not confused with a substring inside a word.
+Single-character symbols use the classic between/start/end patterns (not
+between two digits). Multi-character keys (e.g. ``kr``) are matched only as
+whole tokens (``\b...\b``) and are skipped when a digit is nearby with
+only whitespace in between, so ordinary words are not corrupted.
 
 ### `remove_symbols`
 
 **Base class:** `TextStep`
 
 Replace markers, symbols, and punctuation with spaces.
 
-Preserves letters, digits, and all placeholder characters.
+Preserves letters, digits, and all placeholder characters. When
+``symbols_to_words`` defines a word for ``%``, expands ``%`` only when it
+follows a decimal or integer literal (e.g. ``8,75%``), so other ``%`` uses
+stay unchanged.
 
 ### `remove_thousand_separators`
 

diff --git a/normalization/languages/__init__.py b/normalization/languages/__init__.py
@@ -1,4 +1,14 @@
-from . import dutch, english, finnish, french, german, italian, spanish, swedish
+from . import (
+    dutch,
+    english,
+    finnish,
+    french,
+    german,
+    italian,
+    norwegian,
+    spanish,
+    swedish,
+)
 from .base import LanguageOperators
 from .registry import get_language_registry, register_language
 
@@ -11,6 +21,7 @@
     "french",
     "german",
     "italian",
+    "norwegian",
     "spanish",
     "swedish",
     "get_language_registry",

diff --git a/normalization/languages/base/language_config.py b/normalization/languages/base/language_config.py
@@ -75,6 +75,16 @@ class LanguageConfig:
     ordinal_suffixes: list[str] | None = None
     """Ordinal number suffixes for this language (e.g. ["st", "nd", "rd", "th"] for English).
     Used by steps that need to detect ordinal numbers. None = ordinal detection is skipped."""
+    roman_numerals_uppercase_only: bool = False
+    """When True, only treat Roman numerals as digits if they appear in ALL CAPS (e.g. VI, VIII).
+
+    Avoids collisions with Nordic pronouns spelled ``vi``/``Vi``. Default False preserves
+    legacy case-insensitive matching for other languages."""
+    expand_all_caps_letter_by_letter: bool = True
+    """When False, pure letter ALL-CAPS tokens (e.g. SMS) are not spaced into letters.
+
+    Nordic STT hypotheses often keep acronyms as one word; default True preserves
+    English-style letter-by-letter expansion for CAPS-only tokens."""
     am_word: str | None = None
     """Canonical AM time designator (e.g. "am" for English).
     Used by am/pm time formatting steps. None = am/pm steps are skipped."""

diff --git a/normalization/languages/norwegian/__init__.py b/normalization/languages/norwegian/__init__.py
@@ -0,0 +1,7 @@
+from .operators import NorwegianOperators
+from .replacements import NORWEGIAN_REPLACEMENTS
+
+__all__ = [
+    "NorwegianOperators",
+    "NORWEGIAN_REPLACEMENTS",
+]