diff --git a/generate_espanso.py b/generate_espanso.py index b8deebd..b68a942 100755 --- a/generate_espanso.py +++ b/generate_espanso.py @@ -43,6 +43,16 @@ 'ì': ['i', "i'", 'i1'], 'ò': ['o', "o'", 'o1'], 'ù': ['u', "u'", 'u1'], } +# Common English words that would be generated as typo triggers for dev terms, +# causing false positives. These are excluded from all generated trigger lists. +FALSE_POSITIVE_BLOCKLIST: frozenset[str] = frozenset({ + # High-risk: everyday words + "asset", "cost", "cone", "filer", "fronted", "gird", "mere", "neural", + "reactor", "sash", "sate", "sinner", "sting", "tale", "thee", + # Medium-risk: less common but real English words + "bade", "borer", "brach", "deign", "lading", "outlie", "sider", "sprit", "tost", +}) + REPO_DIR = os.path.dirname(os.path.abspath(__file__)) DICT_DIR = os.path.join(REPO_DIR, "dictionaries") @@ -149,6 +159,7 @@ def generate_all_typos(word, include_accents=True): if len(word) >= 5: typos |= generate_missing_char(word) typos -= ALL_WORDS + typos -= FALSE_POSITIVE_BLOCKLIST typos.discard(word) return {t for t in typos if len(t) >= 2} diff --git a/tests/test_typo_generators.py b/tests/test_typo_generators.py index 6cd37b6..551dde8 100644 --- a/tests/test_typo_generators.py +++ b/tests/test_typo_generators.py @@ -221,3 +221,38 @@ def test_reutrn_for_return(self): ) def test_blocklisted_word_not_generated_as_typo(trigger, source): assert trigger not in ge.generate_all_typos(source) + + +# --------------------------------------------------------------------------- +# FALSE_POSITIVE_BLOCKLIST — no blocked word must appear as a trigger +# --------------------------------------------------------------------------- + +class TestFalsePositiveBlocklist: + """No word in the blocklist must ever appear as a generated trigger.""" + + def test_blocklist_is_non_empty(self): + assert len(ge.FALSE_POSITIVE_BLOCKLIST) > 0 + + def test_no_blocklist_word_in_dev_triggers(self): + """All DEV words combined must not produce any blocked trigger.""" + for word in ge.DEV_WORDS: + if len(word) < 3: + continue + typos = ge.generate_all_typos(word, include_accents=False) + blocked_hits = typos & ge.FALSE_POSITIVE_BLOCKLIST + assert not blocked_hits, ( + f"Blocklisted triggers {blocked_hits!r} " + f"generated for dev word '{word}'" + ) + + def test_blocklist_words_are_excluded_regardless_of_all_words(self, monkeypatch): + """Blocklist takes effect even when ALL_WORDS is empty.""" + monkeypatch.setattr(ge, "ALL_WORDS", set()) + typos = ge.generate_all_typos("string", include_accents=False) + assert "sting" not in typos + + def test_blocklist_does_not_affect_non_blocked_typos(self, monkeypatch): + """Legitimate typos that are not in the blocklist still appear.""" + monkeypatch.setattr(ge, "ALL_WORDS", set()) + typos = ge.generate_all_typos("string", include_accents=False) + assert len(typos) > 0 diff --git a/tests/test_yaml_generation.py b/tests/test_yaml_generation.py index 0be7785..03addfc 100644 --- a/tests/test_yaml_generation.py +++ b/tests/test_yaml_generation.py @@ -42,7 +42,8 @@ def test_every_match_has_triggers_replace_word(self): doc = _parse(content) for match in doc["matches"]: assert "triggers" in match, f"Match missing triggers: {match}" - assert isinstance(match["triggers"], list), f"triggers should be a list: {match}" + assert isinstance(match["triggers"], list), f"triggers must be a list: {match}" + assert len(match["triggers"]) > 0, f"triggers list is empty: {match}" assert "replace" in match, f"Match missing replace: {match}" assert match.get("word") is True, f"Match missing word:true: {match}"