Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions generate_espanso.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@
'ì': ['i', "i'", 'i1'], 'ò': ['o', "o'", 'o1'], 'ù': ['u', "u'", 'u1'],
}

# Common English words that would be generated as typo triggers for dev terms,
# causing false positives. These are excluded from all generated trigger lists.
FALSE_POSITIVE_BLOCKLIST: frozenset[str] = frozenset({
# High-risk: everyday words
"asset", "cost", "cone", "filer", "fronted", "gird", "mere", "neural",
"reactor", "sash", "sate", "sinner", "sting", "tale", "thee",
# Medium-risk: less common but real English words
"bade", "borer", "brach", "deign", "lading", "outlie", "sider", "sprit", "tost",
})

REPO_DIR = os.path.dirname(os.path.abspath(__file__))
DICT_DIR = os.path.join(REPO_DIR, "dictionaries")

Expand Down Expand Up @@ -149,6 +159,7 @@ def generate_all_typos(word, include_accents=True):
if len(word) >= 5:
typos |= generate_missing_char(word)
typos -= ALL_WORDS
typos -= FALSE_POSITIVE_BLOCKLIST
typos.discard(word)
return {t for t in typos if len(t) >= 2}

Expand Down
35 changes: 35 additions & 0 deletions tests/test_typo_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,38 @@ def test_reutrn_for_return(self):
)
def test_blocklisted_word_not_generated_as_typo(trigger, source):
assert trigger not in ge.generate_all_typos(source)


# ---------------------------------------------------------------------------
# FALSE_POSITIVE_BLOCKLIST — no blocked word must appear as a trigger
# ---------------------------------------------------------------------------

class TestFalsePositiveBlocklist:
"""No word in the blocklist must ever appear as a generated trigger."""

def test_blocklist_is_non_empty(self):
assert len(ge.FALSE_POSITIVE_BLOCKLIST) > 0

def test_no_blocklist_word_in_dev_triggers(self):
"""All DEV words combined must not produce any blocked trigger."""
for word in ge.DEV_WORDS:
if len(word) < 3:
continue
typos = ge.generate_all_typos(word, include_accents=False)
blocked_hits = typos & ge.FALSE_POSITIVE_BLOCKLIST
assert not blocked_hits, (
f"Blocklisted triggers {blocked_hits!r} "
f"generated for dev word '{word}'"
)

def test_blocklist_words_are_excluded_regardless_of_all_words(self, monkeypatch):
"""Blocklist takes effect even when ALL_WORDS is empty."""
monkeypatch.setattr(ge, "ALL_WORDS", set())
typos = ge.generate_all_typos("string", include_accents=False)
assert "sting" not in typos

def test_blocklist_does_not_affect_non_blocked_typos(self, monkeypatch):
"""Legitimate typos that are not in the blocklist still appear."""
monkeypatch.setattr(ge, "ALL_WORDS", set())
typos = ge.generate_all_typos("string", include_accents=False)
assert len(typos) > 0
3 changes: 2 additions & 1 deletion tests/test_yaml_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def test_every_match_has_triggers_replace_word(self):
doc = _parse(content)
for match in doc["matches"]:
assert "triggers" in match, f"Match missing triggers: {match}"
assert isinstance(match["triggers"], list), f"triggers should be a list: {match}"
assert isinstance(match["triggers"], list), f"triggers must be a list: {match}"
assert len(match["triggers"]) > 0, f"triggers list is empty: {match}"
assert "replace" in match, f"Match missing replace: {match}"
assert match.get("word") is True, f"Match missing word:true: {match}"

Expand Down
Loading