diff --git a/pyproject.toml b/pyproject.toml index 09d87b2..9ad04d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dependencies = [ "sacremoses>=0.1.1", "lingua-language-detector>=2.0.0", "tomli>=2.0.0;python_version<'3.11'", + "emoji>=2.15.0", ] [project.optional-dependencies] diff --git a/src/scribae/feedback.py b/src/scribae/feedback.py index 5c3d838..d60e449 100644 --- a/src/scribae/feedback.py +++ b/src/scribae/feedback.py @@ -9,6 +9,7 @@ from pathlib import Path from typing import Any, Literal, cast +import emoji import frontmatter import yaml from pydantic import BaseModel, ConfigDict, ValidationError, field_validator @@ -26,33 +27,10 @@ logger = logging.getLogger(__name__) -# Pattern to match emoji characters across common Unicode ranges -_EMOJI_PATTERN = re.compile( - "[" - "\U0001f600-\U0001f64f" # emoticons - "\U0001f300-\U0001f5ff" # symbols & pictographs - "\U0001f680-\U0001f6ff" # transport & map symbols - "\U0001f1e0-\U0001f1ff" # flags - "\U00002700-\U000027bf" # dingbats - "\U0001f900-\U0001f9ff" # supplemental symbols & pictographs - "\U0001fa00-\U0001fa6f" # chess symbols, extended-A - "\U0001fa70-\U0001faff" # symbols & pictographs extended-A - "\U00002600-\U000026ff" # misc symbols - "\U0001f700-\U0001f77f" # alchemical symbols - "\U0001f780-\U0001f7ff" # geometric shapes extended - "\U0001f800-\U0001f8ff" # supplemental arrows-C - "\U0001f3fb-\U0001f3ff" # skin tone modifiers - "\ufe0f" # variation selector-16 (emoji presentation) - "\u200d" # zero-width joiner (used in combined emojis) - "]+", - flags=re.UNICODE, -) - def strip_emojis(value: str) -> str: """Remove emoji characters from a string and clean up extra whitespace.""" - result = _EMOJI_PATTERN.sub(" ", value) - # Collapse multiple spaces and strip + result = emoji.replace_emoji(value, replace=" ") return " ".join(result.split()) diff --git a/uv.lock b/uv.lock index 6d2e3b5..9bf8882 100644 --- a/uv.lock +++ b/uv.lock @@ -632,6 +632,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" }, ] +[[package]] +name = "emoji" +version = "2.15.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/78/0d2db9382c92a163d7095fc08efff7800880f830a152cfced40161e7638d/emoji-2.15.0.tar.gz", hash = "sha256:eae4ab7d86456a70a00a985125a03263a5eac54cd55e51d7e184b1ed3b6757e4", size = 615483, upload-time = "2025-09-21T12:13:02.755Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/5e/4b5aaaabddfacfe36ba7768817bd1f71a7a810a43705e531f3ae4c690767/emoji-2.15.0-py3-none-any.whl", hash = "sha256:205296793d66a89d88af4688fa57fd6496732eb48917a87175a023c8138995eb", size = 608433, upload-time = "2025-09-21T12:13:01.197Z" }, +] + [[package]] name = "eval-type-backport" version = "0.3.1" @@ -3444,6 +3453,7 @@ name = "scribae" version = "0.2.0" source = { editable = "." } dependencies = [ + { name = "emoji" }, { name = "lingua-language-detector" }, { name = "pydantic" }, { name = "pydantic-ai" }, @@ -3474,6 +3484,7 @@ dev = [ [package.metadata] requires-dist = [ + { name = "emoji", specifier = ">=2.15.0" }, { name = "lingua-language-detector", specifier = ">=2.0.0" }, { name = "pydantic", specifier = ">=2.12.4" }, { name = "pydantic-ai", specifier = ">=1.12.0" },