Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@
language: python
pass_filenames: false
always_run: true
- id: remove-em-dash
name: remove em-dash
description: replaces em-dashes with a plain hyphen.
entry: remove-em-dash
language: python
types: [text]
stages: [pre-commit, pre-push, manual]
minimum_pre_commit_version: 3.2.0
- id: requirements-txt-fixer
name: fix requirements.txt
description: sorts entries in requirements.txt.
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ the following commandline options:
- `--no-sort-keys` - when autofixing, retain the original key ordering (instead of sorting the keys)
- `--top-keys comma,separated,keys` - Keys to keep at the top of mappings.

#### `remove-em-dash`
Replaces em-dashes (Unicode `U+2014`) with a plain hyphen (`-`).
- Only the UTF-8 encoding of the em-dash is replaced; files using other
encodings are left untouched.

#### `requirements-txt-fixer`
Sorts entries in requirements.txt and constraints.txt and removes incorrect entry for `pkg-resources==0.0.0`

Expand Down
34 changes: 34 additions & 0 deletions pre_commit_hooks/remove_em_dash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from __future__ import annotations

import argparse
from collections.abc import Sequence

EM_DASH = '\N{EM DASH}'.encode()


def _fix_file(filename: str) -> bool:
with open(filename, 'rb') as f:
contents = f.read()
new_contents = contents.replace(EM_DASH, b'-')
if new_contents == contents:
return False
with open(filename, 'wb') as f:
f.write(new_contents)
return True


def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)

retv = 0
for filename in args.filenames:
if _fix_file(filename):
print(f'Fixing {filename}')
retv = 1
return retv


if __name__ == '__main__':
raise SystemExit(main())
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ console_scripts =
no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main
pre-commit-hooks-removed = pre_commit_hooks.removed:main
pretty-format-json = pre_commit_hooks.pretty_format_json:main
remove-em-dash = pre_commit_hooks.remove_em_dash:main
requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:main
sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main
trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:main
Expand Down
36 changes: 36 additions & 0 deletions tests/remove_em_dash_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

import pytest

from pre_commit_hooks.remove_em_dash import main


@pytest.mark.parametrize(
('text', 'expected'),
(
('foo\N{EM DASH}bar\n', b'foo-bar\n'),
('foo \N{EM DASH} bar\n', b'foo - bar\n'),
('a\N{EM DASH}b\N{EM DASH}c\n', b'a-b-c\n'),
('x\N{EM DASH}y\r\nz\r\n', b'x-y\r\nz\r\n'),
),
)
def test_fixes_em_dash(text, expected, tmpdir):
path = tmpdir.join('file.txt')
path.write_binary(text.encode())
assert main((str(path),)) == 1
assert path.read_binary() == expected


@pytest.mark.parametrize(
'contents',
(
pytest.param(b'foo-bar\n', id='plain-hyphen'),
pytest.param(b'no em dashes here\n', id='no-dash'),
pytest.param(b'<a>\x97</a>\n', id='windows-1252-em-dash'),
),
)
def test_noop_without_utf8_em_dash(contents, tmpdir):
path = tmpdir.join('file.txt')
path.write_binary(contents)
assert main((str(path),)) == 0
assert path.read_binary() == contents
Loading