diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 1540957..6f945a2 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v3 @@ -26,14 +26,15 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 pytest + pip install coverage pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Lint with ruff + uses: astral-sh/ruff-action@v3 + with: + args: check - name: Test with pytest run: | - pytest + coverage run -m pytest + - name: Show coverage report + run: | + coverage report -mi --fail-under=${{ vars.COVERAGE_FAIL_UNDER}} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 58d0934..cf37b2d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,19 +5,12 @@ repos: - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace -- repo: https://github.com/psf/black - rev: 22.8.0 + +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.11.5 hooks: - - id: black -- repo: https://github.com/pycqa/isort - rev: 5.12.0 - hooks: - - id: isort - name: isort (python) - args: ["--profile", "black"] -- repo: https://github.com/pycqa/flake8 - rev: 5.0.4 - hooks: - - id: flake8 - args: [--max-line-length=100, "-j8", "--ignore=E203,E501,P103,F403,F405,W503"] - additional_dependencies: [flake8-isort] + - id: ruff + types_or: [ python, pyi ] + args: [ --fix ] + - id: ruff-format + types_or: [ python, pyi ] diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..a3a1838 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "tests" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} diff --git a/filext/__init__.py b/filext/__init__.py index d778716..01953b7 100644 --- a/filext/__init__.py +++ b/filext/__init__.py @@ -1,67 +1,7 @@ -from typing import Union +from filext.functions import whatdoc, whatimage, whatfile -from filext.classifiers import * -from filext.utils import lookup_file_type - - -def whatfile(file: Union[str, bytes]) -> Union[str, None]: - """ - Identifies the file type by calling each classifier - - Args: - file (Union[str, bytes]): path to file or bytes of file - - Returns: - str: found file type or None - """ - classifiers = ( - whatdoc, - whatimage, - ) - for classifier in classifiers: - result = classifier(file) - if result: - return result - - -def whatdoc(file: Union[str, bytes]) -> Union[str, None]: - """ - Identifies the file type by calling each document classifier - - Args: - file (Union[str, bytes]): path to file or bytes of file - - Returns: - Union[str,None]: found file type or None - """ - file_types = { - "docx": is_docx, - "pptx": is_pptx, - "xlsx": is_xlsx, - "pdf": is_pdf, - "doc": is_doc, - "xls": is_xls, - "ppt": is_ppt, - } - return lookup_file_type(file, file_types) - - -def whatimage(file: Union[str, bytes]) -> Union[str, None]: - """ - Identifies the file type by calling each image classifier - - Args: - file (Union[str, bytes]): path to file or bytes of file - - Returns: - Union[str,None]: found file type or None - """ - file_types = { - "bmp": is_bmp, - "gif": is_gif, - "heic": is_heic, - "jpg": is_jpg, - "png": is_png, - "tif": is_tif, - } - return lookup_file_type(file, file_types) +__all__ = [ + "whatdoc", + "whatimage", + "whatfile", +] diff --git a/filext/functions.py b/filext/functions.py new file mode 100644 index 0000000..159a1f0 --- /dev/null +++ b/filext/functions.py @@ -0,0 +1,81 @@ +from typing import Union + +from filext.classifiers import ( + is_bmp, + is_doc, + is_docx, + is_gif, + is_heic, + is_jpg, + is_pdf, + is_png, + is_ppt, + is_pptx, + is_tif, + is_xls, + is_xlsx, +) +from filext.utils import lookup_file_type + + +def whatfile(file: Union[str, bytes]) -> Union[str, None]: + """ + Identifies the file type by calling each classifier + + Args: + file (Union[str, bytes]): path to file or bytes of file + + Returns: + str: found file type or None + """ + classifiers = ( + whatdoc, + whatimage, + ) + for classifier in classifiers: + result = classifier(file) + if result: + return result + + +def whatdoc(file: Union[str, bytes]) -> Union[str, None]: + """ + Identifies the file type by calling each document classifier + + Args: + file (Union[str, bytes]): path to file or bytes of file + + Returns: + Union[str,None]: found file type or None + """ + file_types = { + "docx": is_docx, + "pptx": is_pptx, + "xlsx": is_xlsx, + "pdf": is_pdf, + "doc": is_doc, + "xls": is_xls, + "ppt": is_ppt, + } + return lookup_file_type(file, file_types) + + +def whatimage(file: Union[str, bytes]) -> Union[str, None]: + """ + Identifies the file type by calling each image classifier + + Args: + file (Union[str, bytes]): path to file or bytes of file + + Returns: + Union[str,None]: found file type or None + """ + file_types = { + "bmp": is_bmp, + "gif": is_gif, + "heic": is_heic, + "jpg": is_jpg, + "png": is_png, + "tif": is_tif, + } + return lookup_file_type(file, file_types) diff --git a/filext/utils.py b/filext/utils.py index fbdf9b5..ba9500e 100644 --- a/filext/utils.py +++ b/filext/utils.py @@ -1,8 +1,9 @@ from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path from typing import Callable, Dict, Union -def get_bytes(file: Union[str, bytes]) -> bytes: +def get_bytes(file: Union[str, bytes, Path]) -> bytes: """ Returns the raw bytes of a given file. @@ -20,6 +21,8 @@ def get_bytes(file: Union[str, bytes]) -> bytes: file_bytes = fh.read() elif isinstance(file, bytes): file_bytes = file + elif isinstance(file, Path): + file_bytes = file.read_bytes() else: raise TypeError return file_bytes diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..fbac2ec --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,6 @@ +-r requirements.txt +commitizen +coverage +pre-commit +pytest +ruff diff --git a/tests/test_utils.py b/tests/test_utils.py index a5f8624..cd692c7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,9 +1,14 @@ +from pathlib import Path + import pytest from filext.utils import get_bytes -@pytest.mark.parametrize("file", ["tests/files/image.png"]) +@pytest.mark.parametrize( + "file", + ["tests/files/image.png", Path("tests/files/image.png")], +) def test_get_bytes_valid(file): assert isinstance(get_bytes(file), bytes)