Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10"]
python-version: ["3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
Expand All @@ -26,14 +26,15 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
pip install coverage pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Lint with ruff
uses: astral-sh/ruff-action@v3
with:
args: check
- name: Test with pytest
run: |
pytest
coverage run -m pytest
- name: Show coverage report
run: |
coverage report -mi --fail-under=${{ vars.COVERAGE_FAIL_UNDER}}
23 changes: 8 additions & 15 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,12 @@ repos:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 22.8.0

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.11.5
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black"]
- repo: https://github.com/pycqa/flake8
rev: 5.0.4
hooks:
- id: flake8
args: [--max-line-length=100, "-j8", "--ignore=E203,E501,P103,F403,F405,W503"]
additional_dependencies: [flake8-isort]
- id: ruff
types_or: [ python, pyi ]
args: [ --fix ]
- id: ruff-format
types_or: [ python, pyi ]
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
72 changes: 6 additions & 66 deletions filext/__init__.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,7 @@
from typing import Union
from filext.functions import whatdoc, whatimage, whatfile

from filext.classifiers import *
from filext.utils import lookup_file_type


def whatfile(file: Union[str, bytes]) -> Union[str, None]:
"""
Identifies the file type by calling each classifier

Args:
file (Union[str, bytes]): path to file or bytes of file

Returns:
str: found file type or None
"""
classifiers = (
whatdoc,
whatimage,
)
for classifier in classifiers:
result = classifier(file)
if result:
return result


def whatdoc(file: Union[str, bytes]) -> Union[str, None]:
"""
Identifies the file type by calling each document classifier

Args:
file (Union[str, bytes]): path to file or bytes of file

Returns:
Union[str,None]: found file type or None
"""
file_types = {
"docx": is_docx,
"pptx": is_pptx,
"xlsx": is_xlsx,
"pdf": is_pdf,
"doc": is_doc,
"xls": is_xls,
"ppt": is_ppt,
}
return lookup_file_type(file, file_types)


def whatimage(file: Union[str, bytes]) -> Union[str, None]:
"""
Identifies the file type by calling each image classifier

Args:
file (Union[str, bytes]): path to file or bytes of file

Returns:
Union[str,None]: found file type or None
"""
file_types = {
"bmp": is_bmp,
"gif": is_gif,
"heic": is_heic,
"jpg": is_jpg,
"png": is_png,
"tif": is_tif,
}
return lookup_file_type(file, file_types)
__all__ = [
"whatdoc",
"whatimage",
"whatfile",
]
81 changes: 81 additions & 0 deletions filext/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from typing import Union

from filext.classifiers import (
is_bmp,
is_doc,
is_docx,
is_gif,
is_heic,
is_jpg,
is_pdf,
is_png,
is_ppt,
is_pptx,
is_tif,
is_xls,
is_xlsx,
)
from filext.utils import lookup_file_type


def whatfile(file: Union[str, bytes]) -> Union[str, None]:
"""
Identifies the file type by calling each classifier

Args:
file (Union[str, bytes]): path to file or bytes of file

Returns:
str: found file type or None
"""
classifiers = (
whatdoc,
whatimage,
)
for classifier in classifiers:
result = classifier(file)
if result:
return result


def whatdoc(file: Union[str, bytes]) -> Union[str, None]:
"""
Identifies the file type by calling each document classifier

Args:
file (Union[str, bytes]): path to file or bytes of file

Returns:
Union[str,None]: found file type or None
"""
file_types = {
"docx": is_docx,
"pptx": is_pptx,
"xlsx": is_xlsx,
"pdf": is_pdf,
"doc": is_doc,
"xls": is_xls,
"ppt": is_ppt,
}
return lookup_file_type(file, file_types)


def whatimage(file: Union[str, bytes]) -> Union[str, None]:
"""
Identifies the file type by calling each image classifier

Args:
file (Union[str, bytes]): path to file or bytes of file

Returns:
Union[str,None]: found file type or None
"""
file_types = {
"bmp": is_bmp,
"gif": is_gif,
"heic": is_heic,
"jpg": is_jpg,
"png": is_png,
"tif": is_tif,
}
return lookup_file_type(file, file_types)
5 changes: 4 additions & 1 deletion filext/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Callable, Dict, Union


def get_bytes(file: Union[str, bytes]) -> bytes:
def get_bytes(file: Union[str, bytes, Path]) -> bytes:
"""
Returns the raw bytes of a given file.

Expand All @@ -20,6 +21,8 @@ def get_bytes(file: Union[str, bytes]) -> bytes:
file_bytes = fh.read()
elif isinstance(file, bytes):
file_bytes = file
elif isinstance(file, Path):
file_bytes = file.read_bytes()
else:
raise TypeError
return file_bytes
Expand Down
6 changes: 6 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-r requirements.txt
commitizen
coverage
pre-commit
pytest
ruff
7 changes: 6 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from pathlib import Path

import pytest

from filext.utils import get_bytes


@pytest.mark.parametrize("file", ["tests/files/image.png"])
@pytest.mark.parametrize(
"file",
["tests/files/image.png", Path("tests/files/image.png")],
)
def test_get_bytes_valid(file):
assert isinstance(get_bytes(file), bytes)

Expand Down