-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathMakefile
More file actions
59 lines (46 loc) · 2.12 KB
/
Makefile
File metadata and controls
59 lines (46 loc) · 2.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
.PHONY: help install test test-ci lint format typecheck clean corpus-download bench-robust bench-retrieval bench
PYTHON ?= python
PKG_VERSION := $(shell $(PYTHON) -c "import tomllib, pathlib; print(tomllib.loads(pathlib.Path('pyproject.toml').read_text())['project']['version'])")
help:
@echo "ks-xlsx-parser — common targets"
@echo ""
@echo " make install Install package and dev deps (editable)"
@echo " make test Run the default test suite"
@echo " make test-ci Run the suite with verbose output for CI"
@echo ""
@echo " make lint Ruff lint"
@echo " make format Ruff format"
@echo " make typecheck mypy"
@echo ""
@echo " make corpus-download Fetch SpreadsheetBench for benchmark runs"
@echo ""
@echo " make bench-robust Robustness on SpreadsheetBench (ks vs docling, ~20 min)"
@echo " make bench-retrieval Retrieval recall on SpreadsheetBench (ks vs docling, ~40 min)"
@echo " make bench Run both benchmarks back-to-back"
install:
$(PYTHON) -m pip install -e ".[dev,api]"
test:
$(PYTHON) -m pytest tests/ -v --tb=short -W ignore::UserWarning
test-ci:
$(PYTHON) -m pytest tests/ -v --tb=short -W ignore::UserWarning --junitxml=reports/junit.xml
lint:
$(PYTHON) -m ruff check src/ tests/ scripts/
format:
$(PYTHON) -m ruff format src/ tests/ scripts/
typecheck:
$(PYTHON) -m mypy src/xlsx_parser
clean:
rm -rf build/ dist/ *.egg-info src/*.egg-info .pytest_cache .ruff_cache .mypy_cache
find . -type d -name __pycache__ -prune -exec rm -rf {} +
corpus-download:
./scripts/download_corpora.sh
bench-robust:
@test -d data/corpora/spreadsheetbench || (echo "Corpus missing. Run 'make corpus-download' first." && exit 1)
PYTHONPATH=src $(PYTHON) -m tests.benchmarks.vs_hucre \
--corpus data/corpora/spreadsheetbench --parsers ks,docling \
--per-file-timeout 120 \
--out tests/benchmarks/reports/spreadsheetbench
bench-retrieval:
@test -d data/corpora/spreadsheetbench || (echo "Corpus missing. Run 'make corpus-download' first." && exit 1)
PYTHONPATH=src $(PYTHON) scripts/eval_retrieval.py --parsers ks,docling
bench: bench-robust bench-retrieval