From 5862f0b0769181d52e902744eae8e6bfc4f4fa33 Mon Sep 17 00:00:00 2001
From: airmang
Date: Tue, 10 Mar 2026 10:06:52 +0900
Subject: [PATCH 1/2] chore: align license metadata
---
CHANGELOG.md | 6 +++
DevDoc/license-alignment-audit.md | 41 +++++++++++++++
DevDoc/license-metadata-policy.md | 28 +++++++++++
README.md | 10 ++--
pyproject.toml | 9 ++--
tests/test_packaging_license_metadata.py | 63 ++++++++++++++++++++++++
6 files changed, 149 insertions(+), 8 deletions(-)
create mode 100644 DevDoc/license-alignment-audit.md
create mode 100644 DevDoc/license-metadata-policy.md
create mode 100644 tests/test_packaging_license_metadata.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e86e568..a4d169c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,12 @@
모든 중요한 변경 사항은 이 문서에 기록됩니다. 형식은 [Keep a Changelog](https://keepachangelog.com/ko/1.1.0/)과 [Semantic Versioning](https://semver.org/lang/ko/)을 따릅니다.
+## [2.8.3] - 2026-03-10
+### 변경
+- 저장소와 배포 메타데이터의 라이선스 표기를 실제 `LICENSE` 파일과 일치하도록 정렬했습니다.
+- `pyproject.toml`을 PEP 639 방식의 `LicenseRef-python-hwpx-NonCommercial` + `license-files` 구성으로 갱신하고, 잘못된 MIT 분류자를 제거했습니다.
+- README 라이선스 배지/섹션을 커스텀 비상업적 라이선스 기준으로 수정하고, wheel/sdist 산출물의 라이선스 메타데이터를 검증하는 회귀 테스트를 추가했습니다.
+
## [2.8.2] - 2026-03-08
### 변경
- README를 현재 공개 API와 CLI 범위에 맞춰 정리했습니다. Quick start, 텍스트 추출, 객체 검색 예시를 실제 호출 방식 기준으로 수정했습니다.
diff --git a/DevDoc/license-alignment-audit.md b/DevDoc/license-alignment-audit.md
new file mode 100644
index 0000000..bded63a
--- /dev/null
+++ b/DevDoc/license-alignment-audit.md
@@ -0,0 +1,41 @@
+# License Alignment Audit
+
+Date: 2026-03-10
+
+## Files inspected
+
+- `LICENSE`
+- `README.md`
+- `pyproject.toml`
+- `docs/conf.py`
+- `CONTRIBUTING.md`
+- `.github/workflows/release.yml`
+- `.github/workflows/tests.yml`
+- `scripts/build-and-publish.sh`
+- `tests/test_packaging_py_typed.py`
+- Repo-wide searches across `docs/`, `.github/`, `DevDoc/`, `CHANGELOG.md`, and the repository root for license-related metadata and MIT references
+
+## Contradictions found before this change
+
+- `LICENSE` defined a custom non-commercial license and named `python-hwpx Maintainers` as the copyright holder.
+- `README.md` showed an MIT badge and an MIT license section, which contradicted the actual license text.
+- `README.md` attributed the license line to `고규현 (Kyuhyun Koh)`, while the `LICENSE` file and package metadata used `python-hwpx Maintainers`.
+- `pyproject.toml` used the legacy `license = { file = "LICENSE" }` form and also published the classifier `License :: OSI Approved :: MIT License`, which falsely represented the distribution as MIT-licensed.
+
+## Source of truth
+
+- The repository root `LICENSE` file is the source of truth for license terms.
+- This audit treats the project as remaining under its existing custom non-commercial license. No evidence of an intentional relicensing to MIT was found elsewhere in the repository.
+
+## Decision summary
+
+- Preserve the current non-commercial custom license.
+- Align public-facing metadata and README wording to that license.
+- Use modern packaging metadata that points built distributions back to the root `LICENSE` file without inventing an OSI identifier.
+- Remove conflicting MIT wording and the MIT trove classifier rather than replacing it with another potentially ambiguous license classifier.
+
+## Notes on surfaces inspected
+
+- `docs/conf.py` already used `python-hwpx Maintainers` and did not restate MIT licensing.
+- No GitHub Pages or docs markdown pages were found to restate the project license.
+- The release workflow already builds distributions and runs `twine check`, so it was left in place and used for verification after the metadata update.
diff --git a/DevDoc/license-metadata-policy.md b/DevDoc/license-metadata-policy.md
new file mode 100644
index 0000000..0b46573
--- /dev/null
+++ b/DevDoc/license-metadata-policy.md
@@ -0,0 +1,28 @@
+# License Metadata Policy
+
+## Source of truth
+
+- The root `LICENSE` file defines the project's license terms.
+- Metadata changes must reflect the current `LICENSE` text. Do not treat README text, badges, or historical PyPI metadata as authoritative.
+
+## Packaging rule
+
+- `pyproject.toml` must represent the current custom license with `project.license = "LicenseRef-python-hwpx-NonCommercial"`.
+- `pyproject.toml` must list `project.license-files = ["LICENSE"]` so both `sdist` and `wheel` carry the license file.
+- Keep the build backend compatible with that metadata format by requiring `setuptools>=77.0.0`.
+
+## Classifier rule
+
+- Do not add `License ::` trove classifiers for this project unless the `LICENSE` file changes to a classifier-backed license and the classifier is verified to be accurate.
+- For the current custom non-commercial license, leaving license classifiers unset is less ambiguous than picking an approximate classifier.
+
+## README rule
+
+- The README badge and license section must describe the project as using a custom non-commercial license and link to `LICENSE`.
+- If contact information is updated, keep it distinct from the copyright/licensing line unless the `LICENSE` file is updated too.
+
+## Verification rule
+
+- Before release or after touching license metadata, run `python -m build` and `twine check dist/*`.
+- Inspect built `PKG-INFO` and wheel `METADATA` for `License-Expression: LicenseRef-python-hwpx-NonCommercial` and `License-File: LICENSE`.
+- Confirm the wheel contains `.dist-info/licenses/LICENSE` and the sdist contains the root `LICENSE` file.
diff --git a/README.md b/README.md
index 9153e84..b90bd97 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
-
+
@@ -286,13 +286,15 @@ pytest
## License
-[MIT](LICENSE) © 고규현 (Kyuhyun Koh)
+[Custom Non-Commercial License](LICENSE) © python-hwpx Maintainers
+
+Commercial use requires separate permission from the copyright holders.
-## Author
+## Maintainer
-**고규현** — 광교고등학교 정보·컴퓨터 교사
+Primary maintainer/contact: **고규현** — 광교고등학교 정보·컴퓨터 교사
- ✉️ [kokyuhyun@hotmail.com](mailto:kokyuhyun@hotmail.com)
- 🐙 [@airmang](https://github.com/airmang)
diff --git a/pyproject.toml b/pyproject.toml
index 844c0b6..85ed3d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,13 +1,14 @@
[build-system]
-requires = ["setuptools", "wheel"]
+requires = ["setuptools>=77.0.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "python-hwpx"
-version = "2.8.2"
+version = "2.8.3"
description = "Hancom HWPX 패키지를 로드하고 편집하기 위한 Python 유틸리티 모음"
readme = { file = "README.md", content-type = "text/markdown" }
-license = { file = "LICENSE" }
+license = "LicenseRef-python-hwpx-NonCommercial"
+license-files = ["LICENSE"]
requires-python = ">=3.10"
authors = [
{ name = "python-hwpx Maintainers" },
@@ -16,7 +17,6 @@ keywords = ["hwp", "hwpx", "hancom", "opc", "xml"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
- "License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
@@ -35,6 +35,7 @@ dev = [
"pytest>=7.4",
]
test = [
+ "build>=1.0",
"pytest>=7.4",
"pytest-cov>=5.0",
]
diff --git a/tests/test_packaging_license_metadata.py b/tests/test_packaging_license_metadata.py
new file mode 100644
index 0000000..9f7ab19
--- /dev/null
+++ b/tests/test_packaging_license_metadata.py
@@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import subprocess
+import sys
+import tarfile
+from pathlib import Path
+from zipfile import ZipFile
+
+import pytest
+
+
+LICENSE_EXPRESSION = "LicenseRef-python-hwpx-NonCommercial"
+
+
+def _build_distribution(tmp_path: Path, distribution: str) -> Path:
+ pytest.importorskip("build")
+
+ project_root = Path(__file__).resolve().parents[1]
+ build_args = [
+ sys.executable,
+ "-m",
+ "build",
+ f"--{distribution}",
+ "--outdir",
+ str(tmp_path),
+ ]
+ subprocess.run(build_args, cwd=project_root, check=True)
+
+ pattern = "*.whl" if distribution == "wheel" else "*.tar.gz"
+ return next(tmp_path.glob(pattern))
+
+
+@pytest.mark.parametrize("distribution", ["wheel", "sdist"])
+def test_built_distributions_expose_custom_license_metadata(
+ tmp_path: Path, distribution: str
+) -> None:
+ artifact = _build_distribution(tmp_path, distribution)
+
+ if distribution == "wheel":
+ with ZipFile(artifact) as wheel_archive:
+ members = set(wheel_archive.namelist())
+ metadata_name = next(
+ name for name in members if name.endswith(".dist-info/METADATA")
+ )
+ metadata = wheel_archive.read(metadata_name).decode("utf-8")
+
+ assert f"License-Expression: {LICENSE_EXPRESSION}" in metadata
+ assert "License-File: LICENSE" in metadata
+ assert "Classifier: License ::" not in metadata
+ assert any(name.endswith(".dist-info/licenses/LICENSE") for name in members)
+ return
+
+ with tarfile.open(artifact, "r:gz") as sdist_archive:
+ members = sdist_archive.getnames()
+ pkg_info_name = next(name for name in members if name.endswith("/PKG-INFO"))
+ pkg_info_member = sdist_archive.extractfile(pkg_info_name)
+ assert pkg_info_member is not None
+ metadata = pkg_info_member.read().decode("utf-8")
+
+ assert f"License-Expression: {LICENSE_EXPRESSION}" in metadata
+ assert "License-File: LICENSE" in metadata
+ assert "Classifier: License ::" not in metadata
+ assert any(name.endswith("/LICENSE") for name in members)
From 3e20156059a3562eb54001649162ae7c2eac4255 Mon Sep 17 00:00:00 2001
From: airmang
Date: Thu, 2 Apr 2026 12:32:43 +0900
Subject: [PATCH 2/2] feat: add table navigation helpers
---
CHANGELOG.md | 9 +
README.md | 12 +
docs/api_reference.md | 6 +
pyproject.toml | 2 +-
src/hwpx/__init__.py | 6 +-
src/hwpx/document.py | 33 ++-
src/hwpx/tools/__init__.py | 24 ++
src/hwpx/tools/table_navigation.py | 457 +++++++++++++++++++++++++++++
tests/test_table_navigation.py | 183 ++++++++++++
9 files changed, 729 insertions(+), 3 deletions(-)
create mode 100644 src/hwpx/tools/table_navigation.py
create mode 100644 tests/test_table_navigation.py
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a4d169c..24e35fc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,15 @@
모든 중요한 변경 사항은 이 문서에 기록됩니다. 형식은 [Keep a Changelog](https://keepachangelog.com/ko/1.1.0/)과 [Semantic Versioning](https://semver.org/lang/ko/)을 따릅니다.
+## [2.9.0] - 2026-04-02
+### 추가
+- `HwpxDocument.get_table_map()`, `find_cell_by_label()`, `fill_by_path()`를 추가해 HWPX 양식/템플릿 표를 문서 순서 기반으로 탐색하고 채울 수 있게 했습니다.
+- `hwpx.tools.table_navigation` 모듈을 추가해 엔진 레벨에서 재사용 가능한 표 탐색, 라벨 정규화, 방향 이동, 배치 채우기 helper를 공개했습니다.
+
+### 변경
+- 라벨 매칭이 공백 축약, 대소문자 무시, 후행 콜론 허용 규칙을 따르도록 정규화 로직을 추가했습니다.
+- 표 자동화 API에 대한 회귀 테스트와 README/API 레퍼런스 문서를 추가했습니다.
+
## [2.8.3] - 2026-03-10
### 변경
- 저장소와 배포 메타데이터의 라이선스 표기를 실제 `LICENSE` 파일과 일치하도록 정렬했습니다.
diff --git a/README.md b/README.md
index b90bd97..d110c41 100644
--- a/README.md
+++ b/README.md
@@ -86,6 +86,7 @@ doc.save_to_path("결과물.hwpx")
| 📝 **단락** | 추가/삭제/편집/서식 | 텍스트 설정, 단락 삭제(`remove_paragraph`), 스타일 참조 |
| ✏️ **Run** | 텍스트 조각 | 추가, 교체, 볼드/이탤릭/밑줄/색상 서식 |
| 📊 **표(Table)** | 생성/편집/병합 | N×M 표 생성, 셀 텍스트, 셀 병합/분할, 중첩 테이블 |
+| 🧭 **표 자동화** | 탐색/채우기 | 테이블 맵, 라벨 기반 셀 탐색, 경로 기반 배치 채우기 |
| 📑 **섹션** | 추가/삭제 | `add_section(after=)`, `remove_section()`, manifest 자동 관리 |
| 🖼️ **이미지** | 임베드/삭제 | 바이너리 데이터 관리, manifest 자동 등록 |
| ✏️ **도형** | 선/사각형/타원 | OWPML 명세 준수 도형 삽입 |
@@ -126,6 +127,17 @@ doc.set_footer_text("1 / 10", page_type="BOTH")
# 표 셀 병합·분할
table.merge_cells(0, 0, 1, 1) # (0,0)~(1,1) 병합
table.set_cell_text(0, 0, "병합된 셀", logical=True, split_merged=True)
+
+# 양식형 표 자동 채우기
+form = doc.add_table(2, 2)
+form.cell(0, 0).text = "성명:"
+form.cell(1, 0).text = "소속"
+
+doc.find_cell_by_label("성명") # {"matches": [...], "count": 1}
+doc.fill_by_path({
+ "성명 > right": "홍길동",
+ "소속 > right": "플랫폼팀",
+})
```
### 🔍 텍스트 추출 & 검색
diff --git a/docs/api_reference.md b/docs/api_reference.md
index 6bd6d31..0924f2a 100644
--- a/docs/api_reference.md
+++ b/docs/api_reference.md
@@ -128,6 +128,12 @@
- 섹션을 삭제합니다. 인스턴스 또는 인덱스를 받습니다. 마지막 섹션 삭제 시 `ValueError`가 발생합니다.
- `add_table(rows, cols, ...) -> HwpxOxmlTable`
- 단락을 삽입하고 그 안에 표 인라인 객체를 생성한 후, 표 래퍼를 반환합니다. `border_fill_id_ref`를 생략하면 헤더 참조 목록에 기본 실선 `borderFill`을 생성하고 표와 셀에 자동으로 연결합니다.
+- `get_table_map() -> dict`
+ - 문서 순서대로 표를 스캔하고 `table_index`, `paragraph_index`, 행·열 수, 추정 헤더 텍스트, 첫 행 미리보기, 빈 표 여부를 반환합니다.
+- `find_cell_by_label(label_text, direction="right") -> dict`
+ - 모든 표를 순회하며 라벨 셀을 찾고, `right`/`down` 방향으로 인접한 타깃 셀 정보를 모두 반환합니다. 라벨 매칭은 공백·대소문자·후행 콜론을 정규화합니다.
+- `fill_by_path(mappings) -> dict`
+ - `"라벨 > 방향 > 방향"` 형식의 경로를 해석해 셀 값을 일괄 기록합니다. 라벨 미발견, 다중 후보, 범위 초과는 개별 실패 항목으로 보고하고 나머지 매핑은 계속 처리합니다.
- `add_shape(shape_type, ...) -> HwpxOxmlInlineObject`
- 새 단락에 태그 이름을 사용하여 인라인 그리기 요소를 삽입합니다.
- `add_control(...) -> HwpxOxmlInlineObject`
diff --git a/pyproject.toml b/pyproject.toml
index 85ed3d5..f3bb140 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "python-hwpx"
-version = "2.8.3"
+version = "2.9.0"
description = "Hancom HWPX 패키지를 로드하고 편집하기 위한 Python 유틸리티 모음"
readme = { file = "README.md", content-type = "text/markdown" }
license = "LicenseRef-python-hwpx-NonCommercial"
diff --git a/src/hwpx/__init__.py b/src/hwpx/__init__.py
index 0a12491..593d43f 100644
--- a/src/hwpx/__init__.py
+++ b/src/hwpx/__init__.py
@@ -10,8 +10,12 @@ def _resolve_version() -> str:
except PackageNotFoundError:
return "0+unknown"
+def __getattr__(name: str) -> object:
+ """Resolve dynamic module attributes."""
-__version__ = _resolve_version()
+ if name == "__version__":
+ return _resolve_version()
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
from .tools.text_extractor import (
DEFAULT_NAMESPACES,
diff --git a/src/hwpx/document.py b/src/hwpx/document.py
index 99138f8..38701af 100644
--- a/src/hwpx/document.py
+++ b/src/hwpx/document.py
@@ -10,7 +10,7 @@
import uuid
from os import PathLike
-from typing import Any, BinaryIO, Iterator, Sequence, overload
+from typing import TYPE_CHECKING, Any, BinaryIO, Iterator, Mapping, Sequence, overload
from lxml import etree
@@ -53,6 +53,9 @@
logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+ from .tools.table_navigation import TableFillResult, TableLabelSearchResult, TableMapResult
+
def _append_element(
parent: Any,
@@ -741,6 +744,34 @@ def add_table(
char_pr_id_ref=char_pr_id_ref,
)
+ def get_table_map(self) -> TableMapResult:
+ """Return compact metadata for every table in document order."""
+
+ from .tools.table_navigation import get_table_map
+
+ return get_table_map(self)
+
+ def find_cell_by_label(
+ self,
+ label_text: str,
+ direction: str = "right",
+ ) -> TableLabelSearchResult:
+ """Return every label/target cell pair that matches *label_text*."""
+
+ from .tools.table_navigation import find_cell_by_label
+
+ return find_cell_by_label(self, label_text, direction=direction)
+
+ def fill_by_path(
+ self,
+ mappings: Mapping[str, str],
+ ) -> TableFillResult:
+ """Fill table cells using ``label > direction > ...`` navigation paths."""
+
+ from .tools.table_navigation import fill_by_path
+
+ return fill_by_path(self, mappings)
+
def add_shape(
self,
shape_type: str,
diff --git a/src/hwpx/tools/__init__.py b/src/hwpx/tools/__init__.py
index 40880db..80c329c 100644
--- a/src/hwpx/tools/__init__.py
+++ b/src/hwpx/tools/__init__.py
@@ -25,6 +25,19 @@
describe_element_path,
strip_namespace,
)
+from .table_navigation import (
+ TableCellReference,
+ TableFillApplied,
+ TableFillFailed,
+ TableFillResult,
+ TableLabelMatch,
+ TableLabelSearchResult,
+ TableMapEntry,
+ TableMapResult,
+ fill_by_path,
+ find_cell_by_label,
+ get_table_map,
+)
from .validator import (
DocumentSchemas,
ValidationIssue,
@@ -41,6 +54,17 @@
"build_parent_map",
"describe_element_path",
"strip_namespace",
+ "TableCellReference",
+ "TableFillApplied",
+ "TableFillFailed",
+ "TableFillResult",
+ "TableLabelMatch",
+ "TableLabelSearchResult",
+ "TableMapEntry",
+ "TableMapResult",
+ "fill_by_path",
+ "find_cell_by_label",
+ "get_table_map",
"FoundElement",
"ObjectFinder",
"PackageValidationIssue",
diff --git a/src/hwpx/tools/table_navigation.py b/src/hwpx/tools/table_navigation.py
new file mode 100644
index 0000000..681d4d9
--- /dev/null
+++ b/src/hwpx/tools/table_navigation.py
@@ -0,0 +1,457 @@
+"""Reusable helpers for HWPX table discovery and form-like navigation."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+import re
+from typing import TYPE_CHECKING, Literal, Mapping, TypedDict
+
+from ..oxml import HwpxOxmlParagraph, HwpxOxmlTable
+
+if TYPE_CHECKING:
+ from ..document import HwpxDocument
+
+__all__ = [
+ "SearchDirection",
+ "PathDirection",
+ "TableCellReference",
+ "TableFillApplied",
+ "TableFillFailed",
+ "TableFillResult",
+ "TableLabelMatch",
+ "TableLabelSearchResult",
+ "TableMapEntry",
+ "TableMapResult",
+ "fill_by_path",
+ "find_cell_by_label",
+ "get_table_map",
+]
+
+_HP_NS = "http://www.hancom.co.kr/hwpml/2011/paragraph"
+_HP = f"{{{_HP_NS}}}"
+_WHITESPACE_RE = re.compile(r"\s+")
+
+SearchDirection = Literal["right", "down"]
+PathDirection = Literal["left", "right", "up", "down"]
+
+
+class TableMapEntry(TypedDict):
+ """Compact metadata describing a table in document order."""
+
+ table_index: int
+ paragraph_index: int
+ rows: int
+ cols: int
+ header_text: str
+ first_row_preview: list[str]
+ is_empty: bool
+
+
+class TableMapResult(TypedDict):
+ """Collection of table metadata entries."""
+
+ tables: list[TableMapEntry]
+
+
+class TableCellReference(TypedDict):
+ """A logical table cell position and its current text."""
+
+ row: int
+ col: int
+ text: str
+
+
+class TableLabelMatch(TypedDict):
+ """A label cell and the cell reached from it."""
+
+ table_index: int
+ label_cell: TableCellReference
+ target_cell: TableCellReference
+
+
+class TableLabelSearchResult(TypedDict):
+ """Result payload returned by :func:`find_cell_by_label`."""
+
+ matches: list[TableLabelMatch]
+ count: int
+
+
+class TableFillApplied(TypedDict):
+ """A successfully applied path-based fill operation."""
+
+ path: str
+ table_index: int
+ row: int
+ col: int
+ value: str
+
+
+class TableFillFailed(TypedDict):
+ """A failed path-based fill operation and its reason."""
+
+ path: str
+ reason: str
+
+
+class TableFillResult(TypedDict):
+ """Batch fill summary for :func:`fill_by_path`."""
+
+ applied: list[TableFillApplied]
+ failed: list[TableFillFailed]
+ applied_count: int
+ failed_count: int
+
+
+@dataclass(frozen=True, slots=True)
+class _AnchoredTable:
+ table: HwpxOxmlTable
+ paragraph_index: int
+ header_text: str
+
+
+@dataclass(frozen=True, slots=True)
+class _IndexedTable:
+ table_index: int
+ table: HwpxOxmlTable
+ paragraph_index: int
+ header_text: str
+
+
+@dataclass(frozen=True, slots=True)
+class _LabelCandidate:
+ table_index: int
+ table: HwpxOxmlTable
+ row: int
+ col: int
+ text: str
+
+
+def _collapse_whitespace(value: str) -> str:
+ return _WHITESPACE_RE.sub(" ", value).strip()
+
+
+def _normalize_label_text(value: str) -> str:
+ normalized = _collapse_whitespace(value).casefold()
+ while normalized.endswith((":", ":")):
+ normalized = normalized[:-1].rstrip()
+ return normalized
+
+
+def _direct_paragraph_text(paragraph: HwpxOxmlParagraph) -> str:
+ parts: list[str] = []
+ for run in paragraph.element.findall(f"{_HP}run"):
+ for child in run:
+ if child.tag == f"{_HP}t" and child.text:
+ parts.append(child.text)
+ return _collapse_whitespace("".join(parts))
+
+
+def _collect_tables_from_table(
+ table: HwpxOxmlTable,
+ *,
+ anchor_paragraph_index: int,
+ inherited_header_text: str,
+ sink: list[_AnchoredTable],
+) -> str:
+ last_header_text = inherited_header_text
+ for row in table.rows:
+ for cell in row.cells:
+ for paragraph in cell.paragraphs:
+ last_header_text = _collect_tables_from_paragraph(
+ paragraph,
+ anchor_paragraph_index=anchor_paragraph_index,
+ inherited_header_text=last_header_text,
+ sink=sink,
+ )
+ return last_header_text
+
+
+def _collect_tables_from_paragraph(
+ paragraph: HwpxOxmlParagraph,
+ *,
+ anchor_paragraph_index: int,
+ inherited_header_text: str,
+ sink: list[_AnchoredTable],
+) -> str:
+ paragraph_text_parts: list[str] = []
+ last_header_text = inherited_header_text
+
+ for run in paragraph.element.findall(f"{_HP}run"):
+ for child in run:
+ if child.tag == f"{_HP}t":
+ if child.text:
+ paragraph_text_parts.append(child.text)
+ continue
+ if child.tag != f"{_HP}tbl":
+ continue
+
+ paragraph_prefix_text = _collapse_whitespace("".join(paragraph_text_parts))
+ header_text = paragraph_prefix_text or last_header_text
+ table = HwpxOxmlTable(child, paragraph)
+ sink.append(
+ _AnchoredTable(
+ table=table,
+ paragraph_index=anchor_paragraph_index,
+ header_text=header_text,
+ )
+ )
+ last_header_text = _collect_tables_from_table(
+ table,
+ anchor_paragraph_index=anchor_paragraph_index,
+ inherited_header_text=header_text,
+ sink=sink,
+ )
+
+ paragraph_text = _collapse_whitespace("".join(paragraph_text_parts))
+ return paragraph_text or last_header_text
+
+
+def _collect_document_tables(document: HwpxDocument) -> list[_IndexedTable]:
+ anchored_tables: list[_AnchoredTable] = []
+ last_top_level_text = ""
+
+ for paragraph_index, paragraph in enumerate(document.paragraphs):
+ _collect_tables_from_paragraph(
+ paragraph,
+ anchor_paragraph_index=paragraph_index,
+ inherited_header_text=last_top_level_text,
+ sink=anchored_tables,
+ )
+ paragraph_text = _direct_paragraph_text(paragraph)
+ if paragraph_text:
+ last_top_level_text = paragraph_text
+
+ return [
+ _IndexedTable(
+ table_index=table_index,
+ table=item.table,
+ paragraph_index=item.paragraph_index,
+ header_text=item.header_text,
+ )
+ for table_index, item in enumerate(anchored_tables)
+ ]
+
+
+def _cell_text(table: HwpxOxmlTable, row_index: int, col_index: int) -> str:
+ return table.cell(row_index, col_index).text
+
+
+def _table_is_empty(table: HwpxOxmlTable) -> bool:
+ for row_index in range(table.row_count):
+ for col_index in range(table.column_count):
+ if _cell_text(table, row_index, col_index).strip():
+ return False
+ return True
+
+
+def _first_row_preview(table: HwpxOxmlTable) -> list[str]:
+ if table.row_count == 0:
+ return []
+ return [_cell_text(table, 0, col_index) for col_index in range(table.column_count)]
+
+
+def _direction_delta(direction: PathDirection) -> tuple[int, int]:
+ if direction == "right":
+ return (0, 1)
+ if direction == "left":
+ return (0, -1)
+ if direction == "down":
+ return (1, 0)
+ return (-1, 0)
+
+
+def _move(
+ table: HwpxOxmlTable,
+ row_index: int,
+ col_index: int,
+ direction: PathDirection,
+) -> tuple[int, int] | None:
+ row_delta, col_delta = _direction_delta(direction)
+ target_row = row_index + row_delta
+ target_col = col_index + col_delta
+ if target_row < 0 or target_col < 0:
+ return None
+ if target_row >= table.row_count or target_col >= table.column_count:
+ return None
+ return (target_row, target_col)
+
+
+def _find_label_candidates(
+ tables: list[_IndexedTable],
+ label_text: str,
+) -> list[_LabelCandidate]:
+ normalized_label = _normalize_label_text(label_text)
+ if not normalized_label:
+ raise ValueError("label_text must contain at least one non-whitespace character")
+
+ candidates: list[_LabelCandidate] = []
+ for table_ref in tables:
+ for row_index in range(table_ref.table.row_count):
+ for col_index in range(table_ref.table.column_count):
+ cell_text = _cell_text(table_ref.table, row_index, col_index)
+ if _normalize_label_text(cell_text) != normalized_label:
+ continue
+ candidates.append(
+ _LabelCandidate(
+ table_index=table_ref.table_index,
+ table=table_ref.table,
+ row=row_index,
+ col=col_index,
+ text=cell_text,
+ )
+ )
+ return candidates
+
+
+def _cell_reference(
+ table: HwpxOxmlTable,
+ row_index: int,
+ col_index: int,
+) -> TableCellReference:
+ return {
+ "row": row_index,
+ "col": col_index,
+ "text": _cell_text(table, row_index, col_index),
+ }
+
+
+def _parse_path(path: str) -> tuple[str | None, list[str], str | None]:
+ tokens = [token.strip() for token in path.split(">")]
+ if not tokens or not tokens[0]:
+ return (None, [], "path must start with a label")
+
+ label_text = tokens[0]
+ raw_directions = [token for token in tokens[1:] if token]
+ if not raw_directions:
+ return (label_text, [], "path must include at least one direction")
+ return (label_text, raw_directions, None)
+
+
+def get_table_map(document: HwpxDocument) -> TableMapResult:
+ """Return compact metadata for every table in document order."""
+
+ tables: list[TableMapEntry] = []
+ for table_ref in _collect_document_tables(document):
+ tables.append(
+ {
+ "table_index": table_ref.table_index,
+ "paragraph_index": table_ref.paragraph_index,
+ "rows": table_ref.table.row_count,
+ "cols": table_ref.table.column_count,
+ "header_text": table_ref.header_text,
+ "first_row_preview": _first_row_preview(table_ref.table),
+ "is_empty": _table_is_empty(table_ref.table),
+ }
+ )
+ return {"tables": tables}
+
+
+def find_cell_by_label(
+ document: HwpxDocument,
+ label_text: str,
+ direction: SearchDirection = "right",
+) -> TableLabelSearchResult:
+ """Find label cells and return the adjacent target cells that remain in bounds."""
+
+ if direction not in {"right", "down"}:
+ raise ValueError("direction must be one of: right, down")
+
+ matches: list[TableLabelMatch] = []
+ for candidate in _find_label_candidates(_collect_document_tables(document), label_text):
+ target = _move(candidate.table, candidate.row, candidate.col, direction)
+ if target is None:
+ continue
+ target_row, target_col = target
+ matches.append(
+ {
+ "table_index": candidate.table_index,
+ "label_cell": {
+ "row": candidate.row,
+ "col": candidate.col,
+ "text": candidate.text,
+ },
+ "target_cell": _cell_reference(candidate.table, target_row, target_col),
+ }
+ )
+
+ return {
+ "matches": matches,
+ "count": len(matches),
+ }
+
+
+def fill_by_path(
+ document: HwpxDocument,
+ mappings: Mapping[str, str],
+) -> TableFillResult:
+ """Fill multiple table cells using label-based navigation paths."""
+
+ indexed_tables = _collect_document_tables(document)
+ applied: list[TableFillApplied] = []
+ failed: list[TableFillFailed] = []
+
+ for path, value in mappings.items():
+ label_text, raw_directions, path_error = _parse_path(path)
+ if path_error is not None or label_text is None:
+ failed.append({"path": path, "reason": path_error or "invalid path"})
+ continue
+
+ try:
+ candidates = _find_label_candidates(indexed_tables, label_text)
+ except ValueError as exc:
+ failed.append({"path": path, "reason": str(exc)})
+ continue
+
+ if not candidates:
+ failed.append({"path": path, "reason": "label not found"})
+ continue
+ if len(candidates) > 1:
+ failed.append({"path": path, "reason": "ambiguous label"})
+ continue
+
+ candidate = candidates[0]
+ current_row = candidate.row
+ current_col = candidate.col
+ navigation_failed = False
+
+ for raw_direction in raw_directions:
+ direction = raw_direction.casefold()
+ if direction not in {"left", "right", "up", "down"}:
+ failed.append(
+ {
+ "path": path,
+ "reason": f"unsupported direction: {raw_direction}",
+ }
+ )
+ navigation_failed = True
+ break
+
+ next_position = _move(candidate.table, current_row, current_col, direction)
+ if next_position is None:
+ failed.append({"path": path, "reason": "navigation out of bounds"})
+ navigation_failed = True
+ break
+
+ current_row, current_col = next_position
+
+ if navigation_failed:
+ continue
+
+ text_value = str(value)
+ candidate.table.set_cell_text(current_row, current_col, text_value, logical=True)
+ applied.append(
+ {
+ "path": path,
+ "table_index": candidate.table_index,
+ "row": current_row,
+ "col": current_col,
+ "value": text_value,
+ }
+ )
+
+ return {
+ "applied": applied,
+ "failed": failed,
+ "applied_count": len(applied),
+ "failed_count": len(failed),
+ }
diff --git a/tests/test_table_navigation.py b/tests/test_table_navigation.py
new file mode 100644
index 0000000..aed26d7
--- /dev/null
+++ b/tests/test_table_navigation.py
@@ -0,0 +1,183 @@
+from __future__ import annotations
+
+from hwpx import HwpxDocument
+
+
+def _paragraph_index(document: HwpxDocument, target) -> int:
+ for index, paragraph in enumerate(document.paragraphs):
+ if paragraph.element is target.element:
+ return index
+ raise AssertionError("target paragraph was not found in document order")
+
+
+def test_fill_by_path_handles_unique_labels_in_a_single_table() -> None:
+ document = HwpxDocument.new()
+ document.add_paragraph("1. 기본 현황")
+ table = document.add_table(2, 2)
+ table.cell(0, 0).text = "성명:"
+ table.cell(1, 0).text = "소속"
+
+ result = document.fill_by_path(
+ {
+ "성명 > right": "홍길동",
+ "소속 > right": "플랫폼팀",
+ }
+ )
+
+ assert result["applied_count"] == 2
+ assert result["failed_count"] == 0
+ assert table.cell(0, 1).text == "홍길동"
+ assert table.cell(1, 1).text == "플랫폼팀"
+
+
+def test_find_cell_by_label_normalizes_trailing_colons() -> None:
+ document = HwpxDocument.new()
+ document.add_paragraph("기본 정보")
+ table = document.add_table(1, 2)
+ table.cell(0, 0).text = "성명:"
+
+ result = document.find_cell_by_label("성명")
+
+ assert result["count"] == 1
+ assert result["matches"][0]["table_index"] == 0
+ assert result["matches"][0]["label_cell"] == {
+ "row": 0,
+ "col": 0,
+ "text": "성명:",
+ }
+ assert result["matches"][0]["target_cell"] == {
+ "row": 0,
+ "col": 1,
+ "text": "",
+ }
+
+
+def test_multiple_tables_with_the_same_label_return_all_matches_and_make_fill_ambiguous() -> None:
+ document = HwpxDocument.new()
+ document.add_paragraph("1. 신청인")
+ first = document.add_table(1, 2)
+ first.cell(0, 0).text = "성명"
+
+ document.add_paragraph("2. 보호자")
+ second = document.add_table(1, 2)
+ second.cell(0, 0).text = "성명"
+
+ matches = document.find_cell_by_label("성명")
+ fill_result = document.fill_by_path({"성명 > right": "홍길동"})
+
+ assert matches["count"] == 2
+ assert [match["table_index"] for match in matches["matches"]] == [0, 1]
+ assert fill_result["applied_count"] == 0
+ assert fill_result["failed_count"] == 1
+ assert fill_result["failed"][0] == {
+ "path": "성명 > right",
+ "reason": "ambiguous label",
+ }
+ assert first.cell(0, 1).text == ""
+ assert second.cell(0, 1).text == ""
+
+
+def test_out_of_bounds_candidates_are_skipped_and_reported_for_batch_fill() -> None:
+ document = HwpxDocument.new()
+ document.add_paragraph("경계 값")
+ table = document.add_table(2, 2)
+ table.cell(0, 1).text = "마지막열"
+ table.cell(1, 0).text = "마지막행"
+
+ right_matches = document.find_cell_by_label("마지막열", direction="right")
+ down_matches = document.find_cell_by_label("마지막행", direction="down")
+ fill_result = document.fill_by_path(
+ {
+ "마지막열 > right": "실패",
+ "마지막행 > down": "실패",
+ }
+ )
+
+ assert right_matches["count"] == 0
+ assert down_matches["count"] == 0
+ assert fill_result["applied_count"] == 0
+ assert fill_result["failed_count"] == 2
+ assert fill_result["failed"] == [
+ {"path": "마지막열 > right", "reason": "navigation out of bounds"},
+ {"path": "마지막행 > down", "reason": "navigation out of bounds"},
+ ]
+
+
+def test_fill_by_path_supports_multi_step_navigation() -> None:
+ document = HwpxDocument.new()
+ document.add_paragraph("정산표")
+ table = document.add_table(3, 2)
+ table.cell(0, 0).text = "합계"
+
+ result = document.fill_by_path({"합계 > down > right": "100"})
+
+ assert result["applied"] == [
+ {
+ "path": "합계 > down > right",
+ "table_index": 0,
+ "row": 1,
+ "col": 1,
+ "value": "100",
+ }
+ ]
+ assert result["failed"] == []
+ assert table.cell(1, 1).text == "100"
+
+
+def test_get_table_map_reports_stable_order_shape_and_header_text() -> None:
+ document = HwpxDocument.new()
+ document.add_paragraph("1. 기본 현황")
+ first = document.add_table(2, 4)
+ first.cell(0, 0).text = "성명"
+ first.cell(0, 1).text = "소속"
+ first.cell(0, 2).text = "직위"
+ first.cell(0, 3).text = "연락처"
+ first.cell(1, 0).text = "홍길동"
+
+ document.add_paragraph("2. 비고")
+ second = document.add_table(1, 2)
+ second.cell(0, 0).text = "항목"
+ second.cell(0, 1).text = "값"
+
+ result = document.get_table_map()
+
+ assert result["tables"] == [
+ {
+ "table_index": 0,
+ "paragraph_index": _paragraph_index(document, first.paragraph),
+ "rows": 2,
+ "cols": 4,
+ "header_text": "1. 기본 현황",
+ "first_row_preview": ["성명", "소속", "직위", "연락처"],
+ "is_empty": False,
+ },
+ {
+ "table_index": 1,
+ "paragraph_index": _paragraph_index(document, second.paragraph),
+ "rows": 1,
+ "cols": 2,
+ "header_text": "2. 비고",
+ "first_row_preview": ["항목", "값"],
+ "is_empty": False,
+ },
+ ]
+
+
+def test_get_table_map_marks_tables_with_only_empty_strings_as_empty() -> None:
+ document = HwpxDocument.new()
+ document.add_paragraph("빈 표")
+ table = document.add_table(2, 2)
+
+ result = document.get_table_map()
+
+ assert result["tables"] == [
+ {
+ "table_index": 0,
+ "paragraph_index": _paragraph_index(document, table.paragraph),
+ "rows": 2,
+ "cols": 2,
+ "header_text": "빈 표",
+ "first_row_preview": ["", ""],
+ "is_empty": True,
+ }
+ ]