Skip to content

Commit 1bece2d

Browse files
authored
Merge pull request #26 from airmang/pr2-hp-tab-support
Handle hp:tab across parsing and text export
2 parents 0d8ee35 + f411deb commit 1bece2d

File tree

6 files changed

+279
-168
lines changed

6 files changed

+279
-168
lines changed

src/hwpx/oxml/body.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
}
4444

4545
InlineMark = Union[GenericElement, "TrackChangeMark"]
46-
RunChild = Union[GenericElement, "Control", "Table", "InlineObject", "TextSpan"]
46+
RunChild = Union[GenericElement, "Control", "Table", "InlineObject", "TextSpan", "Tab"]
4747
ParagraphChild = Union["Run", GenericElement]
4848

4949

@@ -105,6 +105,12 @@ class InlineObject:
105105
children: List[GenericElement] = field(default_factory=list)
106106

107107

108+
@dataclass(slots=True)
109+
class Tab:
110+
tag: str
111+
attributes: Dict[str, str] = field(default_factory=dict)
112+
113+
108114
@dataclass(slots=True)
109115
class Table:
110116
tag: str
@@ -120,6 +126,7 @@ class Run:
120126
controls: List[Control] = field(default_factory=list)
121127
tables: List[Table] = field(default_factory=list)
122128
inline_objects: List[InlineObject] = field(default_factory=list)
129+
tabs: List[Tab] = field(default_factory=list)
123130
text_spans: List[TextSpan] = field(default_factory=list)
124131
other_children: List[GenericElement] = field(default_factory=list)
125132
attributes: Dict[str, str] = field(default_factory=dict)
@@ -227,6 +234,10 @@ def parse_table_element(node: etree._Element) -> Table:
227234
)
228235

229236

237+
def parse_tab_element(node: etree._Element) -> Tab:
238+
return Tab(tag=node.tag, attributes={key: value for key, value in node.attrib.items()})
239+
240+
230241
def parse_run_element(node: etree._Element) -> Run:
231242
attributes = {key: value for key, value in node.attrib.items()}
232243
char_pr_id_ref = parse_int(attributes.pop("charPrIDRef", None))
@@ -247,6 +258,10 @@ def parse_run_element(node: etree._Element) -> Run:
247258
span = parse_text_span(child)
248259
run.text_spans.append(span)
249260
run.content.append(span)
261+
elif name == "tab":
262+
tab = parse_tab_element(child)
263+
run.tabs.append(tab)
264+
run.content.append(tab)
250265
elif name == "tbl":
251266
table = parse_table_element(child)
252267
run.tables.append(table)
@@ -342,6 +357,10 @@ def _text_span_to_xml(span: TextSpan) -> etree._Element:
342357
return node
343358

344359

360+
def _tab_to_xml(tab: Tab) -> etree._Element:
361+
return etree.Element(_qualified_tag(tab.tag, "tab"), dict(tab.attributes))
362+
363+
345364
def _control_to_xml(control: Control) -> etree._Element:
346365
attrs = dict(control.attributes)
347366
if control.control_type is not None:
@@ -376,6 +395,8 @@ def serialize_run(run: Run) -> etree._Element:
376395
node.append(_text_span_to_xml(child))
377396
elif isinstance(child, Control):
378397
node.append(_control_to_xml(child))
398+
elif isinstance(child, Tab):
399+
node.append(_tab_to_xml(child))
379400
elif isinstance(child, Table):
380401
node.append(_table_to_xml(child))
381402
elif isinstance(child, InlineObject):

src/hwpx/oxml/document.py

Lines changed: 134 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,45 @@ def _memo_id() -> str:
121121
return str(uuid4().int & 0xFFFFFFFF)
122122

123123

124+
def _refresh_copied_paragraph_subtree_ids(paragraph: ET.Element) -> None:
125+
"""Assign fresh local identifiers inside a copied paragraph subtree.
126+
127+
This is intentionally narrow: it refreshes paragraph ids for the copied
128+
paragraph and any nested paragraphs (for example inside table cells), plus
129+
common object identifiers used by tables/shapes/notes. Reference-style
130+
attributes such as ``borderFillIDRef`` are left untouched.
131+
"""
132+
133+
for node in paragraph.iter():
134+
if node.tag == f"{_HP}p":
135+
node.set("id", _paragraph_id())
136+
continue
137+
138+
if "id" in node.attrib and node.tag in {
139+
f"{_HP}tbl",
140+
f"{_HP}pic",
141+
f"{_HP}container",
142+
f"{_HP}ole",
143+
f"{_HP}equation",
144+
f"{_HP}textart",
145+
f"{_HP}video",
146+
f"{_HP}header",
147+
f"{_HP}footer",
148+
}:
149+
node.set("id", _object_id())
150+
151+
if "instId" in node.attrib:
152+
node.set("instId", _object_id())
153+
154+
155+
def _clone_paragraph_element(paragraph: ET.Element) -> ET.Element:
156+
"""Return a deep-copied paragraph element with refreshed local ids."""
157+
158+
cloned = deepcopy(paragraph)
159+
_refresh_copied_paragraph_subtree_ids(cloned)
160+
return cloned
161+
162+
124163
def _create_paragraph_element(
125164
text: str,
126165
*,
@@ -154,9 +193,7 @@ def _create_paragraph_element(
154193

155194
run = paragraph.makeelement(f"{_HP}run", run_attrs)
156195
paragraph.append(run)
157-
text_element = run.makeelement(f"{_HP}t", {})
158-
run.append(text_element)
159-
text_element.text = text
196+
_append_text_with_tabs(run, text)
160197
return paragraph
161198

162199

@@ -192,6 +229,20 @@ def _append_child(
192229
return child
193230

194231

232+
def _is_tab_control_element(node: ET.Element) -> bool:
233+
return node.tag == f"{_HP}ctrl" and (node.get("id") or "").lower() == "tab"
234+
235+
236+
def _append_text_with_tabs(run: ET.Element, value: str) -> None:
237+
segments = value.split("\t")
238+
for index, segment in enumerate(segments):
239+
text_element = run.makeelement(f"{_HP}t", {})
240+
text_element.text = _sanitize_text(segment)
241+
run.append(text_element)
242+
if index < len(segments) - 1:
243+
run.append(run.makeelement(f"{_HP}tab", {}))
244+
245+
195246
def _normalize_length(value: str | None) -> str:
196247
if value is None:
197248
return ""
@@ -2097,9 +2148,7 @@ def add_paragraph(
20972148
run_attrs["charPrIDRef"] = "0"
20982149

20992150
run = _append_child(paragraph, f"{_HP}run", run_attrs)
2100-
t = run.makeelement(f"{_HP}t", {})
2101-
t.text = _sanitize_text(text)
2102-
run.append(t)
2151+
_append_text_with_tabs(run, text)
21032152

21042153
self.table.mark_dirty()
21052154
section = self.table.paragraph.section
@@ -2762,9 +2811,13 @@ def runs(self) -> list[HwpxOxmlRun]:
27622811
def text(self) -> str:
27632812
"""Return the concatenated textual content of this paragraph."""
27642813
texts: list[str] = []
2765-
for text_element in self.element.findall(f".//{_HP}t"):
2766-
if text_element.text:
2767-
texts.append(text_element.text)
2814+
for run in self._run_elements():
2815+
for child in run:
2816+
if child.tag == f"{_HP}t":
2817+
if child.text:
2818+
texts.append(child.text)
2819+
elif child.tag == f"{_HP}tab" or _is_tab_control_element(child):
2820+
texts.append("\t")
27682821
return "".join(texts)
27692822

27702823
@text.setter
@@ -2780,10 +2833,10 @@ def text(self, value: str) -> None:
27802833
# Identify first run — its charPrIDRef will be kept.
27812834
first_run = self._ensure_run()
27822835

2783-
# Remove <hp:t> from ALL runs.
2836+
# Remove existing text/tab nodes from all runs.
27842837
for run in runs:
27852838
for child in list(run):
2786-
if child.tag == f"{_HP}t":
2839+
if child.tag == f"{_HP}t" or child.tag == f"{_HP}tab" or _is_tab_control_element(child):
27872840
run.remove(child)
27882841

27892842
# Remove non-first runs that are now empty (only had text).
@@ -2794,10 +2847,8 @@ def text(self, value: str) -> None:
27942847
if len(list(run)) == 0:
27952848
self.element.remove(run)
27962849

2797-
# Write the new text into the first run.
2798-
text_element = first_run.makeelement(f"{_HP}t", {})
2799-
text_element.text = _sanitize_text(value)
2800-
first_run.append(text_element)
2850+
# Write the new text into the first run, preserving tabs as <hp:tab/>.
2851+
_append_text_with_tabs(first_run, value)
28012852
_clear_paragraph_layout_cache(self.element)
28022853
self.section.mark_dirty()
28032854

@@ -3668,14 +3719,43 @@ def add_paragraph(
36683719

36693720
run = paragraph.makeelement(f"{_HP}run", run_attrs)
36703721
paragraph.append(run)
3671-
text_element = run.makeelement(f"{_HP}t", {})
3672-
text_element.text = text
3673-
run.append(text_element)
3722+
_append_text_with_tabs(run, text)
36743723

36753724
self._element.append(paragraph)
36763725
self._dirty = True
36773726
return HwpxOxmlParagraph(paragraph, self)
36783727

3728+
def insert_paragraphs(
3729+
self,
3730+
index: int,
3731+
paragraphs: Sequence[HwpxOxmlParagraph | ET.Element],
3732+
) -> list[HwpxOxmlParagraph]:
3733+
"""Insert paragraph copies at *index* and return wrappers for them."""
3734+
3735+
existing = self.paragraphs
3736+
if index < 0 or index > len(existing):
3737+
raise IndexError(f"단락 인덱스 {index}이(가) 범위를 벗어났습니다 (총 {len(existing)}개)")
3738+
3739+
inserted: list[HwpxOxmlParagraph] = []
3740+
for offset, paragraph in enumerate(paragraphs):
3741+
source_element = paragraph.element if isinstance(paragraph, HwpxOxmlParagraph) else paragraph
3742+
cloned = _clone_paragraph_element(source_element)
3743+
self._element.insert(index + offset, cloned)
3744+
inserted.append(HwpxOxmlParagraph(cloned, self))
3745+
3746+
if inserted:
3747+
self._dirty = True
3748+
return inserted
3749+
3750+
def copy_paragraph_range(self, start: int, end: int) -> list[ET.Element]:
3751+
"""Return deep-copied paragraph elements for the inclusive range."""
3752+
3753+
paragraphs = self.paragraphs
3754+
total = len(paragraphs)
3755+
if start < 0 or end < 0 or start >= total or end >= total or start > end:
3756+
raise IndexError(f"문단 범위 {start}..{end}이(가) 유효하지 않습니다 (총 {total}개)")
3757+
return [_clone_paragraph_element(paragraphs[index].element) for index in range(start, end + 1)]
3758+
36793759
def mark_dirty(self) -> None:
36803760
self._dirty = True
36813761

@@ -4649,6 +4729,42 @@ def remove_paragraph(
46494729
else:
46504730
paragraph.remove()
46514731

4732+
def copy_paragraph_range(
4733+
self,
4734+
start: int,
4735+
end: int,
4736+
*,
4737+
section: HwpxOxmlSection | None = None,
4738+
section_index: int | None = None,
4739+
) -> list[ET.Element]:
4740+
"""Return deep-copied paragraph elements for an inclusive range."""
4741+
4742+
if section is None and section_index is not None:
4743+
section = self._sections[section_index]
4744+
if section is None:
4745+
if not self._sections:
4746+
raise ValueError("document does not contain any sections")
4747+
section = self._sections[-1]
4748+
return section.copy_paragraph_range(start, end)
4749+
4750+
def insert_paragraphs(
4751+
self,
4752+
index: int,
4753+
paragraphs: Sequence[HwpxOxmlParagraph | ET.Element],
4754+
*,
4755+
section: HwpxOxmlSection | None = None,
4756+
section_index: int | None = None,
4757+
) -> list[HwpxOxmlParagraph]:
4758+
"""Insert copied paragraphs into the requested section."""
4759+
4760+
if section is None and section_index is not None:
4761+
section = self._sections[section_index]
4762+
if section is None:
4763+
if not self._sections:
4764+
raise ValueError("document does not contain any sections")
4765+
section = self._sections[-1]
4766+
return section.insert_paragraphs(index, paragraphs)
4767+
46524768
# ------------------------------------------------------------------
46534769
# Section management
46544770
# ------------------------------------------------------------------

0 commit comments

Comments
 (0)