Skip to content

Commit c0f22bd

Browse files
committed
Handle hp:tab across parsing and text export
1 parent c82966e commit c0f22bd

File tree

6 files changed

+279
-167
lines changed

6 files changed

+279
-167
lines changed

src/hwpx/oxml/body.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
}
4444

4545
InlineMark = Union[GenericElement, "TrackChangeMark"]
46-
RunChild = Union[GenericElement, "Control", "Table", "InlineObject", "TextSpan"]
46+
RunChild = Union[GenericElement, "Control", "Table", "InlineObject", "TextSpan", "Tab"]
4747
ParagraphChild = Union["Run", GenericElement]
4848

4949

@@ -105,6 +105,12 @@ class InlineObject:
105105
children: List[GenericElement] = field(default_factory=list)
106106

107107

108+
@dataclass(slots=True)
109+
class Tab:
110+
tag: str
111+
attributes: Dict[str, str] = field(default_factory=dict)
112+
113+
108114
@dataclass(slots=True)
109115
class Table:
110116
tag: str
@@ -120,6 +126,7 @@ class Run:
120126
controls: List[Control] = field(default_factory=list)
121127
tables: List[Table] = field(default_factory=list)
122128
inline_objects: List[InlineObject] = field(default_factory=list)
129+
tabs: List[Tab] = field(default_factory=list)
123130
text_spans: List[TextSpan] = field(default_factory=list)
124131
other_children: List[GenericElement] = field(default_factory=list)
125132
attributes: Dict[str, str] = field(default_factory=dict)
@@ -227,6 +234,10 @@ def parse_table_element(node: etree._Element) -> Table:
227234
)
228235

229236

237+
def parse_tab_element(node: etree._Element) -> Tab:
238+
return Tab(tag=node.tag, attributes={key: value for key, value in node.attrib.items()})
239+
240+
230241
def parse_run_element(node: etree._Element) -> Run:
231242
attributes = {key: value for key, value in node.attrib.items()}
232243
char_pr_id_ref = parse_int(attributes.pop("charPrIDRef", None))
@@ -247,6 +258,10 @@ def parse_run_element(node: etree._Element) -> Run:
247258
span = parse_text_span(child)
248259
run.text_spans.append(span)
249260
run.content.append(span)
261+
elif name == "tab":
262+
tab = parse_tab_element(child)
263+
run.tabs.append(tab)
264+
run.content.append(tab)
250265
elif name == "tbl":
251266
table = parse_table_element(child)
252267
run.tables.append(table)
@@ -342,6 +357,10 @@ def _text_span_to_xml(span: TextSpan) -> etree._Element:
342357
return node
343358

344359

360+
def _tab_to_xml(tab: Tab) -> etree._Element:
361+
return etree.Element(_qualified_tag(tab.tag, "tab"), dict(tab.attributes))
362+
363+
345364
def _control_to_xml(control: Control) -> etree._Element:
346365
attrs = dict(control.attributes)
347366
if control.control_type is not None:
@@ -376,6 +395,8 @@ def serialize_run(run: Run) -> etree._Element:
376395
node.append(_text_span_to_xml(child))
377396
elif isinstance(child, Control):
378397
node.append(_control_to_xml(child))
398+
elif isinstance(child, Tab):
399+
node.append(_tab_to_xml(child))
379400
elif isinstance(child, Table):
380401
node.append(_table_to_xml(child))
381402
elif isinstance(child, InlineObject):

src/hwpx/oxml/document.py

Lines changed: 134 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,45 @@ def _memo_id() -> str:
119119
return str(uuid4().int & 0xFFFFFFFF)
120120

121121

122+
def _refresh_copied_paragraph_subtree_ids(paragraph: ET.Element) -> None:
123+
"""Assign fresh local identifiers inside a copied paragraph subtree.
124+
125+
This is intentionally narrow: it refreshes paragraph ids for the copied
126+
paragraph and any nested paragraphs (for example inside table cells), plus
127+
common object identifiers used by tables/shapes/notes. Reference-style
128+
attributes such as ``borderFillIDRef`` are left untouched.
129+
"""
130+
131+
for node in paragraph.iter():
132+
if node.tag == f"{_HP}p":
133+
node.set("id", _paragraph_id())
134+
continue
135+
136+
if "id" in node.attrib and node.tag in {
137+
f"{_HP}tbl",
138+
f"{_HP}pic",
139+
f"{_HP}container",
140+
f"{_HP}ole",
141+
f"{_HP}equation",
142+
f"{_HP}textart",
143+
f"{_HP}video",
144+
f"{_HP}header",
145+
f"{_HP}footer",
146+
}:
147+
node.set("id", _object_id())
148+
149+
if "instId" in node.attrib:
150+
node.set("instId", _object_id())
151+
152+
153+
def _clone_paragraph_element(paragraph: ET.Element) -> ET.Element:
154+
"""Return a deep-copied paragraph element with refreshed local ids."""
155+
156+
cloned = deepcopy(paragraph)
157+
_refresh_copied_paragraph_subtree_ids(cloned)
158+
return cloned
159+
160+
122161
def _create_paragraph_element(
123162
text: str,
124163
*,
@@ -147,8 +186,7 @@ def _create_paragraph_element(
147186
run_attrs.setdefault("charPrIDRef", "0")
148187

149188
run = ET.SubElement(paragraph, f"{_HP}run", run_attrs)
150-
text_element = ET.SubElement(run, f"{_HP}t")
151-
text_element.text = text
189+
_append_text_with_tabs(run, text)
152190
return paragraph
153191

154192

@@ -184,6 +222,20 @@ def _append_child(
184222
return child
185223

186224

225+
def _is_tab_control_element(node: ET.Element) -> bool:
226+
return node.tag == f"{_HP}ctrl" and (node.get("id") or "").lower() == "tab"
227+
228+
229+
def _append_text_with_tabs(run: ET.Element, value: str) -> None:
230+
segments = value.split("\t")
231+
for index, segment in enumerate(segments):
232+
text_element = run.makeelement(f"{_HP}t", {})
233+
text_element.text = _sanitize_text(segment)
234+
run.append(text_element)
235+
if index < len(segments) - 1:
236+
run.append(run.makeelement(f"{_HP}tab", {}))
237+
238+
187239
def _normalize_length(value: str | None) -> str:
188240
if value is None:
189241
return ""
@@ -2084,9 +2136,7 @@ def add_paragraph(
20842136
run_attrs["charPrIDRef"] = "0"
20852137

20862138
run = _append_child(paragraph, f"{_HP}run", run_attrs)
2087-
t = run.makeelement(f"{_HP}t", {})
2088-
t.text = _sanitize_text(text)
2089-
run.append(t)
2139+
_append_text_with_tabs(run, text)
20902140

20912141
self.table.mark_dirty()
20922142
section = self.table.paragraph.section
@@ -2749,9 +2799,13 @@ def runs(self) -> list[HwpxOxmlRun]:
27492799
def text(self) -> str:
27502800
"""Return the concatenated textual content of this paragraph."""
27512801
texts: list[str] = []
2752-
for text_element in self.element.findall(f".//{_HP}t"):
2753-
if text_element.text:
2754-
texts.append(text_element.text)
2802+
for run in self._run_elements():
2803+
for child in run:
2804+
if child.tag == f"{_HP}t":
2805+
if child.text:
2806+
texts.append(child.text)
2807+
elif child.tag == f"{_HP}tab" or _is_tab_control_element(child):
2808+
texts.append("\t")
27552809
return "".join(texts)
27562810

27572811
@text.setter
@@ -2767,10 +2821,10 @@ def text(self, value: str) -> None:
27672821
# Identify first run — its charPrIDRef will be kept.
27682822
first_run = self._ensure_run()
27692823

2770-
# Remove <hp:t> from ALL runs.
2824+
# Remove existing text/tab nodes from all runs.
27712825
for run in runs:
27722826
for child in list(run):
2773-
if child.tag == f"{_HP}t":
2827+
if child.tag == f"{_HP}t" or child.tag == f"{_HP}tab" or _is_tab_control_element(child):
27742828
run.remove(child)
27752829

27762830
# Remove non-first runs that are now empty (only had text).
@@ -2781,10 +2835,8 @@ def text(self, value: str) -> None:
27812835
if len(list(run)) == 0:
27822836
self.element.remove(run)
27832837

2784-
# Write the new text into the first run.
2785-
text_element = first_run.makeelement(f"{_HP}t", {})
2786-
text_element.text = _sanitize_text(value)
2787-
first_run.append(text_element)
2838+
# Write the new text into the first run, preserving tabs as <hp:tab/>.
2839+
_append_text_with_tabs(first_run, value)
27882840
_clear_paragraph_layout_cache(self.element)
27892841
self.section.mark_dirty()
27902842

@@ -3655,14 +3707,43 @@ def add_paragraph(
36553707

36563708
run = paragraph.makeelement(f"{_HP}run", run_attrs)
36573709
paragraph.append(run)
3658-
text_element = run.makeelement(f"{_HP}t", {})
3659-
text_element.text = text
3660-
run.append(text_element)
3710+
_append_text_with_tabs(run, text)
36613711

36623712
self._element.append(paragraph)
36633713
self._dirty = True
36643714
return HwpxOxmlParagraph(paragraph, self)
36653715

3716+
def insert_paragraphs(
3717+
self,
3718+
index: int,
3719+
paragraphs: Sequence[HwpxOxmlParagraph | ET.Element],
3720+
) -> list[HwpxOxmlParagraph]:
3721+
"""Insert paragraph copies at *index* and return wrappers for them."""
3722+
3723+
existing = self.paragraphs
3724+
if index < 0 or index > len(existing):
3725+
raise IndexError(f"단락 인덱스 {index}이(가) 범위를 벗어났습니다 (총 {len(existing)}개)")
3726+
3727+
inserted: list[HwpxOxmlParagraph] = []
3728+
for offset, paragraph in enumerate(paragraphs):
3729+
source_element = paragraph.element if isinstance(paragraph, HwpxOxmlParagraph) else paragraph
3730+
cloned = _clone_paragraph_element(source_element)
3731+
self._element.insert(index + offset, cloned)
3732+
inserted.append(HwpxOxmlParagraph(cloned, self))
3733+
3734+
if inserted:
3735+
self._dirty = True
3736+
return inserted
3737+
3738+
def copy_paragraph_range(self, start: int, end: int) -> list[ET.Element]:
3739+
"""Return deep-copied paragraph elements for the inclusive range."""
3740+
3741+
paragraphs = self.paragraphs
3742+
total = len(paragraphs)
3743+
if start < 0 or end < 0 or start >= total or end >= total or start > end:
3744+
raise IndexError(f"문단 범위 {start}..{end}이(가) 유효하지 않습니다 (총 {total}개)")
3745+
return [_clone_paragraph_element(paragraphs[index].element) for index in range(start, end + 1)]
3746+
36663747
def mark_dirty(self) -> None:
36673748
self._dirty = True
36683749

@@ -4633,6 +4714,42 @@ def remove_paragraph(
46334714
else:
46344715
paragraph.remove()
46354716

4717+
def copy_paragraph_range(
4718+
self,
4719+
start: int,
4720+
end: int,
4721+
*,
4722+
section: HwpxOxmlSection | None = None,
4723+
section_index: int | None = None,
4724+
) -> list[ET.Element]:
4725+
"""Return deep-copied paragraph elements for an inclusive range."""
4726+
4727+
if section is None and section_index is not None:
4728+
section = self._sections[section_index]
4729+
if section is None:
4730+
if not self._sections:
4731+
raise ValueError("document does not contain any sections")
4732+
section = self._sections[-1]
4733+
return section.copy_paragraph_range(start, end)
4734+
4735+
def insert_paragraphs(
4736+
self,
4737+
index: int,
4738+
paragraphs: Sequence[HwpxOxmlParagraph | ET.Element],
4739+
*,
4740+
section: HwpxOxmlSection | None = None,
4741+
section_index: int | None = None,
4742+
) -> list[HwpxOxmlParagraph]:
4743+
"""Insert copied paragraphs into the requested section."""
4744+
4745+
if section is None and section_index is not None:
4746+
section = self._sections[section_index]
4747+
if section is None:
4748+
if not self._sections:
4749+
raise ValueError("document does not contain any sections")
4750+
section = self._sections[-1]
4751+
return section.insert_paragraphs(index, paragraphs)
4752+
46364753
# ------------------------------------------------------------------
46374754
# Section management
46384755
# ------------------------------------------------------------------

0 commit comments

Comments
 (0)