Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion src/hwpx/oxml/body.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
}

InlineMark = Union[GenericElement, "TrackChangeMark"]
RunChild = Union[GenericElement, "Control", "Table", "InlineObject", "TextSpan"]
RunChild = Union[GenericElement, "Control", "Table", "InlineObject", "TextSpan", "Tab"]
ParagraphChild = Union["Run", GenericElement]


Expand Down Expand Up @@ -105,6 +105,12 @@ class InlineObject:
children: List[GenericElement] = field(default_factory=list)


@dataclass(slots=True)
class Tab:
tag: str
attributes: Dict[str, str] = field(default_factory=dict)


@dataclass(slots=True)
class Table:
tag: str
Expand All @@ -120,6 +126,7 @@ class Run:
controls: List[Control] = field(default_factory=list)
tables: List[Table] = field(default_factory=list)
inline_objects: List[InlineObject] = field(default_factory=list)
tabs: List[Tab] = field(default_factory=list)
text_spans: List[TextSpan] = field(default_factory=list)
other_children: List[GenericElement] = field(default_factory=list)
attributes: Dict[str, str] = field(default_factory=dict)
Expand Down Expand Up @@ -227,6 +234,10 @@ def parse_table_element(node: etree._Element) -> Table:
)


def parse_tab_element(node: etree._Element) -> Tab:
return Tab(tag=node.tag, attributes={key: value for key, value in node.attrib.items()})


def parse_run_element(node: etree._Element) -> Run:
attributes = {key: value for key, value in node.attrib.items()}
char_pr_id_ref = parse_int(attributes.pop("charPrIDRef", None))
Expand All @@ -247,6 +258,10 @@ def parse_run_element(node: etree._Element) -> Run:
span = parse_text_span(child)
run.text_spans.append(span)
run.content.append(span)
elif name == "tab":
tab = parse_tab_element(child)
run.tabs.append(tab)
run.content.append(tab)
elif name == "tbl":
table = parse_table_element(child)
run.tables.append(table)
Expand Down Expand Up @@ -342,6 +357,10 @@ def _text_span_to_xml(span: TextSpan) -> etree._Element:
return node


def _tab_to_xml(tab: Tab) -> etree._Element:
return etree.Element(_qualified_tag(tab.tag, "tab"), dict(tab.attributes))


def _control_to_xml(control: Control) -> etree._Element:
attrs = dict(control.attributes)
if control.control_type is not None:
Expand Down Expand Up @@ -376,6 +395,8 @@ def serialize_run(run: Run) -> etree._Element:
node.append(_text_span_to_xml(child))
elif isinstance(child, Control):
node.append(_control_to_xml(child))
elif isinstance(child, Tab):
node.append(_tab_to_xml(child))
elif isinstance(child, Table):
node.append(_table_to_xml(child))
elif isinstance(child, InlineObject):
Expand Down
152 changes: 134 additions & 18 deletions src/hwpx/oxml/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,45 @@ def _memo_id() -> str:
return str(uuid4().int & 0xFFFFFFFF)


def _refresh_copied_paragraph_subtree_ids(paragraph: ET.Element) -> None:
"""Assign fresh local identifiers inside a copied paragraph subtree.

This is intentionally narrow: it refreshes paragraph ids for the copied
paragraph and any nested paragraphs (for example inside table cells), plus
common object identifiers used by tables/shapes/notes. Reference-style
attributes such as ``borderFillIDRef`` are left untouched.
"""

for node in paragraph.iter():
if node.tag == f"{_HP}p":
node.set("id", _paragraph_id())
continue

if "id" in node.attrib and node.tag in {
f"{_HP}tbl",
f"{_HP}pic",
f"{_HP}container",
f"{_HP}ole",
f"{_HP}equation",
f"{_HP}textart",
f"{_HP}video",
f"{_HP}header",
f"{_HP}footer",
}:
node.set("id", _object_id())

if "instId" in node.attrib:
node.set("instId", _object_id())


def _clone_paragraph_element(paragraph: ET.Element) -> ET.Element:
"""Return a deep-copied paragraph element with refreshed local ids."""

cloned = deepcopy(paragraph)
_refresh_copied_paragraph_subtree_ids(cloned)
return cloned


def _create_paragraph_element(
text: str,
*,
Expand Down Expand Up @@ -154,9 +193,7 @@ def _create_paragraph_element(

run = paragraph.makeelement(f"{_HP}run", run_attrs)
paragraph.append(run)
text_element = run.makeelement(f"{_HP}t", {})
run.append(text_element)
text_element.text = text
_append_text_with_tabs(run, text)
return paragraph


Expand Down Expand Up @@ -192,6 +229,20 @@ def _append_child(
return child


def _is_tab_control_element(node: ET.Element) -> bool:
return node.tag == f"{_HP}ctrl" and (node.get("id") or "").lower() == "tab"


def _append_text_with_tabs(run: ET.Element, value: str) -> None:
segments = value.split("\t")
for index, segment in enumerate(segments):
text_element = run.makeelement(f"{_HP}t", {})
text_element.text = _sanitize_text(segment)
run.append(text_element)
if index < len(segments) - 1:
run.append(run.makeelement(f"{_HP}tab", {}))


def _normalize_length(value: str | None) -> str:
if value is None:
return ""
Expand Down Expand Up @@ -2097,9 +2148,7 @@ def add_paragraph(
run_attrs["charPrIDRef"] = "0"

run = _append_child(paragraph, f"{_HP}run", run_attrs)
t = run.makeelement(f"{_HP}t", {})
t.text = _sanitize_text(text)
run.append(t)
_append_text_with_tabs(run, text)

self.table.mark_dirty()
section = self.table.paragraph.section
Expand Down Expand Up @@ -2762,9 +2811,13 @@ def runs(self) -> list[HwpxOxmlRun]:
def text(self) -> str:
"""Return the concatenated textual content of this paragraph."""
texts: list[str] = []
for text_element in self.element.findall(f".//{_HP}t"):
if text_element.text:
texts.append(text_element.text)
for run in self._run_elements():
for child in run:
if child.tag == f"{_HP}t":
if child.text:
texts.append(child.text)
elif child.tag == f"{_HP}tab" or _is_tab_control_element(child):
texts.append("\t")
return "".join(texts)

@text.setter
Expand All @@ -2780,10 +2833,10 @@ def text(self, value: str) -> None:
# Identify first run — its charPrIDRef will be kept.
first_run = self._ensure_run()

# Remove <hp:t> from ALL runs.
# Remove existing text/tab nodes from all runs.
for run in runs:
for child in list(run):
if child.tag == f"{_HP}t":
if child.tag == f"{_HP}t" or child.tag == f"{_HP}tab" or _is_tab_control_element(child):
run.remove(child)

# Remove non-first runs that are now empty (only had text).
Expand All @@ -2794,10 +2847,8 @@ def text(self, value: str) -> None:
if len(list(run)) == 0:
self.element.remove(run)

# Write the new text into the first run.
text_element = first_run.makeelement(f"{_HP}t", {})
text_element.text = _sanitize_text(value)
first_run.append(text_element)
# Write the new text into the first run, preserving tabs as <hp:tab/>.
_append_text_with_tabs(first_run, value)
_clear_paragraph_layout_cache(self.element)
self.section.mark_dirty()

Expand Down Expand Up @@ -3668,14 +3719,43 @@ def add_paragraph(

run = paragraph.makeelement(f"{_HP}run", run_attrs)
paragraph.append(run)
text_element = run.makeelement(f"{_HP}t", {})
text_element.text = text
run.append(text_element)
_append_text_with_tabs(run, text)

self._element.append(paragraph)
self._dirty = True
return HwpxOxmlParagraph(paragraph, self)

def insert_paragraphs(
self,
index: int,
paragraphs: Sequence[HwpxOxmlParagraph | ET.Element],
) -> list[HwpxOxmlParagraph]:
"""Insert paragraph copies at *index* and return wrappers for them."""

existing = self.paragraphs
if index < 0 or index > len(existing):
raise IndexError(f"단락 인덱스 {index}이(가) 범위를 벗어났습니다 (총 {len(existing)}개)")

inserted: list[HwpxOxmlParagraph] = []
for offset, paragraph in enumerate(paragraphs):
source_element = paragraph.element if isinstance(paragraph, HwpxOxmlParagraph) else paragraph
cloned = _clone_paragraph_element(source_element)
self._element.insert(index + offset, cloned)
inserted.append(HwpxOxmlParagraph(cloned, self))

if inserted:
self._dirty = True
return inserted

def copy_paragraph_range(self, start: int, end: int) -> list[ET.Element]:
"""Return deep-copied paragraph elements for the inclusive range."""

paragraphs = self.paragraphs
total = len(paragraphs)
if start < 0 or end < 0 or start >= total or end >= total or start > end:
raise IndexError(f"문단 범위 {start}..{end}이(가) 유효하지 않습니다 (총 {total}개)")
return [_clone_paragraph_element(paragraphs[index].element) for index in range(start, end + 1)]

def mark_dirty(self) -> None:
self._dirty = True

Expand Down Expand Up @@ -4649,6 +4729,42 @@ def remove_paragraph(
else:
paragraph.remove()

def copy_paragraph_range(
self,
start: int,
end: int,
*,
section: HwpxOxmlSection | None = None,
section_index: int | None = None,
) -> list[ET.Element]:
"""Return deep-copied paragraph elements for an inclusive range."""

if section is None and section_index is not None:
section = self._sections[section_index]
if section is None:
if not self._sections:
raise ValueError("document does not contain any sections")
section = self._sections[-1]
return section.copy_paragraph_range(start, end)

def insert_paragraphs(
self,
index: int,
paragraphs: Sequence[HwpxOxmlParagraph | ET.Element],
*,
section: HwpxOxmlSection | None = None,
section_index: int | None = None,
) -> list[HwpxOxmlParagraph]:
"""Insert copied paragraphs into the requested section."""

if section is None and section_index is not None:
section = self._sections[section_index]
if section is None:
if not self._sections:
raise ValueError("document does not contain any sections")
section = self._sections[-1]
return section.insert_paragraphs(index, paragraphs)

# ------------------------------------------------------------------
# Section management
# ------------------------------------------------------------------
Expand Down
Loading
Loading