Skip to content
This repository was archived by the owner on Apr 9, 2026. It is now read-only.

Commit c2d2ebb

Browse files
committed
remove bytestream support due to earlier PR already adding support
1 parent c2cc99e commit c2d2ebb

3 files changed

Lines changed: 51 additions & 2 deletions

File tree

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Our project welcomes external contributions. If you have an itch, please feel
33
free to scratch it.
44

5-
To contribute code or documentation, please submit a [pull request](https://github.com/DS4SD/docling/pulls).
5+
To contribute code or documentation, please submit a [pull request](https://github.com/DS4SD/docling-haystack/pulls).
66

77
A good way to familiarize yourself with the codebase and contribution process is
88
to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/DS4SD/docling/issues).

docling_haystack/converter.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from docling.chunking import BaseChunk, BaseChunker, HybridChunker
1414
from docling.datamodel.document import DoclingDocument
1515
from docling.document_converter import DocumentConverter
16-
from haystack import Document, component
16+
from haystack import Document, component, default_from_dict, default_to_dict
1717

1818

1919
class ExportType(str, Enum):
@@ -139,3 +139,29 @@ def run(
139139
else:
140140
raise RuntimeError(f"Unexpected export type: {self._export_type}")
141141
return {"documents": documents}
142+
143+
def to_dict(self) -> dict[str, Any]:
144+
"""
145+
Serialize the component to a dictionary for pipeline persistence.
146+
147+
Returns:
148+
dict[str, Any]: A dictionary representation of the component
149+
"""
150+
return default_to_dict(
151+
self,
152+
convert_kwargs=self._convert_kwargs,
153+
md_export_kwargs=self._md_export_kwargs,
154+
)
155+
156+
@classmethod
157+
def from_dict(cls, data: dict[str, Any]) -> "DoclingConverter":
158+
"""
159+
Deserialize the component from a dictionary.
160+
161+
Args:
162+
data: Dictionary representation of the component
163+
164+
Returns:
165+
DoclingConverter: A new instance of the component
166+
"""
167+
return default_from_dict(cls, data)

test/test_converter.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,26 @@ def test_convert_markdown(monkeypatch):
8080
with open(EXPECTED_OUT_FILE) as f:
8181
exp_data = json.load(fp=f)
8282
assert exp_data == act_data
83+
84+
85+
def test_serialization_deserialization():
86+
"""Test component serialization and deserialization."""
87+
converter = DoclingConverter(
88+
convert_kwargs={"optimize_ocr": True},
89+
md_export_kwargs={"image_placeholder": "[IMAGE]"},
90+
)
91+
92+
# serialize the component to dict
93+
serialized = converter.to_dict()
94+
95+
assert "init_parameters" in serialized
96+
assert serialized["init_parameters"].get("convert_kwargs") == {"optimize_ocr": True}
97+
98+
md_export_kwargs = serialized["init_parameters"].get("md_export_kwargs", {})
99+
assert md_export_kwargs.get("image_placeholder") == "[IMAGE]"
100+
101+
# deserialize back to component
102+
deserialized = DoclingConverter.from_dict(serialized)
103+
assert deserialized._convert_kwargs == {"optimize_ocr": True}
104+
105+
assert deserialized._md_export_kwargs.get("image_placeholder") == "[IMAGE]"

0 commit comments

Comments
 (0)