Skip to content

Commit b41ebd9

Browse files
Merge pull request #5 from openize-com/muhammadumar-patch
Publishing the hotfix 25.6.1
2 parents f2a3819 + 9ebb2ed commit b41ebd9

File tree

10 files changed

+119
-30
lines changed

10 files changed

+119
-30
lines changed

packages/markitdown/README.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,47 @@ $env:CLAUDE_API_KEY = "your-claude-key"
100100
$env:GEMINI_API_KEY = "your-gemini-key"
101101
$env:MISTRAL_API_KEY = "your-mistral-key"
102102
```
103+
## Running Tests
104+
105+
To run unit tests for **Openize.MarkItDown**, follow these steps:
106+
107+
### 1. Navigate to the package directory
108+
109+
From the root of the repository, change into the package directory:
110+
111+
```bash
112+
cd openize-markitdown/packages/markitdown
113+
```
114+
115+
### 2. Install test dependencies
116+
117+
Make sure `pytest` and `pytest-mock` are installed:
118+
119+
```bash
120+
pip install pytest pytest-mock
121+
```
122+
123+
### 3. Run tests using `pytest`
124+
125+
To run all tests:
126+
127+
```bash
128+
pytest
129+
```
130+
131+
To run a specific test file:
132+
133+
```bash
134+
pytest tests/test.py
135+
```
136+
137+
### Tip
138+
139+
Use `-v` for more detailed test output:
140+
141+
```bash
142+
pytest -v
143+
```
103144

104145
## License
105146

packages/markitdown/pytest.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
[pytest]
2-
pythonpath = packages/markitdown/src
2+
pythonpath = src

packages/markitdown/setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
[metadata]
33
name = openize-markitdown-python
4-
version = 25.6.0
4+
version = 25.6.1
55

66
author = Openize
77
author_email = packages@openize.com

packages/markitdown/src/openize/markitdown/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from .processor import DocumentProcessor
99
from .converters import WordConverter, PDFConverter, ExcelConverter, PowerPointConverter
1010
from .factory import ConverterFactory
11-
from .llm_strategy import SaveLocally, InsertIntoLLM
11+
from .llm_strategy import SaveLocally, LLMFactory
1212
from .license_manager import LicenseManager
1313

1414
__all__ = [
@@ -19,6 +19,6 @@
1919
'PowerPointConverter',
2020
'ConverterFactory',
2121
'SaveLocally',
22-
'InsertIntoLLM',
22+
'LLMFactory',
2323
'LicenseManager',
2424
]

packages/markitdown/src/openize/markitdown/llm_strategy.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
import os
3+
from anthropic import Anthropic
34
from abc import ABC, abstractmethod
45
import openai
56

@@ -55,32 +56,33 @@ def process(self, md_file):
5556
class ClaudeClient(LLMStrategy):
5657
def __init__(self):
5758
self.api_key = os.getenv("CLAUDE_API_KEY")
58-
self.model = os.getenv("CLAUDE_MODEL", "claude-v1")
59+
self.model = os.getenv("CLAUDE_MODEL", "claude-3-opus-20240229") # update to your model
5960

6061
if not self.api_key:
6162
raise ValueError("Missing Claude API key. Please set it in the environment.")
6263

63-
# Initialize Claude client here (replace with actual SDK code)
64-
# self.client = ClaudeAPIClient(api_key=self.api_key)
64+
self.client = Anthropic(api_key=self.api_key)
6565

6666
def process(self, md_file):
6767
try:
6868
with open(md_file, "r", encoding="utf-8") as file:
6969
content = file.read()
7070

71-
# Replace with actual Claude API call
72-
# response = self.client.complete(prompt=content, model=self.model)
73-
74-
# Dummy placeholder response
75-
response_text = f"Simulated Claude response for {md_file}"
71+
response = self.client.messages.create(
72+
model=self.model,
73+
max_tokens=1024,
74+
messages=[
75+
{"role": "user", "content": content}
76+
]
77+
)
7678

77-
logging.info(f"Claude Response for {md_file}: {response_text}")
79+
message = response.content[0].text if response.content else ""
80+
logging.info(f"Claude Response for {md_file}: {message}")
7881

7982
except FileNotFoundError:
8083
logging.error(f"Markdown file not found: {md_file}")
8184
except Exception as e:
8285
logging.exception(f"Unexpected error processing {md_file}: {e}")
83-
8486
class GeminiClient(LLMStrategy):
8587
def __init__(self):
8688
self.api_key = os.getenv("GEMINI_API_KEY")
Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import logging
22
from pathlib import Path
33
from factory import ConverterFactory
4-
from llm_strategy import SaveLocally, InsertIntoLLM
4+
from llm_strategy import SaveLocally, LLMFactory
55

66

77
class DocumentProcessor:
8-
def __init__(self, output_dir=Path("converted_md")):
8+
def __init__(self, output_dir=Path("converted_md"), llm_client_name="openai"):
99
self.output_dir = Path(output_dir)
1010
self.output_dir.mkdir(parents=True, exist_ok=True)
11+
self.llm_client_name = llm_client_name
1112

1213
def process_document(self, file_path, insert_into_llm=False):
1314
file_path = Path(file_path)
@@ -20,6 +21,12 @@ def process_document(self, file_path, insert_into_llm=False):
2021

2122
md_file = converter.convert_to_md(file_path, self.output_dir)
2223
if md_file:
23-
strategy = InsertIntoLLM() if insert_into_llm else SaveLocally()
24-
strategy.process(md_file)
25-
24+
try:
25+
strategy = (
26+
LLMFactory.get_llm(self.llm_client_name)
27+
if insert_into_llm
28+
else SaveLocally()
29+
)
30+
strategy.process(md_file)
31+
except ValueError as e:
32+
logging.error(f"Failed to initialize strategy: {e}")
13.1 KB
Binary file not shown.
18.4 KB
Loading
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Sample Markdown File
2+
3+
This is a test.

packages/markitdown/tests/test.py

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
from pathlib import Path
33
import os
44

5-
from ..src.openize.markitdown.converters import WordConverter, PDFConverter, ExcelConverter, PowerPointConverter
6-
from ..src.openize.markitdown.factory import ConverterFactory
7-
from ..src.openize.markitdown.llm_strategy import SaveLocally, LLMFactory, OpenAIClient, ClaudeClient
8-
from ..src.openize.markitdown.processor import DocumentProcessor
5+
from openize.markitdown.converters import WordConverter, PDFConverter, ExcelConverter, PowerPointConverter
6+
from openize.markitdown.factory import ConverterFactory
7+
from openize.markitdown.llm_strategy import SaveLocally, LLMFactory, OpenAIClient, ClaudeClient,MistralClient, GeminiClient
8+
from openize.markitdown.processor import DocumentProcessor
99

1010

1111
@pytest.fixture
@@ -60,14 +60,14 @@ def test_insert_into_llm_openai(mocker, sample_md_file):
6060
mocker.patch("openai.ChatCompletion.create", return_value={
6161
"choices": [{"message": {"content": "Mocked OpenAI Response"}}]
6262
})
63-
strategy = OpenAIClient(provider="openai")
63+
strategy = OpenAIClient()
6464
strategy.process(sample_md_file)
6565

6666
def test_insert_into_llm_claude(mocker, sample_md_file):
6767
mock_anthropic = mocker.patch("openize.markitdown.llm_strategy.Anthropic")
6868
mock_client = mock_anthropic.return_value
6969
mock_client.messages.create.return_value.content = "Mocked Claude Response"
70-
strategy = ClaudeClient(provider="claude")
70+
strategy = ClaudeClient()
7171
strategy.process(sample_md_file)
7272

7373

@@ -76,7 +76,7 @@ def test_insert_into_llm_claude(mocker, sample_md_file):
7676
def test_document_processor_local_conversion(mocker, sample_output_dir):
7777
mock_converter = mocker.patch("openize.markitdown.factory.ConverterFactory.get_converter", return_value=WordConverter())
7878
processor = DocumentProcessor(output_dir=sample_output_dir)
79-
processor.process_document("sample.docx", insert_into_llm=False)
79+
processor.process_document("test_input/sample.docx", insert_into_llm=False)
8080
output_file = sample_output_dir / "sample.md"
8181
assert output_file.exists()
8282

@@ -85,8 +85,8 @@ def test_document_processor_with_llm_openai(mocker, sample_output_dir):
8585
mocker.patch("openai.ChatCompletion.create", return_value={
8686
"choices": [{"message": {"content": "LLM Output"}}]
8787
})
88-
processor = DocumentProcessor(output_dir=sample_output_dir)
89-
processor.process_document("sample.docx", insert_into_llm=True, llm_provider="openai")
88+
processor = DocumentProcessor(output_dir=sample_output_dir, llm_client_name="openai")
89+
processor.process_document("test_input/sample.docx", insert_into_llm=True)
9090
output_file = sample_output_dir / "sample.md"
9191
assert output_file.exists()
9292

@@ -95,8 +95,44 @@ def test_document_processor_with_llm_claude(mocker, sample_output_dir):
9595
mock_anthropic = mocker.patch("openize.markitdown.llm_strategy.Anthropic")
9696
mock_client = mock_anthropic.return_value
9797
mock_client.messages.create.return_value.content = "LLM Claude Output"
98-
processor = DocumentProcessor(output_dir=sample_output_dir)
99-
processor.process_document("sample.docx", insert_into_llm=True, llm_provider="claude")
98+
processor = DocumentProcessor(output_dir=sample_output_dir, llm_client_name="claude")
99+
processor.process_document("test_input/sample.docx", insert_into_llm=True)
100100
output_file = sample_output_dir / "sample.md"
101101
assert output_file.exists()
102102

103+
def test_insert_into_llm_gemini(mocker, sample_md_file):
104+
mock_response = mocker.Mock()
105+
mock_response.raise_for_status.return_value = None
106+
mock_response.json.return_value = {
107+
"candidates": [
108+
{"content": {"parts": [{"text": "Mocked Gemini Response"}]}}
109+
]
110+
}
111+
112+
mocker.patch("requests.post", return_value=mock_response)
113+
mocker.patch.dict(os.environ, {
114+
"GEMINI_API_KEY": "dummy_key",
115+
"GEMINI_MODEL": "gemini-pro"
116+
})
117+
118+
client = GeminiClient()
119+
client.process(sample_md_file)
120+
def test_insert_into_llm_mistral(mocker, sample_md_file):
121+
mock_response = mocker.Mock()
122+
mock_response.raise_for_status.return_value = None
123+
mock_response.json.return_value = {
124+
"choices": [
125+
{"message": {"content": "Mocked Mistral Response"}}
126+
]
127+
}
128+
129+
mocker.patch("requests.post", return_value=mock_response)
130+
mocker.patch.dict(os.environ, {
131+
"MISTRAL_API_KEY": "dummy_key",
132+
"MISTRAL_MODEL": "mistral-medium"
133+
})
134+
135+
client = MistralClient()
136+
client.process(sample_md_file)
137+
138+

0 commit comments

Comments
 (0)