From 6743d4a165d47ae5013db3bbdfaa5218080c2d24 Mon Sep 17 00:00:00 2001 From: kimtth Date: Wed, 18 Mar 2026 20:29:42 +0900 Subject: [PATCH] fix (docs): correct typos --- .../src/markitdown_sample_plugin/_plugin.py | 4 ++-- .../markitdown-sample-plugin/tests/test_sample_plugin.py | 2 +- packages/markitdown/src/markitdown/_base_converter.py | 6 +++--- packages/markitdown/src/markitdown/_exceptions.py | 2 +- packages/markitdown/src/markitdown/_markitdown.py | 4 ++-- .../src/markitdown/converters/_doc_intel_converter.py | 2 +- .../markitdown/src/markitdown/converters/_docx_converter.py | 2 +- .../markitdown/src/markitdown/converters/_epub_converter.py | 2 +- .../src/markitdown/converters/_outlook_msg_converter.py | 2 +- .../src/markitdown/converters/_transcribe_audio.py | 2 +- packages/markitdown/tests/test_cli_vectors.py | 4 ++-- packages/markitdown/tests/test_module_misc.py | 4 ++-- 12 files changed, 18 insertions(+), 18 deletions(-) diff --git a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/_plugin.py b/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/_plugin.py index 1ca00ccc5..f47bba6af 100644 --- a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/_plugin.py +++ b/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/_plugin.py @@ -33,7 +33,7 @@ def register_converters(markitdown: MarkItDown, **kwargs): class RtfConverter(DocumentConverter): """ - Converts an RTF file to in the simplest possible way. + Converts an RTF file in the simplest possible way. """ def accepts( @@ -60,7 +60,7 @@ def convert( stream_info: StreamInfo, **kwargs: Any, ) -> DocumentConverterResult: - # Read the file stream into an str using hte provided charset encoding, or using the system default + # Read the file stream into a str using the provided charset encoding, or using the system default encoding = stream_info.charset or locale.getpreferredencoding() stream_data = file_stream.read().decode(encoding) diff --git a/packages/markitdown-sample-plugin/tests/test_sample_plugin.py b/packages/markitdown-sample-plugin/tests/test_sample_plugin.py index 696824742..0ffaa663f 100644 --- a/packages/markitdown-sample-plugin/tests/test_sample_plugin.py +++ b/packages/markitdown-sample-plugin/tests/test_sample_plugin.py @@ -13,7 +13,7 @@ def test_converter() -> None: - """Tests the RTF converter dirctly.""" + """Tests the RTF converter directly.""" with open(os.path.join(TEST_FILES_DIR, "test.rtf"), "rb") as file_stream: converter = RtfConverter() result = converter.convert( diff --git a/packages/markitdown/src/markitdown/_base_converter.py b/packages/markitdown/src/markitdown/_base_converter.py index fa2b11145..50c88987b 100644 --- a/packages/markitdown/src/markitdown/_base_converter.py +++ b/packages/markitdown/src/markitdown/_base_converter.py @@ -51,7 +51,7 @@ def accepts( """ Return a quick determination on if the converter should attempt converting the document. This is primarily based `stream_info` (typically, `stream_info.mimetype`, `stream_info.extension`). - In cases where the data is retrieved via HTTP, the `steam_info.url` might also be referenced to + In cases where the data is retrieved via HTTP, the `stream_info.url` might also be referenced to make a determination (e.g., special converters for Wikipedia, YouTube etc). Finally, it is conceivable that the `stream_info.filename` might be used to in cases where the filename is well-known (e.g., `Dockerfile`, `Makefile`, etc) @@ -71,7 +71,7 @@ def accepts( Parameters: - file_stream: The file-like object to convert. Must support seek(), tell(), and read() methods. - - stream_info: The StreamInfo object containing metadata about the file (mimetype, extension, charset, set) + - stream_info: The StreamInfo object containing metadata about the file (mimetype, extension, charset) - kwargs: Additional keyword arguments for the converter. Returns: @@ -92,7 +92,7 @@ def convert( Parameters: - file_stream: The file-like object to convert. Must support seek(), tell(), and read() methods. - - stream_info: The StreamInfo object containing metadata about the file (mimetype, extension, charset, set) + - stream_info: The StreamInfo object containing metadata about the file (mimetype, extension, charset) - kwargs: Additional keyword arguments for the converter. Returns: diff --git a/packages/markitdown/src/markitdown/_exceptions.py b/packages/markitdown/src/markitdown/_exceptions.py index 2f87ac8b2..5f1d0cabc 100644 --- a/packages/markitdown/src/markitdown/_exceptions.py +++ b/packages/markitdown/src/markitdown/_exceptions.py @@ -41,7 +41,7 @@ class UnsupportedFormatException(MarkItDownException): class FailedConversionAttempt(object): """ - Represents an a single attempt to convert a file. + Represents a single attempt to convert a file. """ def __init__(self, converter: Any, exc_info: Optional[tuple] = None): diff --git a/packages/markitdown/src/markitdown/_markitdown.py b/packages/markitdown/src/markitdown/_markitdown.py index f342a614b..ad16c5054 100644 --- a/packages/markitdown/src/markitdown/_markitdown.py +++ b/packages/markitdown/src/markitdown/_markitdown.py @@ -580,7 +580,7 @@ def _convert( # Add the list of converters for nested processing _kwargs["_parent_converters"] = self._converters - # Add legaxy kwargs + # Add legacy kwargs if stream_info is not None: if stream_info.extension is not None: _kwargs["file_extension"] = stream_info.extension @@ -631,7 +631,7 @@ def _convert( ) def register_page_converter(self, converter: DocumentConverter) -> None: - """DEPRECATED: User register_converter instead.""" + """DEPRECATED: Use register_converter instead.""" warn( "register_page_converter is deprecated. Use register_converter instead.", DeprecationWarning, diff --git a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py index fd843f231..ef60b2226 100644 --- a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py +++ b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py @@ -207,7 +207,7 @@ def accepts( def _analysis_features(self, stream_info: StreamInfo) -> List[str]: """ Helper needed to determine which analysis features to use. - Certain document analysis features are not availiable for + Certain document analysis features are not available for office filetypes (.xlsx, .pptx, .html, .docx) """ mimetype = (stream_info.mimetype or "").lower() diff --git a/packages/markitdown/src/markitdown/converters/_docx_converter.py b/packages/markitdown/src/markitdown/converters/_docx_converter.py index 3975107b1..0f92b9e6a 100644 --- a/packages/markitdown/src/markitdown/converters/_docx_converter.py +++ b/packages/markitdown/src/markitdown/converters/_docx_converter.py @@ -30,7 +30,7 @@ class DocxConverter(HtmlConverter): """ - Converts DOCX files to Markdown. Style information (e.g.m headings) and tables are preserved where possible. + Converts DOCX files to Markdown. Style information (e.g., headings) and tables are preserved where possible. """ def __init__(self): diff --git a/packages/markitdown/src/markitdown/converters/_epub_converter.py b/packages/markitdown/src/markitdown/converters/_epub_converter.py index 3be65b016..2fe93ef15 100644 --- a/packages/markitdown/src/markitdown/converters/_epub_converter.py +++ b/packages/markitdown/src/markitdown/converters/_epub_converter.py @@ -25,7 +25,7 @@ class EpubConverter(HtmlConverter): """ - Converts EPUB files to Markdown. Style information (e.g.m headings) and tables are preserved where possible. + Converts EPUB files to Markdown. Style information (e.g., headings) and tables are preserved where possible. """ def __init__(self): diff --git a/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py b/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py index d216beaea..7717f62d8 100644 --- a/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py +++ b/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py @@ -54,7 +54,7 @@ def accepts( finally: file_stream.seek(cur_pos) - # Brue force, check if it's an Outlook file + # Brute force, check if it's an Outlook file try: if olefile is not None: msg = olefile.OleFileIO(file_stream) diff --git a/packages/markitdown/src/markitdown/converters/_transcribe_audio.py b/packages/markitdown/src/markitdown/converters/_transcribe_audio.py index d558e4629..553e260f4 100644 --- a/packages/markitdown/src/markitdown/converters/_transcribe_audio.py +++ b/packages/markitdown/src/markitdown/converters/_transcribe_audio.py @@ -24,7 +24,7 @@ def transcribe_audio(file_stream: BinaryIO, *, audio_format: str = "wav") -> str # Check for installed dependencies if _dependency_exc_info is not None: raise MissingDependencyException( - "Speech transcription requires installing MarkItdown with the [audio-transcription] optional dependencies. E.g., `pip install markitdown[audio-transcription]` or `pip install markitdown[all]`" + "Speech transcription requires installing MarkItDown with the [audio-transcription] optional dependencies. E.g., `pip install markitdown[audio-transcription]` or `pip install markitdown[all]`" ) from _dependency_exc_info[ 1 ].with_traceback( # type: ignore[union-attr] diff --git a/packages/markitdown/tests/test_cli_vectors.py b/packages/markitdown/tests/test_cli_vectors.py index 5add530b1..5346304a0 100644 --- a/packages/markitdown/tests/test_cli_vectors.py +++ b/packages/markitdown/tests/test_cli_vectors.py @@ -27,7 +27,7 @@ TEST_FILES_URL = "https://raw.githubusercontent.com/microsoft/markitdown/refs/heads/main/packages/markitdown/tests/test_files" -# Prepare CLI test vectors (remove vectors that require mockig the url) +# Prepare CLI test vectors (remove vectors that require mocking the url) CLI_TEST_VECTORS: List[FileTestVector] = [] for test_vector in GENERAL_TEST_VECTORS: if test_vector.url is not None: @@ -96,7 +96,7 @@ def test_output_to_file(shared_tmp_dir, test_vector) -> None: @pytest.mark.parametrize("test_vector", CLI_TEST_VECTORS) def test_input_from_stdin_without_hints(shared_tmp_dir, test_vector) -> None: - """Test that the CLI readds from stdin correctly.""" + """Test that the CLI reads from stdin correctly.""" test_input = b"" with open(os.path.join(TEST_FILES_DIR, test_vector.filename), "rb") as stream: diff --git a/packages/markitdown/tests/test_module_misc.py b/packages/markitdown/tests/test_module_misc.py index 8e3acc23d..75aa28b5e 100644 --- a/packages/markitdown/tests/test_module_misc.py +++ b/packages/markitdown/tests/test_module_misc.py @@ -126,7 +126,7 @@ def test_stream_info_operations() -> None: **{keyword: f"{keyword}.2"} ) - # Make sure the targted attribute is updated + # Make sure the targeted attribute is updated assert getattr(updated_stream_info, keyword) == f"{keyword}.2" # Make sure the other attributes are unchanged @@ -143,7 +143,7 @@ def test_stream_info_operations() -> None: StreamInfo(**{keyword: f"{keyword}.2"}) ) - # Make sure the targted attribute is updated + # Make sure the targeted attribute is updated assert getattr(updated_stream_info, keyword) == f"{keyword}.2" # Make sure the other attributes are unchanged