diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index e3b028e..f4487b6 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -856,6 +856,7 @@ class MarkItDown: requests_session: Optional[requests.Session] = None, mlm_client: Optional[Any] = None, mlm_model: Optional[Any] = None, + style_map: Optional[str] = None, ): if requests_session is None: self._requests_session = requests.Session() @@ -864,6 +865,7 @@ class MarkItDown: self._mlm_client = mlm_client self._mlm_model = mlm_model + self._style_map = style_map self._page_converters: List[DocumentConverter] = [] @@ -1038,6 +1040,9 @@ class MarkItDown: if "mlm_model" not in _kwargs and self._mlm_model is not None: _kwargs["mlm_model"] = self._mlm_model + if "style_map" not in _kwargs and self._style_map is not None: + _kwargs["style_map"] = self._style_map + # If we hit an error log it and keep trying try: res = converter.convert(local_path, **_kwargs) diff --git a/tests/test_markitdown.py b/tests/test_markitdown.py index e069813..1a75ec7 100644 --- a/tests/test_markitdown.py +++ b/tests/test_markitdown.py @@ -150,6 +150,15 @@ def test_markitdown_local() -> None: text_content = result.text_content.replace("\\", "") assert test_string in text_content + # Test DOCX processing, with comments and setting style_map on init + markitdown_with_style_map = MarkItDown(style_map="comment-reference => ") + result = markitdown_with_style_map.convert( + os.path.join(TEST_FILES_DIR, "test_with_comment.docx") + ) + for test_string in DOCX_COMMENT_TEST_STRINGS: + text_content = result.text_content.replace("\\", "") + assert test_string in text_content + # Test PPTX processing result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.pptx")) for test_string in PPTX_TEST_STRINGS: