diff --git a/packages/markitdown/src/markitdown/_markitdown.py b/packages/markitdown/src/markitdown/_markitdown.py index 9072951..ee588cf 100644 --- a/packages/markitdown/src/markitdown/_markitdown.py +++ b/packages/markitdown/src/markitdown/_markitdown.py @@ -230,7 +230,7 @@ class MarkItDown: ext = kwargs.get("file_extension") extensions = [ext] if ext is not None else [] - # TODO: Curently, there are some ongoing issues with puremagic's magic_stream function (incorrect guesses, unsupported file types, etc.) + # TODO: Curently, there are some ongoing issues with passing direct file objects to puremagic (incorrect guesses, unsupported file type errors, etc.) # Only use puremagic as a last resort if no extensions were provided if extensions == []: for g in self._guess_ext_magic(source=file_object): diff --git a/packages/markitdown/tests/test_markitdown.py b/packages/markitdown/tests/test_markitdown.py index adb9495..d04623b 100644 --- a/packages/markitdown/tests/test_markitdown.py +++ b/packages/markitdown/tests/test_markitdown.py @@ -335,11 +335,23 @@ def test_markitdown_local_objects() -> None: text_content = result.text_content.replace("\\", "") validate_strings(result, SERP_TEST_STRINGS, SERP_TEST_EXCLUDES) + # Test RSS processing + with open(os.path.join(TEST_FILES_DIR, "test_rss.xml"), "rb") as f: + result = markitdown.convert(f, file_extension=".xml") + text_content = result.text_content.replace("\\", "") + for test_string in RSS_TEST_STRINGS: + assert test_string in text_content + # Test MSG (Outlook email) processing with open(os.path.join(TEST_FILES_DIR, "test_outlook_msg.msg"), "rb") as f: result = markitdown.convert(f, file_extension=".msg") validate_strings(result, MSG_TEST_STRINGS) + # Test JSON processing + with open(os.path.join(TEST_FILES_DIR, "test.json"), "rb") as f: + result = markitdown.convert(f, file_extension=".json") + validate_strings(result, JSON_TEST_STRINGS) + @pytest.mark.skipif( skip_exiftool,