added file obj unit tests for rss and json

This commit is contained in:
Kenny Zhang
2025-02-27 15:05:29 -05:00
parent db0c8acbaf
commit d9a92f7f06
2 changed files with 13 additions and 1 deletions

View File

@@ -230,7 +230,7 @@ class MarkItDown:
ext = kwargs.get("file_extension")
extensions = [ext] if ext is not None else []
# TODO: Curently, there are some ongoing issues with puremagic's magic_stream function (incorrect guesses, unsupported file types, etc.)
# TODO: Curently, there are some ongoing issues with passing direct file objects to puremagic (incorrect guesses, unsupported file type errors, etc.)
# Only use puremagic as a last resort if no extensions were provided
if extensions == []:
for g in self._guess_ext_magic(source=file_object):

View File

@@ -335,11 +335,23 @@ def test_markitdown_local_objects() -> None:
text_content = result.text_content.replace("\\", "")
validate_strings(result, SERP_TEST_STRINGS, SERP_TEST_EXCLUDES)
# Test RSS processing
with open(os.path.join(TEST_FILES_DIR, "test_rss.xml"), "rb") as f:
result = markitdown.convert(f, file_extension=".xml")
text_content = result.text_content.replace("\\", "")
for test_string in RSS_TEST_STRINGS:
assert test_string in text_content
# Test MSG (Outlook email) processing
with open(os.path.join(TEST_FILES_DIR, "test_outlook_msg.msg"), "rb") as f:
result = markitdown.convert(f, file_extension=".msg")
validate_strings(result, MSG_TEST_STRINGS)
# Test JSON processing
with open(os.path.join(TEST_FILES_DIR, "test.json"), "rb") as f:
result = markitdown.convert(f, file_extension=".json")
validate_strings(result, JSON_TEST_STRINGS)
@pytest.mark.skipif(
skip_exiftool,