added file obj unit tests for rss and json
This commit is contained in:
@@ -230,7 +230,7 @@ class MarkItDown:
|
|||||||
ext = kwargs.get("file_extension")
|
ext = kwargs.get("file_extension")
|
||||||
extensions = [ext] if ext is not None else []
|
extensions = [ext] if ext is not None else []
|
||||||
|
|
||||||
# TODO: Curently, there are some ongoing issues with puremagic's magic_stream function (incorrect guesses, unsupported file types, etc.)
|
# TODO: Curently, there are some ongoing issues with passing direct file objects to puremagic (incorrect guesses, unsupported file type errors, etc.)
|
||||||
# Only use puremagic as a last resort if no extensions were provided
|
# Only use puremagic as a last resort if no extensions were provided
|
||||||
if extensions == []:
|
if extensions == []:
|
||||||
for g in self._guess_ext_magic(source=file_object):
|
for g in self._guess_ext_magic(source=file_object):
|
||||||
|
|||||||
@@ -335,11 +335,23 @@ def test_markitdown_local_objects() -> None:
|
|||||||
text_content = result.text_content.replace("\\", "")
|
text_content = result.text_content.replace("\\", "")
|
||||||
validate_strings(result, SERP_TEST_STRINGS, SERP_TEST_EXCLUDES)
|
validate_strings(result, SERP_TEST_STRINGS, SERP_TEST_EXCLUDES)
|
||||||
|
|
||||||
|
# Test RSS processing
|
||||||
|
with open(os.path.join(TEST_FILES_DIR, "test_rss.xml"), "rb") as f:
|
||||||
|
result = markitdown.convert(f, file_extension=".xml")
|
||||||
|
text_content = result.text_content.replace("\\", "")
|
||||||
|
for test_string in RSS_TEST_STRINGS:
|
||||||
|
assert test_string in text_content
|
||||||
|
|
||||||
# Test MSG (Outlook email) processing
|
# Test MSG (Outlook email) processing
|
||||||
with open(os.path.join(TEST_FILES_DIR, "test_outlook_msg.msg"), "rb") as f:
|
with open(os.path.join(TEST_FILES_DIR, "test_outlook_msg.msg"), "rb") as f:
|
||||||
result = markitdown.convert(f, file_extension=".msg")
|
result = markitdown.convert(f, file_extension=".msg")
|
||||||
validate_strings(result, MSG_TEST_STRINGS)
|
validate_strings(result, MSG_TEST_STRINGS)
|
||||||
|
|
||||||
|
# Test JSON processing
|
||||||
|
with open(os.path.join(TEST_FILES_DIR, "test.json"), "rb") as f:
|
||||||
|
result = markitdown.convert(f, file_extension=".json")
|
||||||
|
validate_strings(result, JSON_TEST_STRINGS)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
skip_exiftool,
|
skip_exiftool,
|
||||||
|
|||||||
Reference in New Issue
Block a user