added core unit tests for file obj support
This commit is contained in:
@@ -189,7 +189,7 @@ def test_markitdown_remote() -> None:
|
|||||||
# assert test_string in result.text_content
|
# assert test_string in result.text_content
|
||||||
|
|
||||||
|
|
||||||
def test_markitdown_local() -> None:
|
def test_markitdown_local_paths() -> None:
|
||||||
markitdown = MarkItDown()
|
markitdown = MarkItDown()
|
||||||
|
|
||||||
# Test XLSX processing
|
# Test XLSX processing
|
||||||
@@ -272,6 +272,75 @@ def test_markitdown_local() -> None:
|
|||||||
assert "# Test" in result.text_content
|
assert "# Test" in result.text_content
|
||||||
|
|
||||||
|
|
||||||
|
def test_markitdown_local_objects() -> None:
|
||||||
|
markitdown = MarkItDown()
|
||||||
|
|
||||||
|
# Test XLSX processing
|
||||||
|
with open(os.path.join(TEST_FILES_DIR, "test.xlsx"), "rb") as f:
|
||||||
|
result = markitdown.convert(f, file_extension=".xlsx")
|
||||||
|
validate_strings(result, XLSX_TEST_STRINGS)
|
||||||
|
|
||||||
|
# Test XLS processing
|
||||||
|
with open(os.path.join(TEST_FILES_DIR, "test.xls"), "rb") as f:
|
||||||
|
result = markitdown.convert(f, file_extension=".xls")
|
||||||
|
for test_string in XLS_TEST_STRINGS:
|
||||||
|
text_content = result.text_content.replace("\\", "")
|
||||||
|
assert test_string in text_content
|
||||||
|
|
||||||
|
# Test DOCX processing
|
||||||
|
with open(os.path.join(TEST_FILES_DIR, "test.docx"), "rb") as f:
|
||||||
|
result = markitdown.convert(f, file_extension=".docx")
|
||||||
|
validate_strings(result, DOCX_TEST_STRINGS)
|
||||||
|
|
||||||
|
# Test DOCX processing, with comments
|
||||||
|
with open(os.path.join(TEST_FILES_DIR, "test_with_comment.docx"), "rb") as f:
|
||||||
|
result = markitdown.convert(
|
||||||
|
f,
|
||||||
|
file_extension=".docx",
|
||||||
|
style_map="comment-reference => ",
|
||||||
|
)
|
||||||
|
validate_strings(result, DOCX_COMMENT_TEST_STRINGS)
|
||||||
|
|
||||||
|
# Test DOCX processing, with comments and setting style_map on init
|
||||||
|
markitdown_with_style_map = MarkItDown(style_map="comment-reference => ")
|
||||||
|
with open(os.path.join(TEST_FILES_DIR, "test_with_comment.docx"), "rb") as f:
|
||||||
|
result = markitdown_with_style_map.convert(f, file_extension=".docx")
|
||||||
|
validate_strings(result, DOCX_COMMENT_TEST_STRINGS)
|
||||||
|
|
||||||
|
# Test PPTX processing
|
||||||
|
with open(os.path.join(TEST_FILES_DIR, "test.pptx"), "rb") as f:
|
||||||
|
result = markitdown.convert(f, file_extension=".pptx")
|
||||||
|
validate_strings(result, PPTX_TEST_STRINGS)
|
||||||
|
|
||||||
|
# Test HTML processing
|
||||||
|
with open(
|
||||||
|
os.path.join(TEST_FILES_DIR, "test_blog.html"), "rt", encoding="utf-8"
|
||||||
|
) as f:
|
||||||
|
result = markitdown.convert(f, file_extension=".html", url=BLOG_TEST_URL)
|
||||||
|
validate_strings(result, BLOG_TEST_STRINGS)
|
||||||
|
|
||||||
|
# Test Wikipedia processing
|
||||||
|
with open(
|
||||||
|
os.path.join(TEST_FILES_DIR, "test_wikipedia.html"), "rt", encoding="utf-8"
|
||||||
|
) as f:
|
||||||
|
result = markitdown.convert(f, file_extension=".html", url=WIKIPEDIA_TEST_URL)
|
||||||
|
text_content = result.text_content.replace("\\", "")
|
||||||
|
validate_strings(result, WIKIPEDIA_TEST_STRINGS, WIKIPEDIA_TEST_EXCLUDES)
|
||||||
|
|
||||||
|
# Test Bing processing
|
||||||
|
with open(
|
||||||
|
os.path.join(TEST_FILES_DIR, "test_serp.html"), "rt", encoding="utf-8"
|
||||||
|
) as f:
|
||||||
|
result = markitdown.convert(f, file_extension=".html", url=SERP_TEST_URL)
|
||||||
|
text_content = result.text_content.replace("\\", "")
|
||||||
|
validate_strings(result, SERP_TEST_STRINGS, SERP_TEST_EXCLUDES)
|
||||||
|
|
||||||
|
# Test MSG (Outlook email) processing
|
||||||
|
with open(os.path.join(TEST_FILES_DIR, "test_outlook_msg.msg"), "rb") as f:
|
||||||
|
result = markitdown.convert(f, file_extension=".msg")
|
||||||
|
validate_strings(result, MSG_TEST_STRINGS)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(
|
@pytest.mark.skipif(
|
||||||
skip_exiftool,
|
skip_exiftool,
|
||||||
reason="do not run if exiftool is not installed",
|
reason="do not run if exiftool is not installed",
|
||||||
@@ -328,7 +397,7 @@ def test_markitdown_llm() -> None:
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
"""Runs this file's tests from the command line."""
|
"""Runs this file's tests from the command line."""
|
||||||
test_markitdown_remote()
|
test_markitdown_remote()
|
||||||
test_markitdown_local()
|
test_markitdown_local_paths()
|
||||||
test_markitdown_exiftool()
|
test_markitdown_exiftool()
|
||||||
# test_markitdown_llm()
|
# test_markitdown_llm()
|
||||||
print("All tests passed!")
|
print("All tests passed!")
|
||||||
|
|||||||
Reference in New Issue
Block a user