* Updated DocumentConverter interface * Updated all DocumentConverter classes * Added support for various new audio files. * Updated sample plugin to new DocumentConverter interface. * Updated project README with notes about changes, and use-cases. * Updated DocumentConverter documentation. * Move priority to outside DocumentConverter, allowing them to be reprioritized, and keeping the DocumentConverter interface simple. --------- Co-authored-by: Kenny Zhang <kzhang678@gmail.com>
120 lines
3.6 KiB
Python
120 lines
3.6 KiB
Python
#!/usr/bin/env python3 -m pytest
|
|
import os
|
|
import subprocess
|
|
import pytest
|
|
from markitdown import __version__
|
|
|
|
try:
|
|
from .test_markitdown import TEST_FILES_DIR, DOCX_TEST_STRINGS
|
|
except ImportError:
|
|
from test_markitdown import TEST_FILES_DIR, DOCX_TEST_STRINGS # type: ignore
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def shared_tmp_dir(tmp_path_factory):
|
|
return tmp_path_factory.mktemp("pytest_tmp")
|
|
|
|
|
|
def test_version(shared_tmp_dir) -> None:
|
|
result = subprocess.run(
|
|
["python", "-m", "markitdown", "--version"], capture_output=True, text=True
|
|
)
|
|
|
|
assert result.returncode == 0, f"CLI exited with error: {result.stderr}"
|
|
assert __version__ in result.stdout, f"Version not found in output: {result.stdout}"
|
|
|
|
|
|
def test_invalid_flag(shared_tmp_dir) -> None:
|
|
result = subprocess.run(
|
|
["python", "-m", "markitdown", "--foobar"], capture_output=True, text=True
|
|
)
|
|
|
|
assert result.returncode != 0, f"CLI exited with error: {result.stderr}"
|
|
assert (
|
|
"unrecognized arguments" in result.stderr
|
|
), f"Expected 'unrecognized arguments' to appear in STDERR"
|
|
assert "SYNTAX" in result.stderr, f"Expected 'SYNTAX' to appear in STDERR"
|
|
|
|
|
|
def test_output_to_stdout(shared_tmp_dir) -> None:
|
|
# DOC X
|
|
result = subprocess.run(
|
|
["python", "-m", "markitdown", os.path.join(TEST_FILES_DIR, "test.docx")],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
assert result.returncode == 0, f"CLI exited with error: {result.stderr}"
|
|
for test_string in DOCX_TEST_STRINGS:
|
|
assert (
|
|
test_string in result.stdout
|
|
), f"Expected string not found in output: {test_string}"
|
|
|
|
|
|
def test_output_to_file(shared_tmp_dir) -> None:
|
|
# DOC X, flag -o at the end
|
|
docx_output_file_1 = os.path.join(shared_tmp_dir, "test_docx_1.md")
|
|
result = subprocess.run(
|
|
[
|
|
"python",
|
|
"-m",
|
|
"markitdown",
|
|
os.path.join(TEST_FILES_DIR, "test.docx"),
|
|
"-o",
|
|
docx_output_file_1,
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
assert result.returncode == 0, f"CLI exited with error: {result.stderr}"
|
|
assert os.path.exists(
|
|
docx_output_file_1
|
|
), f"Output file not created: {docx_output_file_1}"
|
|
|
|
with open(docx_output_file_1, "r") as f:
|
|
output = f.read()
|
|
for test_string in DOCX_TEST_STRINGS:
|
|
assert (
|
|
test_string in output
|
|
), f"Expected string not found in output: {test_string}"
|
|
|
|
# DOC X, flag -o at the beginning
|
|
docx_output_file_2 = os.path.join(shared_tmp_dir, "test_docx_2.md")
|
|
result = subprocess.run(
|
|
[
|
|
"python",
|
|
"-m",
|
|
"markitdown",
|
|
"-o",
|
|
docx_output_file_2,
|
|
os.path.join(TEST_FILES_DIR, "test.docx"),
|
|
],
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
assert result.returncode == 0, f"CLI exited with error: {result.stderr}"
|
|
assert os.path.exists(
|
|
docx_output_file_2
|
|
), f"Output file not created: {docx_output_file_2}"
|
|
|
|
with open(docx_output_file_2, "r") as f:
|
|
output = f.read()
|
|
for test_string in DOCX_TEST_STRINGS:
|
|
assert (
|
|
test_string in output
|
|
), f"Expected string not found in output: {test_string}"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
"""Runs this file's tests from the command line."""
|
|
import tempfile
|
|
|
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
test_version(tmp_dir)
|
|
test_invalid_flag(tmp_dir)
|
|
test_output_to_stdout(tmp_dir)
|
|
test_output_to_file(tmp_dir)
|
|
print("All tests passed!")
|