feat: render math equations in .docx documents (#1160)

* feat: math equation rendering in .docx files
* fix: import fix on .docx pre processing
* test: add test cases for docx equation rendering
* docs: add ThirdPartyNotices.md
* refactor: reformatted with black
This commit is contained in:
Sathindu
2025-03-28 18:36:38 -04:00
committed by GitHub
parent 9e067c42b6
commit 3fcd48cdfc
11 changed files with 1081 additions and 2 deletions

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3 -m pytest
import io
import os
import re
import shutil
import openai
import pytest
@@ -262,6 +263,19 @@ def test_docx_comments() -> None:
validate_strings(result, DOCX_COMMENT_TEST_STRINGS)
def test_docx_equations() -> None:
markitdown = MarkItDown()
docx_file = os.path.join(TEST_FILES_DIR, "equations.docx")
result = markitdown.convert(docx_file)
# Check for inline equation m=1 (wrapped with single $) is present
assert "$m=1$" in result.text_content, "Inline equation $m=1$ not found"
# Find block equations wrapped with double $$ and check if they are present
block_equations = re.findall(r"\$\$(.+?)\$\$", result.text_content)
assert block_equations, "No block equations found in the document."
def test_input_as_strings() -> None:
markitdown = MarkItDown()