diff --git a/packages/markitdown/pyproject.toml b/packages/markitdown/pyproject.toml index afb5d31..0d9bb61 100644 --- a/packages/markitdown/pyproject.toml +++ b/packages/markitdown/pyproject.toml @@ -36,7 +36,7 @@ dependencies = [ [project.optional-dependencies] all = [ "python-pptx", - "mammoth", + "mammoth~=1.10.0", "pandas", "openpyxl", "xlrd", diff --git a/packages/markitdown/src/markitdown/__about__.py b/packages/markitdown/src/markitdown/__about__.py index d2336ca..4441f3f 100644 --- a/packages/markitdown/src/markitdown/__about__.py +++ b/packages/markitdown/src/markitdown/__about__.py @@ -1,4 +1,4 @@ # SPDX-FileCopyrightText: 2024-present Adam Fourney # # SPDX-License-Identifier: MIT -__version__ = "0.1.2" +__version__ = "0.1.3" diff --git a/packages/markitdown/src/markitdown/converters/_docx_converter.py b/packages/markitdown/src/markitdown/converters/_docx_converter.py index 69c1ea8..9cb2cbd 100644 --- a/packages/markitdown/src/markitdown/converters/_docx_converter.py +++ b/packages/markitdown/src/markitdown/converters/_docx_converter.py @@ -1,4 +1,6 @@ import sys +import io +from warnings import warn from typing import BinaryIO, Any @@ -13,6 +15,14 @@ from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE _dependency_exc_info = None try: import mammoth + import mammoth.docx.files + + def mammoth_files_open(self, uri): + warn("DOCX: processing of r:link resources (e.g., linked images) is disabled.") + return io.BytesIO(b"") + + mammoth.docx.files.Files.open = mammoth_files_open + except ImportError: # Preserve the error and stack trace for later _dependency_exc_info = sys.exc_info()