diff --git a/packages/markitdown/src/markitdown/_markitdown.py b/packages/markitdown/src/markitdown/_markitdown.py index 04015d7..9aaa4cf 100644 --- a/packages/markitdown/src/markitdown/_markitdown.py +++ b/packages/markitdown/src/markitdown/_markitdown.py @@ -3,6 +3,7 @@ import mimetypes import os import re import sys +import shutil import tempfile import warnings import traceback @@ -138,9 +139,30 @@ class MarkItDown: self._llm_model = kwargs.get("llm_model") self._exiftool_path = kwargs.get("exiftool_path") self._style_map = kwargs.get("style_map") + if self._exiftool_path is None: self._exiftool_path = os.getenv("EXIFTOOL_PATH") + # Still none? Check well-known paths + if self._exiftool_path is None: + candidate = shutil.which("exiftool") + if candidate: + candidate = os.path.abspath(candidate) + if any( + d == os.path.dirname(candidate) + for d in [ + "/usr/bin", + "/usr/local/bin", + "/opt", + "/opt/bin", + "/opt/local/bin", + "/opt/homebrew/bin" "C:\\Windows\\System32", + "C:\\Program Files", + "C:\\Program Files (x86)", + ] + ): + self._exiftool_path = candidate + # Register converters for successful browsing operations # Later registrations are tried first / take higher priority than earlier registrations # To this end, the most specific converters should appear below the most generic converters diff --git a/packages/markitdown/src/markitdown/converters/_exiftool.py b/packages/markitdown/src/markitdown/converters/_exiftool.py index 5a316f0..43c100f 100644 --- a/packages/markitdown/src/markitdown/converters/_exiftool.py +++ b/packages/markitdown/src/markitdown/converters/_exiftool.py @@ -5,26 +5,16 @@ import sys import shutil import os import warnings -from typing import BinaryIO, Optional, Any +from typing import BinaryIO, Any, Union def exiftool_metadata( - file_stream: BinaryIO, *, exiftool_path: Optional[str] = None + file_stream: BinaryIO, + *, + exiftool_path: Union[str, None], ) -> Any: # Need a better type for json data - # Check if we have a valid pointer to exiftool + # Nothing to do if not exiftool_path: - which_exiftool = shutil.which("exiftool") - if which_exiftool: - warnings.warn( - f"""Implicit discovery of 'exiftool' is disabled. If you would like to continue to use exiftool in MarkItDown, please set the exiftool_path parameter in the MarkItDown consructor. E.g., - - md = MarkItDown(exiftool_path="{which_exiftool}") - -This warning will be removed in future releases. -""", - DeprecationWarning, - ) - # Nothing to do return {} # Run exiftool diff --git a/packages/markitdown/tests/test_markitdown.py b/packages/markitdown/tests/test_markitdown.py index 8c34da0..25e5e33 100644 --- a/packages/markitdown/tests/test_markitdown.py +++ b/packages/markitdown/tests/test_markitdown.py @@ -7,8 +7,6 @@ import openai import pytest import requests -import warnings - from markitdown import ( MarkItDown, UnsupportedFormatException, @@ -517,19 +515,6 @@ def test_exceptions() -> None: reason="do not run if exiftool is not installed", ) def test_markitdown_exiftool() -> None: - # Test the automatic discovery of exiftool throws a warning - # and is disabled - try: - warnings.simplefilter("default") - with warnings.catch_warnings(record=True) as w: - markitdown = MarkItDown() - result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.jpg")) - assert len(w) == 1 - assert w[0].category is DeprecationWarning - assert result.text_content.strip() == "" - finally: - warnings.resetwarnings() - which_exiftool = shutil.which("exiftool") assert which_exiftool is not None