Fix exiftool in well-known paths. (#1106)

This commit is contained in:
afourney
2025-03-07 21:47:20 -08:00
committed by GitHub
parent 515fa854bf
commit 99d8e562db
3 changed files with 27 additions and 30 deletions

View File

@@ -3,6 +3,7 @@ import mimetypes
import os
import re
import sys
import shutil
import tempfile
import warnings
import traceback
@@ -138,9 +139,30 @@ class MarkItDown:
self._llm_model = kwargs.get("llm_model")
self._exiftool_path = kwargs.get("exiftool_path")
self._style_map = kwargs.get("style_map")
if self._exiftool_path is None:
self._exiftool_path = os.getenv("EXIFTOOL_PATH")
# Still none? Check well-known paths
if self._exiftool_path is None:
candidate = shutil.which("exiftool")
if candidate:
candidate = os.path.abspath(candidate)
if any(
d == os.path.dirname(candidate)
for d in [
"/usr/bin",
"/usr/local/bin",
"/opt",
"/opt/bin",
"/opt/local/bin",
"/opt/homebrew/bin" "C:\\Windows\\System32",
"C:\\Program Files",
"C:\\Program Files (x86)",
]
):
self._exiftool_path = candidate
# Register converters for successful browsing operations
# Later registrations are tried first / take higher priority than earlier registrations
# To this end, the most specific converters should appear below the most generic converters

View File

@@ -5,26 +5,16 @@ import sys
import shutil
import os
import warnings
from typing import BinaryIO, Optional, Any
from typing import BinaryIO, Any, Union
def exiftool_metadata(
file_stream: BinaryIO, *, exiftool_path: Optional[str] = None
file_stream: BinaryIO,
*,
exiftool_path: Union[str, None],
) -> Any: # Need a better type for json data
# Check if we have a valid pointer to exiftool
# Nothing to do
if not exiftool_path:
which_exiftool = shutil.which("exiftool")
if which_exiftool:
warnings.warn(
f"""Implicit discovery of 'exiftool' is disabled. If you would like to continue to use exiftool in MarkItDown, please set the exiftool_path parameter in the MarkItDown consructor. E.g.,
md = MarkItDown(exiftool_path="{which_exiftool}")
This warning will be removed in future releases.
""",
DeprecationWarning,
)
# Nothing to do
return {}
# Run exiftool

View File

@@ -7,8 +7,6 @@ import openai
import pytest
import requests
import warnings
from markitdown import (
MarkItDown,
UnsupportedFormatException,
@@ -517,19 +515,6 @@ def test_exceptions() -> None:
reason="do not run if exiftool is not installed",
)
def test_markitdown_exiftool() -> None:
# Test the automatic discovery of exiftool throws a warning
# and is disabled
try:
warnings.simplefilter("default")
with warnings.catch_warnings(record=True) as w:
markitdown = MarkItDown()
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.jpg"))
assert len(w) == 1
assert w[0].category is DeprecationWarning
assert result.text_content.strip() == ""
finally:
warnings.resetwarnings()
which_exiftool = shutil.which("exiftool")
assert which_exiftool is not None