Fix exiftool in well-known paths. (#1106)
This commit is contained in:
@@ -3,6 +3,7 @@ import mimetypes
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
import warnings
|
import warnings
|
||||||
import traceback
|
import traceback
|
||||||
@@ -138,9 +139,30 @@ class MarkItDown:
|
|||||||
self._llm_model = kwargs.get("llm_model")
|
self._llm_model = kwargs.get("llm_model")
|
||||||
self._exiftool_path = kwargs.get("exiftool_path")
|
self._exiftool_path = kwargs.get("exiftool_path")
|
||||||
self._style_map = kwargs.get("style_map")
|
self._style_map = kwargs.get("style_map")
|
||||||
|
|
||||||
if self._exiftool_path is None:
|
if self._exiftool_path is None:
|
||||||
self._exiftool_path = os.getenv("EXIFTOOL_PATH")
|
self._exiftool_path = os.getenv("EXIFTOOL_PATH")
|
||||||
|
|
||||||
|
# Still none? Check well-known paths
|
||||||
|
if self._exiftool_path is None:
|
||||||
|
candidate = shutil.which("exiftool")
|
||||||
|
if candidate:
|
||||||
|
candidate = os.path.abspath(candidate)
|
||||||
|
if any(
|
||||||
|
d == os.path.dirname(candidate)
|
||||||
|
for d in [
|
||||||
|
"/usr/bin",
|
||||||
|
"/usr/local/bin",
|
||||||
|
"/opt",
|
||||||
|
"/opt/bin",
|
||||||
|
"/opt/local/bin",
|
||||||
|
"/opt/homebrew/bin" "C:\\Windows\\System32",
|
||||||
|
"C:\\Program Files",
|
||||||
|
"C:\\Program Files (x86)",
|
||||||
|
]
|
||||||
|
):
|
||||||
|
self._exiftool_path = candidate
|
||||||
|
|
||||||
# Register converters for successful browsing operations
|
# Register converters for successful browsing operations
|
||||||
# Later registrations are tried first / take higher priority than earlier registrations
|
# Later registrations are tried first / take higher priority than earlier registrations
|
||||||
# To this end, the most specific converters should appear below the most generic converters
|
# To this end, the most specific converters should appear below the most generic converters
|
||||||
|
|||||||
@@ -5,26 +5,16 @@ import sys
|
|||||||
import shutil
|
import shutil
|
||||||
import os
|
import os
|
||||||
import warnings
|
import warnings
|
||||||
from typing import BinaryIO, Optional, Any
|
from typing import BinaryIO, Any, Union
|
||||||
|
|
||||||
|
|
||||||
def exiftool_metadata(
|
def exiftool_metadata(
|
||||||
file_stream: BinaryIO, *, exiftool_path: Optional[str] = None
|
file_stream: BinaryIO,
|
||||||
|
*,
|
||||||
|
exiftool_path: Union[str, None],
|
||||||
) -> Any: # Need a better type for json data
|
) -> Any: # Need a better type for json data
|
||||||
# Check if we have a valid pointer to exiftool
|
# Nothing to do
|
||||||
if not exiftool_path:
|
if not exiftool_path:
|
||||||
which_exiftool = shutil.which("exiftool")
|
|
||||||
if which_exiftool:
|
|
||||||
warnings.warn(
|
|
||||||
f"""Implicit discovery of 'exiftool' is disabled. If you would like to continue to use exiftool in MarkItDown, please set the exiftool_path parameter in the MarkItDown consructor. E.g.,
|
|
||||||
|
|
||||||
md = MarkItDown(exiftool_path="{which_exiftool}")
|
|
||||||
|
|
||||||
This warning will be removed in future releases.
|
|
||||||
""",
|
|
||||||
DeprecationWarning,
|
|
||||||
)
|
|
||||||
# Nothing to do
|
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
# Run exiftool
|
# Run exiftool
|
||||||
|
|||||||
@@ -7,8 +7,6 @@ import openai
|
|||||||
import pytest
|
import pytest
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
import warnings
|
|
||||||
|
|
||||||
from markitdown import (
|
from markitdown import (
|
||||||
MarkItDown,
|
MarkItDown,
|
||||||
UnsupportedFormatException,
|
UnsupportedFormatException,
|
||||||
@@ -517,19 +515,6 @@ def test_exceptions() -> None:
|
|||||||
reason="do not run if exiftool is not installed",
|
reason="do not run if exiftool is not installed",
|
||||||
)
|
)
|
||||||
def test_markitdown_exiftool() -> None:
|
def test_markitdown_exiftool() -> None:
|
||||||
# Test the automatic discovery of exiftool throws a warning
|
|
||||||
# and is disabled
|
|
||||||
try:
|
|
||||||
warnings.simplefilter("default")
|
|
||||||
with warnings.catch_warnings(record=True) as w:
|
|
||||||
markitdown = MarkItDown()
|
|
||||||
result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.jpg"))
|
|
||||||
assert len(w) == 1
|
|
||||||
assert w[0].category is DeprecationWarning
|
|
||||||
assert result.text_content.strip() == ""
|
|
||||||
finally:
|
|
||||||
warnings.resetwarnings()
|
|
||||||
|
|
||||||
which_exiftool = shutil.which("exiftool")
|
which_exiftool = shutil.which("exiftool")
|
||||||
assert which_exiftool is not None
|
assert which_exiftool is not None
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user