diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..f59ec20 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..492ad8a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.13-alpine + +USER root + +# Runtime dependency +RUN apk add --no-cache ffmpeg + +RUN pip install markitdown + +# Default USERID and GROUPID +ARG USERID=10000 +ARG GROUPID=10000 + +USER $USERID:$GROUPID + +ENTRYPOINT [ "markitdown" ] diff --git a/README.md b/README.md index 6c72d0d..7079dbf 100644 --- a/README.md +++ b/README.md @@ -59,24 +59,24 @@ You can pipe content to standard input by omitting the argument: cat path-to-file.pdf | markitdown ``` +You can also configure markitdown to use Large Language Models to describe images. To do so you must provide `llm_client` and `llm_model` parameters to MarkItDown object, according to your specific client. -You can also configure markitdown to use Large Language Models to describe images. To do so you must provide `mlm_client` and `mlm_model` parameters to MarkItDown object, according to your specific client. ```python from markitdown import MarkItDown from openai import OpenAI client = OpenAI() -md = MarkItDown(mlm_client=client, mlm_model="gpt-4o") +md = MarkItDown(llm_client=client, llm_model="gpt-4o") result = md.convert("example.jpg") print(result.text_content) ``` -The prompt of describing images can be customized by providing `mlm_prompt` parameter. +You can also use the project as Docker Image: -```python -# ... -result = md.convert("example.jpg", mlm_prompt="Customized prompt") +```sh +docker build -t markitdown:latest . +docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md ``` ## Contributing @@ -95,15 +95,18 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio ### Running Tests -To run the tests for this project, use the following command: +To run tests, install `hatch` using `pip` or other methods as described [here](https://hatch.pypa.io/dev/install). ```sh +pip install hatch hatch shell hatch test ``` ### Running Pre-commit Checks +Please run the pre-commit checks before submitting a PR. + ```sh pre-commit run --all-files ``` diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index cfbb243..a7fb28a 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -793,7 +793,7 @@ class Mp3Converter(WavConverter): class ImageConverter(MediaConverter): """ - Converts images to markdown via extraction of metadata (if `exiftool` is installed), OCR (if `easyocr` is installed), and description via a multimodal LLM (if an mlm_client is configured). + Converts images to markdown via extraction of metadata (if `exiftool` is installed), OCR (if `easyocr` is installed), and description via a multimodal LLM (if an llm_client is configured). """ def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: @@ -823,17 +823,17 @@ class ImageConverter(MediaConverter): md_content += f"{f}: {metadata[f]}\n" # Try describing the image with GPTV - mlm_client = kwargs.get("mlm_client") - mlm_model = kwargs.get("mlm_model") - if mlm_client is not None and mlm_model is not None: + llm_client = kwargs.get("llm_client") + llm_model = kwargs.get("llm_model") + if llm_client is not None and llm_model is not None: md_content += ( "\n# Description:\n" - + self._get_mlm_description( + + self._get_llm_description( local_path, extension, - mlm_client, - mlm_model, - prompt=kwargs.get("mlm_prompt"), + llm_client, + llm_model, + prompt=kwargs.get("llm_prompt"), ).strip() + "\n" ) @@ -843,11 +843,11 @@ class ImageConverter(MediaConverter): text_content=md_content, ) - def _get_mlm_description(self, local_path, extension, client, model, prompt=None): + def _get_llm_description(self, local_path, extension, client, model, prompt=None): if prompt is None or prompt.strip() == "": prompt = "Write a detailed caption for this image." - sys.stderr.write(f"MLM Prompt:\n{prompt}\n") + sys.stderr.write(f"llm Prompt:\n{prompt}\n") data_uri = "" with open(local_path, "rb") as image_file: @@ -1009,8 +1009,8 @@ class MarkItDown: def __init__( self, requests_session: Optional[requests.Session] = None, - mlm_client: Optional[Any] = None, - mlm_model: Optional[Any] = None, + llm_client: Optional[Any] = None, + llm_model: Optional[Any] = None, style_map: Optional[str] = None, ): if requests_session is None: @@ -1018,8 +1018,8 @@ class MarkItDown: else: self._requests_session = requests_session - self._mlm_client = mlm_client - self._mlm_model = mlm_model + self._llm_client = llm_client + self._llm_model = llm_model self._style_map = style_map self._page_converters: List[DocumentConverter] = [] @@ -1190,11 +1190,12 @@ class MarkItDown: _kwargs.update({"file_extension": ext}) # Copy any additional global options - if "mlm_client" not in _kwargs and self._mlm_client is not None: - _kwargs["mlm_client"] = self._mlm_client + if "llm_client" not in _kwargs and self._llm_client is not None: + _kwargs["llm_client"] = self._llm_client + + if "llm_model" not in _kwargs and self._llm_model is not None: + _kwargs["llm_model"] = self._llm_model - if "mlm_model" not in _kwargs and self._mlm_model is not None: - _kwargs["mlm_model"] = self._mlm_model # Add the list of converters for nested processing _kwargs["_parent_converters"] = self._page_converters @@ -1236,8 +1237,7 @@ class MarkItDown: if ext == "": return # if ext not in extensions: - if True: - extensions.append(ext) + extensions.append(ext) def _guess_ext_magic(self, path): """Use puremagic (a Python implementation of libmagic) to guess a file's extension based on the first few bytes."""