Clean up README.md

Bump version (#1095 )
Bump version. (#1075 )
2025-03-05 21:35:08 -08:00 · 2025-03-05 21:30:56 -08:00 · 2025-02-28 07:30:46 -08:00 · 2025-02-28 00:57:41 -08:00 · 2025-02-10 16:01:17 -08:00
4 changed files with 24 additions and 42 deletions
--- a/README.md
+++ b/README.md
@@ -87,42 +87,6 @@ print(result.text_content)
 docker build -t markitdown:latest .
 docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md
 ```
-<details>
-    
-<summary>Batch Processing Multiple Files</summary>
-
-This example shows how to convert multiple files to markdown format in a single run. The script processes all supported files in a directory and creates corresponding markdown files.
-
-
-```python convert.py
-from markitdown import MarkItDown
-from openai import OpenAI
-import os
-client = OpenAI(api_key="your-api-key-here")
-md = MarkItDown(llm_client=client, llm_model="gpt-4o-2024-11-20")
-supported_extensions = ('.pptx', '.docx', '.pdf', '.jpg', '.jpeg', '.png')
-files_to_convert = [f for f in os.listdir('.') if f.lower().endswith(supported_extensions)]
-for file in files_to_convert:
-    print(f"\nConverting {file}...")
-    try:
-        md_file = os.path.splitext(file)[0] + '.md'
-        result = md.convert(file)
-        with open(md_file, 'w') as f:
-            f.write(result.text_content)
-        
-        print(f"Successfully converted {file} to {md_file}")
-    except Exception as e:
-        print(f"Error converting {file}: {str(e)}")
-
-print("\nAll conversions completed!")
-```
-2. Place the script in the same directory as your files
-3. Install required packages: like openai
-4. Run script ```bash python convert.py ```
-
-Note that original files will remain unchanged and new markdown files are created with the same base name.
-
-</details>
   
 ## Contributing

--- a/src/markitdown/about.py
+++ b/src/markitdown/about.py
@@ -1,4 +1,4 @@
 # SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
 #
 # SPDX-License-Identifier: MIT
-__version__ = "0.0.1a3"
+__version__ = "0.0.1"
--- a/src/markitdown/main.py
+++ b/src/markitdown/main.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: MIT
 import argparse
 import sys
+import shutil
 from textwrap import dedent
 from .__about__ import __version__
 from ._markitdown import MarkItDown, DocumentConverterResult
@@ -74,6 +75,8 @@ def main():
    parser.add_argument("filename", nargs="?")
    args = parser.parse_args()

+    which_exiftool = shutil.which("exiftool")
+
    if args.use_docintel:
        if args.endpoint is None:
            raise ValueError(
@@ -81,9 +84,11 @@ def main():
            )
        elif args.filename is None:
            raise ValueError("Filename is required when using Document Intelligence.")
-        markitdown = MarkItDown(docintel_endpoint=args.endpoint)
+        markitdown = MarkItDown(
+            exiftool_path=which_exiftool, docintel_endpoint=args.endpoint
+        )
    else:
-        markitdown = MarkItDown()
+        markitdown = MarkItDown(exiftool_path=which_exiftool)

    if args.filename is None:
        result = markitdown.convert_stream(sys.stdin.buffer)
--- a/src/markitdown/_markitdown.py
+++ b/src/markitdown/_markitdown.py
@@ -91,7 +91,14 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
        # Explicitly cast options to the expected type if necessary
        super().__init__(**options)

-    def convert_hn(self, n: int, el: Any, text: str, convert_as_inline: bool) -> str:
+    def convert_hn(
+        self,
+        n: int,
+        el: Any,
+        text: str,
+        convert_as_inline: Optional[bool] = False,
+        **kwargs,
+    ) -> str:
        """Same as usual, but be sure to start with a new line"""
        if not convert_as_inline:
            if not re.search(r"^\n", text):
@@ -99,7 +106,9 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):

        return super().convert_hn(n, el, text, convert_as_inline)  # type: ignore

-    def convert_a(self, el: Any, text: str, convert_as_inline: bool):
+    def convert_a(
+        self, el: Any, text: str, convert_as_inline: Optional[bool] = False, **kwargs
+    ):
        """Same as usual converter, but removes Javascript links and escapes URIs."""
        prefix, suffix, text = markdownify.chomp(text)  # type: ignore
        if not text:
@@ -135,7 +144,9 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
            else text
        )

-    def convert_img(self, el: Any, text: str, convert_as_inline: bool) -> str:
+    def convert_img(
+        self, el: Any, text: str, convert_as_inline: Optional[bool] = False, **kwargs
+    ) -> str:
        """Same as usual converter, but removes data URIs"""

        alt = el.attrs.get("alt", None) or ""
@@ -1752,6 +1763,8 @@ class MarkItDown:
        ext = ext.strip()
        if ext == "":
            return
+        if ext in extensions:
+            return
        # if ext not in extensions:
        extensions.append(ext)
Author	SHA1	Message	Date
Adam Fourney	8eaf5a1da9	Clean up README.md	2025-03-05 21:35:08 -08:00
afourney	38c924793c	Bump version (#1095 )	2025-03-05 21:30:56 -08:00
afourney	b9526d5e47	Bump version. (#1075 )	2025-02-28 07:30:46 -08:00
Hieu Lam	519fe172aa	Unable to convert HTML to Markdown (#1072 ) * feat: issue where inherited function from `markdownify.MarkdownConverter` doesn't have `current_tags` leading to error using `kwargs`, also set default value for `convert_as_inline`	2025-02-28 00:57:41 -08:00
Adam Fourney	abe9752438	Bumped version	2025-02-10 16:01:17 -08:00