diff --git a/src/markitdown/__main__.py b/src/markitdown/__main__.py index d213363..fa68a0d 100644 --- a/src/markitdown/__main__.py +++ b/src/markitdown/__main__.py @@ -84,7 +84,9 @@ def main(): ) elif args.filename is None: raise ValueError("Filename is required when using Document Intelligence.") - markitdown = MarkItDown(exiftool_path=which_exiftool, docintel_endpoint=args.endpoint) + markitdown = MarkItDown( + exiftool_path=which_exiftool, docintel_endpoint=args.endpoint + ) else: markitdown = MarkItDown(exiftool_path=which_exiftool) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index e68b099..48cdfa6 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -91,7 +91,14 @@ class _CustomMarkdownify(markdownify.MarkdownConverter): # Explicitly cast options to the expected type if necessary super().__init__(**options) - def convert_hn(self, n: int, el: Any, text: str, convert_as_inline: bool) -> str: + def convert_hn( + self, + n: int, + el: Any, + text: str, + convert_as_inline: Optional[bool] = False, + **kwargs, + ) -> str: """Same as usual, but be sure to start with a new line""" if not convert_as_inline: if not re.search(r"^\n", text): @@ -99,7 +106,9 @@ class _CustomMarkdownify(markdownify.MarkdownConverter): return super().convert_hn(n, el, text, convert_as_inline) # type: ignore - def convert_a(self, el: Any, text: str, convert_as_inline: bool): + def convert_a( + self, el: Any, text: str, convert_as_inline: Optional[bool] = False, **kwargs + ): """Same as usual converter, but removes Javascript links and escapes URIs.""" prefix, suffix, text = markdownify.chomp(text) # type: ignore if not text: @@ -135,7 +144,9 @@ class _CustomMarkdownify(markdownify.MarkdownConverter): else text ) - def convert_img(self, el: Any, text: str, convert_as_inline: bool) -> str: + def convert_img( + self, el: Any, text: str, convert_as_inline: Optional[bool] = False, **kwargs + ) -> str: """Same as usual converter, but removes data URIs""" alt = el.attrs.get("alt", None) or "" @@ -1752,6 +1763,8 @@ class MarkItDown: ext = ext.strip() if ext == "": return + if ext in extensions: + return # if ext not in extensions: extensions.append(ext)