From 59eb60f8cba78103dd0004ec68785587d02d4f2a Mon Sep 17 00:00:00 2001 From: Yuzhong Zhang <141388234+BetterAndBetterII@users.noreply.github.com> Date: Wed, 27 Aug 2025 06:20:17 +0800 Subject: [PATCH] fix docx parse error(\n in alt) (#1163) --- packages/markitdown/src/markitdown/converters/_markdownify.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/markitdown/src/markitdown/converters/_markdownify.py b/packages/markitdown/src/markitdown/converters/_markdownify.py index ee78541..e6a0dee 100644 --- a/packages/markitdown/src/markitdown/converters/_markdownify.py +++ b/packages/markitdown/src/markitdown/converters/_markdownify.py @@ -95,6 +95,8 @@ class _CustomMarkdownify(markdownify.MarkdownConverter): src = el.attrs.get("src", None) or el.attrs.get("data-src", None) or "" title = el.attrs.get("title", None) or "" title_part = ' "%s"' % title.replace('"', r"\"") if title else "" + # Remove all line breaks from alt + alt = alt.replace("\n", " ") if ( convert_as_inline and el.parent.name not in self.options["keep_inline_images_in"]