From 7bea2672a05f5877acb8690b20222593dab13788 Mon Sep 17 00:00:00 2001 From: ZeyuTeng96 <96521059+ZeyuTeng96@users.noreply.github.com> Date: Sun, 9 Feb 2025 12:28:35 +0800 Subject: [PATCH] remove leading and trailing \n for HtmlConverter (#262) --- src/markitdown/_markitdown.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index ae6a7b4..6f40547 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -236,6 +236,9 @@ class HtmlConverter(DocumentConverter): assert isinstance(webpage_text, str) + # remove leading and trailing \n + webpage_text = webpage_text.strip() + return DocumentConverterResult( title=None if soup.title is None else soup.title.string, text_content=webpage_text,