remove leading and trailing \n for HtmlConverter (#262)

This commit is contained in:
ZeyuTeng96
2025-02-09 12:28:35 +08:00
committed by GitHub
parent bf6a15e9b5
commit 7bea2672a0

View File

@@ -236,6 +236,9 @@ class HtmlConverter(DocumentConverter):
assert isinstance(webpage_text, str)
# remove leading and trailing \n
webpage_text = webpage_text.strip()
return DocumentConverterResult(
title=None if soup.title is None else soup.title.string,
text_content=webpage_text,