Merge branch 'main' into main

This commit is contained in:
gagb
2024-12-16 13:51:39 -08:00
committed by GitHub
5 changed files with 41 additions and 4 deletions

View File

@@ -27,6 +27,7 @@ import pptx
import puremagic
import requests
from bs4 import BeautifulSoup
from charset_normalizer import from_path
# Optional Transcription support
try:
@@ -162,9 +163,7 @@ class PlainTextConverter(DocumentConverter):
elif "text/" not in content_type.lower():
return None
text_content = ""
with open(local_path, "rt", encoding="utf-8") as fh:
text_content = fh.read()
text_content = str(from_path(local_path).best())
return DocumentConverterResult(
title=None,
text_content=text_content,