Don't have ZipConverter accept OOXML files. This will never yield a good result. (#1078)

This commit is contained in:
afourney
2025-02-28 09:54:19 -08:00
committed by GitHub
parent 9a19fdd134
commit 9182923375

View File

@@ -77,6 +77,10 @@ class ZipConverter(DocumentConverter):
try:
# Extract the zip file safely
with zipfile.ZipFile(local_path, "r") as zipObj:
# Bail if we discover it's an Office OOXML file
if "[Content_Types].xml" in zipObj.namelist():
return None
# Safeguard against path traversal
for member in zipObj.namelist():
member_path = os.path.normpath(os.path.join(extraction_dir, member))