modify ext guesser
This commit is contained in:
@@ -10,7 +10,7 @@ from typing import Any, List, Optional, Union
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from warnings import warn
|
from warnings import warn
|
||||||
from io import BufferedIOBase, TextIOBase
|
from io import BufferedIOBase, TextIOBase, BytesIO
|
||||||
|
|
||||||
# File-format detection
|
# File-format detection
|
||||||
import puremagic
|
import puremagic
|
||||||
@@ -416,7 +416,7 @@ class MarkItDown:
|
|||||||
"""Use puremagic (a Python implementation of libmagic) to guess a file's extension based on the first few bytes."""
|
"""Use puremagic (a Python implementation of libmagic) to guess a file's extension based on the first few bytes."""
|
||||||
# Use puremagic to guess
|
# Use puremagic to guess
|
||||||
try:
|
try:
|
||||||
guesses = None
|
guesses = []
|
||||||
|
|
||||||
# Guess extensions for filepaths
|
# Guess extensions for filepaths
|
||||||
if isinstance(source, str):
|
if isinstance(source, str):
|
||||||
@@ -440,8 +440,9 @@ class MarkItDown:
|
|||||||
except puremagic.main.PureError:
|
except puremagic.main.PureError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Guess extensions for file objects
|
# Guess extensions for file objects. Note that the puremagic's magic_stream function requires a BytesIO-like file source
|
||||||
elif isinstance(source, BufferedIOBase) or isinstance(source, TextIOBase):
|
# TODO: Figure out how to guess extensions for TextIO-like file sources (manually converting to BytesIO does not currently work)
|
||||||
|
elif isinstance(source, BufferedIOBase):
|
||||||
guesses = puremagic.magic_stream(source)
|
guesses = puremagic.magic_stream(source)
|
||||||
|
|
||||||
extensions = list()
|
extensions = list()
|
||||||
|
|||||||
Reference in New Issue
Block a user