72 lines
1.8 KiB
Python
72 lines
1.8 KiB
Python
import locale
|
|
from typing import BinaryIO, Any
|
|
from striprtf.striprtf import rtf_to_text
|
|
|
|
from markitdown import (
|
|
MarkItDown,
|
|
DocumentConverter,
|
|
DocumentConverterResult,
|
|
StreamInfo,
|
|
)
|
|
|
|
|
|
__plugin_interface_version__ = (
|
|
1 # The version of the plugin interface that this plugin uses
|
|
)
|
|
|
|
ACCEPTED_MIME_TYPE_PREFIXES = [
|
|
"text/rtf",
|
|
"application/rtf",
|
|
]
|
|
|
|
ACCEPTED_FILE_EXTENSIONS = [".rtf"]
|
|
|
|
|
|
def register_converters(markitdown: MarkItDown, **kwargs):
|
|
"""
|
|
Called during construction of MarkItDown instances to register converters provided by plugins.
|
|
"""
|
|
|
|
# Simply create and attach an RtfConverter instance
|
|
markitdown.register_converter(RtfConverter())
|
|
|
|
|
|
class RtfConverter(DocumentConverter):
|
|
"""
|
|
Converts an RTF file to in the simplest possible way.
|
|
"""
|
|
|
|
def accepts(
|
|
self,
|
|
file_stream: BinaryIO,
|
|
stream_info: StreamInfo,
|
|
**kwargs: Any,
|
|
) -> bool:
|
|
mimetype = (stream_info.mimetype or "").lower()
|
|
extension = (stream_info.extension or "").lower()
|
|
|
|
if extension in ACCEPTED_FILE_EXTENSIONS:
|
|
return True
|
|
|
|
for prefix in ACCEPTED_MIME_TYPE_PREFIXES:
|
|
if mimetype.startswith(prefix):
|
|
return True
|
|
|
|
return False
|
|
|
|
def convert(
|
|
self,
|
|
file_stream: BinaryIO,
|
|
stream_info: StreamInfo,
|
|
**kwargs: Any,
|
|
) -> DocumentConverterResult:
|
|
# Read the file stream into an str using hte provided charset encoding, or using the system default
|
|
encoding = stream_info.charset or locale.getpreferredencoding()
|
|
stream_data = file_stream.read().decode(encoding)
|
|
|
|
# Return the result
|
|
return DocumentConverterResult(
|
|
title=None,
|
|
markdown=rtf_to_text(stream_data),
|
|
)
|