Cleanup and refactor, in preparation for plugin support. (#318)
* Work started moving converters to individual files. * Significant cleanup and refactor. * Moved everything to a packages subfolder. * Added sample plugin. * Added instructions to the README.md * Bumped version, and added a note about compatibility.
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
__version__ = "0.0.1a2"
|
||||
@@ -0,0 +1,13 @@
|
||||
# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from ._plugin import __plugin_interface_version__, register_converters, RtfConverter
|
||||
from .__about__ import __version__
|
||||
|
||||
__all__ = [
|
||||
"__version__",
|
||||
"__plugin_interface_version__",
|
||||
"register_converters",
|
||||
"RtfConverter",
|
||||
]
|
||||
@@ -0,0 +1,39 @@
|
||||
from typing import Union
|
||||
from striprtf.striprtf import rtf_to_text
|
||||
|
||||
from markitdown import MarkItDown, DocumentConverter, DocumentConverterResult
|
||||
|
||||
__plugin_interface_version__ = (
|
||||
1 # The version of the plugin interface that this plugin uses
|
||||
)
|
||||
|
||||
|
||||
def register_converters(markitdown: MarkItDown, **kwargs):
|
||||
"""
|
||||
Called during construction of MarkItDown instances to register converters provided by plugins.
|
||||
"""
|
||||
|
||||
# Simply create and attach an RtfConverter instance
|
||||
markitdown.register_converter(RtfConverter())
|
||||
|
||||
|
||||
class RtfConverter(DocumentConverter):
|
||||
"""
|
||||
Converts an RTF file to in the simplest possible way.
|
||||
"""
|
||||
|
||||
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
|
||||
# Bail if not a DOCX
|
||||
extension = kwargs.get("file_extension", "")
|
||||
if extension.lower() != ".rtf":
|
||||
return None
|
||||
|
||||
# Read the RTF file
|
||||
with open(local_path, "r") as f:
|
||||
rtf = f.read()
|
||||
|
||||
# Return the result
|
||||
return DocumentConverterResult(
|
||||
title=None,
|
||||
text_content=rtf_to_text(rtf),
|
||||
)
|
||||
Reference in New Issue
Block a user