From 997c7af53c6cf8b1b4a58344c95c3cb181ba6f1c Mon Sep 17 00:00:00 2001 From: Adam Fourney Date: Thu, 14 Nov 2024 07:50:21 -0800 Subject: [PATCH] Added a simple CLI. --- pyproject.toml | 3 +++ src/markitdown/__main__.py | 42 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 src/markitdown/__main__.py diff --git a/pyproject.toml b/pyproject.toml index f5ffdb3..d1dd737 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,9 @@ Source = "https://github.com/microsoft/markitdown" [tool.hatch.version] path = "src/markitdown/__about__.py" +[project.scripts] +markitdown = "markitdown.__main__:main" + [tool.hatch.envs.types] extra-dependencies = [ "mypy>=1.0.0", diff --git a/src/markitdown/__main__.py b/src/markitdown/__main__.py new file mode 100644 index 0000000..6c8a672 --- /dev/null +++ b/src/markitdown/__main__.py @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: 2024-present Adam Fourney +# +# SPDX-License-Identifier: MIT +import sys +from ._markitdown import MarkItDown + + +def main(): + if len(sys.argv) == 1: + markitdown = MarkItDown() + result = markitdown.convert_stream(sys.stdin.buffer) + print(result.text_content) + elif len(sys.argv) == 2: + markitdown = MarkItDown() + result = markitdown.convert(sys.argv[1]) + print(result.text_content) + else: + sys.stderr.write( + """ +SYNTAX: + + markitdown + If FILENAME is empty, markitdown reads from stdin. + +EXAMPLE: + + markitdown example.pdf + + OR + + cat example.pdf | markitdown + + OR + + markitdown < example.pdf +""".strip() + + "\n" + ) + + +if __name__ == "__main__": + main()