From 33638f1fe6d380a888f7d9bf5119dba965fba3c3 Mon Sep 17 00:00:00 2001 From: kevinbabou Date: Sun, 15 Dec 2024 17:38:28 -0800 Subject: [PATCH 1/3] feature: add argument parsing and setup.py file for cli tool capability --- setup.py | 31 +++++++++++++++++++++++++++++++ src/markitdown/__main__.py | 33 +++++++++++++++++++-------------- 2 files changed, 50 insertions(+), 14 deletions(-) create mode 100644 setup.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..7d3e311 --- /dev/null +++ b/setup.py @@ -0,0 +1,31 @@ +from setuptools import setup, find_packages + +setup( + name='markitdown', + version='0.1.0', + package_dir={'': 'src'}, + packages=find_packages(where='src'), + install_requires=[ + 'mammoth', + 'markdownify', + 'pandas', + 'pdfminer.six', + 'python-pptx', + 'puremagic', + 'requests', + 'beautifulsoup4', + 'pydub', + 'SpeechRecognition', + 'youtube_transcript_api', + ], + entry_points={ + 'console_scripts': [ + 'markitdown=markitdown.__main__:main', + ], + }, + author='Adam Fourney', + author_email='adamfo@microsoft.com', + description='Convert various file formats to markdown', + license='MIT', + python_requires='>=3.6', +) \ No newline at end of file diff --git a/src/markitdown/__main__.py b/src/markitdown/__main__.py index 6c8a672..a2fafb2 100644 --- a/src/markitdown/__main__.py +++ b/src/markitdown/__main__.py @@ -2,21 +2,15 @@ # # SPDX-License-Identifier: MIT import sys +import argparse from ._markitdown import MarkItDown def main(): - if len(sys.argv) == 1: - markitdown = MarkItDown() - result = markitdown.convert_stream(sys.stdin.buffer) - print(result.text_content) - elif len(sys.argv) == 2: - markitdown = MarkItDown() - result = markitdown.convert(sys.argv[1]) - print(result.text_content) - else: - sys.stderr.write( - """ + parser = argparse.ArgumentParser( + description='Convert various file formats to markdown.', + formatter_class=argparse.RawDescriptionHelpFormatter, + usage=""" SYNTAX: markitdown @@ -34,9 +28,20 @@ EXAMPLE: markitdown < example.pdf """.strip() - + "\n" - ) + ) + + parser.add_argument('filename', nargs='?') + args = parser.parse_args() + + if args.filename is None: + markitdown = MarkItDown() + result = markitdown.convert_stream(sys.stdin.buffer) + print(result.text_content) + else: + markitdown = MarkItDown() + result = markitdown.convert(args.filename) + print(result.text_content) if __name__ == "__main__": - main() + main() \ No newline at end of file From 87846cf5f8797dcc54d2c34d1ac56bb666d99d24 Mon Sep 17 00:00:00 2001 From: kevinbabou Date: Mon, 16 Dec 2024 16:27:59 -0800 Subject: [PATCH 2/3] rm setup.py --- setup.py | 31 ------------------------------- 1 file changed, 31 deletions(-) delete mode 100644 setup.py diff --git a/setup.py b/setup.py deleted file mode 100644 index 7d3e311..0000000 --- a/setup.py +++ /dev/null @@ -1,31 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name='markitdown', - version='0.1.0', - package_dir={'': 'src'}, - packages=find_packages(where='src'), - install_requires=[ - 'mammoth', - 'markdownify', - 'pandas', - 'pdfminer.six', - 'python-pptx', - 'puremagic', - 'requests', - 'beautifulsoup4', - 'pydub', - 'SpeechRecognition', - 'youtube_transcript_api', - ], - entry_points={ - 'console_scripts': [ - 'markitdown=markitdown.__main__:main', - ], - }, - author='Adam Fourney', - author_email='adamfo@microsoft.com', - description='Convert various file formats to markdown', - license='MIT', - python_requires='>=3.6', -) \ No newline at end of file From ad29122592ec44c68c571ef991a9a6082e9fef36 Mon Sep 17 00:00:00 2001 From: gagb Date: Mon, 16 Dec 2024 18:09:48 -0800 Subject: [PATCH 3/3] run precommit --- src/markitdown/__main__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/markitdown/__main__.py b/src/markitdown/__main__.py index a2fafb2..2d53173 100644 --- a/src/markitdown/__main__.py +++ b/src/markitdown/__main__.py @@ -8,7 +8,7 @@ from ._markitdown import MarkItDown def main(): parser = argparse.ArgumentParser( - description='Convert various file formats to markdown.', + description="Convert various file formats to markdown.", formatter_class=argparse.RawDescriptionHelpFormatter, usage=""" SYNTAX: @@ -27,10 +27,10 @@ EXAMPLE: OR markitdown < example.pdf -""".strip() +""".strip(), ) - parser.add_argument('filename', nargs='?') + parser.add_argument("filename", nargs="?") args = parser.parse_args() if args.filename is None: @@ -44,4 +44,4 @@ EXAMPLE: if __name__ == "__main__": - main() \ No newline at end of file + main()