fix: support -o param to avoid encoding issues (#116)
* perf: cli supports -o param * doc: update README --------- Co-authored-by: gagb <gagb@users.noreply.github.com>
This commit is contained in:
@@ -29,6 +29,12 @@ To install MarkItDown, use pip: `pip install markitdown`. Alternatively, you can
|
|||||||
markitdown path-to-file.pdf > document.md
|
markitdown path-to-file.pdf > document.md
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Or use `-o` to specify the output file:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
markitdown path-to-file.pdf -o document.md
|
||||||
|
```
|
||||||
|
|
||||||
You can also pipe content:
|
You can also pipe content:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
from ._markitdown import MarkItDown
|
from ._markitdown import MarkItDown, DocumentConverterResult
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -29,20 +29,42 @@ def main():
|
|||||||
OR
|
OR
|
||||||
|
|
||||||
markitdown < example.pdf
|
markitdown < example.pdf
|
||||||
|
|
||||||
|
OR to save to a file use
|
||||||
|
|
||||||
|
markitdown example.pdf -o example.md
|
||||||
|
|
||||||
|
OR
|
||||||
|
|
||||||
|
markitdown example.pdf > example.md
|
||||||
"""
|
"""
|
||||||
).strip(),
|
).strip(),
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument("filename", nargs="?")
|
parser.add_argument("filename", nargs="?")
|
||||||
|
parser.add_argument(
|
||||||
|
"-o",
|
||||||
|
"--output",
|
||||||
|
help="Output file name. If not provided, output is written to stdout.",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.filename is None:
|
if args.filename is None:
|
||||||
markitdown = MarkItDown()
|
markitdown = MarkItDown()
|
||||||
result = markitdown.convert_stream(sys.stdin.buffer)
|
result = markitdown.convert_stream(sys.stdin.buffer)
|
||||||
print(result.text_content)
|
_handle_output(args, result)
|
||||||
else:
|
else:
|
||||||
markitdown = MarkItDown()
|
markitdown = MarkItDown()
|
||||||
result = markitdown.convert(args.filename)
|
result = markitdown.convert(args.filename)
|
||||||
|
_handle_output(args, result)
|
||||||
|
|
||||||
|
|
||||||
|
def _handle_output(args, result: DocumentConverterResult):
|
||||||
|
"""Handle output to stdout or file"""
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(result.text_content)
|
||||||
|
else:
|
||||||
print(result.text_content)
|
print(result.text_content)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user