Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8eaf5a1da9 | ||
|
|
38c924793c | ||
|
|
b9526d5e47 |
36
README.md
36
README.md
@@ -87,42 +87,6 @@ print(result.text_content)
|
|||||||
docker build -t markitdown:latest .
|
docker build -t markitdown:latest .
|
||||||
docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md
|
docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md
|
||||||
```
|
```
|
||||||
<details>
|
|
||||||
|
|
||||||
<summary>Batch Processing Multiple Files</summary>
|
|
||||||
|
|
||||||
This example shows how to convert multiple files to markdown format in a single run. The script processes all supported files in a directory and creates corresponding markdown files.
|
|
||||||
|
|
||||||
|
|
||||||
```python convert.py
|
|
||||||
from markitdown import MarkItDown
|
|
||||||
from openai import OpenAI
|
|
||||||
import os
|
|
||||||
client = OpenAI(api_key="your-api-key-here")
|
|
||||||
md = MarkItDown(llm_client=client, llm_model="gpt-4o-2024-11-20")
|
|
||||||
supported_extensions = ('.pptx', '.docx', '.pdf', '.jpg', '.jpeg', '.png')
|
|
||||||
files_to_convert = [f for f in os.listdir('.') if f.lower().endswith(supported_extensions)]
|
|
||||||
for file in files_to_convert:
|
|
||||||
print(f"\nConverting {file}...")
|
|
||||||
try:
|
|
||||||
md_file = os.path.splitext(file)[0] + '.md'
|
|
||||||
result = md.convert(file)
|
|
||||||
with open(md_file, 'w') as f:
|
|
||||||
f.write(result.text_content)
|
|
||||||
|
|
||||||
print(f"Successfully converted {file} to {md_file}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error converting {file}: {str(e)}")
|
|
||||||
|
|
||||||
print("\nAll conversions completed!")
|
|
||||||
```
|
|
||||||
2. Place the script in the same directory as your files
|
|
||||||
3. Install required packages: like openai
|
|
||||||
4. Run script ```bash python convert.py ```
|
|
||||||
|
|
||||||
Note that original files will remain unchanged and new markdown files are created with the same base name.
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
|
# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
__version__ = "0.0.1a4"
|
__version__ = "0.0.1"
|
||||||
|
|||||||
Reference in New Issue
Block a user