diff --git a/README.md b/README.md index 6dc096c..7040311 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,43 @@ print(result.text_content) docker build -t markitdown:latest . docker run --rm -i markitdown:latest < ~/your-file.pdf > output.md ``` +
+ +Batch Processing Multiple Files +This example shows how to convert multiple files to markdown format in a single run. The script processes all supported files in a directory and creates corresponding markdown files. + + +```python convert.py +from markitdown import MarkItDown +from openai import OpenAI +import os +client = OpenAI(api_key="your-api-key-here") +md = MarkItDown(llm_client=client, llm_model="gpt-4o-2024-11-20") +supported_extensions = ('.pptx', '.docx', '.pdf', '.jpg', '.jpeg', '.png') +files_to_convert = [f for f in os.listdir('.') if f.lower().endswith(supported_extensions)] +for file in files_to_convert: + print(f"\nConverting {file}...") + try: + md_file = os.path.splitext(file)[0] + '.md' + result = md.convert(file) + with open(md_file, 'w') as f: + f.write(result.text_content) + + print(f"Successfully converted {file} to {md_file}") + except Exception as e: + print(f"Error converting {file}: {str(e)}") + +print("\nAll conversions completed!") +``` +2. Place the script in the same directory as your files +3. Install required packages: like openai +4. Run script ```bash python convert.py ``` + +Note that original files will remain unchanged and new markdown files are created with the same base name. + +
+ ## Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a