Cleanup and refactor, in preparation for plugin support. (#318)

* Work started moving converters to individual files. * Significant cleanup and refactor. * Moved everything to a packages subfolder. * Added sample plugin. * Added instructions to the README.md * Bumped version, and added a note about compatibility.
2025-02-10 15:21:44 -08:00
parent 73ba69d8cd
commit c73afcffea
60 changed files with 2755 additions and 1901 deletions
--- a/packages/markitdown-sample-plugin/README.md
+++ b/packages/markitdown-sample-plugin/README.md
@@ -0,0 +1,96 @@
+# MarkItDown Sample Plugin
+
+[![PyPI](https://img.shields.io/pypi/v/markitdown.svg)](https://pypi.org/project/markitdown/)
+![PyPI - Downloads](https://img.shields.io/pypi/dd/markitdown)
+[![Built by AutoGen Team](https://img.shields.io/badge/Built%20by-AutoGen%20Team-blue)](https://github.com/microsoft/autogen)
+
+
+This project shows how to create a sample plugin for MarkItDown. The most important parts are as follows:
+
+FNext, implement your custom DocumentConverter:
+
+```python
+from typing import Union
+from markitdown import DocumentConverter, DocumentConverterResult
+
+class RtfConverter(DocumentConverter):
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not an RTF file 
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".rtf":
+            return None
+
+	# Implement the conversion logic here ...
+
+        # Return the result
+        return DocumentConverterResult(
+            title=title,
+            text_content=text_content,
+        )
+```
+
+Next, make sure your package implements and exports the following:
+
+```python
+# The version of the plugin interface that this plugin uses. 
+# The only supported version is 1 for now.
+__plugin_interface_version__ = 1 
+
+# The main entrypoint for the plugin. This is called each time MarkItDown instances are created.
+def register_converters(markitdown: MarkItDown, **kwargs):
+    """
+    Called during construction of MarkItDown instances to register converters provided by plugins.
+    """
+
+    # Simply create and attach an RtfConverter instance
+    markitdown.register_converter(RtfConverter())
+```
+
+
+Finally, create an entrypoint in the `pyproject.toml` file:
+
+```toml
+[project.entry-points."markitdown.plugin"]
+sample_plugin = "markitdown_sample_plugin"
+```
+
+Here, the value of `sample_plugin` can be any key, but should ideally be the name of the plugin. The value is the fully qualified name of the package implementing the plugin.
+
+
+## Installation
+
+To use the plugin with MarkItDown, it must be installed. To install the plugin from the current directory use:
+
+```bash
+pip install -e .
+```
+
+Once the plugin package is installed, verify that it is available to MarkItDown by running:
+
+```bash
+markitdown --list-plugins
+```
+
+To use the plugin for a conversion use the `--use-plugins` flag. For example, to convert a PDF:
+
+```bash
+markitdown --use-plugins path-to-file.pdf
+```
+
+In Python, plugins can be enabled as follows:
+
+```python
+from markitdown import MarkItDown
+
+md = MarkItDown(enable_plugins=True) 
+result = md.convert("path-to-file.pdf")
+print(result.text_content)
+```
+
+## Trademarks
+
+This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
+trademarks or logos is subject to and must follow
+[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
+Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
+Any use of third-party trademarks or logos are subject to those third-party's policies.
--- a/packages/markitdown-sample-plugin/pyproject.toml
+++ b/packages/markitdown-sample-plugin/pyproject.toml
@@ -0,0 +1,70 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "markitdown-sample-plugin"
+dynamic = ["version"]
+description = 'A sample plugin for the "markitdown" library.'
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+keywords = []
+authors = [
+  { name = "Adam Fourney", email = "adamfo@microsoft.com" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: Implementation :: CPython",
+  "Programming Language :: Python :: Implementation :: PyPy",
+]
+dependencies = [
+  "markitdown",
+  "striprtf",
+]
+
+[project.urls]
+Documentation = "https://github.com/microsoft/markitdown#readme"
+Issues = "https://github.com/microsoft/markitdown/issues"
+Source = "https://github.com/microsoft/markitdown"
+
+[tool.hatch.version]
+path = "src/markitdown_sample_plugin/__about__.py"
+
+# IMPORTANT: MarkItDown will look for this entry point to find the plugin.
+[project.entry-points."markitdown.plugin"]
+sample_plugin = "markitdown_sample_plugin"
+
+[tool.hatch.envs.types]
+extra-dependencies = [
+  "mypy>=1.0.0",
+]
+[tool.hatch.envs.types.scripts]
+check = "mypy --install-types --non-interactive {args:src/markitdown_sample_plugin tests}"
+
+[tool.coverage.run]
+source_pkgs = ["markitdown-sample-plugin", "tests"]
+branch = true
+parallel = true
+omit = [
+  "src/markitdown_sample_plugin/__about__.py",
+]
+
+[tool.coverage.paths]
+markitdown-sample-plugin = ["src/markitdown_sample_plugin", "*/markitdown-sample-plugin/src/markitdown_sample_plugin"]
+tests = ["tests", "*/markitdown-sample-plugin/tests"]
+
+[tool.coverage.report]
+exclude_lines = [
+  "no cov",
+  "if __name__ == .__main__.:",
+  "if TYPE_CHECKING:",
+]
+
+[tool.hatch.build.targets.sdist]
+only-include = ["src/markitdown_sample_plugin"]
--- a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/about.py
+++ b/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/about.py
@@ -0,0 +1,4 @@
+# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
+#
+# SPDX-License-Identifier: MIT
+__version__ = "0.0.1a2"
--- a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/init.py
+++ b/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/init.py
@@ -0,0 +1,13 @@
+# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
+#
+# SPDX-License-Identifier: MIT
+
+from ._plugin import __plugin_interface_version__, register_converters, RtfConverter
+from .__about__ import __version__
+
+__all__ = [
+    "__version__",
+    "__plugin_interface_version__",
+    "register_converters",
+    "RtfConverter",
+]
--- a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/_plugin.py
+++ b/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/_plugin.py
@@ -0,0 +1,39 @@
+from typing import Union
+from striprtf.striprtf import rtf_to_text
+
+from markitdown import MarkItDown, DocumentConverter, DocumentConverterResult
+
+__plugin_interface_version__ = (
+    1  # The version of the plugin interface that this plugin uses
+)
+
+
+def register_converters(markitdown: MarkItDown, **kwargs):
+    """
+    Called during construction of MarkItDown instances to register converters provided by plugins.
+    """
+
+    # Simply create and attach an RtfConverter instance
+    markitdown.register_converter(RtfConverter())
+
+
+class RtfConverter(DocumentConverter):
+    """
+    Converts an RTF file to in the simplest possible way.
+    """
+
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a DOCX
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".rtf":
+            return None
+
+        # Read the RTF file
+        with open(local_path, "r") as f:
+            rtf = f.read()
+
+        # Return the result
+        return DocumentConverterResult(
+            title=None,
+            text_content=rtf_to_text(rtf),
+        )
--- a/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/py.typed
+++ b/packages/markitdown-sample-plugin/src/markitdown_sample_plugin/py.typed
--- a/packages/markitdown-sample-plugin/tests/init.py
+++ b/packages/markitdown-sample-plugin/tests/init.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
+#
+# SPDX-License-Identifier: MIT
--- a/packages/markitdown-sample-plugin/tests/test_files/test.rtf
+++ b/packages/markitdown-sample-plugin/tests/test_files/test.rtf
@@ -0,0 +1,251 @@
+{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff31507\deff0\stshfdbch31506\stshfloch31506\stshfhich31506\stshfbi31507\deflang1033\deflangfe1033\themelang1033\themelangfe0\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f34\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria Math;}
+{\f42\fbidi \fswiss\fcharset0\fprq2 Aptos Display;}{\f43\fbidi \fswiss\fcharset0\fprq2 Aptos;}{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
+{\fdbmajor\f31501\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhimajor\f31502\fbidi \fswiss\fcharset0\fprq2 Aptos Display;}{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
+{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fdbminor\f31505\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2 Aptos;}
+{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f51\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\f52\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
+{\f54\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f55\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f56\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f57\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
+{\f58\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f59\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f391\fbidi \froman\fcharset238\fprq2 Cambria Math CE;}{\f392\fbidi \froman\fcharset204\fprq2 Cambria Math Cyr;}
+{\f394\fbidi \froman\fcharset161\fprq2 Cambria Math Greek;}{\f395\fbidi \froman\fcharset162\fprq2 Cambria Math Tur;}{\f398\fbidi \froman\fcharset186\fprq2 Cambria Math Baltic;}{\f399\fbidi \froman\fcharset163\fprq2 Cambria Math (Vietnamese);}
+{\f471\fbidi \fswiss\fcharset238\fprq2 Aptos Display CE;}{\f472\fbidi \fswiss\fcharset204\fprq2 Aptos Display Cyr;}{\f474\fbidi \fswiss\fcharset161\fprq2 Aptos Display Greek;}{\f475\fbidi \fswiss\fcharset162\fprq2 Aptos Display Tur;}
+{\f478\fbidi \fswiss\fcharset186\fprq2 Aptos Display Baltic;}{\f479\fbidi \fswiss\fcharset163\fprq2 Aptos Display (Vietnamese);}{\f481\fbidi \fswiss\fcharset238\fprq2 Aptos CE;}{\f482\fbidi \fswiss\fcharset204\fprq2 Aptos Cyr;}
+{\f484\fbidi \fswiss\fcharset161\fprq2 Aptos Greek;}{\f485\fbidi \fswiss\fcharset162\fprq2 Aptos Tur;}{\f488\fbidi \fswiss\fcharset186\fprq2 Aptos Baltic;}{\f489\fbidi \fswiss\fcharset163\fprq2 Aptos (Vietnamese);}
+{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}
+{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
+{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbmajor\f31518\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
+{\fdbmajor\f31519\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbmajor\f31521\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbmajor\f31522\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
+{\fdbmajor\f31523\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbmajor\f31524\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbmajor\f31525\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
+{\fdbmajor\f31526\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhimajor\f31528\fbidi \fswiss\fcharset238\fprq2 Aptos Display CE;}{\fhimajor\f31529\fbidi \fswiss\fcharset204\fprq2 Aptos Display Cyr;}
+{\fhimajor\f31531\fbidi \fswiss\fcharset161\fprq2 Aptos Display Greek;}{\fhimajor\f31532\fbidi \fswiss\fcharset162\fprq2 Aptos Display Tur;}{\fhimajor\f31535\fbidi \fswiss\fcharset186\fprq2 Aptos Display Baltic;}
+{\fhimajor\f31536\fbidi \fswiss\fcharset163\fprq2 Aptos Display (Vietnamese);}{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
+{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
+{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
+{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}
+{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
+{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbminor\f31558\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
+{\fdbminor\f31559\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbminor\f31561\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbminor\f31562\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
+{\fdbminor\f31563\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbminor\f31564\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbminor\f31565\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
+{\fdbminor\f31566\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Aptos CE;}{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Aptos Cyr;}
+{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Aptos Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Aptos Tur;}{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Aptos Baltic;}
+{\fhiminor\f31576\fbidi \fswiss\fcharset163\fprq2 Aptos (Vietnamese);}{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
+{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
+{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}}
+{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;
+\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;\red0\green0\blue0;\red0\green0\blue0;\caccentone\ctint255\cshade191\red15\green71\blue97;
+\ctextone\ctint166\cshade255\red89\green89\blue89;\ctextone\ctint216\cshade255\red39\green39\blue39;\ctextone\ctint191\cshade255\red64\green64\blue64;}{\*\defchp \f31506\fs24\kerning2 }{\*\defpap \ql \li0\ri0\sa160\sl278\slmult1
+\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\stylesheet{\ql \li0\ri0\sa160\sl278\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs24\alang1025 
+\ltrch\fcs0 \f31506\fs24\lang1033\langfe1033\kerning2\cgrid\langnp1033\langfenp1033 \snext0 \sqformat \spriority0 Normal;}{\s1\ql \li0\ri0\sb360\sa80\sl278\slmult1
+\keep\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel0\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31503\afs40\alang1025 \ltrch\fcs0 
+\fs40\cf19\lang1033\langfe1033\kerning2\loch\f31502\hich\af31502\dbch\af31501\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink15 \sqformat \spriority9 \styrsid15678446 heading 1;}{\s2\ql \li0\ri0\sb160\sa80\sl278\slmult1
+\keep\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel1\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31503\afs32\alang1025 \ltrch\fcs0 
+\fs32\cf19\lang1033\langfe1033\kerning2\loch\f31502\hich\af31502\dbch\af31501\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink16 \ssemihidden \sunhideused \sqformat \spriority9 \styrsid15678446 heading 2;}{\s3\ql \li0\ri0\sb160\sa80\sl278\slmult1
+\keep\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel2\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31503\afs28\alang1025 \ltrch\fcs0 
+\fs28\cf19\lang1033\langfe1033\kerning2\loch\f31506\hich\af31506\dbch\af31501\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink17 \ssemihidden \sunhideused \sqformat \spriority9 \styrsid15678446 heading 3;}{\s4\ql \li0\ri0\sb80\sa40\sl278\slmult1
+\keep\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel3\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \ai\af31503\afs24\alang1025 \ltrch\fcs0 
+\i\fs24\cf19\lang1033\langfe1033\kerning2\loch\f31506\hich\af31506\dbch\af31501\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink18 \ssemihidden \sunhideused \sqformat \spriority9 \styrsid15678446 heading 4;}{\s5\ql \li0\ri0\sb80\sa40\sl278\slmult1
+\keep\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel4\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31503\afs24\alang1025 \ltrch\fcs0 
+\fs24\cf19\lang1033\langfe1033\kerning2\loch\f31506\hich\af31506\dbch\af31501\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink19 \ssemihidden \sunhideused \sqformat \spriority9 \styrsid15678446 heading 5;}{\s6\ql \li0\ri0\sb40\sl278\slmult1
+\keep\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel5\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \ai\af31503\afs24\alang1025 \ltrch\fcs0 
+\i\fs24\cf20\lang1033\langfe1033\kerning2\loch\f31506\hich\af31506\dbch\af31501\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink20 \ssemihidden \sunhideused \sqformat \spriority9 \styrsid15678446 heading 6;}{\s7\ql \li0\ri0\sb40\sl278\slmult1
+\keep\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel6\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31503\afs24\alang1025 \ltrch\fcs0 
+\fs24\cf20\lang1033\langfe1033\kerning2\loch\f31506\hich\af31506\dbch\af31501\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink21 \ssemihidden \sunhideused \sqformat \spriority9 \styrsid15678446 heading 7;}{\s8\ql \li0\ri0\sl278\slmult1
+\keep\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel7\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \ai\af31503\afs24\alang1025 \ltrch\fcs0 
+\i\fs24\cf21\lang1033\langfe1033\kerning2\loch\f31506\hich\af31506\dbch\af31501\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink22 \ssemihidden \sunhideused \sqformat \spriority9 \styrsid15678446 heading 8;}{\s9\ql \li0\ri0\sl278\slmult1
+\keep\keepn\widctlpar\wrapdefault\aspalpha\aspnum\faauto\outlinelevel8\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31503\afs24\alang1025 \ltrch\fcs0 
+\fs24\cf21\lang1033\langfe1033\kerning2\loch\f31506\hich\af31506\dbch\af31501\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink23 \ssemihidden \sunhideused \sqformat \spriority9 \styrsid15678446 heading 9;}{\*\cs10 \additive 
+\ssemihidden \sunhideused \spriority1 Default Paragraph Font;}{\*
+\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tblind0\tblindtype3\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv \ql \li0\ri0\sa160\sl278\slmult1
+\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs24\alang1025 \ltrch\fcs0 \f31506\fs24\lang1033\langfe1033\kerning2\cgrid\langnp1033\langfenp1033 \snext11 \ssemihidden \sunhideused Normal Table;}{\*\cs15 
+\additive \rtlch\fcs1 \af31503\afs40 \ltrch\fcs0 \fs40\cf19\loch\f31502\hich\af31502\dbch\af31501 \sbasedon10 \slink1 \spriority9 \styrsid15678446 Heading 1 Char;}{\*\cs16 \additive \rtlch\fcs1 \af31503\afs32 \ltrch\fcs0 
+\fs32\cf19\loch\f31502\hich\af31502\dbch\af31501 \sbasedon10 \slink2 \ssemihidden \spriority9 \styrsid15678446 Heading 2 Char;}{\*\cs17 \additive \rtlch\fcs1 \af31503\afs28 \ltrch\fcs0 \fs28\cf19\dbch\af31501 
+\sbasedon10 \slink3 \ssemihidden \spriority9 \styrsid15678446 Heading 3 Char;}{\*\cs18 \additive \rtlch\fcs1 \ai\af31503 \ltrch\fcs0 \i\cf19\dbch\af31501 \sbasedon10 \slink4 \ssemihidden \spriority9 \styrsid15678446 Heading 4 Char;}{\*\cs19 \additive 
+\rtlch\fcs1 \af31503 \ltrch\fcs0 \cf19\dbch\af31501 \sbasedon10 \slink5 \ssemihidden \spriority9 \styrsid15678446 Heading 5 Char;}{\*\cs20 \additive \rtlch\fcs1 \ai\af31503 \ltrch\fcs0 \i\cf20\dbch\af31501 
+\sbasedon10 \slink6 \ssemihidden \spriority9 \styrsid15678446 Heading 6 Char;}{\*\cs21 \additive \rtlch\fcs1 \af31503 \ltrch\fcs0 \cf20\dbch\af31501 \sbasedon10 \slink7 \ssemihidden \spriority9 \styrsid15678446 Heading 7 Char;}{\*\cs22 \additive 
+\rtlch\fcs1 \ai\af31503 \ltrch\fcs0 \i\cf21\dbch\af31501 \sbasedon10 \slink8 \ssemihidden \spriority9 \styrsid15678446 Heading 8 Char;}{\*\cs23 \additive \rtlch\fcs1 \af31503 \ltrch\fcs0 \cf21\dbch\af31501 
+\sbasedon10 \slink9 \ssemihidden \spriority9 \styrsid15678446 Heading 9 Char;}{\s24\ql \li0\ri0\sa80\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\contextualspace \rtlch\fcs1 \af31503\afs56\alang1025 \ltrch\fcs0 
+\fs56\expnd-2\expndtw-10\lang1033\langfe1033\kerning28\loch\f31502\hich\af31502\dbch\af31501\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink25 \sqformat \spriority10 \styrsid15678446 Title;}{\*\cs25 \additive \rtlch\fcs1 \af31503\afs56 
+\ltrch\fcs0 \fs56\expnd-2\expndtw-10\kerning28\loch\f31502\hich\af31502\dbch\af31501 \sbasedon10 \slink24 \spriority10 \styrsid15678446 Title Char;}{\s26\ql \li0\ri0\sa160\sl278\slmult1
+\widctlpar\wrapdefault\aspalpha\aspnum\faauto\ilvl1\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31503\afs28\alang1025 \ltrch\fcs0 \fs28\expnd3\expndtw15\cf20\lang1033\langfe1033\kerning2\loch\f31506\hich\af31506\dbch\af31501\cgrid\langnp1033\langfenp1033 
+\sbasedon0 \snext0 \slink27 \sqformat \spriority11 \styrsid15678446 Subtitle;}{\*\cs27 \additive \rtlch\fcs1 \af31503\afs28 \ltrch\fcs0 \fs28\expnd3\expndtw15\cf20\dbch\af31501 \sbasedon10 \slink26 \spriority11 \styrsid15678446 Subtitle Char;}{
+\s28\qc \li0\ri0\sb160\sa160\sl278\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \ai\af31507\afs24\alang1025 \ltrch\fcs0 \i\f31506\fs24\cf22\lang1033\langfe1033\kerning2\cgrid\langnp1033\langfenp1033 
+\sbasedon0 \snext0 \slink29 \sqformat \spriority29 \styrsid15678446 Quote;}{\*\cs29 \additive \rtlch\fcs1 \ai\af0 \ltrch\fcs0 \i\cf22 \sbasedon10 \slink28 \spriority29 \styrsid15678446 Quote Char;}{\s30\ql \li720\ri0\sa160\sl278\slmult1
+\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin720\itap0\contextualspace \rtlch\fcs1 \af31507\afs24\alang1025 \ltrch\fcs0 \f31506\fs24\lang1033\langfe1033\kerning2\cgrid\langnp1033\langfenp1033 
+\sbasedon0 \snext30 \sqformat \spriority34 \styrsid15678446 List Paragraph;}{\*\cs31 \additive \rtlch\fcs1 \ai\af0 \ltrch\fcs0 \i\cf19 \sbasedon10 \sqformat \spriority21 \styrsid15678446 Intense Emphasis;}{\s32\qc \li864\ri864\sb360\sa360\sl278\slmult1
+\widctlpar\brdrt\brdrs\brdrw10\brsp200\brdrcf19 \brdrb\brdrs\brdrw10\brsp200\brdrcf19 \wrapdefault\aspalpha\aspnum\faauto\adjustright\rin864\lin864\itap0 \rtlch\fcs1 \ai\af31507\afs24\alang1025 \ltrch\fcs0 
+\i\f31506\fs24\cf19\lang1033\langfe1033\kerning2\cgrid\langnp1033\langfenp1033 \sbasedon0 \snext0 \slink33 \sqformat \spriority30 \styrsid15678446 Intense Quote;}{\*\cs33 \additive \rtlch\fcs1 \ai\af0 \ltrch\fcs0 \i\cf19 
+\sbasedon10 \slink32 \spriority30 \styrsid15678446 Intense Quote Char;}{\*\cs34 \additive \rtlch\fcs1 \ab\af0 \ltrch\fcs0 \b\scaps\expnd1\expndtw5\cf19 \sbasedon10 \sqformat \spriority32 \styrsid15678446 Intense Reference;}}{\*\rsidtbl \rsid3543682
+\rsid6316520\rsid7364952\rsid8278432\rsid9589131\rsid10298217\rsid15678446\rsid15953651}{\mmathPr\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0\mdispDef1\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\author Adam Fourney}
+{\operator Adam Fourney}{\creatim\yr2025\mo2\dy9\hr22\min56}{\revtim\yr2025\mo2\dy9\hr22\min58}{\version1}{\edmins2}{\nofpages1}{\nofwords17}{\nofchars98}{\nofcharsws114}{\vern115}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/office/word/2003/wordm
+l}}\paperw12240\paperh15840\margl1440\margr1440\margt1440\margb1440\gutter0\ltrsect 
+\widowctrl\ftnbj\aenddoc\trackmoves0\trackformatting1\donotembedsysfont1\relyonvml0\donotembedlingdata0\grfdocevents0\validatexml1\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors1\noxlattoyen
+\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1440\dgvorigin1440\dghshow1\dgvshow1
+\jexpand\viewkind1\viewscale100\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct
+\asianbrkrule\rsidroot15678446\newtblstyruls\nogrowautofit\usenormstyforlist\noindnmbrts\felnbrelev\nocxsptable\indrlsweleven\noafcnsttbl\afelev\utinl\hwelev\spltpgpar\notcvasp\notbrkcnstfrctbl\notvatxbx\krnprsnet\cachedcolbal \nouicompat \fet0
+{\*\wgrffmtfilter 2450}\nofeaturethrottle1\ilfomacatclnup0\ltrpar \sectd \ltrsect\linex0\endnhere\sectlinegrid360\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang 
+{\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang 
+{\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}
+\pard\plain \ltrpar\s24\ql \li0\ri0\sa80\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid15678446\contextualspace \rtlch\fcs1 \af31503\afs56\alang1025 \ltrch\fcs0 
+\fs56\expnd-2\expndtw-10\lang1033\langfe1033\kerning28\loch\af31502\hich\af31502\dbch\af31501\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31503 \ltrch\fcs0 \insrsid15678446 \hich\af31502\dbch\af31501\loch\f31502 This is a 
+\hich\af31502\dbch\af31501\loch\f31502 S\hich\af31502\dbch\af31501\loch\f31502 ample RT\hich\af31502\dbch\af31501\loch\f31502 F \hich\af31502\dbch\af31501\loch\f31502 File}{\rtlch\fcs1 \af31503 \ltrch\fcs0 \insrsid8278432 
+\par }\pard\plain \ltrpar\ql \li0\ri0\sa160\sl278\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs24\alang1025 \ltrch\fcs0 \f31506\fs24\lang1033\langfe1033\kerning2\cgrid\langnp1033\langfenp1033 {
+\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid15678446 
+\par It is included to test if the MarkItDown sample plugin can correctly convert RTF files.
+\par }{\*\themedata 504b030414000600080000002100e9de0fbfff0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb4ec3301045f748fc83e52d4a
+9cb2400825e982c78ec7a27cc0c8992416c9d8b2a755fbf74cd25442a820166c2cd933f79e3be372bd1f07b5c3989ca74aaff2422b24eb1b475da5df374fd9ad
+5689811a183c61a50f98f4babebc2837878049899a52a57be670674cb23d8e90721f90a4d2fa3802cb35762680fd800ecd7551dc18eb899138e3c943d7e503b6
+b01d583deee5f99824e290b4ba3f364eac4a430883b3c092d4eca8f946c916422ecab927f52ea42b89a1cd59c254f919b0e85e6535d135a8de20f20b8c12c3b0
+0c895fcf6720192de6bf3b9e89ecdbd6596cbcdd8eb28e7c365ecc4ec1ff1460f53fe813d3cc7f5b7f020000ffff0300504b030414000600080000002100a5d6
+a7e7c0000000360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4f
+c7060abb0884a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b6309512
+0f88d94fbc52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462
+a1a82fe353bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f746865
+6d652f7468656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b
+4b0d592c9c070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b
+4757e8d3f729e245eb2b260a0238fd010000ffff0300504b030414000600080000002100d3d1e707f007000012220000160000007468656d652f7468656d652f
+7468656d65312e786d6cec5a4b8fdbc811be07c87f20789745ea414903cb0b3d3d6bcfd8034b76b0c796d812dbd36413ecd6cc080b0381f794cb020b6c825c02
+e496431064812c90452ef931066c249b1f91ea2645754b2dcf030662043373215b5f557f5d555d556cf2e1175731752e70c6094bbaaeffc0731d9ccc59489265
+d77d391d57daaec3054a42445982bbee1a73f78b47bffcc5437424221c6307e4137e84ba6e24447a54adf2390c23fe80a53881df162c8b9180db6c590d337409
+7a635aad795e508d11495c274131a87dbe58903976a652a5fb68a37c44e136115c0ecc693691aab121a1b0e1b92f117ccd0734732e10edba304fc82ea7f84ab8
+0e455cc00f5dd7537f6ef5d1c32a3a2a84a83820abc98dd55f21570884e7353567b69c95937aa35abbe197fa15808a7ddca82dff4b7d0a80e6735869ce45d7e9
+3703af5d2bb01a28bfb4e8eeb4fcba89d7f4d7f738fb9da05f6b18fa1528d7dfd8c37be3ce68d834f00a94e39b7bf89e57eb77ea065e81727cb0876f8c7aadda
+c8c02b50444972be8f0e5aed7650a04bc882d1632bbc13045e6b58c0b728888632bae4140b968843b116a3d72c1b03400229122471c43ac50b348728eea58271
+6748784ad1da755294300ec35ecdf721f41a5eadfc571647471869d2921730e17b43928fc3e7194945d77d025a5d0df2fea79fdebdfdf1dddbbfbffbe69b776f
+ffea9c906524725586dc314a96badccf7ffaee3f7ff8b5f3efbffdf1e7ef7f6bc7731dffe12fbff9f08f7f7e4c3d6cb5ad29deffee870f3ffef0fef7dffeebcf
+df5bb4f73234d3e1531263ee3cc397ce0b16c30295294cfe7896dd4e621a21a24bf49225470992b358f48f4464a09fad1145165c1f9b767c9541aab1011faf5e
+1b842751b612c4a2f169141bc053c6689f65562b3c957369669eae92a57df26ca5e35e2074619b7b8012c3cba3550a3996d8540e226cd03ca328116889132c1c
+f91b3bc7d8b2baaf0831ec7a4ae619e36c219caf88d347c46a92299919d1b4153a2631f8656d2308fe366c73facae9336a5bf5105f9848d81b885ac84f3135cc
+f818ad048a6d2aa728a6bac14f90886c2427eb6caee3465c80a7979832671462ce6d32cf3358afe6f4a708b29bd5eda7741d9bc84c90739bce13c4988e1cb2f3
+4184e2d4869d9024d2b15ff2730851e49c3161839f327387c87bf0034a0ebafb15c186bbafcf062f21cbe994b601227f5965165f3ec6cc88dfc99a2e10b6a59a
+5e161b29b697116b74f4574b23b44f30a6e81285183b2fbfb430e8b3d4b0f996f49308b2ca31b605d61364c6aabc4f30875e493637fb79f2847023642778c90e
+f0395def249e354a62941dd2fc0cbcaedb7c34cb60335a283ca7f3731df88c400f08f16235ca730e3ab4e03ea8f52c42460193f7dc1eafebccf0df4df618eccb
+d7068d1bec4b90c1b79681c4aecb7cd43653448d09b6013345c439b1a55b1031dcbf1591c55589adac720b73d36edd00dd91d1f4c424b9a603fadf743e9640fc
+343d8f5db191b06ed9ed1c4a28c73b3dce21dc6e67336059483effc6668856c919865ab29fb5eefb9afbbec6fdbfef6b0eede7fb6ee650cf71dfcdb8d065dc77
+33c501cba7e966b60d0cf436f290213fec51473ff1c1939f05a17422d6149f7075f8c3e199261cc3a09453a79eb83c094c23b894650e263070cb0c29192763e2
+5744449308a57042e4bb52c99217aa97dc4919878323356cd52df174159fb2303ff054274c5e5e593912db71af09474ff9381c56891c1db48a41c94f9daa025f
+c576a90e5b3704a4ec6d4868939924ea1612adcde03524e4d9d9a761d1b1b0684bf51b57ed9902a8955e81876e071ed5bb6eb32109c149399f43831e4a3fe5ae
+de785739f3537afa90318d0880c3c57c2570345f7aba23b91e5c9e5c5d1e6a37f0b4414239250f2b9384b28c6af078048fc24574cad19bd0b8adaf3b5b971af4
+a429d47c10df5b1aadf6c758dcd5d720b79b1b68a2670a9a38975d37a8372164e628edba0b383886cb3885d8e1f2b90bd125bc7d998b2cdff077c92c69c6c510
+f12837b84a3ab97b622270e65012775db9fcd20d3451394471f36b90103e5b721d482b9f1b3970bae964bc58e0b9d0ddae8d484be7b790e1f35c61fd5589df1d
+2c25d90adc3d89c24b674657d90b0421d66cf9d28021e1f0fec0cfad191278215626b26dfced14a622f9eb6fa4540ce5e388a6112a2a8a9ecc73b8aa27251d75
+57da40bb2bd60c06d54c5214c2d9521658dda846352d4b57cee160d5bd5e485a4e4b9adb9a6964155935ed59cc98615306766c79b722afb1da9818729a5ee1f3
+d4bd9b723b9b5cb7d3279455020c5edaef6ea55fa3b69dcca02619efa76199b38b51b3766c16780db59b14092deb071bb53b762b6b84753a18bc53e507b9dda8
+85a1c5a6af5496566fcef597db6cf61a92c710badc15cd5f77d304ee6454f2f42c53be9db1705d5c529e279adce7b22795489abcc00b8784579b7eb2746fbe3d
+f257ae7ed10c28b41493b5ab14b4367ba6608197a2f986bd8d7029a16686d6bb1456c78ab67e575c6d28cb561df0ca843c5f3598b6b0145ced5b118ec83304ad
+ed44357679ee05da57a2c82f70e5ac32d275bff69abdc6a0d61c54bc76735469d41b5ea5ddecd52bbd66b3ee8f9abe37ecd7de003d11c57e33fff4610c6f82e8
+baf800428def7d04116f5e763d98b3b8cad4470e55e57df511845f3bfc110438126805b571a7dee907954ebd37ae3486fd76a53308fa956130680dc7c341b3dd
+19bf719d0b056ef4ea8346306a57027f30a834024fd26f772aad46add66bb47aed51a3f7a6703fac3ccfc1852dc07c8ad7a3ff020000ffff0300504b03041400
+06000800000021000dd1909fb60000001b010000270000007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c7384
+8f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4350d363f2451eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16d
+b8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e3198720e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017c
+c524bd62107bd5001996509affb3fd381a89672f1f165dfe514173d9850528a2c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d001400060008
+0000002100e9de0fbfff0000001c0200001300000000000000000000000000000000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600
+080000002100a5d6a7e7c0000000360100000b00000000000000000000000000300100005f72656c732f2e72656c73504b01022d00140006000800000021006b
+799616830000008a0000001c00000000000000000000000000190200007468656d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d0014
+000600080000002100d3d1e707f0070000122200001600000000000000000000000000d60200007468656d652f7468656d652f7468656d65312e786d6c504b01
+022d00140006000800000021000dd1909fb60000001b0100002700000000000000000000000000fa0a00007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d010000f50b00000000}
+{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d
+617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169
+6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363
+656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e}
+{\*\latentstyles\lsdstimax376\lsdlockeddef0\lsdsemihiddendef0\lsdunhideuseddef0\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept \lsdqformat1 \lsdpriority0 \lsdlocked0 Normal;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 1;
+\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 2;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 3;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 4;
+\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 5;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 6;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 7;
+\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 8;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 1;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 5;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index 9;
+\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 1;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 2;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 3;
+\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 4;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 5;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 6;
+\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 7;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 8;\lsdsemihidden1 \lsdunhideused1 \lsdpriority39 \lsdlocked0 toc 9;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal Indent;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footnote text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 header;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footer;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 index heading;\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority35 \lsdlocked0 caption;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 table of figures;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 envelope address;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 envelope return;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 footnote reference;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation reference;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 line number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 page number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 endnote reference;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 endnote text;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 table of authorities;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 macro;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 toa heading;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 3;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 3;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Bullet 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 3;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Number 5;\lsdqformat1 \lsdpriority10 \lsdlocked0 Title;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Closing;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Signature;\lsdsemihidden1 \lsdunhideused1 \lsdpriority1 \lsdlocked0 Default Paragraph Font;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 4;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 List Continue 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Message Header;\lsdqformat1 \lsdpriority11 \lsdlocked0 Subtitle;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Salutation;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Date;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text First Indent;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text First Indent 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Note Heading;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Body Text Indent 3;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Block Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Hyperlink;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 FollowedHyperlink;\lsdqformat1 \lsdpriority22 \lsdlocked0 Strong;
+\lsdqformat1 \lsdpriority20 \lsdlocked0 Emphasis;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Document Map;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Plain Text;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 E-mail Signature;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Top of Form;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Bottom of Form;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal (Web);\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Acronym;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Address;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Cite;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Code;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Definition;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Keyboard;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Preformatted;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Sample;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Typewriter;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 HTML Variable;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Normal Table;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 annotation subject;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 No List;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Outline List 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 1;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Simple 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 2;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Classic 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 2;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Colorful 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 3;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Columns 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 2;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 6;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Grid 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 2;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 4;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 5;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 6;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 7;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table List 8;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 2;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table 3D effects 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Contemporary;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Elegant;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Professional;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Subtle 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Subtle 2;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 1;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 2;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Web 3;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Balloon Text;\lsdpriority39 \lsdlocked0 Table Grid;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Table Theme;\lsdsemihidden1 \lsdlocked0 Placeholder Text;
+\lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing;\lsdpriority60 \lsdlocked0 Light Shading;\lsdpriority61 \lsdlocked0 Light List;\lsdpriority62 \lsdlocked0 Light Grid;\lsdpriority63 \lsdlocked0 Medium Shading 1;\lsdpriority64 \lsdlocked0 Medium Shading 2;
+\lsdpriority65 \lsdlocked0 Medium List 1;\lsdpriority66 \lsdlocked0 Medium List 2;\lsdpriority67 \lsdlocked0 Medium Grid 1;\lsdpriority68 \lsdlocked0 Medium Grid 2;\lsdpriority69 \lsdlocked0 Medium Grid 3;\lsdpriority70 \lsdlocked0 Dark List;
+\lsdpriority71 \lsdlocked0 Colorful Shading;\lsdpriority72 \lsdlocked0 Colorful List;\lsdpriority73 \lsdlocked0 Colorful Grid;\lsdpriority60 \lsdlocked0 Light Shading Accent 1;\lsdpriority61 \lsdlocked0 Light List Accent 1;
+\lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 1;\lsdsemihidden1 \lsdlocked0 Revision;
+\lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph;\lsdqformat1 \lsdpriority29 \lsdlocked0 Quote;\lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 1;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;
+\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1;\lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 1;\lsdpriority72 \lsdlocked0 Colorful List Accent 1;
+\lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdpriority60 \lsdlocked0 Light Shading Accent 2;\lsdpriority61 \lsdlocked0 Light List Accent 2;\lsdpriority62 \lsdlocked0 Light Grid Accent 2;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2;
+\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 2;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 2;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;
+\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2;\lsdpriority70 \lsdlocked0 Dark List Accent 2;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 2;\lsdpriority72 \lsdlocked0 Colorful List Accent 2;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 2;
+\lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdpriority61 \lsdlocked0 Light List Accent 3;\lsdpriority62 \lsdlocked0 Light Grid Accent 3;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3;
+\lsdpriority65 \lsdlocked0 Medium List 1 Accent 3;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 3;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;
+\lsdpriority70 \lsdlocked0 Dark List Accent 3;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 3;\lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 3;\lsdpriority60 \lsdlocked0 Light Shading Accent 4;
+\lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdpriority62 \lsdlocked0 Light Grid Accent 4;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 4;
+\lsdpriority66 \lsdlocked0 Medium List 2 Accent 4;\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4;\lsdpriority70 \lsdlocked0 Dark List Accent 4;
+\lsdpriority71 \lsdlocked0 Colorful Shading Accent 4;\lsdpriority72 \lsdlocked0 Colorful List Accent 4;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdpriority60 \lsdlocked0 Light Shading Accent 5;\lsdpriority61 \lsdlocked0 Light List Accent 5;
+\lsdpriority62 \lsdlocked0 Light Grid Accent 5;\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 5;
+\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5;\lsdpriority70 \lsdlocked0 Dark List Accent 5;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 5;
+\lsdpriority72 \lsdlocked0 Colorful List Accent 5;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 5;\lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdpriority61 \lsdlocked0 Light List Accent 6;\lsdpriority62 \lsdlocked0 Light Grid Accent 6;
+\lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6;\lsdpriority65 \lsdlocked0 Medium List 1 Accent 6;\lsdpriority66 \lsdlocked0 Medium List 2 Accent 6;
+\lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6;\lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6;\lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdpriority70 \lsdlocked0 Dark List Accent 6;\lsdpriority71 \lsdlocked0 Colorful Shading Accent 6;
+\lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdpriority73 \lsdlocked0 Colorful Grid Accent 6;\lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis;\lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis;
+\lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference;\lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference;\lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdsemihidden1 \lsdunhideused1 \lsdpriority37 \lsdlocked0 Bibliography;
+\lsdsemihidden1 \lsdunhideused1 \lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;\lsdpriority41 \lsdlocked0 Plain Table 1;\lsdpriority42 \lsdlocked0 Plain Table 2;\lsdpriority43 \lsdlocked0 Plain Table 3;\lsdpriority44 \lsdlocked0 Plain Table 4;
+\lsdpriority45 \lsdlocked0 Plain Table 5;\lsdpriority40 \lsdlocked0 Grid Table Light;\lsdpriority46 \lsdlocked0 Grid Table 1 Light;\lsdpriority47 \lsdlocked0 Grid Table 2;\lsdpriority48 \lsdlocked0 Grid Table 3;\lsdpriority49 \lsdlocked0 Grid Table 4;
+\lsdpriority50 \lsdlocked0 Grid Table 5 Dark;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 1;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 1;
+\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 1;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 1;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 1;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 1;
+\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 1;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 2;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 2;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 2;
+\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 2;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 2;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 2;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 2;
+\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 3;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 3;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 3;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 3;
+\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 3;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 3;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 3;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 4;
+\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 4;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 4;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 4;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 4;
+\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 4;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 4;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 5;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 5;
+\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 5;\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 5;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 5;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 5;
+\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 5;\lsdpriority46 \lsdlocked0 Grid Table 1 Light Accent 6;\lsdpriority47 \lsdlocked0 Grid Table 2 Accent 6;\lsdpriority48 \lsdlocked0 Grid Table 3 Accent 6;
+\lsdpriority49 \lsdlocked0 Grid Table 4 Accent 6;\lsdpriority50 \lsdlocked0 Grid Table 5 Dark Accent 6;\lsdpriority51 \lsdlocked0 Grid Table 6 Colorful Accent 6;\lsdpriority52 \lsdlocked0 Grid Table 7 Colorful Accent 6;
+\lsdpriority46 \lsdlocked0 List Table 1 Light;\lsdpriority47 \lsdlocked0 List Table 2;\lsdpriority48 \lsdlocked0 List Table 3;\lsdpriority49 \lsdlocked0 List Table 4;\lsdpriority50 \lsdlocked0 List Table 5 Dark;
+\lsdpriority51 \lsdlocked0 List Table 6 Colorful;\lsdpriority52 \lsdlocked0 List Table 7 Colorful;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 1;\lsdpriority47 \lsdlocked0 List Table 2 Accent 1;\lsdpriority48 \lsdlocked0 List Table 3 Accent 1;
+\lsdpriority49 \lsdlocked0 List Table 4 Accent 1;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 1;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 1;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 1;
+\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 2;\lsdpriority47 \lsdlocked0 List Table 2 Accent 2;\lsdpriority48 \lsdlocked0 List Table 3 Accent 2;\lsdpriority49 \lsdlocked0 List Table 4 Accent 2;
+\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 2;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 2;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 2;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 3;
+\lsdpriority47 \lsdlocked0 List Table 2 Accent 3;\lsdpriority48 \lsdlocked0 List Table 3 Accent 3;\lsdpriority49 \lsdlocked0 List Table 4 Accent 3;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 3;
+\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 3;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 3;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 4;\lsdpriority47 \lsdlocked0 List Table 2 Accent 4;
+\lsdpriority48 \lsdlocked0 List Table 3 Accent 4;\lsdpriority49 \lsdlocked0 List Table 4 Accent 4;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 4;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 4;
+\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 4;\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 5;\lsdpriority47 \lsdlocked0 List Table 2 Accent 5;\lsdpriority48 \lsdlocked0 List Table 3 Accent 5;
+\lsdpriority49 \lsdlocked0 List Table 4 Accent 5;\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 5;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 5;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 5;
+\lsdpriority46 \lsdlocked0 List Table 1 Light Accent 6;\lsdpriority47 \lsdlocked0 List Table 2 Accent 6;\lsdpriority48 \lsdlocked0 List Table 3 Accent 6;\lsdpriority49 \lsdlocked0 List Table 4 Accent 6;
+\lsdpriority50 \lsdlocked0 List Table 5 Dark Accent 6;\lsdpriority51 \lsdlocked0 List Table 6 Colorful Accent 6;\lsdpriority52 \lsdlocked0 List Table 7 Colorful Accent 6;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Mention;
+\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Smart Hyperlink;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Hashtag;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Unresolved Mention;\lsdsemihidden1 \lsdunhideused1 \lsdlocked0 Smart Link;}}{\*\datastore 01050000
+02000000180000004d73786d6c322e534158584d4c5265616465722e362e3000000000000000000000060000
+d0cf11e0a1b11ae1000000000000000000000000000000003e000300feff090006000000000000000000000001000000010000000000000000100000feffffff00000000feffffff0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+fffffffffffffffffdfffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffff52006f006f007400200045006e00740072007900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000500ffffffffffffffffffffffff0c6ad98892f1d411a65f0040963251e5000000000000000000000000f0af
+5b31897bdb01feffffff00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff00000000000000000000000000000000000000000000000000000000
+00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff0000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000105000000000000}}
--- a/packages/markitdown-sample-plugin/tests/test_sample_plugin.py
+++ b/packages/markitdown-sample-plugin/tests/test_sample_plugin.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3 -m pytest
+import os
+import pytest
+
+from markitdown import MarkItDown
+from markitdown_sample_plugin import RtfConverter
+
+TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), "test_files")
+
+RTF_TEST_STRINGS = {
+    "This is a Sample RTF File",
+    "It is included to test if the MarkItDown sample plugin can correctly convert RTF files.",
+}
+
+
+def test_converter() -> None:
+    """Tests the RTF converter dirctly."""
+    converter = RtfConverter()
+    result = converter.convert(
+        os.path.join(TEST_FILES_DIR, "test.rtf"), file_extension=".rtf"
+    )
+
+    for test_string in RTF_TEST_STRINGS:
+        assert test_string in result.text_content
+
+
+def test_markitdown() -> None:
+    """Tests that MarkItDown correctly loads the plugin."""
+    md = MarkItDown()
+    result = md.convert(os.path.join(TEST_FILES_DIR, "test.rtf"))
+
+    for test_string in RTF_TEST_STRINGS:
+        assert test_string in result.text_content
+
+
+if __name__ == "__main__":
+    """Runs this file's tests from the command line."""
+    test_converter()
+    test_markitdown()
+    print("All tests passed.")
--- a/packages/markitdown/README.md
+++ b/packages/markitdown/README.md
@@ -0,0 +1,52 @@
+# MarkItDown
+
+> [!IMPORTANT]
+> MarkItDown is a Python package and command-line utility for converting various files to Markdown (e.g., for indexing, text analysis, etc). 
+>
+> For more information, and full documentation, see the project [README.md](https://github.com/microsoft/markitdown) on GitHub.
+
+## Installation
+
+From PyPI:
+
+```bash
+pip install markitdown
+```
+
+From source:
+
+```bash
+git clone git@github.com:microsoft/markitdown.git
+cd markitdown
+pip install -e packages/markitdown
+```
+
+## Usage
+
+### Command-Line
+
+```bash
+markitdown path-to-file.pdf > document.md
+```
+
+### Python API
+
+```python
+from markitdown import MarkItDown
+
+md = MarkItDown()
+result = md.convert("test.xlsx")
+print(result.text_content)
+```
+
+### More Information
+
+For more information, and full documentation, see the project [README.md](https://github.com/microsoft/markitdown) on GitHub.
+
+## Trademarks
+
+This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
+trademarks or logos is subject to and must follow
+[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
+Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
+Any use of third-party trademarks or logos are subject to those third-party's policies.
--- a/packages/markitdown/pyproject.toml
+++ b/packages/markitdown/pyproject.toml
@@ -0,0 +1,87 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "markitdown"
+dynamic = ["version"]
+description = 'Utility tool for converting various files to Markdown'
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+keywords = []
+authors = [
+  { name = "Adam Fourney", email = "adamfo@microsoft.com" },
+]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Programming Language :: Python",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Programming Language :: Python :: Implementation :: CPython",
+  "Programming Language :: Python :: Implementation :: PyPy",
+]
+dependencies = [
+  "beautifulsoup4",
+  "requests",
+  "mammoth",
+  "markdownify",
+  "numpy",
+  "python-pptx",
+  "pandas",
+  "openpyxl",
+  "xlrd",
+  "pdfminer.six",
+  "puremagic",
+  "pydub",
+  "olefile",
+  "youtube-transcript-api",
+  "SpeechRecognition",
+  "pathvalidate",
+  "charset-normalizer",
+  "openai",
+  "azure-ai-documentintelligence",
+  "azure-identity"
+]
+
+[project.urls]
+Documentation = "https://github.com/microsoft/markitdown#readme"
+Issues = "https://github.com/microsoft/markitdown/issues"
+Source = "https://github.com/microsoft/markitdown"
+
+[tool.hatch.version]
+path = "src/markitdown/__about__.py"
+
+[project.scripts]
+markitdown = "markitdown.__main__:main"
+
+[tool.hatch.envs.types]
+extra-dependencies = [
+  "mypy>=1.0.0",
+]
+[tool.hatch.envs.types.scripts]
+check = "mypy --install-types --non-interactive {args:src/markitdown tests}"
+
+[tool.coverage.run]
+source_pkgs = ["markitdown", "tests"]
+branch = true
+parallel = true
+omit = [
+  "src/markitdown/__about__.py",
+]
+
+[tool.coverage.paths]
+markitdown = ["src/markitdown", "*/markitdown/src/markitdown"]
+tests = ["tests", "*/markitdown/tests"]
+
+[tool.coverage.report]
+exclude_lines = [
+  "no cov",
+  "if __name__ == .__main__.:",
+  "if TYPE_CHECKING:",
+]
+
+[tool.hatch.build.targets.sdist]
+only-include = ["src/markitdown"]
--- a/packages/markitdown/src/markitdown/about.py
+++ b/packages/markitdown/src/markitdown/about.py
@@ -0,0 +1,4 @@
+# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
+#
+# SPDX-License-Identifier: MIT
+__version__ = "0.0.2a1"
--- a/packages/markitdown/src/markitdown/init.py
+++ b/packages/markitdown/src/markitdown/init.py
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
+#
+# SPDX-License-Identifier: MIT
+
+from ._markitdown import MarkItDown
+from ._exceptions import (
+    MarkItDownException,
+    ConverterPrerequisiteException,
+    FileConversionException,
+    UnsupportedFormatException,
+)
+from .converters import DocumentConverter, DocumentConverterResult
+
+__all__ = [
+    "MarkItDown",
+    "DocumentConverter",
+    "DocumentConverterResult",
+    "MarkItDownException",
+    "ConverterPrerequisiteException",
+    "FileConversionException",
+    "UnsupportedFormatException",
+]
--- a/packages/markitdown/src/markitdown/main.py
+++ b/packages/markitdown/src/markitdown/main.py
@@ -0,0 +1,139 @@
+# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
+#
+# SPDX-License-Identifier: MIT
+import argparse
+import sys
+from textwrap import dedent
+from importlib.metadata import entry_points
+from .__about__ import __version__
+from ._markitdown import MarkItDown, DocumentConverterResult
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert various file formats to markdown.",
+        prog="markitdown",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        usage=dedent(
+            """
+            SYNTAX:
+
+                markitdown <OPTIONAL: FILENAME>
+                If FILENAME is empty, markitdown reads from stdin.
+
+            EXAMPLE:
+
+                markitdown example.pdf
+
+                OR
+
+                cat example.pdf | markitdown
+
+                OR
+
+                markitdown < example.pdf
+                
+                OR to save to a file use
+    
+                markitdown example.pdf -o example.md
+                
+                OR
+                
+                markitdown example.pdf > example.md
+            """
+        ).strip(),
+    )
+
+    parser.add_argument(
+        "-v",
+        "--version",
+        action="version",
+        version=f"%(prog)s {__version__}",
+        help="show the version number and exit",
+    )
+
+    parser.add_argument(
+        "-o",
+        "--output",
+        help="Output file name. If not provided, output is written to stdout.",
+    )
+
+    parser.add_argument(
+        "-d",
+        "--use-docintel",
+        action="store_true",
+        help="Use Document Intelligence to extract text instead of offline conversion. Requires a valid Document Intelligence Endpoint.",
+    )
+
+    parser.add_argument(
+        "-e",
+        "--endpoint",
+        type=str,
+        help="Document Intelligence Endpoint. Required if using Document Intelligence.",
+    )
+
+    parser.add_argument(
+        "-p",
+        "--use-plugins",
+        action="store_true",
+        help="Use 3rd-party plugins to convert files. Use --list-plugins to see installed plugins.",
+    )
+
+    parser.add_argument(
+        "--list-plugins",
+        action="store_true",
+        help="List installed 3rd-party plugins. Plugins are loaded when using the -p or --use-plugin option.",
+    )
+
+    parser.add_argument("filename", nargs="?")
+    args = parser.parse_args()
+
+    if args.list_plugins:
+        # List installed plugins, then exit
+        print("Installed MarkItDown 3rd-party Plugins:\n")
+        plugin_entry_points = list(entry_points(group="markitdown.plugin"))
+        if len(plugin_entry_points) == 0:
+            print("  * No 3rd-party plugins installed.")
+            print(
+                "\nFind plugins by searching for the hashtag #markitdown-plugin on GitHub.\n"
+            )
+        else:
+            for entry_point in plugin_entry_points:
+                print(f"  * {entry_point.name:<16}\t(package: {entry_point.value})")
+            print(
+                "\nUse the -p (or --use-plugins) option to enable 3rd-party plugins.\n"
+            )
+        sys.exit(0)
+
+    if args.use_docintel:
+        if args.endpoint is None:
+            raise ValueError(
+                "Document Intelligence Endpoint is required when using Document Intelligence."
+            )
+        elif args.filename is None:
+            raise ValueError("Filename is required when using Document Intelligence.")
+        markitdown = MarkItDown(
+            enable_plugins=args.use_plugins, docintel_endpoint=args.endpoint
+        )
+    else:
+        markitdown = MarkItDown(enable_plugins=args.use_plugins)
+
+    if args.filename is None:
+        result = markitdown.convert_stream(sys.stdin.buffer)
+    else:
+        result = markitdown.convert(args.filename)
+
+    _handle_output(args, result)
+
+
+def _handle_output(args, result: DocumentConverterResult):
+    """Handle output to stdout or file"""
+    if args.output:
+        with open(args.output, "w", encoding="utf-8") as f:
+            f.write(result.text_content)
+    else:
+        print(result.text_content)
+
+
+if __name__ == "__main__":
+    main()
--- a/packages/markitdown/src/markitdown/_exceptions.py
+++ b/packages/markitdown/src/markitdown/_exceptions.py
@@ -0,0 +1,37 @@
+class MarkItDownException(BaseException):
+    """
+    Base exception class for MarkItDown.
+    """
+
+    pass
+
+
+class ConverterPrerequisiteException(MarkItDownException):
+    """
+    Thrown when instantiating a DocumentConverter in cases where
+    a required library or dependency is not installed, an API key
+    is not set, or some other prerequisite is not met.
+
+    This is not necessarily a fatal error. If thrown during
+    MarkItDown's plugin loading phase, the converter will simply be
+    skipped, and a warning will be issued.
+    """
+
+    pass
+
+
+class FileConversionException(MarkItDownException):
+    """
+    Thrown when a suitable converter was found, but the conversion
+    process fails for any reason.
+    """
+
+    pass
+
+
+class UnsupportedFormatException(MarkItDownException):
+    """
+    Thrown when no suitable converter was found for the given file.
+    """
+
+    pass
--- a/packages/markitdown/src/markitdown/_markitdown.py
+++ b/packages/markitdown/src/markitdown/_markitdown.py
@@ -0,0 +1,440 @@
+import copy
+import mimetypes
+import os
+import re
+import tempfile
+import warnings
+import traceback
+from importlib.metadata import entry_points
+from typing import Any, List, Optional, Union
+from pathlib import Path
+from urllib.parse import urlparse
+from warnings import warn
+
+# File-format detection
+import puremagic
+import requests
+
+from .converters import (
+    DocumentConverter,
+    DocumentConverterResult,
+    PlainTextConverter,
+    HtmlConverter,
+    RssConverter,
+    WikipediaConverter,
+    YouTubeConverter,
+    IpynbConverter,
+    BingSerpConverter,
+    PdfConverter,
+    DocxConverter,
+    XlsxConverter,
+    XlsConverter,
+    PptxConverter,
+    ImageConverter,
+    WavConverter,
+    Mp3Converter,
+    OutlookMsgConverter,
+    ZipConverter,
+    DocumentIntelligenceConverter,
+)
+
+from ._exceptions import (
+    FileConversionException,
+    UnsupportedFormatException,
+    ConverterPrerequisiteException,
+)
+
+# Override mimetype for csv to fix issue on windows
+mimetypes.add_type("text/csv", ".csv")
+
+PRIORITY_SPECIFIC_FILE_FORMAT = 0.0
+PRIORITY_GENERIC_FILE_FORMAT = 10.0
+
+
+_plugins: Union[None | List[Any]] = None
+
+
+def _load_plugins() -> Union[None | List[Any]]:
+    """Lazy load plugins, exiting early if already loaded."""
+    global _plugins
+
+    # Skip if we've already loaded plugins
+    if _plugins is not None:
+        return _plugins
+
+    # Load plugins
+    _plugins = []
+    for entry_point in entry_points(group="markitdown.plugin"):
+        try:
+            _plugins.append(entry_point.load())
+        except Exception:
+            tb = traceback.format_exc()
+            warn(f"Plugin '{entry_point.name}' failed to load ... skipping:\n{tb}")
+
+    return _plugins
+
+
+class MarkItDown:
+    """(In preview) An extremely simple text-based document reader, suitable for LLM use.
+    This reader will convert common file-types or webpages to Markdown."""
+
+    def __init__(
+        self,
+        *,
+        enable_builtins: Union[None, bool] = None,
+        enable_plugins: Union[None, bool] = None,
+        **kwargs,
+    ):
+        self._builtins_enabled = False
+        self._plugins_enabled = False
+
+        requests_session = kwargs.get("requests_session")
+        if requests_session is None:
+            self._requests_session = requests.Session()
+        else:
+            self._requests_session = requests_session
+
+        # TODO - remove these (see enable_builtins)
+        self._llm_client = None
+        self._llm_model = None
+        self._exiftool_path = None
+        self._style_map = None
+
+        # Register the converters
+        self._page_converters: List[DocumentConverter] = []
+
+        if (
+            enable_builtins is None or enable_builtins
+        ):  # Default to True when not specified
+            self.enable_builtins(**kwargs)
+
+        if enable_plugins:
+            self.enable_plugins(**kwargs)
+
+    def enable_builtins(self, **kwargs) -> None:
+        """
+        Enable and register built-in converters.
+        Built-in converters are enabled by default.
+        This method should only be called once, if built-ins were initially disabled.
+        """
+        if not self._builtins_enabled:
+            # TODO: Move these into converter constructors
+            self._llm_client = kwargs.get("llm_client")
+            self._llm_model = kwargs.get("llm_model")
+            self._exiftool_path = kwargs.get("exiftool_path")
+            self._style_map = kwargs.get("style_map")
+
+            # Register converters for successful browsing operations
+            # Later registrations are tried first / take higher priority than earlier registrations
+            # To this end, the most specific converters should appear below the most generic converters
+            self.register_converter(PlainTextConverter())
+            self.register_converter(ZipConverter())
+            self.register_converter(HtmlConverter())
+            self.register_converter(RssConverter())
+            self.register_converter(WikipediaConverter())
+            self.register_converter(YouTubeConverter())
+            self.register_converter(BingSerpConverter())
+            self.register_converter(DocxConverter())
+            self.register_converter(XlsxConverter())
+            self.register_converter(XlsConverter())
+            self.register_converter(PptxConverter())
+            self.register_converter(WavConverter())
+            self.register_converter(Mp3Converter())
+            self.register_converter(ImageConverter())
+            self.register_converter(IpynbConverter())
+            self.register_converter(PdfConverter())
+            self.register_converter(OutlookMsgConverter())
+
+            # Register Document Intelligence converter at the top of the stack if endpoint is provided
+            docintel_endpoint = kwargs.get("docintel_endpoint")
+            if docintel_endpoint is not None:
+                self.register_converter(
+                    DocumentIntelligenceConverter(endpoint=docintel_endpoint)
+                )
+
+            self._builtins_enabled = True
+        else:
+            warn("Built-in converters are already enabled.", RuntimeWarning)
+
+    def enable_plugins(self, **kwargs) -> None:
+        """
+        Enable and register converters provided by plugins.
+        Plugins are disabled by default.
+        This method should only be called once, if plugins were initially disabled.
+        """
+        if not self._plugins_enabled:
+            # Load plugins
+            for plugin in _load_plugins():
+                try:
+                    plugin.register_converters(self, **kwargs)
+                except Exception:
+                    tb = traceback.format_exc()
+                    warn(f"Plugin '{plugin}' failed to register converters:\n{tb}")
+            self._plugins_enabled = True
+        else:
+            warn("Plugins converters are already enabled.", RuntimeWarning)
+
+    def convert(
+        self, source: Union[str, requests.Response, Path], **kwargs: Any
+    ) -> DocumentConverterResult:  # TODO: deal with kwargs
+        """
+        Args:
+            - source: can be a string representing a path either as string pathlib path object or url, or a requests.response object
+            - extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.)
+        """
+
+        # Local path or url
+        if isinstance(source, str):
+            if (
+                source.startswith("http://")
+                or source.startswith("https://")
+                or source.startswith("file://")
+            ):
+                return self.convert_url(source, **kwargs)
+            else:
+                return self.convert_local(source, **kwargs)
+        # Request response
+        elif isinstance(source, requests.Response):
+            return self.convert_response(source, **kwargs)
+        elif isinstance(source, Path):
+            return self.convert_local(source, **kwargs)
+
+    def convert_local(
+        self, path: Union[str, Path], **kwargs: Any
+    ) -> DocumentConverterResult:  # TODO: deal with kwargs
+        if isinstance(path, Path):
+            path = str(path)
+        # Prepare a list of extensions to try (in order of priority)
+        ext = kwargs.get("file_extension")
+        extensions = [ext] if ext is not None else []
+
+        # Get extension alternatives from the path and puremagic
+        base, ext = os.path.splitext(path)
+        self._append_ext(extensions, ext)
+
+        for g in self._guess_ext_magic(path):
+            self._append_ext(extensions, g)
+
+        # Convert
+        return self._convert(path, extensions, **kwargs)
+
+    # TODO what should stream's type be?
+    def convert_stream(
+        self, stream: Any, **kwargs: Any
+    ) -> DocumentConverterResult:  # TODO: deal with kwargs
+        # Prepare a list of extensions to try (in order of priority)
+        ext = kwargs.get("file_extension")
+        extensions = [ext] if ext is not None else []
+
+        # Save the file locally to a temporary file. It will be deleted before this method exits
+        handle, temp_path = tempfile.mkstemp()
+        fh = os.fdopen(handle, "wb")
+        result = None
+        try:
+            # Write to the temporary file
+            content = stream.read()
+            if isinstance(content, str):
+                fh.write(content.encode("utf-8"))
+            else:
+                fh.write(content)
+            fh.close()
+
+            # Use puremagic to check for more extension options
+            for g in self._guess_ext_magic(temp_path):
+                self._append_ext(extensions, g)
+
+            # Convert
+            result = self._convert(temp_path, extensions, **kwargs)
+        # Clean up
+        finally:
+            try:
+                fh.close()
+            except Exception:
+                pass
+            os.unlink(temp_path)
+
+        return result
+
+    def convert_url(
+        self, url: str, **kwargs: Any
+    ) -> DocumentConverterResult:  # TODO: fix kwargs type
+        # Send a HTTP request to the URL
+        response = self._requests_session.get(url, stream=True)
+        response.raise_for_status()
+        return self.convert_response(response, **kwargs)
+
+    def convert_response(
+        self, response: requests.Response, **kwargs: Any
+    ) -> DocumentConverterResult:  # TODO fix kwargs type
+        # Prepare a list of extensions to try (in order of priority)
+        ext = kwargs.get("file_extension")
+        extensions = [ext] if ext is not None else []
+
+        # Guess from the mimetype
+        content_type = response.headers.get("content-type", "").split(";")[0]
+        self._append_ext(extensions, mimetypes.guess_extension(content_type))
+
+        # Read the content disposition if there is one
+        content_disposition = response.headers.get("content-disposition", "")
+        m = re.search(r"filename=([^;]+)", content_disposition)
+        if m:
+            base, ext = os.path.splitext(m.group(1).strip("\"'"))
+            self._append_ext(extensions, ext)
+
+        # Read from the extension from the path
+        base, ext = os.path.splitext(urlparse(response.url).path)
+        self._append_ext(extensions, ext)
+
+        # Save the file locally to a temporary file. It will be deleted before this method exits
+        handle, temp_path = tempfile.mkstemp()
+        fh = os.fdopen(handle, "wb")
+        result = None
+        try:
+            # Download the file
+            for chunk in response.iter_content(chunk_size=512):
+                fh.write(chunk)
+            fh.close()
+
+            # Use puremagic to check for more extension options
+            for g in self._guess_ext_magic(temp_path):
+                self._append_ext(extensions, g)
+
+            # Convert
+            result = self._convert(temp_path, extensions, url=response.url, **kwargs)
+        # Clean up
+        finally:
+            try:
+                fh.close()
+            except Exception:
+                pass
+            os.unlink(temp_path)
+
+        return result
+
+    def _convert(
+        self, local_path: str, extensions: List[Union[str, None]], **kwargs
+    ) -> DocumentConverterResult:
+        error_trace = ""
+
+        # Create a copy of the page_converters list, sorted by priority.
+        # We do this with each call to _convert because the priority of converters may change between calls.
+        # The sort is guaranteed to be stable, so converters with the same priority will remain in the same order.
+        sorted_converters = sorted(self._page_converters, key=lambda x: x.priority)
+
+        for ext in extensions + [None]:  # Try last with no extension
+            for converter in sorted_converters:
+                _kwargs = copy.deepcopy(kwargs)
+
+                # Overwrite file_extension appropriately
+                if ext is None:
+                    if "file_extension" in _kwargs:
+                        del _kwargs["file_extension"]
+                else:
+                    _kwargs.update({"file_extension": ext})
+
+                # Copy any additional global options
+                if "llm_client" not in _kwargs and self._llm_client is not None:
+                    _kwargs["llm_client"] = self._llm_client
+
+                if "llm_model" not in _kwargs and self._llm_model is not None:
+                    _kwargs["llm_model"] = self._llm_model
+
+                if "style_map" not in _kwargs and self._style_map is not None:
+                    _kwargs["style_map"] = self._style_map
+
+                if "exiftool_path" not in _kwargs and self._exiftool_path is not None:
+                    _kwargs["exiftool_path"] = self._exiftool_path
+
+                # Add the list of converters for nested processing
+                _kwargs["_parent_converters"] = self._page_converters
+
+                # If we hit an error log it and keep trying
+                # try:
+                if True:
+                    res = converter.convert(local_path, **_kwargs)
+                # except Exception:
+                #    error_trace = ("\n\n" + traceback.format_exc()).strip()
+
+                if res is not None:
+                    # Normalize the content
+                    res.text_content = "\n".join(
+                        [line.rstrip() for line in re.split(r"\r?\n", res.text_content)]
+                    )
+                    res.text_content = re.sub(r"\n{3,}", "\n\n", res.text_content)
+
+                    # Todo
+                    return res
+
+        # If we got this far without success, report any exceptions
+        if len(error_trace) > 0:
+            raise FileConversionException(
+                f"Could not convert '{local_path}' to Markdown. File type was recognized as {extensions}. While converting the file, the following error was encountered:\n\n{error_trace}"
+            )
+
+        # Nothing can handle it!
+        raise UnsupportedFormatException(
+            f"Could not convert '{local_path}' to Markdown. The formats {extensions} are not supported."
+        )
+
+    def _append_ext(self, extensions, ext):
+        """Append a unique non-None, non-empty extension to a list of extensions."""
+        if ext is None:
+            return
+        ext = ext.strip()
+        if ext == "":
+            return
+        # if ext not in extensions:
+        extensions.append(ext)
+
+    def _guess_ext_magic(self, path):
+        """Use puremagic (a Python implementation of libmagic) to guess a file's extension based on the first few bytes."""
+        # Use puremagic to guess
+        try:
+            guesses = puremagic.magic_file(path)
+
+            # Fix for: https://github.com/microsoft/markitdown/issues/222
+            # If there are no guesses, then try again after trimming leading ASCII whitespaces.
+            # ASCII whitespace characters are those byte values in the sequence b' \t\n\r\x0b\f'
+            # (space, tab, newline, carriage return, vertical tab, form feed).
+            if len(guesses) == 0:
+                with open(path, "rb") as file:
+                    while True:
+                        char = file.read(1)
+                        if not char:  # End of file
+                            break
+                        if not char.isspace():
+                            file.seek(file.tell() - 1)
+                            break
+                    try:
+                        guesses = puremagic.magic_stream(file)
+                    except puremagic.main.PureError:
+                        pass
+
+            extensions = list()
+            for g in guesses:
+                ext = g.extension.strip()
+                if len(ext) > 0:
+                    if not ext.startswith("."):
+                        ext = "." + ext
+                    if ext not in extensions:
+                        extensions.append(ext)
+            return extensions
+        except FileNotFoundError:
+            pass
+        except IsADirectoryError:
+            pass
+        except PermissionError:
+            pass
+        return []
+
+    def register_page_converter(self, converter: DocumentConverter) -> None:
+        """DEPRECATED: User register_converter instead."""
+        warn(
+            "register_page_converter is deprecated. Use register_converter instead.",
+            DeprecationWarning,
+        )
+        self.register_converter(converter)
+
+    def register_converter(self, converter: DocumentConverter) -> None:
+        """Register a page text converter."""
+        self._page_converters.insert(0, converter)
--- a/packages/markitdown/src/markitdown/converters/init.py
+++ b/packages/markitdown/src/markitdown/converters/init.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
+#
+# SPDX-License-Identifier: MIT
+
+from ._base import DocumentConverter, DocumentConverterResult
+from ._plain_text_converter import PlainTextConverter
+from ._html_converter import HtmlConverter
+from ._rss_converter import RssConverter
+from ._wikipedia_converter import WikipediaConverter
+from ._youtube_converter import YouTubeConverter
+from ._ipynb_converter import IpynbConverter
+from ._bing_serp_converter import BingSerpConverter
+from ._pdf_converter import PdfConverter
+from ._docx_converter import DocxConverter
+from ._xlsx_converter import XlsxConverter, XlsConverter
+from ._pptx_converter import PptxConverter
+from ._image_converter import ImageConverter
+from ._wav_converter import WavConverter
+from ._mp3_converter import Mp3Converter
+from ._outlook_msg_converter import OutlookMsgConverter
+from ._zip_converter import ZipConverter
+from ._doc_intel_converter import DocumentIntelligenceConverter
+
+__all__ = [
+    "DocumentConverter",
+    "DocumentConverterResult",
+    "PlainTextConverter",
+    "HtmlConverter",
+    "RssConverter",
+    "WikipediaConverter",
+    "YouTubeConverter",
+    "IpynbConverter",
+    "BingSerpConverter",
+    "PdfConverter",
+    "DocxConverter",
+    "XlsxConverter",
+    "XlsConverter",
+    "PptxConverter",
+    "ImageConverter",
+    "WavConverter",
+    "Mp3Converter",
+    "OutlookMsgConverter",
+    "ZipConverter",
+    "DocumentIntelligenceConverter",
+]
--- a/packages/markitdown/src/markitdown/converters/_base.py
+++ b/packages/markitdown/src/markitdown/converters/_base.py
@@ -0,0 +1,34 @@
+from typing import Any, Union
+
+
+class DocumentConverterResult:
+    """The result of converting a document to text."""
+
+    def __init__(self, title: Union[str, None] = None, text_content: str = ""):
+        self.title: Union[str, None] = title
+        self.text_content: str = text_content
+
+
+class DocumentConverter:
+    """Abstract superclass of all DocumentConverters."""
+
+    def __init__(self, priority: float = 0.0):
+        self._priority = priority
+
+    def convert(
+        self, local_path: str, **kwargs: Any
+    ) -> Union[None, DocumentConverterResult]:
+        raise NotImplementedError("Subclasses must implement this method")
+
+    @property
+    def priority(self) -> float:
+        """Priority of the converter in markitdown's converter list. Higher priority values are tried first."""
+        return self._priority
+
+    @priority.setter
+    def radius(self, value: float):
+        self._priority = value
+
+    @priority.deleter
+    def radius(self):
+        raise AttributeError("Cannot delete the priority attribute")
--- a/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py
@@ -0,0 +1,81 @@
+# type: ignore
+import base64
+import re
+
+from typing import Union
+from urllib.parse import parse_qs, urlparse
+from bs4 import BeautifulSoup
+
+from ._base import DocumentConverter, DocumentConverterResult
+from ._markdownify import _CustomMarkdownify
+
+
+class BingSerpConverter(DocumentConverter):
+    """
+    Handle Bing results pages (only the organic search results).
+    NOTE: It is better to use the Bing API
+    """
+
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a Bing SERP
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".html", ".htm"]:
+            return None
+        url = kwargs.get("url", "")
+        if not re.search(r"^https://www\.bing\.com/search\?q=", url):
+            return None
+
+        # Parse the query parameters
+        parsed_params = parse_qs(urlparse(url).query)
+        query = parsed_params.get("q", [""])[0]
+
+        # Parse the file
+        soup = None
+        with open(local_path, "rt", encoding="utf-8") as fh:
+            soup = BeautifulSoup(fh.read(), "html.parser")
+
+        # Clean up some formatting
+        for tptt in soup.find_all(class_="tptt"):
+            if hasattr(tptt, "string") and tptt.string:
+                tptt.string += " "
+        for slug in soup.find_all(class_="algoSlug_icon"):
+            slug.extract()
+
+        # Parse the algorithmic results
+        _markdownify = _CustomMarkdownify()
+        results = list()
+        for result in soup.find_all(class_="b_algo"):
+            # Rewrite redirect urls
+            for a in result.find_all("a", href=True):
+                parsed_href = urlparse(a["href"])
+                qs = parse_qs(parsed_href.query)
+
+                # The destination is contained in the u parameter,
+                # but appears to be base64 encoded, with some prefix
+                if "u" in qs:
+                    u = (
+                        qs["u"][0][2:].strip() + "=="
+                    )  # Python 3 doesn't care about extra padding
+
+                    try:
+                        # RFC 4648 / Base64URL" variant, which uses "-" and "_"
+                        a["href"] = base64.b64decode(u, altchars="-_").decode("utf-8")
+                    except UnicodeDecodeError:
+                        pass
+                    except binascii.Error:
+                        pass
+
+            # Convert to markdown
+            md_result = _markdownify.convert_soup(result).strip()
+            lines = [line.strip() for line in re.split(r"\n+", md_result)]
+            results.append("\n".join([line for line in lines if len(line) > 0]))
+
+        webpage_text = (
+            f"## A Bing search for '{query}' found the following results:\n\n"
+            + "\n\n".join(results)
+        )
+
+        return DocumentConverterResult(
+            title=None if soup.title is None else soup.title.string,
+            text_content=webpage_text,
+        )
--- a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py
@@ -0,0 +1,85 @@
+from typing import Any, Union
+
+# Azure imports
+from azure.ai.documentintelligence import DocumentIntelligenceClient
+from azure.ai.documentintelligence.models import (
+    AnalyzeDocumentRequest,
+    AnalyzeResult,
+    DocumentAnalysisFeature,
+)
+from azure.identity import DefaultAzureCredential
+
+from ._base import DocumentConverter, DocumentConverterResult
+
+
+# TODO: currently, there is a bug in the document intelligence SDK with importing the "ContentFormat" enum.
+# This constant is a temporary fix until the bug is resolved.
+CONTENT_FORMAT = "markdown"
+
+
+class DocumentIntelligenceConverter(DocumentConverter):
+    """Specialized DocumentConverter that uses Document Intelligence to extract text from documents."""
+
+    def __init__(
+        self,
+        endpoint: str,
+        api_version: str = "2024-07-31-preview",
+    ):
+        self.endpoint = endpoint
+        self.api_version = api_version
+        self.doc_intel_client = DocumentIntelligenceClient(
+            endpoint=self.endpoint,
+            api_version=self.api_version,
+            credential=DefaultAzureCredential(),
+        )
+
+    def convert(
+        self, local_path: str, **kwargs: Any
+    ) -> Union[None, DocumentConverterResult]:
+        # Bail if extension is not supported by Document Intelligence
+        extension = kwargs.get("file_extension", "")
+        docintel_extensions = [
+            ".pdf",
+            ".docx",
+            ".xlsx",
+            ".pptx",
+            ".html",
+            ".jpeg",
+            ".jpg",
+            ".png",
+            ".bmp",
+            ".tiff",
+            ".heif",
+        ]
+        if extension.lower() not in docintel_extensions:
+            return None
+
+        # Get the bytestring for the local path
+        with open(local_path, "rb") as f:
+            file_bytes = f.read()
+
+        # Certain document analysis features are not availiable for filetypes (.xlsx, .pptx, .html)
+        if extension.lower() in [".xlsx", ".pptx", ".html"]:
+            analysis_features = []
+        else:
+            analysis_features = [
+                DocumentAnalysisFeature.FORMULAS,  # enable formula extraction
+                DocumentAnalysisFeature.OCR_HIGH_RESOLUTION,  # enable high resolution OCR
+                DocumentAnalysisFeature.STYLE_FONT,  # enable font style extraction
+            ]
+
+        # Extract the text using Azure Document Intelligence
+        poller = self.doc_intel_client.begin_analyze_document(
+            model_id="prebuilt-layout",
+            body=AnalyzeDocumentRequest(bytes_source=file_bytes),
+            features=analysis_features,
+            output_content_format=CONTENT_FORMAT,  # TODO: replace with "ContentFormat.MARKDOWN" when the bug is fixed
+        )
+        result: AnalyzeResult = poller.result()
+
+        # remove comments from the markdown content generated by Doc Intelligence and append to markdown string
+        markdown_text = re.sub(r"<!--.*?-->", "", result.content, flags=re.DOTALL)
+        return DocumentConverterResult(
+            title=None,
+            text_content=markdown_text,
+        )
--- a/packages/markitdown/src/markitdown/converters/_docx_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_docx_converter.py
@@ -0,0 +1,31 @@
+from typing import Union
+
+import mammoth
+
+from ._base import (
+    DocumentConverterResult,
+)
+
+from ._html_converter import HtmlConverter
+
+
+class DocxConverter(HtmlConverter):
+    """
+    Converts DOCX files to Markdown. Style information (e.g.m headings) and tables are preserved where possible.
+    """
+
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a DOCX
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".docx":
+            return None
+
+        result = None
+        with open(local_path, "rb") as docx_file:
+            style_map = kwargs.get("style_map", None)
+
+            result = mammoth.convert_to_html(docx_file, style_map=style_map)
+            html_content = result.value
+            result = self._convert(html_content)
+
+        return result
--- a/packages/markitdown/src/markitdown/converters/_html_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_html_converter.py
@@ -0,0 +1,51 @@
+from typing import Any, Union
+from bs4 import BeautifulSoup
+
+from ._base import DocumentConverter, DocumentConverterResult
+from ._markdownify import _CustomMarkdownify
+
+
+class HtmlConverter(DocumentConverter):
+    """Anything with content type text/html"""
+
+    def convert(
+        self, local_path: str, **kwargs: Any
+    ) -> Union[None, DocumentConverterResult]:
+        # Bail if not html
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".html", ".htm"]:
+            return None
+
+        result = None
+        with open(local_path, "rt", encoding="utf-8") as fh:
+            result = self._convert(fh.read())
+
+        return result
+
+    def _convert(self, html_content: str) -> Union[None, DocumentConverterResult]:
+        """Helper function that converts an HTML string."""
+
+        # Parse the string
+        soup = BeautifulSoup(html_content, "html.parser")
+
+        # Remove javascript and style blocks
+        for script in soup(["script", "style"]):
+            script.extract()
+
+        # Print only the main content
+        body_elm = soup.find("body")
+        webpage_text = ""
+        if body_elm:
+            webpage_text = _CustomMarkdownify().convert_soup(body_elm)
+        else:
+            webpage_text = _CustomMarkdownify().convert_soup(soup)
+
+        assert isinstance(webpage_text, str)
+
+        # remove leading and trailing \n
+        webpage_text = webpage_text.strip()
+
+        return DocumentConverterResult(
+            title=None if soup.title is None else soup.title.string,
+            text_content=webpage_text,
+        )
--- a/packages/markitdown/src/markitdown/converters/_image_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_image_converter.py
@@ -0,0 +1,87 @@
+from typing import Union
+from ._base import DocumentConverterResult
+from ._media_converter import MediaConverter
+
+
+class ImageConverter(MediaConverter):
+    """
+    Converts images to markdown via extraction of metadata (if `exiftool` is installed), OCR (if `easyocr` is installed), and description via a multimodal LLM (if an llm_client is configured).
+    """
+
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not an image
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".jpg", ".jpeg", ".png"]:
+            return None
+
+        md_content = ""
+
+        # Add metadata
+        metadata = self._get_metadata(local_path, kwargs.get("exiftool_path"))
+
+        if metadata:
+            for f in [
+                "ImageSize",
+                "Title",
+                "Caption",
+                "Description",
+                "Keywords",
+                "Artist",
+                "Author",
+                "DateTimeOriginal",
+                "CreateDate",
+                "GPSPosition",
+            ]:
+                if f in metadata:
+                    md_content += f"{f}: {metadata[f]}\n"
+
+        # Try describing the image with GPTV
+        llm_client = kwargs.get("llm_client")
+        llm_model = kwargs.get("llm_model")
+        if llm_client is not None and llm_model is not None:
+            md_content += (
+                "\n# Description:\n"
+                + self._get_llm_description(
+                    local_path,
+                    extension,
+                    llm_client,
+                    llm_model,
+                    prompt=kwargs.get("llm_prompt"),
+                ).strip()
+                + "\n"
+            )
+
+        return DocumentConverterResult(
+            title=None,
+            text_content=md_content,
+        )
+
+    def _get_llm_description(self, local_path, extension, client, model, prompt=None):
+        if prompt is None or prompt.strip() == "":
+            prompt = "Write a detailed caption for this image."
+
+        data_uri = ""
+        with open(local_path, "rb") as image_file:
+            content_type, encoding = mimetypes.guess_type("_dummy" + extension)
+            if content_type is None:
+                content_type = "image/jpeg"
+            image_base64 = base64.b64encode(image_file.read()).decode("utf-8")
+            data_uri = f"data:{content_type};base64,{image_base64}"
+
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": data_uri,
+                        },
+                    },
+                ],
+            }
+        ]
+
+        response = client.chat.completions.create(model=model, messages=messages)
+        return response.choices[0].message.content
--- a/packages/markitdown/src/markitdown/converters/_ipynb_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_ipynb_converter.py
@@ -0,0 +1,70 @@
+import json
+from typing import Any, Union
+
+from ._base import (
+    DocumentConverter,
+    DocumentConverterResult,
+)
+
+from .._exceptions import FileConversionException
+
+
+class IpynbConverter(DocumentConverter):
+    """Converts Jupyter Notebook (.ipynb) files to Markdown."""
+
+    def convert(
+        self, local_path: str, **kwargs: Any
+    ) -> Union[None, DocumentConverterResult]:
+        # Bail if not ipynb
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".ipynb":
+            return None
+
+        # Parse and convert the notebook
+        result = None
+        with open(local_path, "rt", encoding="utf-8") as fh:
+            notebook_content = json.load(fh)
+            result = self._convert(notebook_content)
+
+        return result
+
+    def _convert(self, notebook_content: dict) -> Union[None, DocumentConverterResult]:
+        """Helper function that converts notebook JSON content to Markdown."""
+        try:
+            md_output = []
+            title = None
+
+            for cell in notebook_content.get("cells", []):
+                cell_type = cell.get("cell_type", "")
+                source_lines = cell.get("source", [])
+
+                if cell_type == "markdown":
+                    md_output.append("".join(source_lines))
+
+                    # Extract the first # heading as title if not already found
+                    if title is None:
+                        for line in source_lines:
+                            if line.startswith("# "):
+                                title = line.lstrip("# ").strip()
+                                break
+
+                elif cell_type == "code":
+                    # Code cells are wrapped in Markdown code blocks
+                    md_output.append(f"```python\n{''.join(source_lines)}\n```")
+                elif cell_type == "raw":
+                    md_output.append(f"```\n{''.join(source_lines)}\n```")
+
+            md_text = "\n\n".join(md_output)
+
+            # Check for title in notebook metadata
+            title = notebook_content.get("metadata", {}).get("title", title)
+
+            return DocumentConverterResult(
+                title=title,
+                text_content=md_text,
+            )
+
+        except Exception as e:
+            raise FileConversionException(
+                f"Error converting .ipynb file: {str(e)}"
+            ) from e
--- a/packages/markitdown/src/markitdown/converters/_markdownify.py
+++ b/packages/markitdown/src/markitdown/converters/_markdownify.py
@@ -0,0 +1,87 @@
+import re
+import markdownify
+
+from typing import Any
+from urllib.parse import quote, unquote, urlparse, urlunparse
+
+
+class _CustomMarkdownify(markdownify.MarkdownConverter):
+    """
+    A custom version of markdownify's MarkdownConverter. Changes include:
+
+    - Altering the default heading style to use '#', '##', etc.
+    - Removing javascript hyperlinks.
+    - Truncating images with large data:uri sources.
+    - Ensuring URIs are properly escaped, and do not conflict with Markdown syntax
+    """
+
+    def __init__(self, **options: Any):
+        options["heading_style"] = options.get("heading_style", markdownify.ATX)
+        # Explicitly cast options to the expected type if necessary
+        super().__init__(**options)
+
+    def convert_hn(self, n: int, el: Any, text: str, convert_as_inline: bool) -> str:
+        """Same as usual, but be sure to start with a new line"""
+        if not convert_as_inline:
+            if not re.search(r"^\n", text):
+                return "\n" + super().convert_hn(n, el, text, convert_as_inline)  # type: ignore
+
+        return super().convert_hn(n, el, text, convert_as_inline)  # type: ignore
+
+    def convert_a(self, el: Any, text: str, convert_as_inline: bool):
+        """Same as usual converter, but removes Javascript links and escapes URIs."""
+        prefix, suffix, text = markdownify.chomp(text)  # type: ignore
+        if not text:
+            return ""
+        href = el.get("href")
+        title = el.get("title")
+
+        # Escape URIs and skip non-http or file schemes
+        if href:
+            try:
+                parsed_url = urlparse(href)  # type: ignore
+                if parsed_url.scheme and parsed_url.scheme.lower() not in ["http", "https", "file"]:  # type: ignore
+                    return "%s%s%s" % (prefix, text, suffix)
+                href = urlunparse(parsed_url._replace(path=quote(unquote(parsed_url.path))))  # type: ignore
+            except ValueError:  # It's not clear if this ever gets thrown
+                return "%s%s%s" % (prefix, text, suffix)
+
+        # For the replacement see #29: text nodes underscores are escaped
+        if (
+            self.options["autolinks"]
+            and text.replace(r"\_", "_") == href
+            and not title
+            and not self.options["default_title"]
+        ):
+            # Shortcut syntax
+            return "<%s>" % href
+        if self.options["default_title"] and not title:
+            title = href
+        title_part = ' "%s"' % title.replace('"', r"\"") if title else ""
+        return (
+            "%s[%s](%s%s)%s" % (prefix, text, href, title_part, suffix)
+            if href
+            else text
+        )
+
+    def convert_img(self, el: Any, text: str, convert_as_inline: bool) -> str:
+        """Same as usual converter, but removes data URIs"""
+
+        alt = el.attrs.get("alt", None) or ""
+        src = el.attrs.get("src", None) or ""
+        title = el.attrs.get("title", None) or ""
+        title_part = ' "%s"' % title.replace('"', r"\"") if title else ""
+        if (
+            convert_as_inline
+            and el.parent.name not in self.options["keep_inline_images_in"]
+        ):
+            return alt
+
+        # Remove dataURIs
+        if src.startswith("data:"):
+            src = src.split(",")[0] + "..."
+
+        return "![%s](%s%s)" % (alt, src, title_part)
+
+    def convert_soup(self, soup: Any) -> str:
+        return super().convert_soup(soup)  # type: ignore
--- a/packages/markitdown/src/markitdown/converters/_media_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_media_converter.py
@@ -0,0 +1,36 @@
+import subprocess
+import shutil
+import json
+from warnings import warn
+
+from ._base import DocumentConverter
+
+
+class MediaConverter(DocumentConverter):
+    """
+    Abstract class for multi-modal media (e.g., images and audio)
+    """
+
+    def _get_metadata(self, local_path, exiftool_path=None):
+        if not exiftool_path:
+            which_exiftool = shutil.which("exiftool")
+            if which_exiftool:
+                warn(
+                    f"""Implicit discovery of 'exiftool' is disabled. If you would like to continue to use exiftool in MarkItDown, please set the exiftool_path parameter in the MarkItDown consructor. E.g., 
+
+    md = MarkItDown(exiftool_path="{which_exiftool}")
+
+This warning will be removed in future releases.
+""",
+                    DeprecationWarning,
+                )
+
+            return None
+        else:
+            try:
+                result = subprocess.run(
+                    [exiftool_path, "-json", local_path], capture_output=True, text=True
+                ).stdout
+                return json.loads(result)[0]
+            except Exception:
+                return None
--- a/packages/markitdown/src/markitdown/converters/_mp3_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_mp3_converter.py
@@ -0,0 +1,84 @@
+import tempfile
+from typing import Union
+from ._base import DocumentConverterResult
+from ._wav_converter import WavConverter
+from warnings import resetwarnings, catch_warnings
+
+# Optional Transcription support
+IS_AUDIO_TRANSCRIPTION_CAPABLE = False
+try:
+    # Using warnings' catch_warnings to catch
+    # pydub's warning of ffmpeg or avconv missing
+    with catch_warnings(record=True) as w:
+        import pydub
+
+        if w:
+            raise ModuleNotFoundError
+    import speech_recognition as sr
+
+    IS_AUDIO_TRANSCRIPTION_CAPABLE = True
+except ModuleNotFoundError:
+    pass
+finally:
+    resetwarnings()
+
+
+class Mp3Converter(WavConverter):
+    """
+    Converts MP3 files to markdown via extraction of metadata (if `exiftool` is installed), and speech transcription (if `speech_recognition` AND `pydub` are installed).
+    """
+
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a MP3
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".mp3":
+            return None
+
+        md_content = ""
+
+        # Add metadata
+        metadata = self._get_metadata(local_path, kwargs.get("exiftool_path"))
+        if metadata:
+            for f in [
+                "Title",
+                "Artist",
+                "Author",
+                "Band",
+                "Album",
+                "Genre",
+                "Track",
+                "DateTimeOriginal",
+                "CreateDate",
+                "Duration",
+            ]:
+                if f in metadata:
+                    md_content += f"{f}: {metadata[f]}\n"
+
+        # Transcribe
+        if IS_AUDIO_TRANSCRIPTION_CAPABLE:
+            handle, temp_path = tempfile.mkstemp(suffix=".wav")
+            os.close(handle)
+            try:
+                sound = pydub.AudioSegment.from_mp3(local_path)
+                sound.export(temp_path, format="wav")
+
+                _args = dict()
+                _args.update(kwargs)
+                _args["file_extension"] = ".wav"
+
+                try:
+                    transcript = super()._transcribe_audio(temp_path).strip()
+                    md_content += "\n\n### Audio Transcript:\n" + (
+                        "[No speech detected]" if transcript == "" else transcript
+                    )
+                except Exception:
+                    md_content += "\n\n### Audio Transcript:\nError. Could not transcribe this audio."
+
+            finally:
+                os.unlink(temp_path)
+
+        # Return the result
+        return DocumentConverterResult(
+            title=None,
+            text_content=md_content.strip(),
+        )
--- a/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_outlook_msg_converter.py
@@ -0,0 +1,76 @@
+import olefile
+from typing import Any, Union
+from ._base import DocumentConverter, DocumentConverterResult
+
+
+class OutlookMsgConverter(DocumentConverter):
+    """Converts Outlook .msg files to markdown by extracting email metadata and content.
+
+    Uses the olefile package to parse the .msg file structure and extract:
+    - Email headers (From, To, Subject)
+    - Email body content
+    """
+
+    def convert(
+        self, local_path: str, **kwargs: Any
+    ) -> Union[None, DocumentConverterResult]:
+        # Bail if not a MSG file
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".msg":
+            return None
+
+        try:
+            msg = olefile.OleFileIO(local_path)
+            # Extract email metadata
+            md_content = "# Email Message\n\n"
+
+            # Get headers
+            headers = {
+                "From": self._get_stream_data(msg, "__substg1.0_0C1F001F"),
+                "To": self._get_stream_data(msg, "__substg1.0_0E04001F"),
+                "Subject": self._get_stream_data(msg, "__substg1.0_0037001F"),
+            }
+
+            # Add headers to markdown
+            for key, value in headers.items():
+                if value:
+                    md_content += f"**{key}:** {value}\n"
+
+            md_content += "\n## Content\n\n"
+
+            # Get email body
+            body = self._get_stream_data(msg, "__substg1.0_1000001F")
+            if body:
+                md_content += body
+
+            msg.close()
+
+            return DocumentConverterResult(
+                title=headers.get("Subject"), text_content=md_content.strip()
+            )
+
+        except Exception as e:
+            raise FileConversionException(
+                f"Could not convert MSG file '{local_path}': {str(e)}"
+            )
+
+    def _get_stream_data(
+        self, msg: olefile.OleFileIO, stream_path: str
+    ) -> Union[str, None]:
+        """Helper to safely extract and decode stream data from the MSG file."""
+        try:
+            if msg.exists(stream_path):
+                data = msg.openstream(stream_path).read()
+                # Try UTF-16 first (common for .msg files)
+                try:
+                    return data.decode("utf-16-le").strip()
+                except UnicodeDecodeError:
+                    # Fall back to UTF-8
+                    try:
+                        return data.decode("utf-8").strip()
+                    except UnicodeDecodeError:
+                        # Last resort - ignore errors
+                        return data.decode("utf-8", errors="ignore").strip()
+        except Exception:
+            pass
+        return None
--- a/packages/markitdown/src/markitdown/converters/_pdf_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_pdf_converter.py
@@ -0,0 +1,21 @@
+import pdfminer
+import pdfminer.high_level
+from typing import Union
+from ._base import DocumentConverter, DocumentConverterResult
+
+
+class PdfConverter(DocumentConverter):
+    """
+    Converts PDFs to Markdown. Most style information is ignored, so the results are essentially plain-text.
+    """
+
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a PDF
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".pdf":
+            return None
+
+        return DocumentConverterResult(
+            title=None,
+            text_content=pdfminer.high_level.extract_text(local_path),
+        )
--- a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py
@@ -0,0 +1,33 @@
+import mimetypes
+
+from charset_normalizer import from_path
+from typing import Any, Union
+
+from ._base import DocumentConverter, DocumentConverterResult
+
+
+class PlainTextConverter(DocumentConverter):
+    """Anything with content type text/plain"""
+
+    def convert(
+        self, local_path: str, **kwargs: Any
+    ) -> Union[None, DocumentConverterResult]:
+        # Guess the content type from any file extension that might be around
+        content_type, _ = mimetypes.guess_type(
+            "__placeholder" + kwargs.get("file_extension", "")
+        )
+
+        # Only accept text files
+        if content_type is None:
+            return None
+        elif all(
+            not content_type.lower().startswith(type_prefix)
+            for type_prefix in ["text/", "application/json"]
+        ):
+            return None
+
+        text_content = str(from_path(local_path).best())
+        return DocumentConverterResult(
+            title=None,
+            text_content=text_content,
+        )
--- a/packages/markitdown/src/markitdown/converters/_pptx_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_pptx_converter.py
@@ -0,0 +1,180 @@
+import base64
+import pptx
+import re
+import html
+
+from typing import Union
+
+from ._base import DocumentConverterResult, DocumentConverter
+from ._html_converter import HtmlConverter
+
+
+class PptxConverter(HtmlConverter):
+    """
+    Converts PPTX files to Markdown. Supports heading, tables and images with alt text.
+    """
+
+    def _get_llm_description(
+        self, llm_client, llm_model, image_blob, content_type, prompt=None
+    ):
+        if prompt is None or prompt.strip() == "":
+            prompt = "Write a detailed alt text for this image with less than 50 words."
+
+        image_base64 = base64.b64encode(image_blob).decode("utf-8")
+        data_uri = f"data:{content_type};base64,{image_base64}"
+
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": data_uri,
+                        },
+                    },
+                    {"type": "text", "text": prompt},
+                ],
+            }
+        ]
+
+        response = llm_client.chat.completions.create(
+            model=llm_model, messages=messages
+        )
+        return response.choices[0].message.content
+
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a PPTX
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".pptx":
+            return None
+
+        md_content = ""
+
+        presentation = pptx.Presentation(local_path)
+        slide_num = 0
+        for slide in presentation.slides:
+            slide_num += 1
+
+            md_content += f"\n\n<!-- Slide number: {slide_num} -->\n"
+
+            title = slide.shapes.title
+            for shape in slide.shapes:
+                # Pictures
+                if self._is_picture(shape):
+                    # https://github.com/scanny/python-pptx/pull/512#issuecomment-1713100069
+
+                    llm_description = None
+                    alt_text = None
+
+                    llm_client = kwargs.get("llm_client")
+                    llm_model = kwargs.get("llm_model")
+                    if llm_client is not None and llm_model is not None:
+                        try:
+                            llm_description = self._get_llm_description(
+                                llm_client,
+                                llm_model,
+                                shape.image.blob,
+                                shape.image.content_type,
+                            )
+                        except Exception:
+                            # Unable to describe with LLM
+                            pass
+
+                    if not llm_description:
+                        try:
+                            alt_text = shape._element._nvXxPr.cNvPr.attrib.get(
+                                "descr", ""
+                            )
+                        except Exception:
+                            # Unable to get alt text
+                            pass
+
+                    # A placeholder name
+                    filename = re.sub(r"\W", "", shape.name) + ".jpg"
+                    md_content += (
+                        "\n!["
+                        + (llm_description or alt_text or shape.name)
+                        + "]("
+                        + filename
+                        + ")\n"
+                    )
+
+                # Tables
+                if self._is_table(shape):
+                    html_table = "<html><body><table>"
+                    first_row = True
+                    for row in shape.table.rows:
+                        html_table += "<tr>"
+                        for cell in row.cells:
+                            if first_row:
+                                html_table += "<th>" + html.escape(cell.text) + "</th>"
+                            else:
+                                html_table += "<td>" + html.escape(cell.text) + "</td>"
+                        html_table += "</tr>"
+                        first_row = False
+                    html_table += "</table></body></html>"
+                    md_content += (
+                        "\n" + self._convert(html_table).text_content.strip() + "\n"
+                    )
+
+                # Charts
+                if shape.has_chart:
+                    md_content += self._convert_chart_to_markdown(shape.chart)
+
+                # Text areas
+                elif shape.has_text_frame:
+                    if shape == title:
+                        md_content += "# " + shape.text.lstrip() + "\n"
+                    else:
+                        md_content += shape.text + "\n"
+
+            md_content = md_content.strip()
+
+            if slide.has_notes_slide:
+                md_content += "\n\n### Notes:\n"
+                notes_frame = slide.notes_slide.notes_text_frame
+                if notes_frame is not None:
+                    md_content += notes_frame.text
+                md_content = md_content.strip()
+
+        return DocumentConverterResult(
+            title=None,
+            text_content=md_content.strip(),
+        )
+
+    def _is_picture(self, shape):
+        if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.PICTURE:
+            return True
+        if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.PLACEHOLDER:
+            if hasattr(shape, "image"):
+                return True
+        return False
+
+    def _is_table(self, shape):
+        if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.TABLE:
+            return True
+        return False
+
+    def _convert_chart_to_markdown(self, chart):
+        md = "\n\n### Chart"
+        if chart.has_title:
+            md += f": {chart.chart_title.text_frame.text}"
+        md += "\n\n"
+        data = []
+        category_names = [c.label for c in chart.plots[0].categories]
+        series_names = [s.name for s in chart.series]
+        data.append(["Category"] + series_names)
+
+        for idx, category in enumerate(category_names):
+            row = [category]
+            for series in chart.series:
+                row.append(series.values[idx])
+            data.append(row)
+
+        markdown_table = []
+        for row in data:
+            markdown_table.append("| " + " | ".join(map(str, row)) + " |")
+        header = markdown_table[0]
+        separator = "|" + "|".join(["---"] * len(data[0])) + "|"
+        return md + "\n".join([header, separator] + markdown_table[1:])
--- a/packages/markitdown/src/markitdown/converters/_rss_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_rss_converter.py
@@ -0,0 +1,143 @@
+from xml.dom import minidom
+from typing import Union
+from bs4 import BeautifulSoup
+
+from ._markdownify import _CustomMarkdownify
+from ._base import DocumentConverter, DocumentConverterResult
+
+
+class RssConverter(DocumentConverter):
+    """Convert RSS / Atom type to markdown"""
+
+    def convert(
+        self, local_path: str, **kwargs
+    ) -> Union[None, DocumentConverterResult]:
+        # Bail if not RSS type
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".xml", ".rss", ".atom"]:
+            return None
+        try:
+            doc = minidom.parse(local_path)
+        except BaseException as _:
+            return None
+        result = None
+        if doc.getElementsByTagName("rss"):
+            # A RSS feed must have a root element of <rss>
+            result = self._parse_rss_type(doc)
+        elif doc.getElementsByTagName("feed"):
+            root = doc.getElementsByTagName("feed")[0]
+            if root.getElementsByTagName("entry"):
+                # An Atom feed must have a root element of <feed> and at least one <entry>
+                result = self._parse_atom_type(doc)
+            else:
+                return None
+        else:
+            # not rss or atom
+            return None
+
+        return result
+
+    def _parse_atom_type(
+        self, doc: minidom.Document
+    ) -> Union[None, DocumentConverterResult]:
+        """Parse the type of an Atom feed.
+
+        Returns None if the feed type is not recognized or something goes wrong.
+        """
+        try:
+            root = doc.getElementsByTagName("feed")[0]
+            title = self._get_data_by_tag_name(root, "title")
+            subtitle = self._get_data_by_tag_name(root, "subtitle")
+            entries = root.getElementsByTagName("entry")
+            md_text = f"# {title}\n"
+            if subtitle:
+                md_text += f"{subtitle}\n"
+            for entry in entries:
+                entry_title = self._get_data_by_tag_name(entry, "title")
+                entry_summary = self._get_data_by_tag_name(entry, "summary")
+                entry_updated = self._get_data_by_tag_name(entry, "updated")
+                entry_content = self._get_data_by_tag_name(entry, "content")
+
+                if entry_title:
+                    md_text += f"\n## {entry_title}\n"
+                if entry_updated:
+                    md_text += f"Updated on: {entry_updated}\n"
+                if entry_summary:
+                    md_text += self._parse_content(entry_summary)
+                if entry_content:
+                    md_text += self._parse_content(entry_content)
+
+            return DocumentConverterResult(
+                title=title,
+                text_content=md_text,
+            )
+        except BaseException as _:
+            return None
+
+    def _parse_rss_type(
+        self, doc: minidom.Document
+    ) -> Union[None, DocumentConverterResult]:
+        """Parse the type of an RSS feed.
+
+        Returns None if the feed type is not recognized or something goes wrong.
+        """
+        try:
+            root = doc.getElementsByTagName("rss")[0]
+            channel = root.getElementsByTagName("channel")
+            if not channel:
+                return None
+            channel = channel[0]
+            channel_title = self._get_data_by_tag_name(channel, "title")
+            channel_description = self._get_data_by_tag_name(channel, "description")
+            items = channel.getElementsByTagName("item")
+            if channel_title:
+                md_text = f"# {channel_title}\n"
+            if channel_description:
+                md_text += f"{channel_description}\n"
+            if not items:
+                items = []
+            for item in items:
+                title = self._get_data_by_tag_name(item, "title")
+                description = self._get_data_by_tag_name(item, "description")
+                pubDate = self._get_data_by_tag_name(item, "pubDate")
+                content = self._get_data_by_tag_name(item, "content:encoded")
+
+                if title:
+                    md_text += f"\n## {title}\n"
+                if pubDate:
+                    md_text += f"Published on: {pubDate}\n"
+                if description:
+                    md_text += self._parse_content(description)
+                if content:
+                    md_text += self._parse_content(content)
+
+            return DocumentConverterResult(
+                title=channel_title,
+                text_content=md_text,
+            )
+        except BaseException as _:
+            print(traceback.format_exc())
+            return None
+
+    def _parse_content(self, content: str) -> str:
+        """Parse the content of an RSS feed item"""
+        try:
+            # using bs4 because many RSS feeds have HTML-styled content
+            soup = BeautifulSoup(content, "html.parser")
+            return _CustomMarkdownify().convert_soup(soup)
+        except BaseException as _:
+            return content
+
+    def _get_data_by_tag_name(
+        self, element: minidom.Element, tag_name: str
+    ) -> Union[str, None]:
+        """Get data from first child element with the given tag name.
+        Returns None when no such element is found.
+        """
+        nodes = element.getElementsByTagName(tag_name)
+        if not nodes:
+            return None
+        fc = nodes[0].firstChild
+        if fc:
+            return fc.data
+        return None
--- a/packages/markitdown/src/markitdown/converters/_wav_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_wav_converter.py
@@ -0,0 +1,67 @@
+from typing import Union
+from ._base import DocumentConverterResult
+from ._media_converter import MediaConverter
+
+# Optional Transcription support
+IS_AUDIO_TRANSCRIPTION_CAPABLE = False
+try:
+    import speech_recognition as sr
+
+    IS_AUDIO_TRANSCRIPTION_CAPABLE = True
+except ModuleNotFoundError:
+    pass
+
+
+class WavConverter(MediaConverter):
+    """
+    Converts WAV files to markdown via extraction of metadata (if `exiftool` is installed), and speech transcription (if `speech_recognition` is installed).
+    """
+
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a WAV
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".wav":
+            return None
+
+        md_content = ""
+
+        # Add metadata
+        metadata = self._get_metadata(local_path, kwargs.get("exiftool_path"))
+        if metadata:
+            for f in [
+                "Title",
+                "Artist",
+                "Author",
+                "Band",
+                "Album",
+                "Genre",
+                "Track",
+                "DateTimeOriginal",
+                "CreateDate",
+                "Duration",
+            ]:
+                if f in metadata:
+                    md_content += f"{f}: {metadata[f]}\n"
+
+        # Transcribe
+        if IS_AUDIO_TRANSCRIPTION_CAPABLE:
+            try:
+                transcript = self._transcribe_audio(local_path)
+                md_content += "\n\n### Audio Transcript:\n" + (
+                    "[No speech detected]" if transcript == "" else transcript
+                )
+            except Exception:
+                md_content += (
+                    "\n\n### Audio Transcript:\nError. Could not transcribe this audio."
+                )
+
+        return DocumentConverterResult(
+            title=None,
+            text_content=md_content.strip(),
+        )
+
+    def _transcribe_audio(self, local_path) -> str:
+        recognizer = sr.Recognizer()
+        with sr.AudioFile(local_path) as source:
+            audio = recognizer.record(source)
+            return recognizer.recognize_google(audio).strip()
--- a/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py
@@ -0,0 +1,56 @@
+import re
+
+from typing import Any, Union
+from bs4 import BeautifulSoup
+
+from ._base import DocumentConverter, DocumentConverterResult
+from ._markdownify import _CustomMarkdownify
+
+
+class WikipediaConverter(DocumentConverter):
+    """Handle Wikipedia pages separately, focusing only on the main document content."""
+
+    def convert(
+        self, local_path: str, **kwargs: Any
+    ) -> Union[None, DocumentConverterResult]:
+        # Bail if not Wikipedia
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".html", ".htm"]:
+            return None
+        url = kwargs.get("url", "")
+        if not re.search(r"^https?:\/\/[a-zA-Z]{2,3}\.wikipedia.org\/", url):
+            return None
+
+        # Parse the file
+        soup = None
+        with open(local_path, "rt", encoding="utf-8") as fh:
+            soup = BeautifulSoup(fh.read(), "html.parser")
+
+        # Remove javascript and style blocks
+        for script in soup(["script", "style"]):
+            script.extract()
+
+        # Print only the main content
+        body_elm = soup.find("div", {"id": "mw-content-text"})
+        title_elm = soup.find("span", {"class": "mw-page-title-main"})
+
+        webpage_text = ""
+        main_title = None if soup.title is None else soup.title.string
+
+        if body_elm:
+            # What's the title
+            if title_elm and len(title_elm) > 0:
+                main_title = title_elm.string  # type: ignore
+                assert isinstance(main_title, str)
+
+            # Convert the page
+            webpage_text = f"# {main_title}\n\n" + _CustomMarkdownify().convert_soup(
+                body_elm
+            )
+        else:
+            webpage_text = _CustomMarkdownify().convert_soup(soup)
+
+        return DocumentConverterResult(
+            title=main_title,
+            text_content=webpage_text,
+        )
--- a/packages/markitdown/src/markitdown/converters/_xlsx_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_xlsx_converter.py
@@ -0,0 +1,54 @@
+from typing import Union
+
+import pandas as pd
+
+from ._base import DocumentConverterResult
+from ._html_converter import HtmlConverter
+
+
+class XlsxConverter(HtmlConverter):
+    """
+    Converts XLSX files to Markdown, with each sheet presented as a separate Markdown table.
+    """
+
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a XLSX
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".xlsx":
+            return None
+
+        sheets = pd.read_excel(local_path, sheet_name=None, engine="openpyxl")
+        md_content = ""
+        for s in sheets:
+            md_content += f"## {s}\n"
+            html_content = sheets[s].to_html(index=False)
+            md_content += self._convert(html_content).text_content.strip() + "\n\n"
+
+        return DocumentConverterResult(
+            title=None,
+            text_content=md_content.strip(),
+        )
+
+
+class XlsConverter(HtmlConverter):
+    """
+    Converts XLS files to Markdown, with each sheet presented as a separate Markdown table.
+    """
+
+    def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
+        # Bail if not a XLS
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".xls":
+            return None
+
+        sheets = pd.read_excel(local_path, sheet_name=None, engine="xlrd")
+        md_content = ""
+        for s in sheets:
+            md_content += f"## {s}\n"
+            html_content = sheets[s].to_html(index=False)
+            md_content += self._convert(html_content).text_content.strip() + "\n\n"
+
+        return DocumentConverterResult(
+            title=None,
+            text_content=md_content.strip(),
+        )
--- a/packages/markitdown/src/markitdown/converters/_youtube_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_youtube_converter.py
@@ -0,0 +1,148 @@
+import re
+
+from typing import Any, Union, Dict, List
+from urllib.parse import parse_qs, urlparse
+from bs4 import BeautifulSoup
+
+from ._base import DocumentConverter, DocumentConverterResult
+
+
+# Optional YouTube transcription support
+try:
+    from youtube_transcript_api import YouTubeTranscriptApi
+
+    IS_YOUTUBE_TRANSCRIPT_CAPABLE = True
+except ModuleNotFoundError:
+    pass
+
+
+class YouTubeConverter(DocumentConverter):
+    """Handle YouTube specially, focusing on the video title, description, and transcript."""
+
+    def convert(
+        self, local_path: str, **kwargs: Any
+    ) -> Union[None, DocumentConverterResult]:
+        # Bail if not YouTube
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() not in [".html", ".htm"]:
+            return None
+        url = kwargs.get("url", "")
+        if not url.startswith("https://www.youtube.com/watch?"):
+            return None
+
+        # Parse the file
+        soup = None
+        with open(local_path, "rt", encoding="utf-8") as fh:
+            soup = BeautifulSoup(fh.read(), "html.parser")
+
+        # Read the meta tags
+        assert soup.title is not None and soup.title.string is not None
+        metadata: Dict[str, str] = {"title": soup.title.string}
+        for meta in soup(["meta"]):
+            for a in meta.attrs:
+                if a in ["itemprop", "property", "name"]:
+                    metadata[meta[a]] = meta.get("content", "")
+                    break
+
+        # We can also try to read the full description. This is more prone to breaking, since it reaches into the page implementation
+        try:
+            for script in soup(["script"]):
+                content = script.text
+                if "ytInitialData" in content:
+                    lines = re.split(r"\r?\n", content)
+                    obj_start = lines[0].find("{")
+                    obj_end = lines[0].rfind("}")
+                    if obj_start >= 0 and obj_end >= 0:
+                        data = json.loads(lines[0][obj_start : obj_end + 1])
+                        attrdesc = self._findKey(data, "attributedDescriptionBodyText")  # type: ignore
+                        if attrdesc:
+                            metadata["description"] = str(attrdesc["content"])
+                    break
+        except Exception:
+            pass
+
+        # Start preparing the page
+        webpage_text = "# YouTube\n"
+
+        title = self._get(metadata, ["title", "og:title", "name"])  # type: ignore
+        assert isinstance(title, str)
+
+        if title:
+            webpage_text += f"\n## {title}\n"
+
+        stats = ""
+        views = self._get(metadata, ["interactionCount"])  # type: ignore
+        if views:
+            stats += f"- **Views:** {views}\n"
+
+        keywords = self._get(metadata, ["keywords"])  # type: ignore
+        if keywords:
+            stats += f"- **Keywords:** {keywords}\n"
+
+        runtime = self._get(metadata, ["duration"])  # type: ignore
+        if runtime:
+            stats += f"- **Runtime:** {runtime}\n"
+
+        if len(stats) > 0:
+            webpage_text += f"\n### Video Metadata\n{stats}\n"
+
+        description = self._get(metadata, ["description", "og:description"])  # type: ignore
+        if description:
+            webpage_text += f"\n### Description\n{description}\n"
+
+        if IS_YOUTUBE_TRANSCRIPT_CAPABLE:
+            transcript_text = ""
+            parsed_url = urlparse(url)  # type: ignore
+            params = parse_qs(parsed_url.query)  # type: ignore
+            if "v" in params:
+                assert isinstance(params["v"][0], str)
+                video_id = str(params["v"][0])
+                try:
+                    youtube_transcript_languages = kwargs.get(
+                        "youtube_transcript_languages", ("en",)
+                    )
+                    # Must be a single transcript.
+                    transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=youtube_transcript_languages)  # type: ignore
+                    transcript_text = " ".join([part["text"] for part in transcript])  # type: ignore
+                    # Alternative formatting:
+                    # formatter = TextFormatter()
+                    # formatter.format_transcript(transcript)
+                except Exception:
+                    pass
+            if transcript_text:
+                webpage_text += f"\n### Transcript\n{transcript_text}\n"
+
+        title = title if title else soup.title.string
+        assert isinstance(title, str)
+
+        return DocumentConverterResult(
+            title=title,
+            text_content=webpage_text,
+        )
+
+    def _get(
+        self,
+        metadata: Dict[str, str],
+        keys: List[str],
+        default: Union[str, None] = None,
+    ) -> Union[str, None]:
+        for k in keys:
+            if k in metadata:
+                return metadata[k]
+        return default
+
+    def _findKey(self, json: Any, key: str) -> Union[str, None]:  # TODO: Fix json type
+        if isinstance(json, list):
+            for elm in json:
+                ret = self._findKey(elm, key)
+                if ret is not None:
+                    return ret
+        elif isinstance(json, dict):
+            for k in json:
+                if k == key:
+                    return json[k]
+                else:
+                    ret = self._findKey(json[k], key)
+                    if ret is not None:
+                        return ret
+        return None
--- a/packages/markitdown/src/markitdown/converters/_zip_converter.py
+++ b/packages/markitdown/src/markitdown/converters/_zip_converter.py
@@ -0,0 +1,135 @@
+import os
+import zipfile
+import shutil
+from typing import Any, Union
+
+from ._base import DocumentConverter, DocumentConverterResult
+
+
+class ZipConverter(DocumentConverter):
+    """Converts ZIP files to markdown by extracting and converting all contained files.
+
+    The converter extracts the ZIP contents to a temporary directory, processes each file
+    using appropriate converters based on file extensions, and then combines the results
+    into a single markdown document. The temporary directory is cleaned up after processing.
+
+    Example output format:
+    ```markdown
+    Content from the zip file `example.zip`:
+
+    ## File: docs/readme.txt
+
+    This is the content of readme.txt
+    Multiple lines are preserved
+
+    ## File: images/example.jpg
+
+    ImageSize: 1920x1080
+    DateTimeOriginal: 2024-02-15 14:30:00
+    Description: A beautiful landscape photo
+
+    ## File: data/report.xlsx
+
+    ## Sheet1
+    | Column1 | Column2 | Column3 |
+    |---------|---------|---------|
+    | data1   | data2   | data3   |
+    | data4   | data5   | data6   |
+    ```
+
+    Key features:
+    - Maintains original file structure in headings
+    - Processes nested files recursively
+    - Uses appropriate converters for each file type
+    - Preserves formatting of converted content
+    - Cleans up temporary files after processing
+    """
+
+    def convert(
+        self, local_path: str, **kwargs: Any
+    ) -> Union[None, DocumentConverterResult]:
+        # Bail if not a ZIP
+        extension = kwargs.get("file_extension", "")
+        if extension.lower() != ".zip":
+            return None
+
+        # Get parent converters list if available
+        parent_converters = kwargs.get("_parent_converters", [])
+        if not parent_converters:
+            return DocumentConverterResult(
+                title=None,
+                text_content=f"[ERROR] No converters available to process zip contents from: {local_path}",
+            )
+
+        extracted_zip_folder_name = (
+            f"extracted_{os.path.basename(local_path).replace('.zip', '_zip')}"
+        )
+        extraction_dir = os.path.normpath(
+            os.path.join(os.path.dirname(local_path), extracted_zip_folder_name)
+        )
+        md_content = f"Content from the zip file `{os.path.basename(local_path)}`:\n\n"
+
+        try:
+            # Extract the zip file safely
+            with zipfile.ZipFile(local_path, "r") as zipObj:
+                # Safeguard against path traversal
+                for member in zipObj.namelist():
+                    member_path = os.path.normpath(os.path.join(extraction_dir, member))
+                    if (
+                        not os.path.commonprefix([extraction_dir, member_path])
+                        == extraction_dir
+                    ):
+                        raise ValueError(
+                            f"Path traversal detected in zip file: {member}"
+                        )
+
+                # Extract all files safely
+                zipObj.extractall(path=extraction_dir)
+
+            # Process each extracted file
+            for root, dirs, files in os.walk(extraction_dir):
+                for name in files:
+                    file_path = os.path.join(root, name)
+                    relative_path = os.path.relpath(file_path, extraction_dir)
+
+                    # Get file extension
+                    _, file_extension = os.path.splitext(name)
+
+                    # Update kwargs for the file
+                    file_kwargs = kwargs.copy()
+                    file_kwargs["file_extension"] = file_extension
+                    file_kwargs["_parent_converters"] = parent_converters
+
+                    # Try converting the file using available converters
+                    for converter in parent_converters:
+                        # Skip the zip converter to avoid infinite recursion
+                        if isinstance(converter, ZipConverter):
+                            continue
+
+                        result = converter.convert(file_path, **file_kwargs)
+                        if result is not None:
+                            md_content += f"\n## File: {relative_path}\n\n"
+                            md_content += result.text_content + "\n\n"
+                            break
+
+            # Clean up extracted files if specified
+            if kwargs.get("cleanup_extracted", True):
+                shutil.rmtree(extraction_dir)
+
+            return DocumentConverterResult(title=None, text_content=md_content.strip())
+
+        except zipfile.BadZipFile:
+            return DocumentConverterResult(
+                title=None,
+                text_content=f"[ERROR] Invalid or corrupted zip file: {local_path}",
+            )
+        except ValueError as ve:
+            return DocumentConverterResult(
+                title=None,
+                text_content=f"[ERROR] Security error in zip file {local_path}: {str(ve)}",
+            )
+        except Exception as e:
+            return DocumentConverterResult(
+                title=None,
+                text_content=f"[ERROR] Failed to process zip file {local_path}: {str(e)}",
+            )
--- a/packages/markitdown/src/markitdown/py.typed
+++ b/packages/markitdown/src/markitdown/py.typed
--- a/packages/markitdown/tests/init.py
+++ b/packages/markitdown/tests/init.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2024-present Adam Fourney <adamfo@microsoft.com>
+#
+# SPDX-License-Identifier: MIT
--- a/packages/markitdown/tests/test_files/test.docx
+++ b/packages/markitdown/tests/test_files/test.docx
--- a/packages/markitdown/tests/test_files/test.jpg
+++ b/packages/markitdown/tests/test_files/test.jpg
--- a/packages/markitdown/tests/test_files/test.json
+++ b/packages/markitdown/tests/test_files/test.json
@@ -0,0 +1,10 @@
+{
+    "key1": "string_value",
+    "key2": 1234,
+    "key3": [
+        "list_value1",
+        "list_value2"
+    ],
+    "5b64c88c-b3c3-4510-bcb8-da0b200602d8": "uuid_key",
+    "uuid_value": "9700dc99-6685-40b4-9a3a-5e406dcb37f3"
+}
--- a/packages/markitdown/tests/test_files/test.pptx
+++ b/packages/markitdown/tests/test_files/test.pptx
--- a/packages/markitdown/tests/test_files/test.xls
+++ b/packages/markitdown/tests/test_files/test.xls
--- a/packages/markitdown/tests/test_files/test.xlsx
+++ b/packages/markitdown/tests/test_files/test.xlsx
--- a/packages/markitdown/tests/test_files/test_blog.html
+++ b/packages/markitdown/tests/test_files/test_blog.html
--- a/packages/markitdown/tests/test_files/test_files.zip
+++ b/packages/markitdown/tests/test_files/test_files.zip
--- a/packages/markitdown/tests/test_files/test_llm.jpg
+++ b/packages/markitdown/tests/test_files/test_llm.jpg
--- a/packages/markitdown/tests/test_files/test_mskanji.csv
+++ b/packages/markitdown/tests/test_files/test_mskanji.csv
@@ -0,0 +1,4 @@
+<EFBFBD><EFBFBD><EFBFBD>O,<EFBFBD>N<EFBFBD><EFBFBD>,<EFBFBD>Z<EFBFBD><EFBFBD>
+<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Y,30,<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+<EFBFBD>O<EFBFBD>؉p<EFBFBD>q,25,<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
+<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>~,35,<EFBFBD><EFBFBD><EFBFBD>É<EFBFBD>
--- a/packages/markitdown/tests/test_files/test_notebook.ipynb
+++ b/packages/markitdown/tests/test_files/test_notebook.ipynb
@@ -0,0 +1,89 @@
+{
+    "cells": [
+        {
+            "cell_type": "markdown",
+            "id": "0f61db80",
+            "metadata": {},
+            "source": [
+                "# Test Notebook"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 11,
+            "id": "3f2a5bbd",
+            "metadata": {},
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "markitdown\n"
+                    ]
+                }
+            ],
+            "source": [
+                "print('markitdown')"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "id": "9b9c0468",
+            "metadata": {},
+            "source": [
+                "## Code Cell Below"
+            ]
+        },
+        {
+            "cell_type": "code",
+            "execution_count": 10,
+            "id": "37d8088a",
+            "metadata": {},
+            "outputs": [
+                {
+                    "name": "stdout",
+                    "output_type": "stream",
+                    "text": [
+                        "42\n"
+                    ]
+                }
+            ],
+            "source": [
+                "# comment in code\n",
+                "print(42)"
+            ]
+        },
+        {
+            "cell_type": "markdown",
+            "id": "2e3177bd",
+            "metadata": {},
+            "source": [
+                "End\n",
+                "\n",
+                "---"
+            ]
+        }
+    ],
+    "metadata": {
+        "kernelspec": {
+            "display_name": "Python 3",
+            "language": "python",
+            "name": "python3"
+        },
+        "language_info": {
+            "codemirror_mode": {
+                "name": "ipython",
+                "version": 3
+            },
+            "file_extension": ".py",
+            "mimetype": "text/x-python",
+            "name": "python",
+            "nbconvert_exporter": "python",
+            "pygments_lexer": "ipython3",
+            "version": "3.12.8"
+        },
+        "title": "Test Notebook Title"
+    },
+    "nbformat": 4,
+    "nbformat_minor": 5
+}
--- a/packages/markitdown/tests/test_files/test_outlook_msg.msg
+++ b/packages/markitdown/tests/test_files/test_outlook_msg.msg
--- a/packages/markitdown/tests/test_files/test_rss.xml
+++ b/packages/markitdown/tests/test_files/test_rss.xml
--- a/packages/markitdown/tests/test_files/test_serp.html
+++ b/packages/markitdown/tests/test_files/test_serp.html
--- a/packages/markitdown/tests/test_files/test_wikipedia.html
+++ b/packages/markitdown/tests/test_files/test_wikipedia.html
--- a/packages/markitdown/tests/test_files/test_with_comment.docx
+++ b/packages/markitdown/tests/test_files/test_with_comment.docx
--- a/packages/markitdown/tests/test_markitdown.py
+++ b/packages/markitdown/tests/test_markitdown.py
@@ -0,0 +1,334 @@
+#!/usr/bin/env python3 -m pytest
+import io
+import os
+import shutil
+
+import pytest
+import requests
+
+from warnings import catch_warnings, resetwarnings
+
+from markitdown import MarkItDown
+
+skip_remote = (
+    True if os.environ.get("GITHUB_ACTIONS") else False
+)  # Don't run these tests in CI
+
+
+# Don't run the llm tests without a key and the client library
+skip_llm = False if os.environ.get("OPENAI_API_KEY") else True
+try:
+    import openai
+except ModuleNotFoundError:
+    skip_llm = True
+
+# Skip exiftool tests if not installed
+skip_exiftool = shutil.which("exiftool") is None
+
+TEST_FILES_DIR = os.path.join(os.path.dirname(__file__), "test_files")
+
+JPG_TEST_EXIFTOOL = {
+    "Author": "AutoGen Authors",
+    "Title": "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
+    "Description": "AutoGen enables diverse LLM-based applications",
+    "ImageSize": "1615x1967",
+    "DateTimeOriginal": "2024:03:14 22:10:00",
+}
+
+PDF_TEST_URL = "https://arxiv.org/pdf/2308.08155v2.pdf"
+PDF_TEST_STRINGS = [
+    "While there is contemporaneous exploration of multi-agent approaches"
+]
+
+YOUTUBE_TEST_URL = "https://www.youtube.com/watch?v=V2qZ_lgxTzg"
+YOUTUBE_TEST_STRINGS = [
+    "## AutoGen FULL Tutorial with Python (Step-By-Step)",
+    "This is an intermediate tutorial for installing and using AutoGen locally",
+    "PT15M4S",
+    "the model we're going to be using today is GPT 3.5 turbo",  # From the transcript
+]
+
+XLSX_TEST_STRINGS = [
+    "## 09060124-b5e7-4717-9d07-3c046eb",
+    "6ff4173b-42a5-4784-9b19-f49caff4d93d",
+    "affc7dad-52dc-4b98-9b5d-51e65d8a8ad0",
+]
+
+XLS_TEST_STRINGS = [
+    "## 09060124-b5e7-4717-9d07-3c046eb",
+    "6ff4173b-42a5-4784-9b19-f49caff4d93d",
+    "affc7dad-52dc-4b98-9b5d-51e65d8a8ad0",
+]
+
+DOCX_TEST_STRINGS = [
+    "314b0a30-5b04-470b-b9f7-eed2c2bec74a",
+    "49e168b7-d2ae-407f-a055-2167576f39a1",
+    "## d666f1f7-46cb-42bd-9a39-9a39cf2a509f",
+    "# Abstract",
+    "# Introduction",
+    "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
+]
+
+MSG_TEST_STRINGS = [
+    "# Email Message",
+    "**From:** test.sender@example.com",
+    "**To:** test.recipient@example.com",
+    "**Subject:** Test Email Message",
+    "## Content",
+    "This is the body of the test email message",
+]
+
+DOCX_COMMENT_TEST_STRINGS = [
+    "314b0a30-5b04-470b-b9f7-eed2c2bec74a",
+    "49e168b7-d2ae-407f-a055-2167576f39a1",
+    "## d666f1f7-46cb-42bd-9a39-9a39cf2a509f",
+    "# Abstract",
+    "# Introduction",
+    "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
+    "This is a test comment. 12df-321a",
+    "Yet another comment in the doc. 55yiyi-asd09",
+]
+
+PPTX_TEST_STRINGS = [
+    "2cdda5c8-e50e-4db4-b5f0-9722a649f455",
+    "04191ea8-5c73-4215-a1d3-1cfb43aaaf12",
+    "44bf7d06-5e7a-4a40-a2e1-a2e42ef28c8a",
+    "1b92870d-e3b5-4e65-8153-919f4ff45592",
+    "AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
+    "a3f6004b-6f4f-4ea8-bee3-3741f4dc385f",  # chart title
+    "2003",  # chart value
+]
+
+BLOG_TEST_URL = "https://microsoft.github.io/autogen/blog/2023/04/21/LLM-tuning-math"
+BLOG_TEST_STRINGS = [
+    "Large language models (LLMs) are powerful tools that can generate natural language texts for various applications, such as chatbots, summarization, translation, and more. GPT-4 is currently the state of the art LLM in the world. Is model selection irrelevant? What about inference parameters?",
+    "an example where high cost can easily prevent a generic complex",
+]
+
+
+RSS_TEST_STRINGS = [
+    "The Official Microsoft Blog",
+    "In the case of AI, it is absolutely true that the industry is moving incredibly fast",
+]
+
+
+WIKIPEDIA_TEST_URL = "https://en.wikipedia.org/wiki/Microsoft"
+WIKIPEDIA_TEST_STRINGS = [
+    "Microsoft entered the operating system (OS) business in 1980 with its own version of [Unix]",
+    'Microsoft was founded by [Bill Gates](/wiki/Bill_Gates "Bill Gates")',
+]
+WIKIPEDIA_TEST_EXCLUDES = [
+    "You are encouraged to create an account and log in",
+    "154 languages",
+    "move to sidebar",
+]
+
+SERP_TEST_URL = "https://www.bing.com/search?q=microsoft+wikipedia"
+SERP_TEST_STRINGS = [
+    "](https://en.wikipedia.org/wiki/Microsoft",
+    "Microsoft Corporation is **an American multinational corporation and technology company headquartered** in Redmond",
+    "1995–2007: Foray into the Web, Windows 95, Windows XP, and Xbox",
+]
+SERP_TEST_EXCLUDES = [
+    "https://www.bing.com/ck/a?!&&p=",
+    "data:image/svg+xml,%3Csvg%20width%3D",
+]
+
+CSV_CP932_TEST_STRINGS = [
+    "名前,年齢,住所",
+    "佐藤太郎,30,東京",
+    "三木英子,25,大阪",
+    "髙橋淳,35,名古屋",
+]
+
+LLM_TEST_STRINGS = [
+    "5bda1dd6",
+]
+
+JSON_TEST_STRINGS = [
+    "5b64c88c-b3c3-4510-bcb8-da0b200602d8",
+    "9700dc99-6685-40b4-9a3a-5e406dcb37f3",
+]
+
+
+# --- Helper Functions ---
+def validate_strings(result, expected_strings, exclude_strings=None):
+    """Validate presence or absence of specific strings."""
+    text_content = result.text_content.replace("\\", "")
+    for string in expected_strings:
+        assert string in text_content
+    if exclude_strings:
+        for string in exclude_strings:
+            assert string not in text_content
+
+
+@pytest.mark.skipif(
+    skip_remote,
+    reason="do not run tests that query external urls",
+)
+def test_markitdown_remote() -> None:
+    markitdown = MarkItDown()
+
+    # By URL
+    result = markitdown.convert(PDF_TEST_URL)
+    for test_string in PDF_TEST_STRINGS:
+        assert test_string in result.text_content
+
+    # By stream
+    response = requests.get(PDF_TEST_URL)
+    result = markitdown.convert_stream(
+        io.BytesIO(response.content), file_extension=".pdf", url=PDF_TEST_URL
+    )
+    for test_string in PDF_TEST_STRINGS:
+        assert test_string in result.text_content
+
+    # Youtube
+    # TODO: This test randomly fails for some reason. Haven't been able to repro it yet. Disabling until I can debug the issue
+    # result = markitdown.convert(YOUTUBE_TEST_URL)
+    # for test_string in YOUTUBE_TEST_STRINGS:
+    #     assert test_string in result.text_content
+
+
+def test_markitdown_local() -> None:
+    markitdown = MarkItDown()
+
+    # Test XLSX processing
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.xlsx"))
+    validate_strings(result, XLSX_TEST_STRINGS)
+
+    # Test XLS processing
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.xls"))
+    for test_string in XLS_TEST_STRINGS:
+        text_content = result.text_content.replace("\\", "")
+        assert test_string in text_content
+
+    # Test DOCX processing
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.docx"))
+    validate_strings(result, DOCX_TEST_STRINGS)
+
+    # Test DOCX processing, with comments
+    result = markitdown.convert(
+        os.path.join(TEST_FILES_DIR, "test_with_comment.docx"),
+        style_map="comment-reference => ",
+    )
+    validate_strings(result, DOCX_COMMENT_TEST_STRINGS)
+
+    # Test DOCX processing, with comments and setting style_map on init
+    markitdown_with_style_map = MarkItDown(style_map="comment-reference => ")
+    result = markitdown_with_style_map.convert(
+        os.path.join(TEST_FILES_DIR, "test_with_comment.docx")
+    )
+    validate_strings(result, DOCX_COMMENT_TEST_STRINGS)
+
+    # Test PPTX processing
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.pptx"))
+    validate_strings(result, PPTX_TEST_STRINGS)
+
+    # Test HTML processing
+    result = markitdown.convert(
+        os.path.join(TEST_FILES_DIR, "test_blog.html"), url=BLOG_TEST_URL
+    )
+    validate_strings(result, BLOG_TEST_STRINGS)
+
+    # Test ZIP file processing
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_files.zip"))
+    validate_strings(result, XLSX_TEST_STRINGS)
+
+    # Test Wikipedia processing
+    result = markitdown.convert(
+        os.path.join(TEST_FILES_DIR, "test_wikipedia.html"), url=WIKIPEDIA_TEST_URL
+    )
+    text_content = result.text_content.replace("\\", "")
+    validate_strings(result, WIKIPEDIA_TEST_STRINGS, WIKIPEDIA_TEST_EXCLUDES)
+
+    # Test Bing processing
+    result = markitdown.convert(
+        os.path.join(TEST_FILES_DIR, "test_serp.html"), url=SERP_TEST_URL
+    )
+    text_content = result.text_content.replace("\\", "")
+    validate_strings(result, SERP_TEST_STRINGS, SERP_TEST_EXCLUDES)
+
+    # Test RSS processing
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_rss.xml"))
+    text_content = result.text_content.replace("\\", "")
+    for test_string in RSS_TEST_STRINGS:
+        assert test_string in text_content
+
+    ## Test non-UTF-8 encoding
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_mskanji.csv"))
+    validate_strings(result, CSV_CP932_TEST_STRINGS)
+
+    # Test MSG (Outlook email) processing
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_outlook_msg.msg"))
+    validate_strings(result, MSG_TEST_STRINGS)
+
+    # Test JSON processing
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.json"))
+    validate_strings(result, JSON_TEST_STRINGS)
+
+    # Test input with leading blank characters
+    input_data = b"   \n\n\n<html><body><h1>Test</h1></body></html>"
+    result = markitdown.convert_stream(io.BytesIO(input_data))
+    assert "# Test" in result.text_content
+
+
+@pytest.mark.skipif(
+    skip_exiftool,
+    reason="do not run if exiftool is not installed",
+)
+def test_markitdown_exiftool() -> None:
+    # Test the automatic discovery of exiftool throws a warning
+    # and is disabled
+    try:
+        with catch_warnings(record=True) as w:
+            markitdown = MarkItDown()
+            result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.jpg"))
+            assert len(w) == 1
+            assert w[0].category is DeprecationWarning
+            assert result.text_content.strip() == ""
+    finally:
+        resetwarnings()
+
+    # Test explicitly setting the location of exiftool
+    which_exiftool = shutil.which("exiftool")
+    markitdown = MarkItDown(exiftool_path=which_exiftool)
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.jpg"))
+    for key in JPG_TEST_EXIFTOOL:
+        target = f"{key}: {JPG_TEST_EXIFTOOL[key]}"
+        assert target in result.text_content
+
+    # Test setting the exiftool path through an environment variable
+    os.environ["EXIFTOOL_PATH"] = which_exiftool
+    markitdown = MarkItDown()
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test.jpg"))
+    for key in JPG_TEST_EXIFTOOL:
+        target = f"{key}: {JPG_TEST_EXIFTOOL[key]}"
+        assert target in result.text_content
+
+
+@pytest.mark.skipif(
+    skip_llm,
+    reason="do not run llm tests without a key",
+)
+def test_markitdown_llm() -> None:
+    client = openai.OpenAI()
+    markitdown = MarkItDown(llm_client=client, llm_model="gpt-4o")
+
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_llm.jpg"))
+
+    for test_string in LLM_TEST_STRINGS:
+        assert test_string in result.text_content
+
+    # This is not super precise. It would also accept "red square", "blue circle",
+    # "the square is not blue", etc. But it's sufficient for this test.
+    for test_string in ["red", "circle", "blue", "square"]:
+        assert test_string in result.text_content.lower()
+
+
+if __name__ == "__main__":
+    """Runs this file's tests from the command line."""
+    # test_markitdown_remote()
+    # test_markitdown_local()
+    test_markitdown_exiftool()
+    # test_markitdown_deprecation()
+    # test_markitdown_llm()