diff --git a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py index 835345a..ed8aabf 100644 --- a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py +++ b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py @@ -1,4 +1,5 @@ from typing import Any, Union +import re # Azure imports from azure.ai.documentintelligence import DocumentIntelligenceClient @@ -36,6 +37,7 @@ class DocumentIntelligenceConverter(DocumentConverter): api_version=self.api_version, credential=DefaultAzureCredential(), ) + self._priority = priority def convert( self, local_path: str, **kwargs: Any @@ -62,8 +64,8 @@ class DocumentIntelligenceConverter(DocumentConverter): with open(local_path, "rb") as f: file_bytes = f.read() - # Certain document analysis features are not availiable for filetypes (.xlsx, .pptx, .html) - if extension.lower() in [".xlsx", ".pptx", ".html"]: + # Certain document analysis features are not availiable for office filetypes (.xlsx, .pptx, .html, .docx) + if extension.lower() in [".xlsx", ".pptx", ".html", ".docx"]: analysis_features = [] else: analysis_features = [