Doc Intelligence fixes for refactored code (#325)

* added priority flag to doc intel converter constructor
* fixed analysis features bug for docx
This commit is contained in:
KennyZhang1
2025-02-11 19:01:46 -05:00
committed by GitHub
parent 935da9976c
commit 97eeed5f32

View File

@@ -1,4 +1,5 @@
from typing import Any, Union
import re
# Azure imports
from azure.ai.documentintelligence import DocumentIntelligenceClient
@@ -36,6 +37,7 @@ class DocumentIntelligenceConverter(DocumentConverter):
api_version=self.api_version,
credential=DefaultAzureCredential(),
)
self._priority = priority
def convert(
self, local_path: str, **kwargs: Any
@@ -62,8 +64,8 @@ class DocumentIntelligenceConverter(DocumentConverter):
with open(local_path, "rb") as f:
file_bytes = f.read()
# Certain document analysis features are not availiable for filetypes (.xlsx, .pptx, .html)
if extension.lower() in [".xlsx", ".pptx", ".html"]:
# Certain document analysis features are not availiable for office filetypes (.xlsx, .pptx, .html, .docx)
if extension.lower() in [".xlsx", ".pptx", ".html", ".docx"]:
analysis_features = []
else:
analysis_features = [