Doc Intelligence fixes for refactored code (#325)
* added priority flag to doc intel converter constructor * fixed analysis features bug for docx
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from typing import Any, Union
|
||||
import re
|
||||
|
||||
# Azure imports
|
||||
from azure.ai.documentintelligence import DocumentIntelligenceClient
|
||||
@@ -36,6 +37,7 @@ class DocumentIntelligenceConverter(DocumentConverter):
|
||||
api_version=self.api_version,
|
||||
credential=DefaultAzureCredential(),
|
||||
)
|
||||
self._priority = priority
|
||||
|
||||
def convert(
|
||||
self, local_path: str, **kwargs: Any
|
||||
@@ -62,8 +64,8 @@ class DocumentIntelligenceConverter(DocumentConverter):
|
||||
with open(local_path, "rb") as f:
|
||||
file_bytes = f.read()
|
||||
|
||||
# Certain document analysis features are not availiable for filetypes (.xlsx, .pptx, .html)
|
||||
if extension.lower() in [".xlsx", ".pptx", ".html"]:
|
||||
# Certain document analysis features are not availiable for office filetypes (.xlsx, .pptx, .html, .docx)
|
||||
if extension.lower() in [".xlsx", ".pptx", ".html", ".docx"]:
|
||||
analysis_features = []
|
||||
else:
|
||||
analysis_features = [
|
||||
|
||||
Reference in New Issue
Block a user