Doc Intelligence fixes for refactored code (#325)
* added priority flag to doc intel converter constructor * fixed analysis features bug for docx
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
from typing import Any, Union
|
from typing import Any, Union
|
||||||
|
import re
|
||||||
|
|
||||||
# Azure imports
|
# Azure imports
|
||||||
from azure.ai.documentintelligence import DocumentIntelligenceClient
|
from azure.ai.documentintelligence import DocumentIntelligenceClient
|
||||||
@@ -36,6 +37,7 @@ class DocumentIntelligenceConverter(DocumentConverter):
|
|||||||
api_version=self.api_version,
|
api_version=self.api_version,
|
||||||
credential=DefaultAzureCredential(),
|
credential=DefaultAzureCredential(),
|
||||||
)
|
)
|
||||||
|
self._priority = priority
|
||||||
|
|
||||||
def convert(
|
def convert(
|
||||||
self, local_path: str, **kwargs: Any
|
self, local_path: str, **kwargs: Any
|
||||||
@@ -62,8 +64,8 @@ class DocumentIntelligenceConverter(DocumentConverter):
|
|||||||
with open(local_path, "rb") as f:
|
with open(local_path, "rb") as f:
|
||||||
file_bytes = f.read()
|
file_bytes = f.read()
|
||||||
|
|
||||||
# Certain document analysis features are not availiable for filetypes (.xlsx, .pptx, .html)
|
# Certain document analysis features are not availiable for office filetypes (.xlsx, .pptx, .html, .docx)
|
||||||
if extension.lower() in [".xlsx", ".pptx", ".html"]:
|
if extension.lower() in [".xlsx", ".pptx", ".html", ".docx"]:
|
||||||
analysis_features = []
|
analysis_features = []
|
||||||
else:
|
else:
|
||||||
analysis_features = [
|
analysis_features = [
|
||||||
|
|||||||
Reference in New Issue
Block a user