feat(pptx): support image description with LLM for pptx files (#306)
This commit is contained in:
@@ -787,6 +787,35 @@ class PptxConverter(HtmlConverter):
|
|||||||
Converts PPTX files to Markdown. Supports heading, tables and images with alt text.
|
Converts PPTX files to Markdown. Supports heading, tables and images with alt text.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def _get_llm_description(
|
||||||
|
self, llm_client, llm_model, image_blob, content_type, prompt=None
|
||||||
|
):
|
||||||
|
if prompt is None or prompt.strip() == "":
|
||||||
|
prompt = "Write a detailed alt text for this image with less than 50 words."
|
||||||
|
|
||||||
|
image_base64 = base64.b64encode(image_blob).decode("utf-8")
|
||||||
|
data_uri = f"data:{content_type};base64,{image_base64}"
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": data_uri,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{"type": "text", "text": prompt},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = llm_client.chat.completions.create(
|
||||||
|
model=llm_model, messages=messages
|
||||||
|
)
|
||||||
|
return response.choices[0].message.content
|
||||||
|
|
||||||
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
|
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
|
||||||
# Bail if not a PPTX
|
# Bail if not a PPTX
|
||||||
extension = kwargs.get("file_extension", "")
|
extension = kwargs.get("file_extension", "")
|
||||||
@@ -807,17 +836,38 @@ class PptxConverter(HtmlConverter):
|
|||||||
# Pictures
|
# Pictures
|
||||||
if self._is_picture(shape):
|
if self._is_picture(shape):
|
||||||
# https://github.com/scanny/python-pptx/pull/512#issuecomment-1713100069
|
# https://github.com/scanny/python-pptx/pull/512#issuecomment-1713100069
|
||||||
alt_text = ""
|
|
||||||
try:
|
llm_description = None
|
||||||
alt_text = shape._element._nvXxPr.cNvPr.attrib.get("descr", "")
|
alt_text = None
|
||||||
except Exception:
|
|
||||||
pass
|
llm_client = kwargs.get("llm_client")
|
||||||
|
llm_model = kwargs.get("llm_model")
|
||||||
|
if llm_client is not None and llm_model is not None:
|
||||||
|
try:
|
||||||
|
llm_description = self._get_llm_description(
|
||||||
|
llm_client,
|
||||||
|
llm_model,
|
||||||
|
shape.image.blob,
|
||||||
|
shape.image.content_type,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
# Unable to describe with LLM
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not llm_description:
|
||||||
|
try:
|
||||||
|
alt_text = shape._element._nvXxPr.cNvPr.attrib.get(
|
||||||
|
"descr", ""
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
# Unable to get alt text
|
||||||
|
pass
|
||||||
|
|
||||||
# A placeholder name
|
# A placeholder name
|
||||||
filename = re.sub(r"\W", "", shape.name) + ".jpg"
|
filename = re.sub(r"\W", "", shape.name) + ".jpg"
|
||||||
md_content += (
|
md_content += (
|
||||||
"\n\n"
|
+ ")\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user