Add PPTX chart support

This commit is contained in:
sakasegawa
2024-12-15 20:42:55 +09:00
parent 81e3f24acd
commit 93130b5ba5
3 changed files with 34 additions and 0 deletions

View File

@@ -582,6 +582,11 @@ class PptxConverter(HtmlConverter):
"\n" + self._convert(html_table).text_content.strip() + "\n"
)
# Charts
if shape.has_chart:
md_content += self._convert_chart_to_markdown(shape.chart)
# Text areas
elif shape.has_text_frame:
if shape == title:
@@ -616,6 +621,33 @@ class PptxConverter(HtmlConverter):
return True
return False
def _is_chart(self, shape):
if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.CHART:
return True
return False
def _convert_chart_to_markdown(self, chart):
md = "\n\n### Chart"
if chart.has_title:
md += f": {chart.chart_title.text_frame.text}"
md += "\n\n"
data = []
category_names = [c.label for c in chart.plots[0].categories]
series_names = [s.name for s in chart.series]
data.append(["Category"] + series_names)
for idx, category in enumerate(category_names):
row = [category]
for series in chart.series:
row.append(series.values[idx])
data.append(row)
markdown_table = []
for row in data:
markdown_table.append("| " + " | ".join(map(str, row)) + " |")
header = markdown_table[0]
separator = "|" + "|".join(["---"] * len(data[0])) + "|"
return md + "\n".join([header, separator] + markdown_table[1:])
class MediaConverter(DocumentConverter):
"""

BIN
tests/test_files/test.pptx Executable file → Normal file

Binary file not shown.

View File

@@ -57,6 +57,8 @@ PPTX_TEST_STRINGS = [
"44bf7d06-5e7a-4a40-a2e1-a2e42ef28c8a",
"1b92870d-e3b5-4e65-8153-919f4ff45592",
"AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation",
"a3f6004b-6f4f-4ea8-bee3-3741f4dc385f", # chart title
"2003", # chart value
]
BLOG_TEST_URL = "https://microsoft.github.io/autogen/blog/2023/04/21/LLM-tuning-math"