From 17365654c93c0002971cca7b171eafb8ef6e39e1 Mon Sep 17 00:00:00 2001 From: Richard Ye <33409792+richardye101@users.noreply.github.com> Date: Tue, 26 Aug 2025 18:28:17 -0400 Subject: [PATCH] Handle PPTX shapes where position is None (#1161) * Handle shapes where position is None * Fixed recursion error, and place no-coord shapes at front --- .../src/markitdown/converters/_pptx_converter.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/packages/markitdown/src/markitdown/converters/_pptx_converter.py b/packages/markitdown/src/markitdown/converters/_pptx_converter.py index 087da32..360f177 100644 --- a/packages/markitdown/src/markitdown/converters/_pptx_converter.py +++ b/packages/markitdown/src/markitdown/converters/_pptx_converter.py @@ -168,11 +168,23 @@ class PptxConverter(DocumentConverter): # Group Shapes if shape.shape_type == pptx.enum.shapes.MSO_SHAPE_TYPE.GROUP: - sorted_shapes = sorted(shape.shapes, key=attrgetter("top", "left")) + sorted_shapes = sorted( + shape.shapes, + key=lambda x: ( + float("-inf") if not x.top else x.top, + float("-inf") if not x.left else x.left, + ), + ) for subshape in sorted_shapes: get_shape_content(subshape, **kwargs) - sorted_shapes = sorted(slide.shapes, key=attrgetter("top", "left")) + sorted_shapes = sorted( + slide.shapes, + key=lambda x: ( + float("-inf") if not x.top else x.top, + float("-inf") if not x.left else x.left, + ), + ) for shape in sorted_shapes: get_shape_content(shape, **kwargs)