diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 96997cf..645d230 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -344,8 +344,11 @@ class YouTubeConverter(DocumentConverter): assert isinstance(params["v"][0], str) video_id = str(params["v"][0]) try: + youtube_transcript_languages = kwargs.get( + "youtube_transcript_languages", ("en",) + ) # Must be a single transcript. - transcript = YouTubeTranscriptApi.get_transcript(video_id) # type: ignore + transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=youtube_transcript_languages) # type: ignore transcript_text = " ".join([part["text"] for part in transcript]) # type: ignore # Alternative formatting: # formatter = TextFormatter() @@ -1003,7 +1006,7 @@ class MarkItDown: self._append_ext(extensions, g) # Convert - result = self._convert(temp_path, extensions, url=response.url) + result = self._convert(temp_path, extensions, url=response.url, **kwargs) # Clean up finally: try: