fix: Implement retry logic for YouTube transcript fetching and fix URL decoding issue (#1035)

* fix: add error handling, refactor _findKey to use json.items()

* fix: improve metadata and description extraction logic

* fix: improve YouTube transcript extraction reliability

* fix: implement retry logic for YouTube transcript fetching and fix URL decoding issue

* fix(readme): add youtube URLs as markitdown supports
This commit is contained in:
Nima Akbarzadeh
2025-02-28 08:17:54 +01:00
committed by GitHub
parent a87fbf01ee
commit a394cc7c27
3 changed files with 93 additions and 55 deletions

View File

@@ -184,9 +184,9 @@ def test_markitdown_remote() -> None:
# Youtube
# TODO: This test randomly fails for some reason. Haven't been able to repro it yet. Disabling until I can debug the issue
# result = markitdown.convert(YOUTUBE_TEST_URL)
# for test_string in YOUTUBE_TEST_STRINGS:
# assert test_string in result.text_content
result = markitdown.convert(YOUTUBE_TEST_URL)
for test_string in YOUTUBE_TEST_STRINGS:
assert test_string in result.text_content
def test_markitdown_local() -> None: