From 8f16f32d530da4cfe28777671d9818c30434f9bf Mon Sep 17 00:00:00 2001 From: gagb Date: Thu, 12 Dec 2024 23:10:23 +0000 Subject: [PATCH] Add tests --- pyproject.toml | 1 + src/markitdown/_markitdown.py | 29 ++++++++++++++++++++++++++++- tests/test_markitdown.py | 17 +++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d1dd737..b6a87c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "youtube-transcript-api", "SpeechRecognition", "pathvalidate", + "pygithub" ] [project.urls] diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index a7a2891..141caa9 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -50,7 +50,7 @@ try: IS_GITHUB_ISSUE_CAPABLE = True except ModuleNotFoundError: - pass + IS_GITHUB_ISSUE_CAPABLE = False class _CustomMarkdownify(markdownify.MarkdownConverter): @@ -1111,6 +1111,33 @@ class MarkItDown: def convert_github_issue( self, issue_url: str, github_token: str ) -> DocumentConverterResult: + """ + Convert a GitHub issue to a markdown document. + + Args: + issue_url (str): The URL of the GitHub issue to convert. + github_token (str): A GitHub token with access to the repository. + + Returns: + DocumentConverterResult: The result containing the issue title and markdown content. + + Raises: + ImportError: If the PyGithub library is not installed. + ValueError: If the provided URL is not a valid GitHub issue URL. + + Example: + # Example markdown format + # Issue Title + + Issue body content... + + **State:** open + **Created at:** 2023-10-01 12:34:56 + **Updated at:** 2023-10-02 12:34:56 + **Comments:** + - user1 (2023-10-01 13:00:00): Comment content... + - user2 (2023-10-01 14:00:00): Another comment... + """ if not IS_GITHUB_ISSUE_CAPABLE: raise ImportError( "PyGithub is not installed. Please install it to use this feature." diff --git a/tests/test_markitdown.py b/tests/test_markitdown.py index 94fd886..ee63fa2 100644 --- a/tests/test_markitdown.py +++ b/tests/test_markitdown.py @@ -87,6 +87,9 @@ SERP_TEST_EXCLUDES = [ "data:image/svg+xml,%3Csvg%20width%3D", ] +GITHUB_ISSUE_URL = "https://github.com/microsoft/autogen/issues/1421" +GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") + @pytest.mark.skipif( skip_remote, @@ -179,8 +182,22 @@ def test_markitdown_exiftool() -> None: assert target in result.text_content +@pytest.mark.skipif( + not GITHUB_TOKEN, + reason="GitHub token not provided", +) +def test_markitdown_github_issue() -> None: + markitdown = MarkItDown() + result = markitdown.convert_github_issue(GITHUB_ISSUE_URL, GITHUB_TOKEN) + print(result.text_content) + assert "User-Defined Functions" in result.text_content + assert "closed" in result.text_content + assert "Comments:" in result.text_content + + if __name__ == "__main__": """Runs this file's tests from the command line.""" test_markitdown_remote() test_markitdown_local() test_markitdown_exiftool() + test_markitdown_github_issue()