Add support for GH prs as well
This commit is contained in:
@@ -846,22 +846,25 @@ class ImageConverter(MediaConverter):
|
|||||||
|
|
||||||
|
|
||||||
class GitHubIssueConverter(DocumentConverter):
|
class GitHubIssueConverter(DocumentConverter):
|
||||||
"""Converts GitHub issues to Markdown."""
|
"""Converts GitHub issues and pull requests to Markdown."""
|
||||||
|
|
||||||
def convert(self, issue_url, github_token) -> Union[None, DocumentConverterResult]:
|
def convert(self, github_url, github_token) -> Union[None, DocumentConverterResult]:
|
||||||
# Bail if not a valid GitHub issue URL
|
# Bail if not a valid GitHub issue or pull request URL
|
||||||
if issue_url:
|
if github_url:
|
||||||
parsed_url = urlparse(issue_url)
|
parsed_url = urlparse(github_url)
|
||||||
path_parts = parsed_url.path.strip("/").split("/")
|
path_parts = parsed_url.path.strip("/").split("/")
|
||||||
if len(path_parts) < 4 or path_parts[2] != "issues":
|
if len(path_parts) < 4 or path_parts[2] not in ["issues", "pull"]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if not github_token:
|
if not github_token:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"GitHub token is not set. Cannot convert GitHub issue."
|
"GitHub token is not set. Cannot convert GitHub issue or pull request."
|
||||||
)
|
)
|
||||||
|
|
||||||
return self._convert_github_issue(issue_url, github_token)
|
if path_parts[2] == "issues":
|
||||||
|
return self._convert_github_issue(github_url, github_token)
|
||||||
|
elif path_parts[2] == "pull":
|
||||||
|
return self._convert_github_pr(github_url, github_token)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -914,6 +917,55 @@ class GitHubIssueConverter(DocumentConverter):
|
|||||||
text_content=markdown_content,
|
text_content=markdown_content,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _convert_github_pr(
|
||||||
|
self, pr_url: str, github_token: str
|
||||||
|
) -> DocumentConverterResult:
|
||||||
|
"""
|
||||||
|
Convert a GitHub pull request to a markdown document.
|
||||||
|
Args:
|
||||||
|
pr_url (str): The URL of the GitHub pull request to convert.
|
||||||
|
github_token (str): A GitHub token with access to the repository.
|
||||||
|
Returns:
|
||||||
|
DocumentConverterResult: The result containing the pull request title and markdown content.
|
||||||
|
Raises:
|
||||||
|
ImportError: If the PyGithub library is not installed.
|
||||||
|
ValueError: If the provided URL is not a valid GitHub pull request URL.
|
||||||
|
"""
|
||||||
|
if not IS_GITHUB_ISSUE_CAPABLE:
|
||||||
|
raise ImportError(
|
||||||
|
"PyGithub is not installed. Please install it to use this feature."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse the pull request URL
|
||||||
|
parsed_url = urlparse(pr_url)
|
||||||
|
path_parts = parsed_url.path.strip("/").split("/")
|
||||||
|
if len(path_parts) < 4 or path_parts[2] != "pull":
|
||||||
|
raise ValueError("Invalid GitHub pull request URL")
|
||||||
|
|
||||||
|
owner, repo, _, pr_number = path_parts[:4]
|
||||||
|
|
||||||
|
# Authenticate with GitHub
|
||||||
|
g = Github(github_token)
|
||||||
|
repo = g.get_repo(f"{owner}/{repo}")
|
||||||
|
pr = repo.get_pull(int(pr_number))
|
||||||
|
|
||||||
|
# Convert pull request details to markdown
|
||||||
|
markdown_content = f"# {pr.title}\n\n{pr.body}\n\n"
|
||||||
|
markdown_content += f"**State:** {pr.state}\n"
|
||||||
|
markdown_content += f"**Created at:** {pr.created_at}\n"
|
||||||
|
markdown_content += f"**Updated at:** {pr.updated_at}\n"
|
||||||
|
markdown_content += f"**Comments:**\n"
|
||||||
|
|
||||||
|
for comment in pr.get_issue_comments():
|
||||||
|
markdown_content += (
|
||||||
|
f"- {comment.user.login} ({comment.created_at}): {comment.body}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
return DocumentConverterResult(
|
||||||
|
title=pr.title,
|
||||||
|
text_content=markdown_content,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FileConversionException(BaseException):
|
class FileConversionException(BaseException):
|
||||||
pass
|
pass
|
||||||
@@ -984,16 +1036,18 @@ class MarkItDown:
|
|||||||
def convert_url(
|
def convert_url(
|
||||||
self, url: str, **kwargs: Any
|
self, url: str, **kwargs: Any
|
||||||
) -> DocumentConverterResult: # TODO: fix kwargs type
|
) -> DocumentConverterResult: # TODO: fix kwargs type
|
||||||
# Handle GitHub issue URLs directly
|
# Handle GitHub issue and pull request URLs directly
|
||||||
parsed_url = urlparse(url)
|
parsed_url = urlparse(url)
|
||||||
if parsed_url.hostname == "github.com" and "/issues/" in parsed_url.path:
|
if parsed_url.hostname == "github.com" and any(
|
||||||
|
x in parsed_url.path for x in ["/issues/", "/pull/"]
|
||||||
|
):
|
||||||
github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN"))
|
github_token = kwargs.get("github_token", os.getenv("GITHUB_TOKEN"))
|
||||||
if not github_token:
|
if not github_token:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"GitHub token is required for GitHub issue conversion."
|
"GitHub token is required for GitHub issue or pull request conversion."
|
||||||
)
|
)
|
||||||
return GitHubIssueConverter().convert(
|
return GitHubIssueConverter().convert(
|
||||||
issue_url=url, github_token=github_token
|
github_url=url, github_token=github_token
|
||||||
)
|
)
|
||||||
|
|
||||||
# Send a HTTP request to the URL
|
# Send a HTTP request to the URL
|
||||||
|
|||||||
@@ -88,6 +88,7 @@ SERP_TEST_EXCLUDES = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
GITHUB_ISSUE_URL = "https://github.com/microsoft/autogen/issues/1421"
|
GITHUB_ISSUE_URL = "https://github.com/microsoft/autogen/issues/1421"
|
||||||
|
GITHUB_PR_URL = "https://github.com/microsoft/autogen/pull/194"
|
||||||
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
|
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "")
|
||||||
|
|
||||||
|
|
||||||
@@ -195,9 +196,21 @@ def test_markitdown_github_issue() -> None:
|
|||||||
assert "Comments:" in result.text_content
|
assert "Comments:" in result.text_content
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
not GITHUB_TOKEN,
|
||||||
|
reason="GitHub token not provided",
|
||||||
|
)
|
||||||
|
def test_markitdown_github_pr() -> None:
|
||||||
|
markitdown = MarkItDown()
|
||||||
|
result = markitdown.convert(GITHUB_PR_URL, github_token=GITHUB_TOKEN)
|
||||||
|
print(result.text_content)
|
||||||
|
assert "faq" in result.text_content
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
"""Runs this file's tests from the command line."""
|
"""Runs this file's tests from the command line."""
|
||||||
test_markitdown_remote()
|
test_markitdown_remote()
|
||||||
test_markitdown_local()
|
test_markitdown_local()
|
||||||
test_markitdown_exiftool()
|
test_markitdown_exiftool()
|
||||||
test_markitdown_github_issue()
|
test_markitdown_github_issue()
|
||||||
|
test_markitdown_github_pr()
|
||||||
|
|||||||
Reference in New Issue
Block a user