All-Hands-AI · neubig · Oct 22, 2024 · Oct 20, 2024 · Oct 20, 2024 · Oct 20, 2024
diff --git a/openhands_resolver/github_issue.py b/openhands_resolver/github_issue.py
@@ -1,4 +1,5 @@
 from pydantic import BaseModel
+from typing import Optional
 
 
 class GithubIssue(BaseModel):
@@ -7,3 +8,6 @@ class GithubIssue(BaseModel):
     number: int
     title: str
     body: str
+    closing_issues: Optional[list[str]] = []
+    review_comments: Optional[list[str]] = []
+    head_branch: Optional[str] = None
diff --git a/openhands_resolver/prompts/resolve/basic-followup.jinja b/openhands_resolver/prompts/resolve/basic-followup.jinja
@@ -0,0 +1,17 @@
+The current code is an attempt at fixing a set of issues. The code is not satisfactory and follow up feedback have been provided to address this.
+Please update the code based on the feedback for the repository in /workspace.
+An environment has been set up for you to start working. You may assume all necessary tools are installed.
+
+Issues addressed 
+{{ issues }}
+
+# Feedback chain
+{{ body }}
+
+IMPORTANT: You should ONLY interact with the environment provided to you AND NEVER ASK FOR HUMAN HELP.
+You SHOULD INCLUDE PROPER INDENTATION in your edit commands.{% if repo_instruction %}
+
+Some basic information about this repository:
+{{ repo_instruction }}{% endif %}
+
+When you think you have fixed the issue through code changes, please run the following command: <execute_bash> exit </execute_bash>.
diff --git a/openhands_resolver/resolve_issues.py b/openhands_resolver/resolve_issues.py
@@ -13,6 +13,7 @@
 import pathlib
 import subprocess
 import jinja2
+import json
 
 from tqdm import tqdm
 
@@ -197,9 +198,16 @@ def get_instruction(
     issue: GithubIssue,
     prompt_template: str,
     repo_instruction: str | None = None,
+    issue_type: str | None = None
 ):  # Prepare instruction
     template = jinja2.Template(prompt_template)
-    instruction = template.render(body=issue.body, repo_instruction=repo_instruction)
+    if issue_type == None:
+        instruction = template.render(body=issue.body, repo_instruction=repo_instruction)
+    else:
+        issues_context = json.dumps(issue.closing_issues, indent=4)
+        comment_chain = json.dumps(issue.review_comments, indent=4)
+        instruction = template.render(issues=issues_context, body=comment_chain, repo_instruction=repo_instruction)
+
     return instruction
 
 
@@ -254,6 +262,7 @@ async def process_issue(
     prompt_template: str,
     repo_instruction: str | None = None,
     reset_logger: bool = True,
+    issue_type: str | None = None
 ) -> ResolverOutput:
 
     # Setup the logger properly, so you can run multi-processing to parallelize processing
@@ -263,7 +272,11 @@ async def process_issue(
     else:
         logger.info(f'Starting fixing issue {issue.number}.')
 
-    workspace_base = os.path.join(output_dir, "workspace", f"issue_{issue.number}")
+    if issue_type == None:
+        workspace_base = os.path.join(output_dir, "workspace", f"issue_{issue.number}")
+    else:
+        workspace_base = os.path.join(output_dir, "workspace", f"issue_{issue_type}_{issue.number}")
+
     # Get the absolute path of the workspace base
     workspace_base = os.path.abspath(workspace_base)
     # write the repo to the workspace
@@ -292,7 +305,7 @@ async def process_issue(
     runtime = create_runtime(config, sid=f"{issue.number}")
     initialize_runtime(runtime)
 
-    instruction = get_instruction(issue, prompt_template, repo_instruction)
+    instruction = get_instruction(issue, prompt_template, repo_instruction, issue_type)
 
     # Here's how you can run the agent (similar to the `main` function) and get the final task state
     state: State | None = await run_controller(
@@ -332,9 +345,115 @@ async def process_issue(
     )
     return output
 
+def run_github_graphql_query(query: str, variables: dict, token: str) -> dict:
+
+    url = "https://api.github.com/graphql"
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json"
+    }
+
+    response = requests.post(url, json={"query": query, "variables": variables}, headers=headers)
+    response.raise_for_status()  # Raise an error for bad requests (4xx or 5xx)
+
+    return response.json()
+
+
+def download_pr_metadata(owner: str, repo: str, token: str, pull_number: int):
+
+    """
+        Run a GraphQL query against the GitHub API for information on 
+            1. unresolved review comments
+            2. referenced issues the pull request would close
+
+        Args:
+            query: The GraphQL query as a string.
+            variables: A dictionary of variables for the query.
+            token: Your GitHub personal access token.
+
+        Returns:
+            The JSON response from the GitHub API.
+    """
+    # Using graphql as REST API doesn't indicate resolved status for review comments
+    # TODO: grabbing the first 10 issues, 100 review threads, and 100 coments; add pagination to retrieve all
+    query = """
+            query($owner: String!, $repo: String!, $pr: Int!) {
+                repository(owner: $owner, name: $repo) {
+                    pullRequest(number: $pr) {
+                        closingIssuesReferences(first: 10) {
+                            edges {
+                                node {
+                                    body
+                                }
+                            }
+                        }
+                        url
+                        reviewThreads(first: 100) {
+                            edges{
+                                node{
+                                    isResolved
+                                    comments(first: 100) {
+                                        totalCount
+                                        nodes {
+                                            body
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        """
+
+
+
+    variables = {
+        "owner": owner,
+        "repo": repo,
+        "pr": pull_number
+    }
+
+    url = "https://api.github.com/graphql"
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json"
+    }
+
+    response = requests.post(url, json={"query": query, "variables": variables}, headers=headers)
+    response.raise_for_status()
+    response = response.json()
+
+    # Parse the response to get closing issue references and unresolved review comments
+    pr_data = response.get("data", {}).get("repository", {}).get("pullRequest", {})
+
+    # Get closing issues
+    closing_issues = pr_data.get("closingIssuesReferences", {}).get("edges", [])
+    closing_issues_bodies = [issue["node"]["body"] for issue in closing_issues]
+
+    # Get unresolved review comments
+    unresolved_comments = []
+    review_threads = pr_data.get("reviewThreads", {}).get("edges", [])
+    for thread in review_threads:
+        node = thread.get("node", {})
+        if not node.get("isResolved", True):  # Check if the review thread is unresolved
+            comments = node.get("comments", {}).get("nodes", [])
+            message = ""
+            for i, comment in enumerate(comments):
+                if i == len(comments) - 1:  # Check if it's the last comment in the thread
+                    if len(comments) > 1:
+                        message += "---\n"  # Add "---" before the last message if there's more than one comment
+                    message += "latest feedback:\n" + comment["body"] + "\n"
+                else:
+                    message += comment["body"] + "\n"  # Add each comment in a new line
+            unresolved_comments.append(message)
+
+    return closing_issues_bodies, unresolved_comments
+
+
 
 def download_issues_from_github(
-    owner: str, repo: str, token: str
+    owner: str, repo: str, token: str, issue_type: str | None
 ) -> list[GithubIssue]:
     """Download issues from Github.
 
@@ -346,7 +465,12 @@ def download_issues_from_github(
     Returns:
         List of Github issues.
     """
-    url = f"https://api.github.com/repos/{owner}/{repo}/issues"
+
+    if issue_type == None:
+        url = f"https://api.github.com/repos/{owner}/{repo}/issues"
+    else:
+        url = f"https://api.github.com/repos/{owner}/{repo}/pulls"
+
     headers = {
         "Authorization": f"token {token}",
         "Accept": "application/vnd.github.v3+json",
@@ -377,18 +501,36 @@ def download_issues_from_github(
                 f"Skipping issue {issue} as it is missing number, title, or body."
             )
             continue
-        # Skip pull requests
-        if "pull_request" in issue:
+
+        # Skip pull requests only for regular issues
+        if issue_type == None and "pull_request" in issue:
             continue
-        converted_issues.append(
-            GithubIssue(
-                owner=owner,
-                repo=repo,
-                number=issue["number"],
-                title=issue["title"],
-                body=issue["body"],
-            )
-        )
+
+
+
+        if issue_type == "pr":
+            closing_issues, unresolved_comments = download_pr_metadata(owner, repo, token, issue["number"])
+            head_branch = issue["head"]["ref"]
+            issue_details = GithubIssue(
+                                owner=owner,
+                                repo=repo,
+                                number=issue["number"],
+                                title=issue["title"],
+                                body=issue["body"],
+                                closing_issues=closing_issues,
+                                review_comments=unresolved_comments,
+                                head_branch=head_branch
+                            )
+        else:
+            issue_details = GithubIssue(
+                                owner=owner,
+                                repo=repo,
+                                number=issue["number"],
+                                title=issue["title"],
+                                body=issue["body"],
+                            )
+
+        converted_issues.append(issue_details)
     return converted_issues
 
 
@@ -421,6 +563,7 @@ async def resolve_issues(
     prompt_template: str,  # Add this parameter
     repo_instruction: str | None,
     issue_numbers: list[int] | None,
+    issue_type: str | None,
 ) -> None:
     """Resolve github issues.
 
@@ -440,8 +583,9 @@ async def resolve_issues(
 
     # Load dataset
     issues: list[GithubIssue] = download_issues_from_github(
-        owner, repo, token
+        owner, repo, token, issue_type=issue_type
     )
+
     if issue_numbers is not None:
         issues = [issue for issue in issues if issue.number in issue_numbers]
         logger.info(f"Limiting resolving to issues {issue_numbers}.")
@@ -529,6 +673,17 @@ async def resolve_issues(
         # Replace the ProcessPoolExecutor with asyncio.gather
         tasks = []
         for issue in issues:
+
+            # checkout to pr branch
+            if issue_type == "pr":
+                logger.info(f"Checking out to PR branch {issue.head_branch} for issue {issue.number}")
+                subprocess.check_call(
+                    ["git", "fetch", "origin", f"pull/{issue.number}/head"],
+                    cwd=repo_dir,
+                )
+
+
+
             task = update_progress(
                 process_issue(
                     issue,
@@ -540,6 +695,7 @@ async def resolve_issues(
                     prompt_template,
                     repo_instruction,
                     bool(num_workers > 1),
+                    issue_type=issue_type
                 ),
                 output_fp,
                 pbar,
@@ -656,6 +812,14 @@ def main():
         default=None,
         help="Path to the repository instruction file in text format.",
     )
+
+    parser.add_argument(
+        "--issue-type",
+        type=str,
+        default=None,
+        help="Type of issue to resolve, either open issue or pr comments.",
+    )
+
     my_args = parser.parse_args()
 
     runtime_container_image = my_args.runtime_container_image
@@ -681,13 +845,6 @@ def main():
         base_url=my_args.llm_base_url or os.environ.get("LLM_BASE_URL", None),
     )
 
-    # Read the prompt template
-    prompt_file = my_args.prompt_file
-    if prompt_file is None:
-        prompt_file = os.path.join(os.path.dirname(__file__), "prompts/resolve/basic-with-tests.jinja")
-    with open(prompt_file, 'r') as f:
-        prompt_template = f.read()
-
     repo_instruction = None
     if my_args.repo_instruction_file:
         with open(my_args.repo_instruction_file, 'r') as f:
@@ -697,6 +854,23 @@ def main():
     if my_args.issue_numbers:
         issue_numbers = [int(number) for number in my_args.issue_numbers.split(",")]
 
+    issue_type = None
+    if my_args.issue_type:
+        issue_type = my_args.issue_type
+        if issue_type != "pr":
+            raise ValueError("Set issue-type to 'pr' or leave empty")
+
+
+    # Read the prompt template
+    prompt_file = my_args.prompt_file
+    if prompt_file is None:
+        if issue_type == None:
+            prompt_file = os.path.join(os.path.dirname(__file__), "prompts/resolve/basic-with-tests.jinja")
+        else:
+            prompt_file = os.path.join(os.path.dirname(__file__), "prompts/resolve/basic-followup.jinja") 
+    with open(prompt_file, 'r') as f:
+        prompt_template = f.read()
+
     asyncio.run(
         resolve_issues(
             owner=owner,
@@ -712,6 +886,7 @@ def main():
             prompt_template=prompt_template,  # Pass the prompt template
             repo_instruction=repo_instruction,
             issue_numbers=issue_numbers,
+            issue_type=issue_type
         )
     )