From dd7d44eae74bdfdb46fdd6402a7bb9bba493d6fe Mon Sep 17 00:00:00 2001 From: Lawrence Wang Date: Thu, 2 May 2024 16:22:50 -0400 Subject: [PATCH 1/3] Add handling for API Rate Limiting --- issue_metrics.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/issue_metrics.py b/issue_metrics.py index ec85210..77e02e0 100644 --- a/issue_metrics.py +++ b/issue_metrics.py @@ -23,6 +23,7 @@ import shutil import sys from typing import List, Union +from time import sleep import github3 from auth import auth_to_github, get_github_app_installation_token @@ -62,19 +63,38 @@ def search_issues( Returns: List[github3.search.IssueSearchResult]: A list of issues that match the search query. """ + + def wait_for_GH_api_refresh(iterator): + # Rate Limit Handling: API only allows 30 requests per minute + while iterator.ratelimit_remaining < 5: + print( + "Github API Rate Limit Low, waiting 1 minute to refresh" + ) + sleep(65) + + ISSUES_PER_PAGE = 100 + print("Searching for issues...") - issues_iterator = github_connection.search_issues(search_query, per_page=100) + issues_iterator = github_connection.search_issues( + search_query, per_page=ISSUES_PER_PAGE + ) + wait_for_GH_api_refresh(issues_iterator) + - # Print the issue titles issues = [] repos_and_owners_string = "" for item in owners_and_repositories: repos_and_owners_string += f"{item['owner']}/{item['repository']} " + # Print the issue titles try: - for issue in issues_iterator: + for idx, issue in enumerate(issues_iterator, 1): print(issue.title) # type: ignore issues.append(issue) + + # requests are sent once per page of issues + if idx % ISSUES_PER_PAGE == 0: + wait_for_GH_api_refresh(issues_iterator) except github3.exceptions.ForbiddenError: print( f"You do not have permission to view a repository from: '{repos_and_owners_string}'; Check your API Token." From bdfd787ec9013e5681ad1263f51fbc4ebffb7796 Mon Sep 17 00:00:00 2001 From: Lawrence Wang Date: Thu, 2 May 2024 16:52:19 -0400 Subject: [PATCH 2/3] update lint and test --- issue_metrics.py | 21 ++++++++++----------- test_issue_metrics.py | 11 +++++++++-- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/issue_metrics.py b/issue_metrics.py index 77e02e0..e1ff563 100644 --- a/issue_metrics.py +++ b/issue_metrics.py @@ -22,10 +22,11 @@ import shutil import sys -from typing import List, Union from time import sleep +from typing import List, Union import github3 +import github3.structs from auth import auth_to_github, get_github_app_installation_token from classes import IssueWithMetrics from config import EnvVars, get_env_vars @@ -64,22 +65,19 @@ def search_issues( List[github3.search.IssueSearchResult]: A list of issues that match the search query. """ - def wait_for_GH_api_refresh(iterator): + def wait_for_api_refresh(iterator: github3.structs.SearchIterator): # Rate Limit Handling: API only allows 30 requests per minute while iterator.ratelimit_remaining < 5: - print( - "Github API Rate Limit Low, waiting 1 minute to refresh" - ) + print("Github API Rate Limit Low, waiting 1 minute to refresh") sleep(65) - ISSUES_PER_PAGE = 100 + issues_per_page = 100 print("Searching for issues...") issues_iterator = github_connection.search_issues( - search_query, per_page=ISSUES_PER_PAGE + search_query, per_page=issues_per_page ) - wait_for_GH_api_refresh(issues_iterator) - + wait_for_api_refresh(issues_iterator) issues = [] repos_and_owners_string = "" @@ -93,8 +91,9 @@ def wait_for_GH_api_refresh(iterator): issues.append(issue) # requests are sent once per page of issues - if idx % ISSUES_PER_PAGE == 0: - wait_for_GH_api_refresh(issues_iterator) + if idx % issues_per_page == 0: + wait_for_api_refresh(issues_iterator) + except github3.exceptions.ForbiddenError: print( f"You do not have permission to view a repository from: '{repos_and_owners_string}'; Check your API Token." diff --git a/test_issue_metrics.py b/test_issue_metrics.py index 1d41d3b..a0bb3e9 100644 --- a/test_issue_metrics.py +++ b/test_issue_metrics.py @@ -42,13 +42,20 @@ class TestSearchIssues(unittest.TestCase): def test_search_issues(self): """Test that search_issues returns the correct issues.""" + # Set up the mock GitHub connection object - mock_connection = MagicMock() mock_issues = [ MagicMock(title="Issue 1"), MagicMock(title="Issue 2"), ] - mock_connection.search_issues.return_value = mock_issues + + # simulating github3.structs.SearchIterator return value + mock_search_result = MagicMock() + mock_search_result.__iter__.return_value = iter(mock_issues) + mock_search_result.ratelimit_remaining = 30 + + mock_connection = MagicMock() + mock_connection.search_issues.return_value = mock_search_result # Call search_issues and check that it returns the correct issues repo_with_owner = {"owner": "owner1", "repository": "repo1"} From f222ed7e041aef97b0a7a22b945e9fc546f0529d Mon Sep 17 00:00:00 2001 From: Lawrence Wang Date: Thu, 2 May 2024 17:09:35 -0400 Subject: [PATCH 3/3] Add exponential sleep increase --- issue_metrics.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/issue_metrics.py b/issue_metrics.py index e1ff563..be8327a 100644 --- a/issue_metrics.py +++ b/issue_metrics.py @@ -65,11 +65,24 @@ def search_issues( List[github3.search.IssueSearchResult]: A list of issues that match the search query. """ + # Rate Limit Handling: API only allows 30 requests per minute def wait_for_api_refresh(iterator: github3.structs.SearchIterator): - # Rate Limit Handling: API only allows 30 requests per minute + max_retries = 5 + retry_count = 0 + sleep_time = 70 + while iterator.ratelimit_remaining < 5: - print("Github API Rate Limit Low, waiting 1 minute to refresh") - sleep(65) + if retry_count >= max_retries: + raise RuntimeError("Exceeded maximum retries for API rate limit") + + print( + f"GitHub API Rate Limit Low, waiting {sleep_time} seconds to refresh." + ) + sleep(sleep_time) + + # Exponentially increase the sleep time for the next retry + sleep_time *= 2 + retry_count += 1 issues_per_page = 100