From 5030b5f5b3764e4f1739f3bca047a27d77c70f0b Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Mon, 30 Oct 2023 13:48:33 -0400 Subject: [PATCH 1/3] fix: respect GitHub rate limiting Previously, the client would repeatedly attempt requests until getting actually rate limited by GitHub. Signed-off-by: Will Murphy --- src/vunnel/providers/github/parser.py | 18 ++++++++- tests/unit/providers/github/test_github.py | 44 ++++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/vunnel/providers/github/parser.py b/src/vunnel/providers/github/parser.py index 636040a1..2fcca712 100644 --- a/src/vunnel/providers/github/parser.py +++ b/src/vunnel/providers/github/parser.py @@ -19,6 +19,7 @@ import datetime import logging import os +import time from decimal import Decimal, DecimalException import requests @@ -42,6 +43,9 @@ "SWIFT": "swift", } +GITHUB_RATE_LIMIT_REMAINING_HEADER = "x-ratelimit-remaining" +GITHUB_RATE_LIMIT_RESET_HEADER = "x-ratelimit-reset" + class Parser: def __init__( # noqa: PLR0913 @@ -207,9 +211,21 @@ def get_query(token, query, timeout=125, api_url="https://api.github.com/graphql logger = logging.getLogger("get-query") headers = {"Authorization": f"token {token}"} - logger.debug(f"downloading github advisories from {api_url}") + logger.info(f"downloading github advisories from {api_url}") response = requests.post(api_url, json={"query": query}, timeout=timeout, headers=headers) + if GITHUB_RATE_LIMIT_REMAINING_HEADER in response.headers and GITHUB_RATE_LIMIT_RESET_HEADER in response.headers: + remaining = int(response.headers[GITHUB_RATE_LIMIT_REMAINING_HEADER]) + # reset time is the time in UNIX Epoch Seconds at which + # the rate limit will reset. + reset_time = int(response.headers[GITHUB_RATE_LIMIT_RESET_HEADER]) + logger.debug(f"github rate limit has {remaining} requests left {reset_time}") + if remaining < 10: + current_time = int(time.time()) + sleep_time = reset_time - current_time + # note that the rate limit resets 1x / hour, so this could be a long time + logger.info(f"sleeping for {sleep_time} seconds to allow GitHub rate limit to reset") + time.sleep(sleep_time) response.raise_for_status() if response.status_code == 200: return response.json() diff --git a/tests/unit/providers/github/test_github.py b/tests/unit/providers/github/test_github.py index a1b73749..488b6a03 100644 --- a/tests/unit/providers/github/test_github.py +++ b/tests/unit/providers/github/test_github.py @@ -1,6 +1,8 @@ from __future__ import annotations import pytest +import time +from unittest.mock import patch, Mock from vunnel import result, workspace from vunnel.providers.github import Config, Provider, parser from vunnel.utils import fdb as db @@ -447,6 +449,48 @@ def test_provider_schema(helpers, fake_get_query, advisories): assert workspace.result_schemas_valid(require_entries=True) +@patch("time.sleep") +@patch("requests.post") +def test_provider_respects_github_rate_limit(mock_post, mock_sleep): + response = Mock() + in_five_seconds = int(time.time()) + response.headers = {"x-ratelimit-remaining": 9, "x-ratelimit-reset": in_five_seconds} + response.status_code = 200 + mock_post.return_value = response + + def mock_json(): + return "{}" + + def mock_raise_for_status(): + pass + + response.json = mock_json + response.raise_for_status = mock_raise_for_status + parser.get_query("some-token", "some-query") + mock_sleep.assert_called_once() + + +@patch("time.sleep") +@patch("requests.post") +def test_provider_respects_github_rate_limit(mock_post, mock_sleep): + response = Mock() + in_five_seconds = int(time.time()) + response.headers = {"x-ratelimit-remaining": 11, "x-ratelimit-reset": in_five_seconds} + response.status_code = 200 + mock_post.return_value = response + + def mock_json(): + return "{}" + + def mock_raise_for_status(): + pass + + response.json = mock_json + response.raise_for_status = mock_raise_for_status + parser.get_query("some-token", "some-query") + mock_sleep.assert_not_called() + + def test_provider_via_snapshot(helpers, fake_get_query, advisories): fake_get_query([advisories(), advisories(has_next_page=True)]) workspace = helpers.provider_workspace_helper(name=Provider.name()) From c115823613ab672c67e8a9defab1115cd0b36f4e Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Mon, 30 Oct 2023 14:03:59 -0400 Subject: [PATCH 2/3] fix unit test typo Signed-off-by: Will Murphy --- tests/unit/providers/github/test_github.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/providers/github/test_github.py b/tests/unit/providers/github/test_github.py index 488b6a03..d946e9f0 100644 --- a/tests/unit/providers/github/test_github.py +++ b/tests/unit/providers/github/test_github.py @@ -453,7 +453,7 @@ def test_provider_schema(helpers, fake_get_query, advisories): @patch("requests.post") def test_provider_respects_github_rate_limit(mock_post, mock_sleep): response = Mock() - in_five_seconds = int(time.time()) + in_five_seconds = int(time.time()) + 5 response.headers = {"x-ratelimit-remaining": 9, "x-ratelimit-reset": in_five_seconds} response.status_code = 200 mock_post.return_value = response @@ -474,7 +474,7 @@ def mock_raise_for_status(): @patch("requests.post") def test_provider_respects_github_rate_limit(mock_post, mock_sleep): response = Mock() - in_five_seconds = int(time.time()) + in_five_seconds = int(time.time()) + 5 response.headers = {"x-ratelimit-remaining": 11, "x-ratelimit-reset": in_five_seconds} response.status_code = 200 mock_post.return_value = response From 11d11e22aa99b145c99c0d0eaa92761aaab716da Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Mon, 30 Oct 2023 17:24:19 -0400 Subject: [PATCH 3/3] never sleep for more than 1 hour Signed-off-by: Will Murphy --- src/vunnel/providers/github/parser.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/vunnel/providers/github/parser.py b/src/vunnel/providers/github/parser.py index 2fcca712..c2af7732 100644 --- a/src/vunnel/providers/github/parser.py +++ b/src/vunnel/providers/github/parser.py @@ -224,8 +224,13 @@ def get_query(token, query, timeout=125, api_url="https://api.github.com/graphql current_time = int(time.time()) sleep_time = reset_time - current_time # note that the rate limit resets 1x / hour, so this could be a long time - logger.info(f"sleeping for {sleep_time} seconds to allow GitHub rate limit to reset") - time.sleep(sleep_time) + if sleep_time > 1 and sleep_time < 3600: # never sleep for more than 1 hour + logger.info(f"sleeping for {sleep_time} seconds to allow GitHub rate limit to reset") + time.sleep(sleep_time) + elif sleep_time > 3600: + raise Exception( + f"github rate limit exhaused and not expected to reset for {sleep_time} seconds. Try again later.", + ) response.raise_for_status() if response.status_code == 200: return response.json()