diff --git a/src/vunnel/providers/github/parser.py b/src/vunnel/providers/github/parser.py index 9e0241c2..797ca4af 100644 --- a/src/vunnel/providers/github/parser.py +++ b/src/vunnel/providers/github/parser.py @@ -19,6 +19,7 @@ import datetime import logging import os +import time from decimal import Decimal, DecimalException import requests @@ -42,6 +43,9 @@ "SWIFT": "swift", } +GITHUB_RATE_LIMIT_REMAINING_HEADER = "x-ratelimit-remaining" +GITHUB_RATE_LIMIT_RESET_HEADER = "x-ratelimit-reset" + class Parser: def __init__( # noqa: PLR0913 @@ -207,9 +211,26 @@ def get_query(token, query, timeout=125, api_url="https://api.github.com/graphql logger = logging.getLogger("get-query") headers = {"Authorization": f"token {token}"} - logger.debug(f"downloading github advisories from {api_url}") + logger.info(f"downloading github advisories from {api_url}") response = requests.post(api_url, json={"query": query}, timeout=timeout, headers=headers) + if GITHUB_RATE_LIMIT_REMAINING_HEADER in response.headers and GITHUB_RATE_LIMIT_RESET_HEADER in response.headers: + remaining = int(response.headers[GITHUB_RATE_LIMIT_REMAINING_HEADER]) + # reset time is the time in UNIX Epoch Seconds at which + # the rate limit will reset. + reset_time = int(response.headers[GITHUB_RATE_LIMIT_RESET_HEADER]) + logger.debug(f"github rate limit has {remaining} requests left {reset_time}") + if remaining < 10: + current_time = int(time.time()) + sleep_time = reset_time - current_time + # note that the rate limit resets 1x / hour, so this could be a long time + if sleep_time > 1 and sleep_time < 3600: # never sleep for more than 1 hour + logger.info(f"sleeping for {sleep_time} seconds to allow GitHub rate limit to reset") + time.sleep(sleep_time) + elif sleep_time > 3600: + raise Exception( + f"github rate limit exhaused and not expected to reset for {sleep_time} seconds. Try again later.", + ) response.raise_for_status() if response.status_code == 200: return response.json() diff --git a/tests/unit/providers/github/test_github.py b/tests/unit/providers/github/test_github.py index d0a8a252..43ceb5d9 100644 --- a/tests/unit/providers/github/test_github.py +++ b/tests/unit/providers/github/test_github.py @@ -1,6 +1,8 @@ from __future__ import annotations import pytest +import time +from unittest.mock import patch, Mock from vunnel import result, workspace from vunnel.providers.github import Config, Provider, parser from vunnel.utils import fdb as db @@ -447,6 +449,48 @@ def test_provider_schema(helpers, fake_get_query, advisories): assert workspace.result_schemas_valid(require_entries=True) +@patch("time.sleep") +@patch("requests.post") +def test_provider_respects_github_rate_limit(mock_post, mock_sleep): + response = Mock() + in_five_seconds = int(time.time()) + 5 + response.headers = {"x-ratelimit-remaining": 9, "x-ratelimit-reset": in_five_seconds} + response.status_code = 200 + mock_post.return_value = response + + def mock_json(): + return "{}" + + def mock_raise_for_status(): + pass + + response.json = mock_json + response.raise_for_status = mock_raise_for_status + parser.get_query("some-token", "some-query") + mock_sleep.assert_called_once() + + +@patch("time.sleep") +@patch("requests.post") +def test_provider_respects_github_rate_limit(mock_post, mock_sleep): + response = Mock() + in_five_seconds = int(time.time()) + 5 + response.headers = {"x-ratelimit-remaining": 11, "x-ratelimit-reset": in_five_seconds} + response.status_code = 200 + mock_post.return_value = response + + def mock_json(): + return "{}" + + def mock_raise_for_status(): + pass + + response.json = mock_json + response.raise_for_status = mock_raise_for_status + parser.get_query("some-token", "some-query") + mock_sleep.assert_not_called() + + def test_provider_via_snapshot(helpers, fake_get_query, advisories): fake_get_query([advisories(), advisories(has_next_page=True)]) workspace = helpers.provider_workspace_helper(name=Provider.name())