From 5030b5f5b3764e4f1739f3bca047a27d77c70f0b Mon Sep 17 00:00:00 2001
From: Will Murphy <will.murphy@anchore.com>
Date: Mon, 30 Oct 2023 13:48:33 -0400
Subject: [PATCH 1/3] fix: respect GitHub rate limiting

Previously, the client would repeatedly attempt requests until getting
actually rate limited by GitHub.

Signed-off-by: Will Murphy <will.murphy@anchore.com>
---
 src/vunnel/providers/github/parser.py      | 18 ++++++++-
 tests/unit/providers/github/test_github.py | 44 ++++++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/src/vunnel/providers/github/parser.py b/src/vunnel/providers/github/parser.py
index 636040a1..2fcca712 100644
--- a/src/vunnel/providers/github/parser.py
+++ b/src/vunnel/providers/github/parser.py
@@ -19,6 +19,7 @@
 import datetime
 import logging
 import os
+import time
 from decimal import Decimal, DecimalException
 
 import requests
@@ -42,6 +43,9 @@
     "SWIFT": "swift",
 }
 
+GITHUB_RATE_LIMIT_REMAINING_HEADER = "x-ratelimit-remaining"
+GITHUB_RATE_LIMIT_RESET_HEADER = "x-ratelimit-reset"
+
 
 class Parser:
     def __init__(  # noqa: PLR0913
@@ -207,9 +211,21 @@ def get_query(token, query, timeout=125, api_url="https://api.github.com/graphql
     logger = logging.getLogger("get-query")
 
     headers = {"Authorization": f"token {token}"}
-    logger.debug(f"downloading github advisories from {api_url}")
+    logger.info(f"downloading github advisories from {api_url}")
 
     response = requests.post(api_url, json={"query": query}, timeout=timeout, headers=headers)
+    if GITHUB_RATE_LIMIT_REMAINING_HEADER in response.headers and GITHUB_RATE_LIMIT_RESET_HEADER in response.headers:
+        remaining = int(response.headers[GITHUB_RATE_LIMIT_REMAINING_HEADER])
+        # reset time is the time in UNIX Epoch Seconds at which
+        # the rate limit will reset.
+        reset_time = int(response.headers[GITHUB_RATE_LIMIT_RESET_HEADER])
+        logger.debug(f"github rate limit has {remaining} requests left {reset_time}")
+        if remaining < 10:
+            current_time = int(time.time())
+            sleep_time = reset_time - current_time
+            # note that the rate limit resets 1x / hour, so this could be a long time
+            logger.info(f"sleeping for {sleep_time} seconds to allow GitHub rate limit to reset")
+            time.sleep(sleep_time)
     response.raise_for_status()
     if response.status_code == 200:
         return response.json()
diff --git a/tests/unit/providers/github/test_github.py b/tests/unit/providers/github/test_github.py
index a1b73749..488b6a03 100644
--- a/tests/unit/providers/github/test_github.py
+++ b/tests/unit/providers/github/test_github.py
@@ -1,6 +1,8 @@
 from __future__ import annotations
 
 import pytest
+import time
+from unittest.mock import patch, Mock
 from vunnel import result, workspace
 from vunnel.providers.github import Config, Provider, parser
 from vunnel.utils import fdb as db
@@ -447,6 +449,48 @@ def test_provider_schema(helpers, fake_get_query, advisories):
     assert workspace.result_schemas_valid(require_entries=True)
 
 
+@patch("time.sleep")
+@patch("requests.post")
+def test_provider_respects_github_rate_limit(mock_post, mock_sleep):
+    response = Mock()
+    in_five_seconds = int(time.time())
+    response.headers = {"x-ratelimit-remaining": 9, "x-ratelimit-reset": in_five_seconds}
+    response.status_code = 200
+    mock_post.return_value = response
+
+    def mock_json():
+        return "{}"
+
+    def mock_raise_for_status():
+        pass
+
+    response.json = mock_json
+    response.raise_for_status = mock_raise_for_status
+    parser.get_query("some-token", "some-query")
+    mock_sleep.assert_called_once()
+
+
+@patch("time.sleep")
+@patch("requests.post")
+def test_provider_respects_github_rate_limit(mock_post, mock_sleep):
+    response = Mock()
+    in_five_seconds = int(time.time())
+    response.headers = {"x-ratelimit-remaining": 11, "x-ratelimit-reset": in_five_seconds}
+    response.status_code = 200
+    mock_post.return_value = response
+
+    def mock_json():
+        return "{}"
+
+    def mock_raise_for_status():
+        pass
+
+    response.json = mock_json
+    response.raise_for_status = mock_raise_for_status
+    parser.get_query("some-token", "some-query")
+    mock_sleep.assert_not_called()
+
+
 def test_provider_via_snapshot(helpers, fake_get_query, advisories):
     fake_get_query([advisories(), advisories(has_next_page=True)])
     workspace = helpers.provider_workspace_helper(name=Provider.name())

From c115823613ab672c67e8a9defab1115cd0b36f4e Mon Sep 17 00:00:00 2001
From: Will Murphy <will.murphy@anchore.com>
Date: Mon, 30 Oct 2023 14:03:59 -0400
Subject: [PATCH 2/3] fix unit test typo

Signed-off-by: Will Murphy <will.murphy@anchore.com>
---
 tests/unit/providers/github/test_github.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/providers/github/test_github.py b/tests/unit/providers/github/test_github.py
index 488b6a03..d946e9f0 100644
--- a/tests/unit/providers/github/test_github.py
+++ b/tests/unit/providers/github/test_github.py
@@ -453,7 +453,7 @@ def test_provider_schema(helpers, fake_get_query, advisories):
 @patch("requests.post")
 def test_provider_respects_github_rate_limit(mock_post, mock_sleep):
     response = Mock()
-    in_five_seconds = int(time.time())
+    in_five_seconds = int(time.time()) + 5
     response.headers = {"x-ratelimit-remaining": 9, "x-ratelimit-reset": in_five_seconds}
     response.status_code = 200
     mock_post.return_value = response
@@ -474,7 +474,7 @@ def mock_raise_for_status():
 @patch("requests.post")
 def test_provider_respects_github_rate_limit(mock_post, mock_sleep):
     response = Mock()
-    in_five_seconds = int(time.time())
+    in_five_seconds = int(time.time()) + 5
     response.headers = {"x-ratelimit-remaining": 11, "x-ratelimit-reset": in_five_seconds}
     response.status_code = 200
     mock_post.return_value = response

From 11d11e22aa99b145c99c0d0eaa92761aaab716da Mon Sep 17 00:00:00 2001
From: Will Murphy <will.murphy@anchore.com>
Date: Mon, 30 Oct 2023 17:24:19 -0400
Subject: [PATCH 3/3] never sleep for more than 1 hour

Signed-off-by: Will Murphy <will.murphy@anchore.com>
---
 src/vunnel/providers/github/parser.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/vunnel/providers/github/parser.py b/src/vunnel/providers/github/parser.py
index 2fcca712..c2af7732 100644
--- a/src/vunnel/providers/github/parser.py
+++ b/src/vunnel/providers/github/parser.py
@@ -224,8 +224,13 @@ def get_query(token, query, timeout=125, api_url="https://api.github.com/graphql
             current_time = int(time.time())
             sleep_time = reset_time - current_time
             # note that the rate limit resets 1x / hour, so this could be a long time
-            logger.info(f"sleeping for {sleep_time} seconds to allow GitHub rate limit to reset")
-            time.sleep(sleep_time)
+            if sleep_time > 1 and sleep_time < 3600:  # never sleep for more than 1 hour
+                logger.info(f"sleeping for {sleep_time} seconds to allow GitHub rate limit to reset")
+                time.sleep(sleep_time)
+            elif sleep_time > 3600:
+                raise Exception(
+                    f"github rate limit exhaused and not expected to reset for {sleep_time} seconds. Try again later.",
+                )
     response.raise_for_status()
     if response.status_code == 200:
         return response.json()