From 4d01b3e259d6af3ae6dcd67eb2baf00d871d8d20 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Mon, 30 Oct 2023 09:48:07 -0400 Subject: [PATCH 01/16] log network requests in more places Every time the application uses requests.get, issue a log message at INFO level containing the requested URL. Signed-off-by: Will Murphy --- src/vunnel/providers/alpine/parser.py | 2 ++ src/vunnel/providers/amazon/parser.py | 2 +- src/vunnel/providers/debian/parser.py | 2 +- src/vunnel/providers/mariner/parser.py | 1 + src/vunnel/providers/nvd/api.py | 1 + src/vunnel/providers/rhel/parser.py | 2 +- tests/quality/configure.py | 8 +++----- 7 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/vunnel/providers/alpine/parser.py b/src/vunnel/providers/alpine/parser.py index f8ca51ad..ff6cd82a 100644 --- a/src/vunnel/providers/alpine/parser.py +++ b/src/vunnel/providers/alpine/parser.py @@ -141,6 +141,7 @@ def _download(self): # noqa: C901, PLR0912 @utils.retry_with_backoff() def _download_metadata_url(self) -> requests.Response: + self.logger.info(f"downloading metadata from {self.metadata_url}") r = requests.get(self.metadata_url, timeout=self.download_timeout) r.raise_for_status() return r @@ -148,6 +149,7 @@ def _download_metadata_url(self) -> requests.Response: @utils.retry_with_backoff() def _download_url(self, url) -> requests.Response: self._urls.add(url) + self.logger.info(f"downloading {url}") r = requests.get(url, stream=True, timeout=self.download_timeout) r.raise_for_status() return r diff --git a/src/vunnel/providers/amazon/parser.py b/src/vunnel/providers/amazon/parser.py index 69a0de0d..5dbe3d1b 100644 --- a/src/vunnel/providers/amazon/parser.py +++ b/src/vunnel/providers/amazon/parser.py @@ -100,7 +100,7 @@ def _get_alas_html(self, alas_url, alas_file, skip_if_exists=True): return content # noqa: RET504 try: - self.logger.debug(f"downloading ALAS from {alas_url}") + self.logger.info(f"downloading ALAS from {alas_url}") r = requests.get(alas_url, timeout=self.download_timeout) if r.status_code == 200: content = r.text diff --git a/src/vunnel/providers/debian/parser.py b/src/vunnel/providers/debian/parser.py index 5c218988..6be0f962 100644 --- a/src/vunnel/providers/debian/parser.py +++ b/src/vunnel/providers/debian/parser.py @@ -66,7 +66,7 @@ def _download_json(self): :return: """ try: - self.logger.info(f"downloading debian security tracker data from {self._dsa_url_}") + self.logger.info(f"downloading debian security tracker data from {self._json_url_}") r = requests.get(self._json_url_, timeout=self.download_timeout) if r.status_code != 200: diff --git a/src/vunnel/providers/mariner/parser.py b/src/vunnel/providers/mariner/parser.py index 5736ba42..15e2d70f 100644 --- a/src/vunnel/providers/mariner/parser.py +++ b/src/vunnel/providers/mariner/parser.py @@ -191,6 +191,7 @@ def _download(self) -> list[str]: def _download_version(self, version: str) -> str: filename = MARINER_URL_FILENAME.format(version) url = MARINER_URL_BASE.format(filename) + self.logger.info(f"downloading mariner file from {url}") r = requests.get(url, timeout=self.download_timeout) destination = os.path.join(self.workspace.input_path, filename) with open(destination, "wb") as writer: diff --git a/src/vunnel/providers/nvd/api.py b/src/vunnel/providers/nvd/api.py index 24e05872..d0e54804 100644 --- a/src/vunnel/providers/nvd/api.py +++ b/src/vunnel/providers/nvd/api.py @@ -152,6 +152,7 @@ def _request(self, url: str, parameters: dict[str, str], headers: dict[str, str] # (e.g. prevent pubStartDate=2002-01-01T00%3A00%3A00 , want pubStartDate=2002-01-01T00:00:00) payload_str = urllib.parse.urlencode(parameters, safe=":") + self.logger.info(f"downloading {url}") response = requests.get(url, params=payload_str, headers=headers, timeout=self.timeout) response.encoding = "utf-8" response.raise_for_status() diff --git a/src/vunnel/providers/rhel/parser.py b/src/vunnel/providers/rhel/parser.py index d88a1475..1a89077c 100644 --- a/src/vunnel/providers/rhel/parser.py +++ b/src/vunnel/providers/rhel/parser.py @@ -264,7 +264,7 @@ def _sync_cves(self, skip_if_exists=False, do_full_sync=True): # noqa @utils.retry_with_backoff() def _download_entity(self, url, destination): - self.logger.trace(f"downloading {url}") + self.logger.info(f"downloading {url}") r = requests.get(url, timeout=self.download_timeout) if r.status_code == 200: diff --git a/tests/quality/configure.py b/tests/quality/configure.py index 29280000..25118797 100644 --- a/tests/quality/configure.py +++ b/tests/quality/configure.py @@ -500,11 +500,9 @@ def _install_grype_db(input: str): repo_url = f"https://github.com/{repo_user_and_name}" if input == "latest": - version = ( - requests.get("https://github.com/anchore/grype-db/releases/latest", headers={"Accept": "application/json"}) - .json() - .get("tag_name", "") - ) + latest_url = "https://github.com/anchore/grype-db/releases/latest" + logging.info(f"fetching latest release from {latest_url}") + version = requests.get(latest_url, headers={"Accept": "application/json"}).json().get("tag_name", "") logging.info(f"latest released grype-db version is {version!r}") elif is_semver: From dced672ea593f33ef8d90a8d906d5b5b4c26e26b Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Tue, 31 Oct 2023 11:29:03 -0400 Subject: [PATCH 02/16] start migrating to central http provider Signed-off-by: Will Murphy --- src/vunnel/providers/amazon/parser.py | 13 +++----- src/vunnel/providers/mariner/parser.py | 9 ++---- src/vunnel/utils/http.py | 44 ++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 15 deletions(-) create mode 100644 src/vunnel/utils/http.py diff --git a/src/vunnel/providers/amazon/parser.py b/src/vunnel/providers/amazon/parser.py index bd6f4735..5dffa94a 100644 --- a/src/vunnel/providers/amazon/parser.py +++ b/src/vunnel/providers/amazon/parser.py @@ -10,7 +10,7 @@ import requests from vunnel import utils -from vunnel.utils import rpm +from vunnel.utils import rpm, http namespace = "amzn" @@ -48,17 +48,12 @@ def __init__(self, workspace, download_timeout=125, security_advisories=None, lo logger = logging.getLogger(self.__class__.__name__) self.logger = logger - @utils.retry_with_backoff() def _download_rss(self, rss_url, rss_file): try: - self.logger.info(f"downloading amazon security advisory from {rss_url}") self.urls.append(rss_url) - r = requests.get(rss_url, timeout=self.download_timeout) - if r.status_code == 200: - with open(rss_file, "w", encoding="utf-8") as fp: - fp.write(r.text) - else: - raise Exception(f"GET {rss_url} failed with HTTP error {r.status_code}") + r = http.get(rss_url, self.logger, timeout=self.download_timeout) + with open(rss_file, "w", encoding="utf-8") as fp: + fp.write(r.text) except Exception: self.logger.exception("error downloading amazon linux vulnerability feeds") raise diff --git a/src/vunnel/providers/mariner/parser.py b/src/vunnel/providers/mariner/parser.py index 15e2d70f..811e07f6 100644 --- a/src/vunnel/providers/mariner/parser.py +++ b/src/vunnel/providers/mariner/parser.py @@ -3,13 +3,12 @@ import os from typing import TYPE_CHECKING, Any -import requests from lxml import etree from xsdata.formats.dataclass.parsers import XmlParser from xsdata.formats.dataclass.parsers.config import ParserConfig from vunnel.providers.mariner.model import Definition, RpminfoObject, RpminfoState, RpminfoTest -from vunnel.utils import retry_with_backoff +from vunnel.utils.http import get from vunnel.utils.vulnerability import FixedIn, Vulnerability if TYPE_CHECKING: @@ -187,15 +186,13 @@ def __init__(self, workspace: Workspace, download_timeout: int, allow_versions: def _download(self) -> list[str]: return [self._download_version(v) for v in self.allow_versions] - @retry_with_backoff() def _download_version(self, version: str) -> str: filename = MARINER_URL_FILENAME.format(version) url = MARINER_URL_BASE.format(filename) - self.logger.info(f"downloading mariner file from {url}") - r = requests.get(url, timeout=self.download_timeout) + response = get(url, self.logger, timeout=self.download_timeout) destination = os.path.join(self.workspace.input_path, filename) with open(destination, "wb") as writer: - writer.write(r.content) + writer.write(response.content) self._urls.add(url) return destination diff --git a/src/vunnel/utils/http.py b/src/vunnel/utils/http.py new file mode 100644 index 00000000..f079a06b --- /dev/null +++ b/src/vunnel/utils/http.py @@ -0,0 +1,44 @@ +import logging +import time + +import requests + +DEFAULT_TIMEOUT = 30 + + +def get( + url, + logger: logging.Logger, + retries: int = 5, + backoff_in_seconds: int = 3, + timeout: int = DEFAULT_TIMEOUT, + **kwargs, +) -> requests.Response: + logger.debug(f"attempting get request on {url}") + last_exception = None + for attempt in range(retries): + if last_exception: + time.sleep(backoff_in_seconds) + try: + response = requests.get(url, timeout=timeout, **kwargs) + response.raise_for_status() + return response + except requests.exceptions.HTTPError as e: + last_exception = e + will_retry = "" + if attempt + 1 < retries: + will_retry = f" (will retry in {backoff_in_seconds} seconds) " + # HTTPError includes the attempted request, so don't include it redundantly here + logger.warning(f"attempt {attempt + 1} of {retries}{will_retry} failed: {e}") + except Exception as e: + last_exception = e + will_retry = "" + if attempt + 1 < retries: + will_retry = f" (will retry in {backoff_in_seconds} seconds) " + # this is an unexpected exception type, so include the attempted request in case the + # message from the unexpected exception doesn't. + logger.warning(f"attempt {attempt + 1} of {retries}{will_retry}: unexpected exception during GET {url}: {e}") + if isinstance(last_exception, BaseException): + logger.error(f"last retry of GET {url} failed with {last_exception}") + raise last_exception + raise Exception("unreachable") From 724583ff2763619a1825a67846c9cd516aca5e20 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Tue, 31 Oct 2023 13:30:12 -0400 Subject: [PATCH 03/16] add sles provider Signed-off-by: Will Murphy --- src/vunnel/providers/sles/parser.py | 14 +++----------- src/vunnel/utils/http.py | 2 +- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/vunnel/providers/sles/parser.py b/src/vunnel/providers/sles/parser.py index 805a50f3..2f4a7402 100644 --- a/src/vunnel/providers/sles/parser.py +++ b/src/vunnel/providers/sles/parser.py @@ -26,6 +26,7 @@ iter_parse_vulnerability_file, ) from vunnel.utils.vulnerability import CVSS, CVSSBaseMetrics, FixedIn, Vulnerability +from vunnel.utils import http if TYPE_CHECKING: from vunnel.workspace import Workspace @@ -73,7 +74,6 @@ def __init__( # this is pretty odd, but there are classmethods that need logging Parser.logger = logger - @utils.retry_with_backoff() def _download(self, major_version: str) -> str: if not os.path.exists(self.oval_dir_path): self.logger.debug(f"creating workspace for OVAL source data at {self.oval_dir_path}") @@ -83,20 +83,12 @@ def _download(self, major_version: str) -> str: download_url = self.__oval_url__.format(major_version) self.urls.append(download_url) - self.logger.info( + self.logger.debug( "downloading OVAL file for SLES %s from %s", major_version, download_url, ) - r = requests.get(download_url, stream=True, timeout=self.download_timeout) - if r.status_code != 200: - self.logger.error( - "GET %s failed with HTTP %s. Unable to download OVAL file for SLES %s", - download_url, - r.status_code, - major_version, - ) - r.raise_for_status() + r = http.get(download_url, self.logger, stream=True, timeout=self.download_timeout) with open(oval_file_path, "wb") as fp: for chunk in r.iter_content(chunk_size=1024): diff --git a/src/vunnel/utils/http.py b/src/vunnel/utils/http.py index f079a06b..a20de93f 100644 --- a/src/vunnel/utils/http.py +++ b/src/vunnel/utils/http.py @@ -29,7 +29,7 @@ def get( if attempt + 1 < retries: will_retry = f" (will retry in {backoff_in_seconds} seconds) " # HTTPError includes the attempted request, so don't include it redundantly here - logger.warning(f"attempt {attempt + 1} of {retries}{will_retry} failed: {e}") + logger.warning(f"attempt {attempt + 1} of {retries} failed:{will_retry}{e}") except Exception as e: last_exception = e will_retry = "" From 32444e78ddf64499c6396a63b7b0eb292ef9ebc6 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Tue, 31 Oct 2023 13:37:04 -0400 Subject: [PATCH 04/16] fix mariner unit tests Signed-off-by: Will Murphy --- tests/unit/providers/mariner/test_mariner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/providers/mariner/test_mariner.py b/tests/unit/providers/mariner/test_mariner.py index 0917cbce..a3605367 100644 --- a/tests/unit/providers/mariner/test_mariner.py +++ b/tests/unit/providers/mariner/test_mariner.py @@ -5,7 +5,7 @@ import pytest from pytest_unordered import unordered -from vunnel import result, workspace +from vunnel import result, workspace, utils from vunnel.providers.mariner import Config, Provider, parser from vunnel.providers.mariner.parser import MarinerXmlFile from vunnel.utils.vulnerability import Vulnerability, FixedIn @@ -91,7 +91,7 @@ def disable_get_requests(monkeypatch): def disabled(*args, **kwargs): raise RuntimeError("requests disabled but HTTP GET attempted") - monkeypatch.setattr(parser.requests, "get", disabled) + monkeypatch.setattr(utils.http, "get", disabled) def test_provider_schema(helpers, disable_get_requests, monkeypatch): From bac4ad1cbf50ed38be8c36c3d1ef61ee128c6f08 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 07:01:38 -0400 Subject: [PATCH 05/16] unit tests for new http utility Signed-off-by: Will Murphy --- src/vunnel/utils/http.py | 2 +- tests/unit/utils/test_http.py | 64 +++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 tests/unit/utils/test_http.py diff --git a/src/vunnel/utils/http.py b/src/vunnel/utils/http.py index a20de93f..30ea3800 100644 --- a/src/vunnel/utils/http.py +++ b/src/vunnel/utils/http.py @@ -16,7 +16,7 @@ def get( ) -> requests.Response: logger.debug(f"attempting get request on {url}") last_exception = None - for attempt in range(retries): + for attempt in range(retries + 1): if last_exception: time.sleep(backoff_in_seconds) try: diff --git a/tests/unit/utils/test_http.py b/tests/unit/utils/test_http.py new file mode 100644 index 00000000..17ecc19a --- /dev/null +++ b/tests/unit/utils/test_http.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import logging +import pytest +import requests +from unittest.mock import patch, MagicMock +from vunnel.utils import http + + +class TestGetRequests: + @pytest.fixture() + def mock_logger(self): + logger = logging.getLogger("test-http-utils") + return MagicMock(logger, autospec=True) + + @pytest.fixture() + def error_response(self): + mock_response = MagicMock() + mock_response.raise_for_status = MagicMock() + mock_response.raise_for_status.side_effect = requests.HTTPError("HTTP ERROR") + return mock_response + + @pytest.fixture() + def success_response(self): + response = MagicMock() + response.raise_for_status = MagicMock() + response.raise_for_status.side_effect = None + response.status_code = 200 + return response + + @patch("time.sleep") + @patch("requests.get") + def test_raises_when_out_of_retries(self, mock_requests, mock_sleep, mock_logger, error_response): + mock_requests.side_effect = [Exception("could not attempt request"), error_response, error_response] + with pytest.raises(requests.HTTPError): + http.get("http://example.com/some-path", mock_logger, retries=2, backoff_in_seconds=3) + mock_sleep.assert_called_with(3) + mock_sleep.assert_called_with(3) + mock_logger.error.assert_called() + + @patch("time.sleep") + @patch("requests.get") + def test_succeeds_if_retries_succeed(self, mock_requests, mock_sleep, mock_logger, error_response, success_response): + mock_requests.side_effect = [error_response, success_response] + http.get("http://example.com/some-path", mock_logger, retries=1, backoff_in_seconds=22) + mock_sleep.assert_called_with(22) + mock_logger.warning.assert_called() + mock_logger.error.assert_not_called() + mock_requests.assert_called_with("http://example.com/some-path", timeout=http.DEFAULT_TIMEOUT) + + @patch("requests.get") + def test_timeout_is_passed_in(self, mock_requests, mock_logger): + http.get("http://example.com/some-path", mock_logger, timeout=12345) + mock_requests.assert_called_with("http://example.com/some-path", timeout=12345) + + @patch("time.sleep") + @patch("requests.get") + def test_it_logs_the_url_on_failure(self, mock_requests, mock_sleep, mock_logger, error_response): + mock_requests.side_effect = [error_response, error_response, error_response] + url = "http://example.com/some-path" + with pytest.raises(requests.HTTPError): + http.get(url, mock_logger, retries=2) + + assert url in mock_logger.error.call_args.args[0] From 8c5c329673bd6cf446456ff17146df357da36b42 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 07:11:29 -0400 Subject: [PATCH 06/16] test sleep amount, logging Signed-off-by: Will Murphy --- tests/unit/utils/test_http.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/unit/utils/test_http.py b/tests/unit/utils/test_http.py index 17ecc19a..6e5291e5 100644 --- a/tests/unit/utils/test_http.py +++ b/tests/unit/utils/test_http.py @@ -3,7 +3,7 @@ import logging import pytest import requests -from unittest.mock import patch, MagicMock +from unittest.mock import patch, MagicMock, call from vunnel.utils import http @@ -34,8 +34,6 @@ def test_raises_when_out_of_retries(self, mock_requests, mock_sleep, mock_logger mock_requests.side_effect = [Exception("could not attempt request"), error_response, error_response] with pytest.raises(requests.HTTPError): http.get("http://example.com/some-path", mock_logger, retries=2, backoff_in_seconds=3) - mock_sleep.assert_called_with(3) - mock_sleep.assert_called_with(3) mock_logger.error.assert_called() @patch("time.sleep") @@ -53,6 +51,13 @@ def test_timeout_is_passed_in(self, mock_requests, mock_logger): http.get("http://example.com/some-path", mock_logger, timeout=12345) mock_requests.assert_called_with("http://example.com/some-path", timeout=12345) + @patch("time.sleep") + @patch("requests.get") + def test_sleeps_right_amount_between_retries(self, mock_requests, mock_sleep, mock_logger, error_response, success_response): + mock_requests.side_effect = [error_response, error_response, error_response, success_response] + http.get("http://example.com/some-path", mock_logger, backoff_in_seconds=123, retries=3) + assert mock_sleep.call_args_list == [call(123), call(123), call(123)] + @patch("time.sleep") @patch("requests.get") def test_it_logs_the_url_on_failure(self, mock_requests, mock_sleep, mock_logger, error_response): @@ -62,3 +67,10 @@ def test_it_logs_the_url_on_failure(self, mock_requests, mock_sleep, mock_logger http.get(url, mock_logger, retries=2) assert url in mock_logger.error.call_args.args[0] + + @patch("time.sleep") + @patch("requests.get") + def test_it_log_warns_errors(self, mock_requests, mock_sleep, mock_logger, error_response, success_response): + mock_requests.side_effect = [error_response, success_response] + http.get("http://example.com/some-path", mock_logger, retries=1) + assert "HTTP ERROR" in mock_logger.warning.call_args.args[0] From 27c8db6ad0c26a466e6216fbf9b6dc3224abf454 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 07:36:48 -0400 Subject: [PATCH 07/16] annotate helpr with more types Signed-off-by: Will Murphy --- src/vunnel/utils/http.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/vunnel/utils/http.py b/src/vunnel/utils/http.py index 30ea3800..b6f839ed 100644 --- a/src/vunnel/utils/http.py +++ b/src/vunnel/utils/http.py @@ -1,5 +1,6 @@ import logging import time +from typing import Any import requests @@ -7,15 +8,15 @@ def get( - url, + url: str, logger: logging.Logger, retries: int = 5, backoff_in_seconds: int = 3, timeout: int = DEFAULT_TIMEOUT, - **kwargs, + **kwargs: Any, ) -> requests.Response: logger.debug(f"attempting get request on {url}") - last_exception = None + last_exception: Exception | None = None for attempt in range(retries + 1): if last_exception: time.sleep(backoff_in_seconds) From 6364bb1133564871e71e1560c3936eb8f179bb03 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 07:39:46 -0400 Subject: [PATCH 08/16] organize some imports Signed-off-by: Will Murphy --- src/vunnel/providers/amazon/parser.py | 2 +- src/vunnel/providers/sles/parser.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/vunnel/providers/amazon/parser.py b/src/vunnel/providers/amazon/parser.py index 5dffa94a..1639dada 100644 --- a/src/vunnel/providers/amazon/parser.py +++ b/src/vunnel/providers/amazon/parser.py @@ -10,7 +10,7 @@ import requests from vunnel import utils -from vunnel.utils import rpm, http +from vunnel.utils import http, rpm namespace = "amzn" diff --git a/src/vunnel/providers/sles/parser.py b/src/vunnel/providers/sles/parser.py index 2f4a7402..878f1d50 100644 --- a/src/vunnel/providers/sles/parser.py +++ b/src/vunnel/providers/sles/parser.py @@ -8,11 +8,10 @@ from decimal import Decimal, DecimalException from typing import TYPE_CHECKING -import requests from cvss import CVSS3 from cvss.exceptions import CVSS3MalformedError -from vunnel import utils +from vunnel.utils import http from vunnel.utils.oval_v2 import ( ArtifactParser, Impact, @@ -26,7 +25,6 @@ iter_parse_vulnerability_file, ) from vunnel.utils.vulnerability import CVSS, CVSSBaseMetrics, FixedIn, Vulnerability -from vunnel.utils import http if TYPE_CHECKING: from vunnel.workspace import Workspace From ec2aeabe97a904b3de0f2f6edc83cb0c173b29f7 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 10:32:55 -0400 Subject: [PATCH 09/16] dont change every provider at once Signed-off-by: Will Murphy --- src/vunnel/providers/alpine/parser.py | 2 -- src/vunnel/providers/amazon/parser.py | 15 ++++++++++----- src/vunnel/providers/debian/parser.py | 2 +- src/vunnel/providers/mariner/parser.py | 8 +++++--- src/vunnel/providers/nvd/api.py | 1 - src/vunnel/providers/rhel/parser.py | 2 +- src/vunnel/providers/sles/parser.py | 16 +++++++++++++--- 7 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/vunnel/providers/alpine/parser.py b/src/vunnel/providers/alpine/parser.py index ff6cd82a..f8ca51ad 100644 --- a/src/vunnel/providers/alpine/parser.py +++ b/src/vunnel/providers/alpine/parser.py @@ -141,7 +141,6 @@ def _download(self): # noqa: C901, PLR0912 @utils.retry_with_backoff() def _download_metadata_url(self) -> requests.Response: - self.logger.info(f"downloading metadata from {self.metadata_url}") r = requests.get(self.metadata_url, timeout=self.download_timeout) r.raise_for_status() return r @@ -149,7 +148,6 @@ def _download_metadata_url(self) -> requests.Response: @utils.retry_with_backoff() def _download_url(self, url) -> requests.Response: self._urls.add(url) - self.logger.info(f"downloading {url}") r = requests.get(url, stream=True, timeout=self.download_timeout) r.raise_for_status() return r diff --git a/src/vunnel/providers/amazon/parser.py b/src/vunnel/providers/amazon/parser.py index 1639dada..fbbcd9b5 100644 --- a/src/vunnel/providers/amazon/parser.py +++ b/src/vunnel/providers/amazon/parser.py @@ -10,7 +10,7 @@ import requests from vunnel import utils -from vunnel.utils import http, rpm +from vunnel.utils import rpm namespace = "amzn" @@ -48,12 +48,17 @@ def __init__(self, workspace, download_timeout=125, security_advisories=None, lo logger = logging.getLogger(self.__class__.__name__) self.logger = logger + @utils.retry_with_backoff() def _download_rss(self, rss_url, rss_file): try: + self.logger.info(f"downloading amazon security advisory from {rss_url}") self.urls.append(rss_url) - r = http.get(rss_url, self.logger, timeout=self.download_timeout) - with open(rss_file, "w", encoding="utf-8") as fp: - fp.write(r.text) + r = requests.get(rss_url, timeout=self.download_timeout) + if r.status_code == 200: + with open(rss_file, "w", encoding="utf-8") as fp: + fp.write(r.text) + else: + raise Exception(f"GET {rss_url} failed with HTTP error {r.status_code}") except Exception: self.logger.exception("error downloading amazon linux vulnerability feeds") raise @@ -95,7 +100,7 @@ def _get_alas_html(self, alas_url, alas_file, skip_if_exists=True): return content # noqa: RET504 try: - self.logger.info(f"downloading ALAS from {alas_url}") + self.logger.debug(f"downloading ALAS from {alas_url}") r = requests.get(alas_url, timeout=self.download_timeout) if r.status_code == 200: content = r.text diff --git a/src/vunnel/providers/debian/parser.py b/src/vunnel/providers/debian/parser.py index 0a1ea61f..b9ea0809 100644 --- a/src/vunnel/providers/debian/parser.py +++ b/src/vunnel/providers/debian/parser.py @@ -66,7 +66,7 @@ def _download_json(self): :return: """ try: - self.logger.info(f"downloading debian security tracker data from {self._json_url_}") + self.logger.info(f"downloading debian security tracker data from {self._dsa_url_}") r = requests.get(self._json_url_, timeout=self.download_timeout) if r.status_code != 200: diff --git a/src/vunnel/providers/mariner/parser.py b/src/vunnel/providers/mariner/parser.py index 811e07f6..5736ba42 100644 --- a/src/vunnel/providers/mariner/parser.py +++ b/src/vunnel/providers/mariner/parser.py @@ -3,12 +3,13 @@ import os from typing import TYPE_CHECKING, Any +import requests from lxml import etree from xsdata.formats.dataclass.parsers import XmlParser from xsdata.formats.dataclass.parsers.config import ParserConfig from vunnel.providers.mariner.model import Definition, RpminfoObject, RpminfoState, RpminfoTest -from vunnel.utils.http import get +from vunnel.utils import retry_with_backoff from vunnel.utils.vulnerability import FixedIn, Vulnerability if TYPE_CHECKING: @@ -186,13 +187,14 @@ def __init__(self, workspace: Workspace, download_timeout: int, allow_versions: def _download(self) -> list[str]: return [self._download_version(v) for v in self.allow_versions] + @retry_with_backoff() def _download_version(self, version: str) -> str: filename = MARINER_URL_FILENAME.format(version) url = MARINER_URL_BASE.format(filename) - response = get(url, self.logger, timeout=self.download_timeout) + r = requests.get(url, timeout=self.download_timeout) destination = os.path.join(self.workspace.input_path, filename) with open(destination, "wb") as writer: - writer.write(response.content) + writer.write(r.content) self._urls.add(url) return destination diff --git a/src/vunnel/providers/nvd/api.py b/src/vunnel/providers/nvd/api.py index d0e54804..24e05872 100644 --- a/src/vunnel/providers/nvd/api.py +++ b/src/vunnel/providers/nvd/api.py @@ -152,7 +152,6 @@ def _request(self, url: str, parameters: dict[str, str], headers: dict[str, str] # (e.g. prevent pubStartDate=2002-01-01T00%3A00%3A00 , want pubStartDate=2002-01-01T00:00:00) payload_str = urllib.parse.urlencode(parameters, safe=":") - self.logger.info(f"downloading {url}") response = requests.get(url, params=payload_str, headers=headers, timeout=self.timeout) response.encoding = "utf-8" response.raise_for_status() diff --git a/src/vunnel/providers/rhel/parser.py b/src/vunnel/providers/rhel/parser.py index 725dfed7..d33d5278 100644 --- a/src/vunnel/providers/rhel/parser.py +++ b/src/vunnel/providers/rhel/parser.py @@ -264,7 +264,7 @@ def _sync_cves(self, skip_if_exists=False, do_full_sync=True): # noqa: PLR0915, @utils.retry_with_backoff() def _download_entity(self, url, destination): - self.logger.info(f"downloading {url}") + self.logger.trace(f"downloading {url}") r = requests.get(url, timeout=self.download_timeout) if r.status_code == 200: diff --git a/src/vunnel/providers/sles/parser.py b/src/vunnel/providers/sles/parser.py index 878f1d50..805a50f3 100644 --- a/src/vunnel/providers/sles/parser.py +++ b/src/vunnel/providers/sles/parser.py @@ -8,10 +8,11 @@ from decimal import Decimal, DecimalException from typing import TYPE_CHECKING +import requests from cvss import CVSS3 from cvss.exceptions import CVSS3MalformedError -from vunnel.utils import http +from vunnel import utils from vunnel.utils.oval_v2 import ( ArtifactParser, Impact, @@ -72,6 +73,7 @@ def __init__( # this is pretty odd, but there are classmethods that need logging Parser.logger = logger + @utils.retry_with_backoff() def _download(self, major_version: str) -> str: if not os.path.exists(self.oval_dir_path): self.logger.debug(f"creating workspace for OVAL source data at {self.oval_dir_path}") @@ -81,12 +83,20 @@ def _download(self, major_version: str) -> str: download_url = self.__oval_url__.format(major_version) self.urls.append(download_url) - self.logger.debug( + self.logger.info( "downloading OVAL file for SLES %s from %s", major_version, download_url, ) - r = http.get(download_url, self.logger, stream=True, timeout=self.download_timeout) + r = requests.get(download_url, stream=True, timeout=self.download_timeout) + if r.status_code != 200: + self.logger.error( + "GET %s failed with HTTP %s. Unable to download OVAL file for SLES %s", + download_url, + r.status_code, + major_version, + ) + r.raise_for_status() with open(oval_file_path, "wb") as fp: for chunk in r.iter_content(chunk_size=1024): From 7af4ee78bbcecb1d834cd88a1c0523cf30bf56a2 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 10:34:45 -0400 Subject: [PATCH 10/16] update mariner parser to use new http helper Signed-off-by: Will Murphy --- src/vunnel/providers/mariner/parser.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/vunnel/providers/mariner/parser.py b/src/vunnel/providers/mariner/parser.py index 5736ba42..678f3432 100644 --- a/src/vunnel/providers/mariner/parser.py +++ b/src/vunnel/providers/mariner/parser.py @@ -3,13 +3,12 @@ import os from typing import TYPE_CHECKING, Any -import requests from lxml import etree from xsdata.formats.dataclass.parsers import XmlParser from xsdata.formats.dataclass.parsers.config import ParserConfig from vunnel.providers.mariner.model import Definition, RpminfoObject, RpminfoState, RpminfoTest -from vunnel.utils import retry_with_backoff +from vunnel.utils import http from vunnel.utils.vulnerability import FixedIn, Vulnerability if TYPE_CHECKING: @@ -187,11 +186,10 @@ def __init__(self, workspace: Workspace, download_timeout: int, allow_versions: def _download(self) -> list[str]: return [self._download_version(v) for v in self.allow_versions] - @retry_with_backoff() def _download_version(self, version: str) -> str: filename = MARINER_URL_FILENAME.format(version) url = MARINER_URL_BASE.format(filename) - r = requests.get(url, timeout=self.download_timeout) + r = http.get(url, self.logger, timeout=self.download_timeout) destination = os.path.join(self.workspace.input_path, filename) with open(destination, "wb") as writer: writer.write(r.content) From 6494605b9f865a55a1f07cdff761f444a9219661 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 10:37:49 -0400 Subject: [PATCH 11/16] dont change configure yet Signed-off-by: Will Murphy --- tests/quality/configure.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/quality/configure.py b/tests/quality/configure.py index 1782991c..2aa7b577 100644 --- a/tests/quality/configure.py +++ b/tests/quality/configure.py @@ -499,9 +499,11 @@ def _install_grype_db(input: str): repo_url = f"https://github.com/{repo_user_and_name}" if input == "latest": - latest_url = "https://github.com/anchore/grype-db/releases/latest" - logging.info(f"fetching latest release from {latest_url}") - version = requests.get(latest_url, headers={"Accept": "application/json"}).json().get("tag_name", "") + version = ( + requests.get("https://github.com/anchore/grype-db/releases/latest", headers={"Accept": "application/json"}) + .json() + .get("tag_name", "") + ) logging.info(f"latest released grype-db version is {version!r}") elif is_semver: From 5397bfb7a2458a9fc5fd9f455b18d548586719de Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 13:14:36 -0400 Subject: [PATCH 12/16] use central disable get requests fixture Signed-off-by: Will Murphy --- src/vunnel/utils/http.py | 4 ++-- tests/conftest.py | 10 ++++++++++ tests/unit/providers/mariner/test_mariner.py | 8 -------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/vunnel/utils/http.py b/src/vunnel/utils/http.py index b6f839ed..5ee6a746 100644 --- a/src/vunnel/utils/http.py +++ b/src/vunnel/utils/http.py @@ -15,7 +15,7 @@ def get( timeout: int = DEFAULT_TIMEOUT, **kwargs: Any, ) -> requests.Response: - logger.debug(f"attempting get request on {url}") + logger.debug(f"http GET {url}") last_exception: Exception | None = None for attempt in range(retries + 1): if last_exception: @@ -39,7 +39,7 @@ def get( # this is an unexpected exception type, so include the attempted request in case the # message from the unexpected exception doesn't. logger.warning(f"attempt {attempt + 1} of {retries}{will_retry}: unexpected exception during GET {url}: {e}") - if isinstance(last_exception, BaseException): + if last_exception: logger.error(f"last retry of GET {url} failed with {last_exception}") raise last_exception raise Exception("unreachable") diff --git a/tests/conftest.py b/tests/conftest.py index 2e934cd3..da0b4da7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -207,3 +207,13 @@ def apply(d: str, name: str = ""): return path return apply + + +@pytest.fixture() +def disable_get_requests(monkeypatch): + def disabled(*args, **kwargs): + raise RuntimeError("disabled!") + + from vunnel import utils + + return monkeypatch.setattr(utils.http, "get", disabled) diff --git a/tests/unit/providers/mariner/test_mariner.py b/tests/unit/providers/mariner/test_mariner.py index a3605367..1fd03045 100644 --- a/tests/unit/providers/mariner/test_mariner.py +++ b/tests/unit/providers/mariner/test_mariner.py @@ -86,14 +86,6 @@ def test_parse(tmpdir, helpers, input_file, expected): assert vulnerabilities == expected -@pytest.fixture() -def disable_get_requests(monkeypatch): - def disabled(*args, **kwargs): - raise RuntimeError("requests disabled but HTTP GET attempted") - - monkeypatch.setattr(utils.http, "get", disabled) - - def test_provider_schema(helpers, disable_get_requests, monkeypatch): workspace = helpers.provider_workspace_helper(name=Provider.name()) From 422da3578f9d1cf9c78ff93a340e125a88b1ef2a Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 13:42:57 -0400 Subject: [PATCH 13/16] fix when retry message is logged Signed-off-by: Will Murphy --- src/vunnel/utils/http.py | 4 ++-- tests/unit/utils/test_http.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/vunnel/utils/http.py b/src/vunnel/utils/http.py index 5ee6a746..1c1ccc4b 100644 --- a/src/vunnel/utils/http.py +++ b/src/vunnel/utils/http.py @@ -27,14 +27,14 @@ def get( except requests.exceptions.HTTPError as e: last_exception = e will_retry = "" - if attempt + 1 < retries: + if attempt < retries: will_retry = f" (will retry in {backoff_in_seconds} seconds) " # HTTPError includes the attempted request, so don't include it redundantly here logger.warning(f"attempt {attempt + 1} of {retries} failed:{will_retry}{e}") except Exception as e: last_exception = e will_retry = "" - if attempt + 1 < retries: + if attempt < retries: will_retry = f" (will retry in {backoff_in_seconds} seconds) " # this is an unexpected exception type, so include the attempted request in case the # message from the unexpected exception doesn't. diff --git a/tests/unit/utils/test_http.py b/tests/unit/utils/test_http.py index 6e5291e5..75105d64 100644 --- a/tests/unit/utils/test_http.py +++ b/tests/unit/utils/test_http.py @@ -72,5 +72,6 @@ def test_it_logs_the_url_on_failure(self, mock_requests, mock_sleep, mock_logger @patch("requests.get") def test_it_log_warns_errors(self, mock_requests, mock_sleep, mock_logger, error_response, success_response): mock_requests.side_effect = [error_response, success_response] - http.get("http://example.com/some-path", mock_logger, retries=1) + http.get("http://example.com/some-path", mock_logger, retries=1, backoff_in_seconds=33) assert "HTTP ERROR" in mock_logger.warning.call_args.args[0] + assert "will retry in 33 seconds" in mock_logger.warning.call_args.args[0] From 60e3ac7d78769325f091034af85173ae90a8db01 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 13:55:45 -0400 Subject: [PATCH 14/16] better error for disabled Signed-off-by: Will Murphy --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index da0b4da7..cde2bf51 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -212,7 +212,7 @@ def apply(d: str, name: str = ""): @pytest.fixture() def disable_get_requests(monkeypatch): def disabled(*args, **kwargs): - raise RuntimeError("disabled!") + raise RuntimeError("requests disabled but HTTP GET attempted") from vunnel import utils From f0e56e2515c538f6ceae79d0c76531624a745bc0 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 11:05:44 -0400 Subject: [PATCH 15/16] use new http wrapper in amazon provider Signed-off-by: Will Murphy --- src/vunnel/providers/amazon/parser.py | 29 +++++++--------------- tests/unit/providers/amazon/test_amazon.py | 3 ++- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/src/vunnel/providers/amazon/parser.py b/src/vunnel/providers/amazon/parser.py index fbbcd9b5..95399fb5 100644 --- a/src/vunnel/providers/amazon/parser.py +++ b/src/vunnel/providers/amazon/parser.py @@ -7,10 +7,8 @@ from html.parser import HTMLParser import defusedxml.ElementTree as ET -import requests -from vunnel import utils -from vunnel.utils import rpm +from vunnel.utils import http, rpm namespace = "amzn" @@ -48,17 +46,13 @@ def __init__(self, workspace, download_timeout=125, security_advisories=None, lo logger = logging.getLogger(self.__class__.__name__) self.logger = logger - @utils.retry_with_backoff() def _download_rss(self, rss_url, rss_file): try: self.logger.info(f"downloading amazon security advisory from {rss_url}") self.urls.append(rss_url) - r = requests.get(rss_url, timeout=self.download_timeout) - if r.status_code == 200: - with open(rss_file, "w", encoding="utf-8") as fp: - fp.write(r.text) - else: - raise Exception(f"GET {rss_url} failed with HTTP error {r.status_code}") + r = http.get(rss_url, self.logger, timeout=self.download_timeout) + with open(rss_file, "w", encoding="utf-8") as fp: + fp.write(r.text) except Exception: self.logger.exception("error downloading amazon linux vulnerability feeds") raise @@ -91,23 +85,18 @@ def _parse_rss(self, file_path): return sorted(alas_summaries) - @utils.retry_with_backoff() def _get_alas_html(self, alas_url, alas_file, skip_if_exists=True): if skip_if_exists and os.path.exists(alas_file): # read alas from disk if its available self.logger.debug(f"loading existing ALAS from {alas_file}") with open(alas_file, encoding="utf-8") as fp: content = fp.read() return content # noqa: RET504 - try: - self.logger.debug(f"downloading ALAS from {alas_url}") - r = requests.get(alas_url, timeout=self.download_timeout) - if r.status_code == 200: - content = r.text - with open(alas_file, "w", encoding="utf-8") as fp: - fp.write(content) - return content - raise Exception(f"GET {alas_url} failed with HTTP error {r.status_code}") + r = http.get(alas_url, self.logger, timeout=self.download_timeout) + content = r.text + with open(alas_file, "w", encoding="utf-8") as fp: + fp.write(content) + return content except Exception: self.logger.exception(f"error downloading data from {alas_url}") raise diff --git a/tests/unit/providers/amazon/test_amazon.py b/tests/unit/providers/amazon/test_amazon.py index 4437df6d..f0ed1c59 100644 --- a/tests/unit/providers/amazon/test_amazon.py +++ b/tests/unit/providers/amazon/test_amazon.py @@ -4,6 +4,7 @@ import pytest from vunnel import result, workspace +from vunnel.utils import http from vunnel.providers.amazon import Config, Provider, parser @@ -81,7 +82,7 @@ def disable_get_requests(monkeypatch): def disabled(*args, **kwargs): raise RuntimeError("requests disabled but HTTP GET attempted") - monkeypatch.setattr(parser.requests, "get", disabled) + monkeypatch.setattr(http, "get", disabled) def test_provider_schema(helpers, disable_get_requests, monkeypatch): From 1cf74896e9046b16ab363e50522b6cbf6dd1dec1 Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Wed, 1 Nov 2023 14:27:29 -0400 Subject: [PATCH 16/16] use disable_get_request from conftest Signed-off-by: Will Murphy --- tests/unit/providers/amazon/test_amazon.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/unit/providers/amazon/test_amazon.py b/tests/unit/providers/amazon/test_amazon.py index f0ed1c59..a9ffd3d9 100644 --- a/tests/unit/providers/amazon/test_amazon.py +++ b/tests/unit/providers/amazon/test_amazon.py @@ -77,14 +77,6 @@ def test_get_pkg_name_version(self): assert a == b -@pytest.fixture() -def disable_get_requests(monkeypatch): - def disabled(*args, **kwargs): - raise RuntimeError("requests disabled but HTTP GET attempted") - - monkeypatch.setattr(http, "get", disabled) - - def test_provider_schema(helpers, disable_get_requests, monkeypatch): workspace = helpers.provider_workspace_helper(name=Provider.name())