From 5db2443f73c1419ebd21d964ebce56c53a489e3e Mon Sep 17 00:00:00 2001 From: ricolin Date: Tue, 5 Nov 2024 11:47:45 +0800 Subject: [PATCH] Make openstack retry more configurable --- staffeln/common/openstack.py | 52 +++++++++++----------- staffeln/conf/conductor.py | 30 ++++++++++++- staffeln/tests/common/test_openstacksdk.py | 22 ++++++++- 3 files changed, 76 insertions(+), 28 deletions(-) diff --git a/staffeln/common/openstack.py b/staffeln/common/openstack.py index b757fe9..96c8495 100644 --- a/staffeln/common/openstack.py +++ b/staffeln/common/openstack.py @@ -22,11 +22,13 @@ class RetryHTTPError(tenacity.retry_if_exception): def __init__(self): def is_http_error(exception): - # Make sure we don't retry on 404, as not found could be an - # expected status. + # Make sure we don't retry on codes in skip list (default: 404), + # as not found could be an expected status. + skip_codes = CONF.openstack.skip_retry_codes.replace( + ' ', '').split(',') result = ( isinstance(exception, exceptions.HttpException - ) and exception.status_code != 404 + ) and str(exception.status_code) not in skip_codes ) if result: LOG.debug( @@ -60,9 +62,9 @@ def set_project(self, project): # user @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def get_user_id(self): user_name = self.conn.config.auth["username"] @@ -82,9 +84,9 @@ def get_user_id(self): @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def get_role_assignments(self, project_id, user_id=None): filters = {"project": project_id} @@ -94,18 +96,18 @@ def get_role_assignments(self, project_id, user_id=None): @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def get_user(self, user_id): return self.conn.get_user(name_or_id=user_id) @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def get_project_member_emails(self, project_id): members = self.get_role_assignments(project_id) @@ -125,18 +127,18 @@ def get_project_member_emails(self, project_id): @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def get_projects(self): return self.conn.list_projects() @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def get_servers(self, project_id=None, all_projects=True, details=True): if project_id is not None: @@ -152,18 +154,18 @@ def get_servers(self, project_id=None, all_projects=True, details=True): @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def get_volume(self, uuid, project_id): return self.conn.get_volume_by_id(uuid) @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def get_backup(self, uuid, project_id=None): try: @@ -190,9 +192,9 @@ def create_backup( @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def delete_backup(self, uuid, project_id=None, force=False): # Note(Alex): v3 is not supporting force delete? @@ -209,9 +211,9 @@ def delete_backup(self, uuid, project_id=None, force=False): @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def get_backup_quota(self, project_id): # quota = conn.get_volume_quotas(project_id) @@ -220,9 +222,9 @@ def get_backup_quota(self, project_id): @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout), + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout), ) def get_backup_gigabytes_quota(self, project_id): # quota = conn.get_volume_quotas(project_id) diff --git a/staffeln/conf/conductor.py b/staffeln/conf/conductor.py index 46f41d3..4cdbfc7 100755 --- a/staffeln/conf/conductor.py +++ b/staffeln/conf/conductor.py @@ -13,6 +13,14 @@ "to define Conductor's configuration." ), ) +openstack_group = cfg.OptGroup( + "openstack", + title="OpenStack Options", + help=_( + "Options under this group are used " + "to define OpneStack related configuration." + ), +) backup_opts = [ cfg.IntOpt( @@ -77,11 +85,29 @@ min=0, help=_("Number of incremental backups between full backups."), ), +] + +openstack_opts = [ cfg.IntOpt( "retry_timeout", default=300, min=1, - help=_("The timeout for retry, the unit is one second."), + help=_("The timeout for retry OpenStackSDK HTTP exceptions, " + "the unit is one second."), + ), + cfg.IntOpt( + "max_retry_interval", + default=30, + min=0, + help=_("Max time interval for retry OpenStackSDK HTTP exceptions, " + "the unit is one second."), + ), + cfg.StrOpt( + "skip_retry_codes", + default="404,", + help=_("A comma separated string that provides a list of HTTP codes " + "to skip retry on for OpenStackSDK HTTP " + "exception. Default only `404` is skipped.") ), ] @@ -150,6 +176,7 @@ def register_opts(conf): conf.register_group(conductor_group) conf.register_opts(backup_opts, group=conductor_group) conf.register_opts(rotation_opts, group=conductor_group) + conf.register_opts(openstack_opts, group=openstack_group) conf.register_opts(coordination_opts, group=coordination_group) @@ -157,5 +184,6 @@ def list_opts(): return { "DEFAULT": rotation_opts, conductor_group: backup_opts, + openstack_group: openstack_opts, coordination_group: coordination_opts, } diff --git a/staffeln/tests/common/test_openstacksdk.py b/staffeln/tests/common/test_openstacksdk.py index d4d8827..5a0ac2d 100644 --- a/staffeln/tests/common/test_openstacksdk.py +++ b/staffeln/tests/common/test_openstacksdk.py @@ -7,6 +7,7 @@ import tenacity from staffeln.common import openstack as s_openstack +from staffeln import conf from staffeln.tests import base @@ -46,7 +47,8 @@ def setUp(self): self.fake_role_assignment2 = mock.MagicMock(user={"id": "bar"}) def _test_http_error( - self, m_func, retry_func, status_code, call_count=1, **kwargs + self, m_func, retry_func, status_code, call_count=1, + **kwargs ): m_func.side_effect = openstack_exc.HttpException( http_status=status_code @@ -57,7 +59,9 @@ def _test_http_error( **kwargs, ) self.assertEqual(status_code, exc.status_code) - if status_code != 404: + skip_retry_codes = conf.CONF.openstack.skip_retry_codes.replace( + ' ', '').split(',') + if str(status_code) not in skip_retry_codes: if call_count == 1: self.m_sleep.assert_called_once_with(1.0) else: @@ -84,6 +88,20 @@ def test_get_servers(self): def test_get_servers_non_http_error(self): self._test_non_http_error(self.m_c.compute.servers, "get_servers") + def test_get_servers_conf_skip_http_error(self): + conf.CONF.set_override('skip_retry_codes', '403,', 'openstack') + self._test_http_error( + self.m_c.compute.servers, "get_servers", status_code=403 + ) + self.assertEqual('403,', conf.CONF.openstack.skip_retry_codes) + + def test_get_servers_conf_skip_http_error_not_hit(self): + conf.CONF.set_override('skip_retry_codes', '403,', 'openstack') + self._test_http_error( + self.m_c.compute.servers, "get_servers", status_code=404 + ) + self.assertEqual('403,', conf.CONF.openstack.skip_retry_codes) + def test_get_servers_404_http_error(self): self._test_http_error( self.m_c.compute.servers, "get_servers", status_code=404