From 8d5c50f0ff9d482288e71b67470ec2eaafc65b21 Mon Sep 17 00:00:00 2001 From: ricolin Date: Tue, 5 Nov 2024 12:00:48 +0800 Subject: [PATCH] Make openstack retry more configurable --- staffeln/common/openstack.py | 36 ++++++++++++---------- staffeln/conf/conductor.py | 36 ++++++++++++++++++++-- staffeln/tests/common/test_openstacksdk.py | 19 +++++++++++- 3 files changed, 71 insertions(+), 20 deletions(-) diff --git a/staffeln/common/openstack.py b/staffeln/common/openstack.py index 68ed953..3ac0b64 100644 --- a/staffeln/common/openstack.py +++ b/staffeln/common/openstack.py @@ -16,10 +16,12 @@ class RetryHTTPError(tenacity.retry_if_exception): def __init__(self): def is_http_error(exception): - # Make sure we don't retry on 404, as not found could be an - # expected status. + # Make sure we don't retry on codes in skip list (default: 404), + # as not found could be an expected status. + skip_codes = CONF.openstack.skip_retry_codes.replace( + ' ', '').split(',') result = (isinstance(exception, exceptions.HttpException) and - exception.status_code != 404) + str(exception.status_code) not in skip_codes) if result: LOG.debug(f"Getting HttpException {exception} (status " f"code: {exception.status_code}), " @@ -48,9 +50,9 @@ def set_project(self, project): # user @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout)) + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout)) def get_user_id(self): user_name = self.conn.config.auth["username"] if "user_domain_id" in self.conn.config.auth: @@ -65,17 +67,17 @@ def get_user_id(self): @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout)) + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout)) def get_projects(self): return self.conn.list_projects() @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout)) + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout)) def get_servers(self, project_id=None, all_projects=True, details=True): if project_id is not None: return self.conn.compute.servers( @@ -86,17 +88,17 @@ def get_servers(self, project_id=None, all_projects=True, details=True): @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout)) + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout)) def get_volume(self, uuid, project_id): return self.conn.get_volume_by_id(uuid) @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout)) + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout)) def get_backup(self, uuid, project_id=None): # return conn.block_storage.get_backup( # project_id=project_id, backup_id=uuid, @@ -119,9 +121,9 @@ def create_backup(self, volume_id, project_id, force=True, wait=False): @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout)) + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout)) def delete_backup(self, uuid, project_id=None, force=False): # Note(Alex): v3 is not supporting force delete? # conn.block_storage.delete_backup( @@ -135,9 +137,9 @@ def delete_backup(self, uuid, project_id=None, force=False): @tenacity.retry( retry=RetryHTTPError(), - wait=tenacity.wait_exponential(max=30), + wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval), reraise=True, - stop=tenacity.stop_after_delay(CONF.conductor.retry_timeout)) + stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout)) def get_backup_quota(self, project_id): # quota = conn.get_volume_quotas(project_id) quota = self._get_volume_quotas(project_id) diff --git a/staffeln/conf/conductor.py b/staffeln/conf/conductor.py index 34429f2..5d6c036 100755 --- a/staffeln/conf/conductor.py +++ b/staffeln/conf/conductor.py @@ -8,6 +8,15 @@ help=_("Options under this group are used " "to define Conductor's configuration."), ) +openstack_group = cfg.OptGroup( + "openstack", + title="OpenStack Options", + help=_( + "Options under this group are used " + "to define OpneStack related configuration." + ), +) + backup_opts = [ cfg.IntOpt( "backup_workers", @@ -39,11 +48,29 @@ "backup_metadata_key", help=_("The key string of metadata the VM, which requres back up, has"), ), +] + +openstack_opts = [ cfg.IntOpt( "retry_timeout", default=300, min=1, - help=_("The timeout for retry, the unit is one second."), + help=_("The timeout for retry OpenStackSDK HTTP exceptions, " + "the unit is one second."), + ), + cfg.IntOpt( + "max_retry_interval", + default=30, + min=0, + help=_("Max time interval for retry OpenStackSDK HTTP exceptions, " + "the unit is one second."), + ), + cfg.StrOpt( + "skip_retry_codes", + default="404,", + help=_("A comma separated string that provides a list of HTTP codes " + "to skip retry on for OpenStackSDK HTTP " + "exception. Default only `404` is skipped."), ), ] @@ -91,7 +118,12 @@ def register_opts(conf): conf.register_group(conductor_group) conf.register_opts(backup_opts, group=conductor_group) conf.register_opts(rotation_opts, group=conductor_group) + conf.register_opts(openstack_opts, group=openstack_group) def list_opts(): - return {"DEFAULT": rotation_opts, conductor_group: backup_opts} + return { + "DEFAULT": rotation_opts, + conductor_group: backup_opts, + openstack_group: openstack_opts, + } diff --git a/staffeln/tests/common/test_openstacksdk.py b/staffeln/tests/common/test_openstacksdk.py index b69739c..d3dc99c 100644 --- a/staffeln/tests/common/test_openstacksdk.py +++ b/staffeln/tests/common/test_openstacksdk.py @@ -7,6 +7,7 @@ import tenacity from staffeln.common import openstack as s_openstack +from staffeln import conf from staffeln.tests import base @@ -53,7 +54,9 @@ def _test_http_error( **kwargs, ) self.assertEqual(status_code, exc.status_code) - if status_code != 404: + skip_retry_codes = conf.CONF.openstack.skip_retry_codes.replace( + ' ', '').split(',') + if str(status_code) not in skip_retry_codes: if call_count == 1: self.m_sleep.assert_called_once_with(1.0) else: @@ -77,6 +80,20 @@ def test_get_servers(self): details=True, all_projects=True ) + def test_get_servers_conf_skip_http_error(self): + conf.CONF.set_override('skip_retry_codes', '403,', 'openstack') + self._test_http_error( + self.m_c.compute.servers, "get_servers", status_code=403 + ) + self.assertEqual('403,', conf.CONF.openstack.skip_retry_codes) + + def test_get_servers_conf_skip_http_error_not_hit(self): + conf.CONF.set_override('skip_retry_codes', '403,', 'openstack') + self._test_http_error( + self.m_c.compute.servers, "get_servers", status_code=404 + ) + self.assertEqual('403,', conf.CONF.openstack.skip_retry_codes) + def test_get_servers_non_http_error(self): self._test_non_http_error(self.m_c.compute.servers, "get_servers")