Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CLOUDOPS-569] Allow retry on openstack HttpException #127

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ parse
tooz # Apache-2.0
sherlock>=0.4.1 # MIT
kubernetes # Apache-2.0
tenacity
94 changes: 94 additions & 0 deletions staffeln/common/openstack.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,42 @@
from __future__ import annotations

import tenacity
from openstack import exceptions, proxy
from oslo_log import log

from staffeln import conf
from staffeln.common import auth
from staffeln.i18n import _

CONF = conf.CONF
LOG = log.getLogger(__name__)


class RetryHTTPError(tenacity.retry_if_exception):
"""Retry strategy that retries if the exception is an ``HTTPError`` with
a abnormal status code.
"""

def __init__(self):
def is_http_error(exception):
# Make sure we don't retry on codes in skip list (default: [404]),
# as not found could be an expected status.
skip_codes = CONF.openstack.skip_retry_codes
result = (
isinstance(exception, exceptions.HttpException)
and str(exception.status_code) not in skip_codes
)
if result:
LOG.debug(
f"Getting HttpException {exception} (status "
f"code: {exception.status_code}), "
"retry till timeout..."
)
return result

super().__init__(predicate=is_http_error)


class OpenstackSDK:
def __init__(self):
self.conn_list = {}
Expand All @@ -26,6 +54,12 @@ def set_project(self, project):
self.conn = self.conn_list[project_id]

# user
@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_user_id(self):
user_name = self.conn.config.auth["username"]
if "user_domain_id" in self.conn.config.auth:
Expand All @@ -38,15 +72,33 @@ def get_user_id(self):
user = self.conn.get_user(name_or_id=user_name)
return user.id

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_role_assignments(self, project_id, user_id=None):
filters = {"project": project_id}
if user_id:
filters["user"] = user_id
return self.conn.list_role_assignments(filters=filters)

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_user(self, user_id):
return self.conn.get_user(name_or_id=user_id)

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_project_member_emails(self, project_id):
members = self.get_role_assignments(project_id)
emails = []
Expand All @@ -63,9 +115,21 @@ def get_project_member_emails(self, project_id):
emails.append(user.email)
return emails

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_projects(self):
return self.conn.list_projects()

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_servers(self, project_id=None, all_projects=True, details=True):
if project_id is not None:
return self.conn.compute.servers(
Expand All @@ -76,9 +140,21 @@ def get_servers(self, project_id=None, all_projects=True, details=True):
else:
return self.conn.compute.servers(details=details, all_projects=all_projects)

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_volume(self, uuid, project_id):
return self.conn.get_volume_by_id(uuid)

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_backup(self, uuid, project_id=None):
try:
return self.conn.get_volume_backup(uuid)
Expand All @@ -102,6 +178,12 @@ def create_backup(
incremental=incremental,
)

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def delete_backup(self, uuid, project_id=None, force=False):
# Note(Alex): v3 is not supporting force delete?
# conn.block_storage.delete_backup(
Expand All @@ -115,11 +197,23 @@ def delete_backup(self, uuid, project_id=None, force=False):
except exceptions.ResourceNotFound:
return None

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_backup_quota(self, project_id):
# quota = conn.get_volume_quotas(project_id)
quota = self._get_volume_quotas(project_id)
return quota.backups

@tenacity.retry(
retry=RetryHTTPError(),
wait=tenacity.wait_exponential(max=CONF.openstack.max_retry_interval),
reraise=True,
stop=tenacity.stop_after_delay(CONF.openstack.retry_timeout),
)
def get_backup_gigabytes_quota(self, project_id):
# quota = conn.get_volume_quotas(project_id)
quota = self._get_volume_quotas(project_id)
Expand Down
40 changes: 40 additions & 0 deletions staffeln/conf/conductor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
title="Conductor Options",
help=_("Options under this group are used " "to define Conductor's configuration."),
)
openstack_group = cfg.OptGroup(
"openstack",
title="OpenStack Options",
help=_(
"Options under this group are used "
"to define OpneStack related configuration."
),
)

backup_opts = [
cfg.IntOpt(
Expand Down Expand Up @@ -74,6 +82,36 @@
),
]

openstack_opts = [
cfg.IntOpt(
"retry_timeout",
default=300,
min=1,
help=_(
"The timeout for retry OpenStackSDK HTTP exceptions, "
"the unit is one second."
),
),
cfg.IntOpt(
"max_retry_interval",
default=30,
min=0,
help=_(
"Max time interval for retry OpenStackSDK HTTP exceptions, "
"the unit is one second."
),
),
cfg.ListOpt(
"skip_retry_codes",
default=["404"],
help=_(
"A list of HTTP codes "
"to skip retry on for OpenStackSDK HTTP "
"exception."
),
),
]

rotation_opts = [
cfg.IntOpt(
"rotation_workers",
Expand Down Expand Up @@ -138,12 +176,14 @@ def register_opts(conf):
conf.register_group(conductor_group)
conf.register_opts(backup_opts, group=conductor_group)
conf.register_opts(rotation_opts, group=conductor_group)
conf.register_opts(openstack_opts, group=openstack_group)
conf.register_opts(coordination_opts, group=coordination_group)


def list_opts():
return {
"DEFAULT": rotation_opts,
conductor_group: backup_opts,
openstack_group: openstack_opts,
coordination_group: coordination_opts,
}
2 changes: 2 additions & 0 deletions staffeln/tests/common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright (c) 2024 VEXXHOST, Inc.
# SPDX-License-Identifier: Apache-2.0
13 changes: 6 additions & 7 deletions staffeln/tests/common/test_openstacksdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,7 @@ def _test_http_error(self, m_func, retry_func, status_code, call_count=1, **kwar
**kwargs,
)
self.assertEqual(status_code, exc.status_code)
skip_retry_codes = conf.CONF.openstack.skip_retry_codes.replace(" ", "").split(
","
)
skip_retry_codes = conf.CONF.openstack.skip_retry_codes
if str(status_code) not in skip_retry_codes:
if call_count == 1:
self.m_sleep.assert_called_once_with(1.0)
Expand All @@ -83,17 +81,18 @@ def test_get_servers_non_http_error(self):
self._test_non_http_error(self.m_c.compute.servers, "get_servers")

def test_get_servers_conf_skip_http_error(self):
conf.CONF.set_override("skip_retry_codes", "403,", "openstack")
conf.CONF.set_override("skip_retry_codes", [403], "openstack")
self._test_http_error(self.m_c.compute.servers, "get_servers", status_code=403)
self.assertEqual("403,", conf.CONF.openstack.skip_retry_codes)
self.assertEqual(["403"], conf.CONF.openstack.skip_retry_codes)

def test_get_servers_conf_skip_http_error_not_hit(self):
conf.CONF.set_override("skip_retry_codes", "403,", "openstack")
conf.CONF.set_override("skip_retry_codes", [403], "openstack")
self._test_http_error(self.m_c.compute.servers, "get_servers", status_code=404)
self.assertEqual("403,", conf.CONF.openstack.skip_retry_codes)
self.assertEqual(["403"], conf.CONF.openstack.skip_retry_codes)

def test_get_servers_404_http_error(self):
self._test_http_error(self.m_c.compute.servers, "get_servers", status_code=404)
self.assertEqual(["404"], conf.CONF.openstack.skip_retry_codes)

def test_get_servers_500_http_error(self):
self._test_http_error(self.m_c.compute.servers, "get_servers", status_code=500)
Expand Down
1 change: 1 addition & 0 deletions test-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ oslotest>=1.10.0 # Apache-2.0
stestr>=1.0.0 # Apache-2.0
testtools>=1.4.0 # MIT
pre-commit
tenacity
3 changes: 2 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ install_commands =

[testenv:{py3,py38,py39,py310}]
basepython = python3
deps = -r{toxinidir}/test-requirements.txt
deps = -r{toxinidir}/requirements.txt
-r{toxinidir}/test-requirements.txt
Comment on lines +25 to +26
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is not necessary for this change thier.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need tenacity and openstack modules in py3 test

commands = stestr run --slowest {posargs}

[testenv:cover]
Expand Down
Loading