Skip to content

Commit

Permalink
Merge branch 'main' into CLOUDOPS-548
Browse files Browse the repository at this point in the history
  • Loading branch information
ricolin authored Oct 25, 2024
2 parents bc735f7 + 6a68711 commit db784d7
Show file tree
Hide file tree
Showing 17 changed files with 617 additions and 130 deletions.
40 changes: 40 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: build

concurrency:
group: ${{ github.head_ref || github.run_id }}
cancel-in-progress: true

on:
workflow_dispatch:
push:
branches:
- 'main'
tags:
- 'v*'
pull_request:
branches:
- 'main'

jobs:
image:
runs-on: ubuntu-latest
steps:
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3
- uses: actions/checkout@v4
- uses: docker/metadata-action@v5
id: meta
with:
images: ghcr.io/vexxhost/staffeln
- uses: docker/login-action@v3
if: github.event_name != 'pull_request'
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- uses: docker/build-push-action@v5
with:
context: .
push: ${{ github.event_name != 'pull_request' }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
3 changes: 2 additions & 1 deletion .github/workflows/linters.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: linters
on: push
on: push

jobs:
super-lint:
Expand All @@ -11,5 +11,6 @@ jobs:
DEFAULT_BRANCH: main
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
VALIDATE_ALL_CODEBASE: true
VALIDATE_DOCKERFILE_HADOLINT: false
VALIDATE_PYTHON_MYPY: false
VALIDATE_JSCPD: false
3 changes: 3 additions & 0 deletions .hadolint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
ignored:
- DL3020
12 changes: 12 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# syntax=docker/dockerfile:1.5

FROM python:3.10 AS builder
RUN python3 -m venv /venv
ENV PATH=/venv/bin:$PATH
ADD . /src
RUN --mount=type=cache,target=/root/.cache \
pip install /src

FROM python:3.10-slim AS runtime
ENV PATH=/venv/bin:$PATH
COPY --from=builder /venv /venv
370 changes: 268 additions & 102 deletions README.md

Large diffs are not rendered by default.

21 changes: 13 additions & 8 deletions hack/stack.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ else
fi

# Create DevStack configuration file

sudo mkdir /etc/staffeln
sudo chown -R "${USER}". /etc/staffeln
cat <<EOF > /opt/stack/local.conf
Expand Down Expand Up @@ -49,22 +50,26 @@ EOF

# Create staffeln configuration file
cat <<EOF > /etc/staffeln/staffeln.conf
[DEFAULT]
debug = True
[conductor]
backup_workers = 1
rotation_workers = 1
backup_service_period = 1200
retention_service_period = 1200
backup_cycle_timout = 5min
retention_time = 2w3d
backup_metadata_key="__automated_backup"
backup_metadata_key="__staffeln_backup"
retention_metadata_key="__staffeln_retention"
full_backup_depth = 4
[database]
backend = sqlalchemy
connection = "mysql+pymysql://staffeln:password@localhost:3306/staffeln"
tooz_connection = "mysql://staffeln:password@localhost:3306/staffeln"
mysql_engine = InnoDB
[coordination]
backend_url = "file:///tmp/staffeln_locks"
EOF

# Create staffeln database
Expand All @@ -77,9 +82,9 @@ pip install -U setuptools pip
"${HOME}"/.local/bin/pip3 install -e .

# Start staffeln conductor
staffeln-db-manage create_schema
#staffeln-db-manage upgrade head
set +x
source /opt/stack/openrc admin admin
set -x
staffeln-conductor &
"${HOME}"/.local/bin/staffeln-db-manage --config-file /etc/staffeln/staffeln.conf create_schema
#staffeln-db-manage upgrade head

echo You can fetch authroize with command: source /opt/stack/openrc admin admin
echo You can now run staffeln conductor with: "${HOME}"/.local/bin/staffeln-conductor --config-file /etc/staffeln/staffeln.conf
echo You can now run staffeln api with: "${HOME}"/.local/bin/staffeln-api --config-file /etc/staffeln/staffeln.conf
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ openstacksdk>0.28.0
pymysql
parse
tooz # Apache-2.0
tenacity
sherlock>=0.4.1 # MIT
kubernetes # Apache-2.0
tenacity
1 change: 1 addition & 0 deletions staffeln/common/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
BACKUP_INIT = 4
BACKUP_FAILED = 3
BACKUP_COMPLETED = 2
BACKUP_WIP = 1
Expand Down
157 changes: 148 additions & 9 deletions staffeln/common/lock.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
import staffeln.conf
import errno
import glob
import os
import re
import sys
import uuid
from typing import Optional # noqa: H301

import sherlock
from oslo_log import log
from oslo_utils import uuidutils
from staffeln import conf, exception
from tooz import coordination

CONF = staffeln.conf.CONF
CONF = conf.CONF
LOG = log.getLogger(__name__)


class LockManager(object):
def __init__(self, node_id=None):
self.db_url = CONF.database.tooz_connection
self.node_id = uuidutils.generate_uuid() if node_id is None else node_id
# get_coordinator(backend_url, member_id)
self.coordinator = coordination.get_coordinator(self.db_url, node_id)
def __init__(self):
backend_url = CONF.coordination.backend_url
# This is for now using to check if any backend_url setup
# for tooz backends as K8s should not need one.any
self.coordinator = COORDINATOR if backend_url else K8SCOORDINATOR

def __enter__(self):
self.coordinator.start()
Expand All @@ -23,19 +31,150 @@ def __exit__(self, exc_type, exc_val, exc_tb):


class Lock(object):
def __init__(self, lock_manager, lock_name):
def __init__(self, lock_manager, lock_name, remove_lock=False):
self.lock_manager = lock_manager
self.lock_name = lock_name
self.lock = None
self.acquired = False
self.remove_lock = remove_lock

def __enter__(self):
self.lock = self.lock_manager.coordinator.get_lock(self.lock_name)
self.acquired = self.lock.acquire(blocking=False)
if not self.acquired:
LOG.debug(f"Failed to lock for {self.lock_name}")
else:
LOG.debug(f"acquired lock for {self.lock_name}")
return self

def __exit__(self, exc_type, exc_val, exc_tb):
if self.acquired:
self.lock.release()
LOG.debug(f"released lock for {self.lock_name}")
if self.remove_lock:
self.lock_manager.coordinator.remove_lock(self.lock_name)
LOG.debug(f"removed lock file (if any) for {self.lock_name}")


class Coordinator(object):
"""Tooz coordination wrapper.
Coordination member id is created from concatenated
`prefix` and `agent_id` parameters.
:param str agent_id: Agent identifier
:param str prefix: Used to provide member identifier with a
meaningful prefix.
"""

def __init__(self, agent_id: Optional[str] = None, prefix: str = ""):
self.coordinator = None
self.agent_id = agent_id or str(uuid.uuid4())
self.started = False
self.prefix = prefix
self._file_path = None

def _get_file_path(self, backend_url):
if backend_url.startswith("file://"):
path = backend_url[7:]
# Copied from TooZ's _normalize_path to get the same path they use
if sys.platform == "win32":
path = re.sub(r"\\(?=\w:\\)", "", os.path.normpath(path))
return os.path.abspath(os.path.join(path, self.prefix))
return None

def start(self) -> None:
if self.started:
return

backend_url = CONF.coordination.backend_url

# member_id should be bytes
member_id = (self.prefix + self.agent_id).encode("ascii")
self.coordinator = coordination.get_coordinator(backend_url, member_id)
assert self.coordinator is not None
self.coordinator.start(start_heart=True)
self._file_path = self._get_file_path(backend_url)
self.started = True

def stop(self) -> None:
"""Disconnect from coordination backend and stop heartbeat."""
if self.started:
if self.coordinator is not None:
self.coordinator.stop()
self.coordinator = None
self.started = False

def get_lock(self, name: str):
"""Return a Tooz backend lock.
:param str name: The lock name that is used to identify it
across all nodes.
"""
# lock name should be bytes
lock_name = (self.prefix + name).encode("ascii")
if self.coordinator is not None:
return self.coordinator.get_lock(lock_name)
else:
raise exception.LockCreationFailed("Coordinator uninitialized.")

def remove_lock(self, glob_name):
# Most locks clean up on release, but not the file lock, so we manually
# clean them.

def _err(file_name: str, exc: Exception) -> None:
LOG.warning(f"Failed to cleanup lock {file_name}: {exc}")

if self._file_path:
files = glob.glob(self._file_path + glob_name)
for file_name in files:
try:
os.remove(file_name)
except OSError as exc:
if exc.errno != errno.ENOENT:
_err(file_name, exc)
except Exception as exc:
_err(file_name, exc)


class K8sCoordinator(object):
"""Sherlock kubernetes coordination wrapper.
:param int expire: Set lock expire seconds
:param int timeout: Set lock acquire action timeout seconds
:param str namespace: Set lock namespace.
"""

def __init__(
self, expire: int = 3600, timeout: int = 10, namespace: str = "openstack"
):
self.timeout = timeout
self.expire = expire
self.namespace = namespace
self.started = False
self.prefix = "staffeln-"

def start(self) -> None:
if self.started:
return
sherlock.configure(expire=self.expire, timeout=self.timeout)
self.started = True

def stop(self) -> None:
"""Disconnect from coordination backend and stop heartbeat."""
pass

def get_lock(self, name: str):
"""Return a kubernetes lease lock.
:param str name: The lock name that is used to identify it
across all nodes.
"""
return sherlock.KubernetesLock(self.prefix + name, self.namespace)

def remove_lock(self, glob_name):
pass


COORDINATOR = Coordinator(prefix="staffeln-")
K8SCOORDINATOR = K8sCoordinator()
7 changes: 7 additions & 0 deletions staffeln/conductor/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,13 @@ def get_queues(self, filters=None):
)
return queues

def get_queue_task_by_id(self, task_id):
"""Get single volume queue task from the queue_data table"""
queue = objects.Queue.get_by_id( # pylint: disable=E1120
context=self.ctx, id=task_id
)
return queue

def create_queue(self, old_tasks):
"""
Create the queue of all the volumes for backup
Expand Down
15 changes: 12 additions & 3 deletions staffeln/conductor/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ def _process_wip_tasks(self):
LOG.debug(
f"try to get lock and run task for volume: {queue.volume_id}."
)
with lock.Lock(self.lock_mgt, queue.volume_id) as q_lock:
with lock.Lock(
self.lock_mgt, queue.volume_id, remove_lock=True
) as q_lock:
if q_lock.acquired:
self.controller.check_volume_backup_status(queue)
else: # time out
Expand Down Expand Up @@ -110,9 +112,16 @@ def _process_todo_tasks(self):
)
if len(tasks_to_start) != 0:
for task in tasks_to_start:
with lock.Lock(self.lock_mgt, task.volume_id) as t_lock:
with lock.Lock(
self.lock_mgt, task.volume_id, remove_lock=True
) as t_lock:
if t_lock.acquired:
self.controller.create_volume_backup(task)
# Re-pulling status and make it's up-to-date
task = self.controller.get_queue_task_by_id(task_id=task.id)
if task.backup_status == constants.BACKUP_PLANNED:
task.backup_status = constants.BACKUP_INIT
task.save()
self.controller.create_volume_backup(task)

# Refresh the task queue
def _update_task_queue(self):
Expand Down
Loading

0 comments on commit db784d7

Please sign in to comment.