Skip to content

Commit

Permalink
Shard affinity for k8s workloads
Browse files Browse the repository at this point in the history
Instances that are part of the same K8S cluster will get scheduled
to the same shard (vCenter).
It identifies the K8S cluster by looking at the tags or metadata
set by the k8s cluster orchestrators when creating the instances.
Kubernikus and Gardener are supported for now.
Instances are grouped by hana/non-hana, so that hana_* flavors
may stick together to their own shard.
  • Loading branch information
leust committed Apr 18, 2023
1 parent 1252cb5 commit 9d433ea
Show file tree
Hide file tree
Showing 2 changed files with 339 additions and 33 deletions.
111 changes: 107 additions & 4 deletions nova/scheduler/filters/shard_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from oslo_log import log as logging

import nova.conf
from nova import context as nova_context
from nova.objects.build_request import BuildRequest
from nova.objects.instance import InstanceList
from nova.scheduler import filters
from nova.scheduler import utils
from nova import utils as nova_utils
Expand All @@ -28,6 +31,9 @@
CONF = nova.conf.CONF

_SERVICE_AUTH = None
GARDENER_PREFIX = "kubernetes.io-cluster-shoot--garden--"
KKS_PREFIX = "kubernikus:kluster"
HANA_PREFIX = "hana_"


class ShardFilter(filters.BaseHostFilter):
Expand Down Expand Up @@ -114,11 +120,68 @@ def _get_shards(self, project_id):

return self._PROJECT_SHARD_CACHE.get(project_id)

def host_passes(self, host_state, spec_obj):
def _get_k8s_cluster_instances(self, spec_obj):
"""If the instance will be part of a K8S cluster, it returns
the list of all other instances that are already part of it,
if any.
"""
k8s_filter = self._k8s_instance_query_filter(spec_obj)

if not k8s_filter:
return []

k8s_filter['project_id'] = spec_obj.project_id

instances = InstanceList.get_by_filters(
nova_context.get_admin_context(), filters=k8s_filter,
expected_attrs=['flavor', 'metadata', 'tags'])

if instances and spec_obj.availability_zone:
return [i for i in instances
if i.availability_zone == spec_obj.availability_zone]

return instances

def _k8s_instance_query_filter(self, spec_obj):
elevated = nova_context.get_admin_context()
build_request = BuildRequest.get_by_instance_uuid(
elevated, spec_obj.instance_uuid)

# Kubernikus
kks_tag = next((t.tag for t in build_request.tags
if t.tag.startswith(KKS_PREFIX)), None)
if kks_tag:
return {'tags': [kks_tag]}

# Gardener
gardener_meta = \
{k: v for k, v in build_request.instance.metadata.items()
if k.startswith(GARDENER_PREFIX)}
if gardener_meta:
return {'metadata': gardener_meta}

return None

def filter_all(self, filter_obj_list, spec_obj):
"""Yield objects that pass the filter.
Can be overridden in a subclass, if you need to base filtering
decisions on all objects. Otherwise, one can just override
_filter_one() to filter a single object.
"""
# Only VMware
if utils.is_non_vmware_spec(spec_obj):
return True
for obj in filter_obj_list:
yield obj
return

k8s_instances = self._get_k8s_cluster_instances(spec_obj)

for obj in filter_obj_list:
if self._host_passes(obj, spec_obj, k8s_instances):
yield obj

def _host_passes(self, host_state, spec_obj, k8s_instances):
host_shard_aggrs = [aggr for aggr in host_state.aggregates
if aggr.name.startswith(self._SHARD_PREFIX)]

Expand Down Expand Up @@ -148,18 +211,58 @@ def host_passes(self, host_state, spec_obj):
if self._ALL_SHARDS in shards:
LOG.debug('project enabled for all shards %(project_shards)s.',
{'project_shards': shards})
return True
elif host_shard_names & set(shards):
LOG.debug('%(host_state)s shard %(host_shard)s found in project '
'shards %(project_shards)s.',
{'host_state': host_state,
'host_shard': host_shard_names,
'project_shards': shards})
return True
else:
LOG.debug('%(host_state)s shard %(host_shard)s not found in '
'project shards %(project_shards)s.',
{'host_state': host_state,
'host_shard': host_shard_names,
'project_shards': shards})
return False

if not utils.request_is_resize(spec_obj):
# K8S orchestrators are only creating or deleting nodes,
# therefore we shouldn't infer with resize/migrate requests.
return self._host_passes_k8s(host_state, host_shard_names,
spec_obj, k8s_instances)

return True

def _host_passes_k8s(self, host_state, host_shard_names, spec_obj,
k8s_instances):
"""Instances of a K8S cluster must end up on the same shard.
The K8S cluster is identified by the metadata or tags set
by the orchestrator (Gardener or Kubernikus).
"""
if not k8s_instances:
# There are no instances in the cluster on this availability
# zone, yet. We allow any shard in this AZ for the first instance.
return True

def _is_hana_flavor(flavor):
return flavor.name.startswith(HANA_PREFIX)

def _is_same_category(instance, flavor):
"""Check whether instance is from the flavor's family."""
if _is_hana_flavor(flavor):
return _is_hana_flavor(instance.flavor)
return True

siblings = [i for i in k8s_instances
if _is_same_category(i, spec_obj.flavor)]

if not siblings:
# This is the first instance of this particular type (HANA).
# We allow any shard in this AZ for the first instance.
return True

k8s_hosts = set([i.host for i in siblings])

return any(agg.name in host_shard_names and
set(agg.hosts) & k8s_hosts
for agg in host_state.aggregates)
Loading

0 comments on commit 9d433ea

Please sign in to comment.