Skip to content

Commit

Permalink
Shard affinity for k8s workloads
Browse files Browse the repository at this point in the history
Instances that are part of the same K8S cluster will get scheduled
to the same shard (vCenter).
It identifies the K8S cluster by looking at the tags or metadata
set by the k8s cluster orchestrators when creating the instances.
Kubernikus and Gardener are supported for now.

BigVMs are "out of the picture" and should not adhere to shards.
They are only scheduled on their allocated hosts.
  • Loading branch information
leust committed Jul 25, 2023
1 parent 5c0e180 commit 58e20eb
Show file tree
Hide file tree
Showing 2 changed files with 390 additions and 47 deletions.
104 changes: 100 additions & 4 deletions nova/scheduler/filters/shard_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
from oslo_log import log as logging

import nova.conf
from nova import context as nova_context
from nova.objects.aggregate import AggregateList
from nova.objects.build_request import BuildRequest
from nova.objects.instance import InstanceList
from nova.scheduler import filters
from nova.scheduler import utils
from nova import utils as nova_utils
Expand All @@ -28,6 +32,9 @@
CONF = nova.conf.CONF

_SERVICE_AUTH = None
GARDENER_PREFIX = "kubernetes.io-cluster-shoot--garden--"
KKS_PREFIX = "kubernikus:kluster"
HANA_PREFIX = "hana_"


class ShardFilter(filters.BaseHostFilter):
Expand All @@ -37,6 +44,8 @@ class ShardFilter(filters.BaseHostFilter):
Alternatively the project may have the "sharding_enabled" tag set, which
enables the project for hosts in all shards.
Implements `filter_all` directly instead of `host_passes`
"""

_PROJECT_SHARD_CACHE = {}
Expand Down Expand Up @@ -114,11 +123,87 @@ def _get_shards(self, project_id):

return self._PROJECT_SHARD_CACHE.get(project_id)

def host_passes(self, host_state, spec_obj):
def _get_k8s_cluster_hosts(self, spec_obj):
"""If the instance will be part of a K8S cluster, it returns
the list of all other hosts that are already part of it,
if any. If there are multiple shards part of the existing k8s
cluster, the hosts of the dominant shard are returned.
"""
if (spec_obj.flavor.name.startswith(HANA_PREFIX) or
utils.request_is_resize(spec_obj)):
return None

k8s_filter = self._k8s_instance_query_filter(spec_obj)

if not k8s_filter:
return None

k8s_filter['project_id'] = spec_obj.project_id

instances = InstanceList.get_by_filters(
nova_context.get_admin_context(), filters=k8s_filter,
expected_attrs=['flavor', 'metadata', 'tags'])

if not instances:
return None

if spec_obj.availability_zone:
instances = [i for i in instances
if i.availability_zone == spec_obj.availability_zone]

if not instances:
return None

aggrs = [aggr for aggr in
AggregateList.get_all(nova_context.get_admin_context())
if aggr.name.startswith(self._SHARD_PREFIX)]

if not aggrs:
return None

shards_size = {
aggr: sum(1 for i in instances if i.host in aggr.hosts)
for aggr in aggrs
}

shard_aggr = next(s[0] for s in
sorted(shards_size.items(),
key=lambda i: i[1], reverse=True)
)

return set(i.host for i in instances if i.host in shard_aggr.hosts)

def _k8s_instance_query_filter(self, spec_obj):
elevated = nova_context.get_admin_context()
build_request = BuildRequest.get_by_instance_uuid(
elevated, spec_obj.instance_uuid)

# Kubernikus
kks_tag = next((t.tag for t in build_request.tags
if t.tag.startswith(KKS_PREFIX)), None)
if kks_tag:
return {'tags': [kks_tag]}

# Gardener
gardener_meta = \
{k: v for k, v in build_request.instance.metadata.items()
if k.startswith(GARDENER_PREFIX)}
if gardener_meta:
return {'metadata': gardener_meta}

return None

def filter_all(self, filter_obj_list, spec_obj):
# Only VMware
if utils.is_non_vmware_spec(spec_obj):
return True
return filter_obj_list

k8s_hosts = self._get_k8s_cluster_hosts(spec_obj)

return [host_state for host_state in filter_obj_list
if self._host_passes(host_state, spec_obj, k8s_hosts)]

def _host_passes(self, host_state, spec_obj, k8s_hosts):
host_shard_aggrs = [aggr for aggr in host_state.aggregates
if aggr.name.startswith(self._SHARD_PREFIX)]

Expand Down Expand Up @@ -148,18 +233,29 @@ def host_passes(self, host_state, spec_obj):
if self._ALL_SHARDS in shards:
LOG.debug('project enabled for all shards %(project_shards)s.',
{'project_shards': shards})
return True
elif host_shard_names & set(shards):
LOG.debug('%(host_state)s shard %(host_shard)s found in project '
'shards %(project_shards)s.',
{'host_state': host_state,
'host_shard': host_shard_names,
'project_shards': shards})
return True
else:
LOG.debug('%(host_state)s shard %(host_shard)s not found in '
'project shards %(project_shards)s.',
{'host_state': host_state,
'host_shard': host_shard_names,
'project_shards': shards})
return False

if k8s_hosts:
return self._host_passes_k8s(host_shard_aggrs, k8s_hosts)

return True

def _host_passes_k8s(self, host_shard_aggrs, k8s_hosts):
"""Instances of a K8S cluster must end up on the same shard.
The K8S cluster is identified by the metadata or tags set
by the orchestrator (Gardener or Kubernikus).
"""
return any(set(aggr.hosts) & k8s_hosts
for aggr in host_shard_aggrs)
Loading

0 comments on commit 58e20eb

Please sign in to comment.