charts/openstack-cluster/values.yaml

---

# The name of an existing secret containing a clouds.yaml and optional cacert
cloudCredentialsSecretName:
# OR
# Content for the clouds.yaml file
# Having this as a top-level item allows a clouds.yaml file from OpenStack to be used as a values file
clouds:
# The PEM-encoded CA certificate for the specified cloud
cloudCACert:

# The name of the cloud to use from the specified clouds.yaml
cloudName: openstack

# The Kubernetes version of the cluster
# This should match the version of kubelet and kubeadm in the image
kubernetesVersion:

# The name of the image to use for cluster machines
machineImage:
# OR
# The ID of the image to use for cluster machines
machineImageId:

# The name of the SSH key to inject into cluster machines
machineSSHKeyName:

# The prefix used for project labels and annotations
projectPrefix: capi.stackhpc.com

# Any extra annotations to add to the cluster
clusterAnnotations: {}

# Values for the Kubernetes cluster network
kubeNetwork:
  # By default, use the private network range 172.16.0.0/12 for the cluster network
  # We split it into two equally-sized blocks for pods and services
  # This gives ~500,000 addresses in each block
  pods:
    cidrBlocks:
      - 172.16.0.0/13
  services:
    cidrBlocks:
      - 172.24.0.0/13
  serviceDomain: cluster.local

# Settings for the OpenStack networking for the cluster
clusterNetworking:
  # Custom nameservers to use for the hosts
  dnsNameservers:
  # Indicates if security groups should be managed by the cluster
  manageSecurityGroups: true
  # Indicates if the managed security groups should allow all in-cluster traffic
  # The default CNI installed by the addons is Cilium, so this is true by default
  allowAllInClusterTraffic: true
  # The ID of the external network to use
  # If not given, the external network will be detected
  externalNetworkId:
  # Details of the internal network to use
  internalNetwork:
    # Filter to find an existing network for the cluster internal network
    # see Cluster API documentation for details
    networkFilter:
      # id: e63ca1a0-f69d-4fbf-b306-310857b1afe5
      # name: tenant-internal-net
    # Filter to find an existing subnet for the cluster internal network
    # See Cluster API documentation for details
    subnetFilter:
      # id: e63ca1a0-f69d-4fbf-b306-31089s1j38d8
      # name: tenant-internal-subnet
    # The CIDR to use if creating a cluster network
    # This is only used if neither of networkFilter and subnetFilter are given
    nodeCidr: 192.168.3.0/24

# Settings for registry mirrors
# When a mirror is set, it will be tried for images but will fall back to the
# upstream registry if the image pull fails
# By default, use images mirrored to quay.io when possible
registryMirrors:
  # docker.io:
  #   upstream: https://registry-1.docker.io
  #   mirrors:
  #     - url: https://registry.my.domain/v2/dockerhub-public
  #       capabilities: ["pull", "resolve"]
  docker.io:
    - https://quay.io/v2/azimuth/docker.io
  ghcr.io:
    - https://quay.io/v2/azimuth/ghcr.io
  nvcr.io:
    - https://quay.io/v2/azimuth/nvcr.io
  quay.io:
    - https://quay.io/v2/azimuth/quay.io
  registry.k8s.io:
    - https://quay.io/v2/azimuth/registry.k8s.io

# A map of trusted CAs to add to the system trust on cluster nodes
trustedCAs: {}
  # custom-ca: |
  #   -----BEGIN CERTIFICATE-----
  #   ...certificate data...
  #   -----END CERTIFICATE-----

# List of additional packages to install on cluster nodes
additionalPackages: []
  # - nfs-common

# Settings for etcd
etcd:
  # The data directory to use for etcd
  # When a block device is specified, it is mounted at the parent directory, e.g. /var/lib/etcd
  # This is to avoid etcd complaining about the lost+found directory
  dataDir: /var/lib/etcd
  # Any extra command line arguments to pass to etcd
  extraArgs:
    # Set timeouts so that etcd tolerates 'slowness' (network + disks) better
    # This is at the expense of taking longer to detect a leader failure
    # https://etcd.io/docs/v3.5/tuning/#time-parameters
    heartbeat-interval: "500"  # defaults to 100ms in etcd 3.5
    election-timeout: "5000"   # defaults to 1000ms in etcd 3.5
    # Set a slightly larger space quota than the default (default is 2GB)
    quota-backend-bytes: "4294967296"
    # Listen for metrics on 0.0.0.0 so Prometheus can collect them
    listen-metrics-urls: http://0.0.0.0:2381
  # The block device configuration for etcd
  # If not specified, the root device is used
  blockDevice:
    # # The size of the block device
    # size: 20
    # # The type of the block device
    # # If set to "Volume", which is the default, then a Cinder volume is used to back the block device
    # # If set to "Local", local ephemeral storage is used to back the block device
    # type: Volume
    # # The volume type to use
    # # If not specified, the default volume type is used
    # volumeType:
    # # The volume availability zone to use
    # # If not specified, the machine availability zone is used
    # availabilityZone:

# Settings for the Kubernetes API server
apiServer:
  # Indicates whether to deploy a load balancer for the API server
  enableLoadBalancer: true
  # Indicates what loadbalancer provider to use. Default is amphora
  loadBalancerProvider:
  # Restrict loadbalancer access to select IPs
  # allowedCidrs
  #  - 192.168.0.0/16  # needed for cluster to init
  #  - 10.10.0.0/16  # IPv4 Internal Network
  #  - 123.123.123.123 # some other IPs
  # Indicates whether to associate a floating IP with the API server
  associateFloatingIP: true
  # The specific floating IP to associate with the API server
  # If not given, a new IP will be allocated if required
  floatingIP:
  # The specific fixed IP to associate with the API server
  # If enableLoadBalancer is true, this will become the VIP of the load balancer
  # If enableLoadBalancer and associateFloatingIP are both false, this should be
  # the IP of a pre-allocated port to be used as the VIP
  fixedIP:
  # The port to use for the API server
  port: 6443

# Set osDistro used. ubuntu, flatcar, etc.
osDistro: ubuntu
#
# API server authentication/authorization webhook. Set this to
# integrate into KubeadmControlPlane and KubeadmConfigTemplate
# possible values: k8s-keystone-auth
# authWebhook: k8s-keystone-auth

# Settings for the control plane
controlPlane:
  # The failure domains to use for control plane nodes
  # If given, should be a list of availability zones
  # Only used when omitFailureDomain = false
  failureDomains:
  # Indicates whether the failure domain should be omitted from control plane nodes
  omitFailureDomain: true
  # The number of control plane machines to deploy
  # For high-availability, this should be greater than 1
  # For etcd quorum, it should be odd - usually 3, or 5 for very large clusters
  machineCount: 3
  # The kubernetes version for the control plane
  kubernetesVersion:
  # The image to use for control plane
  machineImage:
  # The ID of the image to use for the control plane
  machineImageId:
  # The flavor to use for control plane machines
  machineFlavor:
  # The ports for control plane nodes
  # If no ports are given, the cluster internal network is used
  # See https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/master/docs/book/src/clusteropenstack/configuration.md#network-filters
  machineNetworking:
    ports:
  # The root volume spec for control plane machines
  machineRootVolume:
    # The size of the disk to use
    # If not given, the ephemeral root disk from the flavor is used
    diskSize:
    # The volume type to use
    # If not specified, the default volume type is used
    # volumeType:
    # The volume availability zone to use
    # If not specified, the machine availability zone is used
    # availabilityZone:
  # The ID of the server group to use for control plane machines
  serverGroupId:
  # Labels to apply to the node objects in Kubernetes that correspond to control plane machines
  nodeLabels:
    # my.company.org/label: value
  # Additional block devices for control plane machines
  additionalBlockDevices: {}
    # # The key is the name for the block device in the context of the machine
    # # It is also used to tag the block device, so that the volume can be identified in instance metadata
    # scratch:
    #   # The size of the block device
    #   size: 20
    #   # The type of the block device
    #   # If set to "Volume", which is the default, then a Cinder volume is used to back the block device
    #   # If set to "Local", local ephemeral storage is used to back the block device
    #   # In both cases, the lifecycle of the device is the same as the machine
    #   type: Volume
    #   # The volume type to use
    #   # If not specified, the default volume type is used
    #   volumeType:
    #   # The volume availability zone to use
    #   # If not specified, the machine availability zone is used
    #   availabilityZone:
  # Indicates whether control plane machines should use config drive or not
  machineConfigDrive: false
  # The time to wait for a node to finish draining before it can be removed
  nodeDrainTimeout: 5m0s
  # The time to wait for a node to detach all volumes before it can be removed
  nodeVolumeDetachTimeout: 5m0s
  # The time to wait for the node resource to be deleted in Kubernetes when a
  # machine is marked for deletion
  nodeDeletionTimeout: 5m0s
  # The remediation strategy for the control plane nodes
  # We set these so that we don't keep remediating an unhealthy control plane forever
  remediationStrategy:
    # The maximum number of times that a remediation will be retried
    maxRetry: 3
    # The amount of time that a node created as a remediation has to become healthy
    # before the remediation is retried
    retryPeriod: 20m
    # The length of time that a node must be healthy before any future problems are
    # considered unrelated to the previous ones (i.e. the retry count is reset)
    minHealthyPeriod: 1h
  # The rollout strategy to use for the control plane nodes
  # By default, the strategy allows the control plane to begin provisioning new nodes
  # without first tearing down old ones
  rolloutStrategy:
    type: RollingUpdate
    rollingUpdate:
      # For the control plane, this can only be 0 or 1
      maxSurge: 1
  # The kubeadm config specification for the control plane
  # By default, this uses a simple configuration that enables the external cloud provider
  kubeadmConfigSpec:
    initConfiguration:
      nodeRegistration:
        name: '{{ local_hostname }}'
        kubeletExtraArgs:
          cloud-provider: external
    # As well as enabling an external cloud provider, we set the bind addresses for the
    # controller-manager, scheduler and kube-proxy to 0.0.0.0 so that Prometheus can reach
    # them to collect metrics
    clusterConfiguration:
      apiServer:
        extraArgs:
          cloud-provider: external
      controllerManager:
        extraArgs:
          cloud-provider: external
          bind-address: 0.0.0.0
      scheduler:
        extraArgs:
          bind-address: 0.0.0.0
    joinConfiguration:
      nodeRegistration:
        name: '{{ local_hostname }}'
        kubeletExtraArgs:
          cloud-provider: external
    kubeProxyConfiguration:
      metricsBindAddress: 0.0.0.0:10249
  # The machine health check for auto-healing of the control plane
  # See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html
  healthCheck:
    # Indicates if the machine health check should be enabled
    enabled: true
    # The spec for the health check
    spec:
      # By default, don't remediate control plane nodes when more than one is unhealthy
      maxUnhealthy: 1
      # If a node takes longer than 30 mins to startup, remediate it
      nodeStartupTimeout: 30m0s
      # By default, consider a control plane node that has not been Ready
      # for more than 5 mins unhealthy
      unhealthyConditions:
        - type: Ready
          status: Unknown
          timeout: 5m0s
        - type: Ready
          status: "False"
          timeout: 5m0s

# Defaults for node groups
# Each of these can be overridden in the specification for an individual node group
nodeGroupDefaults:
  # Indicates if the node group should be autoscaled
  autoscale: false
  # The failure domain for the node group
  failureDomain:
  # The flavor to use for machines in the node group
  machineFlavor:
  # Default image id for nodeGroup hosts
  machineImage:
  # The ID of the image to use for nodeGroup machines
  machineImageId:
  # Kubernetes version for nodeGroup machines
  kubernetesVersion:
  # The default networks and ports for worker nodes
  # If neither networks or ports are given, the cluster internal network is used
  # The default ports for worker nodes
  # If no ports are given, the cluster internal network is used
  # See https://github.com/kubernetes-sigs/cluster-api-provider-openstack/blob/master/docs/book/src/clusteropenstack/configuration.md#network-filters
  machineNetworking:
    ports:
  # The root volume spec for machines in the node group
  machineRootVolume:
    # The size of the disk to use
    # If not given, the ephemeral root disk from the flavor is used
    diskSize:
    # The volume type to use
    # If not specified, the default volume type is used
    # volumeType:
    # The volume availability zone to use
    # If not specified, the machine availability zone is used
    # availabilityZone:
  # The ID of the server group to use for machines in the node group
  serverGroupId:
  # Labels to apply to the node objects in Kubernetes that correspond to machines in the node group
  # By default, nodes get the label "capi.stackhpc.com/node-group=<node group name>"
  nodeLabels:
    # my.company.org/label: value
  # Additional block devices for node groups machines
  # Options are the same as for controlPlane.additionalBlockDevices above
  additionalBlockDevices: {}
  # Indicates whether control plane machines should use config drive or not
  machineConfigDrive: false
  # The time to wait for a node to finish draining before it can be removed
  nodeDrainTimeout: 5m0s
  # The time to wait for a node to detach all volumes before it can be removed
  nodeVolumeDetachTimeout: 5m0s
  # The time to wait for the node resource to be deleted in Kubernetes when a
  # machine is marked for deletion
  nodeDeletionTimeout: 5m0s
  # The rollout strategy to use for the node group
  # By default, this is set to do a rolling update within the existing resource envelope
  # of the node group, even if that means the node group temporarily has zero nodes
  rolloutStrategy:
    type: RollingUpdate
    rollingUpdate:
      # The maximum number of node group machines that can be unavailable during the update
      # Can be an absolute number or a percentage of the desired count
      maxUnavailable: 1
      # The maximum number of machines that can be scheduled above the desired count for
      # the group during an update
      # Can be an absolute number or a percentage of the desired count
      maxSurge: 0
      # One of Random, Newest, Oldest
      deletePolicy: Random
  # The default kubeadm config specification for worker nodes
  # This will be merged with any configuration given for specific node groups
  # By default, this uses a simple configuration that enables the external cloud provider
  kubeadmConfigSpec:
    joinConfiguration:
      nodeRegistration:
        name: '{{ local_hostname }}'
        kubeletExtraArgs:
          cloud-provider: external
  # The default machine health check for worker nodes
  # See https://cluster-api.sigs.k8s.io/tasks/healthcheck.html
  # Note that maxUnhealthy or unhealthRange are evaluated per node group
  healthCheck:
    # Indicates if the machine health check should be enabled
    enabled: true
    # The spec for the health check
    spec:
      # We have a control place feature gate enabled which blocks all worker node remediation if
      # control plane is unhealthy, so should be safe to reset worker remediation threshold to
      # 100% without the risk of runaway remediations.
      maxUnhealthy: 100%
      # If a node takes longer than 30 mins to startup, remediate it
      nodeStartupTimeout: 30m0s
      # By default, consider a worker node that has not been Ready for
      # more than 5 mins unhealthy
      unhealthyConditions:
        - type: Ready
          status: Unknown
          timeout: 5m0s
        - type: Ready
          status: "False"
          timeout: 5m0s

# The worker node groups for the cluster
nodeGroups:
  -  # The name of the node group
    name: md-0
    # The number of machines in the node group if autoscale is false
    machineCount: 3
    # The minimum and maximum number of machines in the node group if autoscale is true
    # machineCountMin: 3
    # machineCountMax: 3

# Configuration for the cluster autoscaler
autoscaler:
  # The image to use for the autoscaler component
  image:
    repository: registry.k8s.io/autoscaling/cluster-autoscaler
    pullPolicy: IfNotPresent
    tag: v1.29.0
  imagePullSecrets: []
  # Any extra args for the autoscaler
  extraArgs:
    # Make sure logs go to stderr
    logtostderr: true
    stderrthreshold: info
    # Output at a decent log level
    v: 4
    # Cordon nodes before terminating them so new pods are not scheduled there
    cordon-node-before-terminating: "true"
    # When scaling up, choose the node group that will result in the least idle CPU after
    expander: least-waste,random
    # Allow pods in kube-system to prevent a node from being deleted
    skip-nodes-with-system-pods: "true"
    # Allow pods with emptyDirs to be evicted
    skip-nodes-with-local-storage: "false"
    # Allow pods with custom controllers to be evicted
    skip-nodes-with-custom-controller-pods: "false"

  # Pod-level security context
  podSecurityContext:
    runAsNonRoot: true
    runAsUser: 1001
  # Container-level security context
  securityContext:
    allowPrivilegeEscalation: false
    capabilities:
      drop: [ALL]
    readOnlyRootFilesystem: true
  # Resource requests and limits for pods
  resources: {}
  # Node selector for pods
  nodeSelector: {}
  # Tolerations for pods
  tolerations: []
  # Topology spread constraints for pods
  topologySpreadConstraints: []
  # Affinity rules for pods
  affinity: {}

# Configuration for cluster addons
addons:
  # Indicates if cluster addons should be deployed
  enabled: true
  # Enable the openstack integrations by default
  openstack:
    enabled: true