From baf40f3e3d074fb10a4f602720863021ce0e7305 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Fri, 10 May 2024 13:48:37 -0500 Subject: [PATCH 1/3] fix: Remove MaxScale It was introduced to help thwart DBDeadlocks but it did not help resolve that issue. --- docs/infrastructure-mariadb-ops.md | 28 +-- docs/infrastructure-mariadb.md | 22 --- docs/openstack-skyline.md | 2 +- .../cinder/cinder-helm-overrides.yaml | 2 +- .../glance/glance-helm-overrides.yaml | 2 +- .../gnocchi/gnocchi-helm-overrides.yaml | 2 +- helm-configs/heat/heat-helm-overrides.yaml | 2 +- .../horizon/horizon-helm-overrides.yaml | 2 +- .../keystone/keystone-helm-overrides.yaml | 2 +- .../neutron/neutron-helm-overrides.yaml | 2 +- helm-configs/nova/nova-helm-overrides.yaml | 6 +- .../octavia/octavia-helm-overrides.yaml | 2 +- .../placement/placement-helm-overrides.yaml | 2 +- .../mariadb-cluster/aio/kustomization.yaml | 7 - .../mariadb-cluster/base/kustomization.yaml | 1 - .../mariadb-cluster/base/mariadb-galera.yaml | 4 - .../base/mariadb-maxscale.yaml | 167 ------------------ kustomize/mariadb-operator/kustomization.yaml | 2 - kustomize/octavia/base/octavia-agent.yaml | 2 +- 19 files changed, 17 insertions(+), 242 deletions(-) delete mode 100644 kustomize/mariadb-cluster/base/mariadb-maxscale.yaml diff --git a/docs/infrastructure-mariadb-ops.md b/docs/infrastructure-mariadb-ops.md index afcbd598..def1e049 100644 --- a/docs/infrastructure-mariadb-ops.md +++ b/docs/infrastructure-mariadb-ops.md @@ -7,9 +7,9 @@ Tips and tricks for managing and operating the MariaDB cluster within a Genestac Sometimes an operator may need to connect to the database to troubleshoot things or otherwise make modifications to the databases in place. The following command can be used to connect to the database from a node within the cluster. ``` shell -mysql -h $(kubectl -n openstack get service maxscale-galera -o jsonpath='{.spec.clusterIP}') \ - -p$(kubectl --namespace openstack get secret maxscale -o jsonpath='{.data.password}' | base64 -d) \ - -u maxscale-galera-client +mysql -h $(kubectl -n openstack get service mariadb-galera-primary -o jsonpath='{.spec.clusterIP}') \ + -p$(kubectl --namespace openstack get secret mariadb -o jsonpath='{.data.root-password}' | base64 -d) \ + -u root ``` !!! info @@ -92,25 +92,3 @@ for more information. If you have multiple backups available, the operator is able to infer which backup to restore based on the `spec.targetRecoveryTime` field discussed in the operator documentation [here](https://github.com/mariadb-operator/mariadb-operator/blob/main/docs/BACKUP.md#target-recovery-time). - -## Interacting with the MaxScale REST API - -Refer to the API reference for MaxScale [here](https://mariadb.com/kb/en/mariadb-maxscale-23-08-rest-api/). - -!!! info "Example curl request" - - ``` shell - curl -s -u mariadb-operator:$(kubectl get secret -n openstack maxscale -o jsonpath='{.data.password}' | base64 -d) http://maxscale-galera.openstack.svc.cluster.local:8989/v1/ -D - - ``` - ``` shell - HTTP/1.1 200 OK - Connection: close - ETag: "da39a3ee5e6b4b0d3255bfef95601890afd80709" - Last-Modified: Tue, 30 Apr 2024 20:10:22 GMT - Date: Tue, 30 Apr 24 20:44:17 GMT - X-Frame-Options: Deny - X-XSS-Protection: 1 - Referrer-Policy: same-origin - Cache-Control: no-cache - Content-Length: 0 - ``` diff --git a/docs/infrastructure-mariadb.md b/docs/infrastructure-mariadb.md index 45faa2d4..f9fe2b57 100644 --- a/docs/infrastructure-mariadb.md +++ b/docs/infrastructure-mariadb.md @@ -9,12 +9,6 @@ kubectl --namespace openstack \ --type Opaque \ --from-literal=root-password="$(< /dev/urandom tr -dc _A-Za-z0-9 | head -c${1:-32};echo;)" \ --from-literal=password="$(< /dev/urandom tr -dc _A-Za-z0-9 | head -c${1:-32};echo;)" - -# MaxScale -kubectl --namespace openstack \ - create secret generic maxscale \ - --type Opaque \ - --from-literal=password="$(< /dev/urandom tr -dc _A-Za-z0-9 | head -c${1:-32};echo;)" ``` ## Deploy the mariadb operator @@ -50,19 +44,3 @@ kubectl --namespace openstack apply -k /opt/genestack/kustomize/mariadb-cluster/ ``` shell kubectl --namespace openstack get mariadbs -w ``` - -## MaxScale - -Within the deployment the OpenStack services use MaxScale for loadlancing and greater reliability. While the MaxScale ecosystem is a good one, there are some limitations that you should be aware of. It is recommended that you review the [MaxScale reference documentation](https://mariadb.com/kb/en/mariadb-maxscale-2302-limitations-and-known-issues-within-mariadb-maxscale) for more about all of the known limitations and potential workarounds available. - -``` mermaid -flowchart TD - A[Connection] ---B{MaxScale} - B ---|ro| C[ES-0] - B ---|rw| D[ES-1] ---|sync| E & C - B ---|ro| E[ES-2] -``` - -### MaxScale GUI - -The MaxScale deployment has access to a built in GUI that can be exposed for further debuging and visibility into the performance of the MariDB backend. For more information on accessing the GUI please refer to the MaxScale documentation that can be found [here](https://mariadb.com/resources/blog/getting-started-with-the-mariadb-maxscale-gui). diff --git a/docs/openstack-skyline.md b/docs/openstack-skyline.md index 70f4a095..619696ac 100644 --- a/docs/openstack-skyline.md +++ b/docs/openstack-skyline.md @@ -17,7 +17,7 @@ kubectl --namespace openstack \ --from-literal=service-domain="service" \ --from-literal=service-project="service" \ --from-literal=service-project-domain="service" \ - --from-literal=db-endpoint="maxscale-galera.openstack.svc.cluster.local" \ + --from-literal=db-endpoint="mariadb-galera-primary.openstack.svc.cluster.local" \ --from-literal=db-name="skyline" \ --from-literal=db-username="skyline" \ --from-literal=db-password="$(< /dev/urandom tr -dc _A-Za-z0-9 | head -c${1:-32};echo;)" \ diff --git a/helm-configs/cinder/cinder-helm-overrides.yaml b/helm-configs/cinder/cinder-helm-overrides.yaml index 136fa62c..9d15d034 100644 --- a/helm-configs/cinder/cinder-helm-overrides.yaml +++ b/helm-configs/cinder/cinder-helm-overrides.yaml @@ -1320,7 +1320,7 @@ endpoints: username: cinder password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /cinder diff --git a/helm-configs/glance/glance-helm-overrides.yaml b/helm-configs/glance/glance-helm-overrides.yaml index 6a5ad87c..b3b19a86 100644 --- a/helm-configs/glance/glance-helm-overrides.yaml +++ b/helm-configs/glance/glance-helm-overrides.yaml @@ -589,7 +589,7 @@ endpoints: username: glance password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /glance diff --git a/helm-configs/gnocchi/gnocchi-helm-overrides.yaml b/helm-configs/gnocchi/gnocchi-helm-overrides.yaml index 9110f4e9..db1c37bb 100644 --- a/helm-configs/gnocchi/gnocchi-helm-overrides.yaml +++ b/helm-configs/gnocchi/gnocchi-helm-overrides.yaml @@ -622,7 +622,7 @@ endpoints: username: gnocchi password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /gnocchi diff --git a/helm-configs/heat/heat-helm-overrides.yaml b/helm-configs/heat/heat-helm-overrides.yaml index 138eb76e..b27640c1 100644 --- a/helm-configs/heat/heat-helm-overrides.yaml +++ b/helm-configs/heat/heat-helm-overrides.yaml @@ -859,7 +859,7 @@ endpoints: username: heat password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /heat diff --git a/helm-configs/horizon/horizon-helm-overrides.yaml b/helm-configs/horizon/horizon-helm-overrides.yaml index 4563074f..298f8238 100644 --- a/helm-configs/horizon/horizon-helm-overrides.yaml +++ b/helm-configs/horizon/horizon-helm-overrides.yaml @@ -7242,7 +7242,7 @@ endpoints: username: horizon password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /horizon diff --git a/helm-configs/keystone/keystone-helm-overrides.yaml b/helm-configs/keystone/keystone-helm-overrides.yaml index cb6f0481..09667ed0 100644 --- a/helm-configs/keystone/keystone-helm-overrides.yaml +++ b/helm-configs/keystone/keystone-helm-overrides.yaml @@ -972,7 +972,7 @@ endpoints: username: keystone password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /keystone diff --git a/helm-configs/neutron/neutron-helm-overrides.yaml b/helm-configs/neutron/neutron-helm-overrides.yaml index 2e9dbdd0..ac8f036f 100644 --- a/helm-configs/neutron/neutron-helm-overrides.yaml +++ b/helm-configs/neutron/neutron-helm-overrides.yaml @@ -2199,7 +2199,7 @@ endpoints: username: neutron password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /neutron diff --git a/helm-configs/nova/nova-helm-overrides.yaml b/helm-configs/nova/nova-helm-overrides.yaml index 75e07d39..450a8a9d 100644 --- a/helm-configs/nova/nova-helm-overrides.yaml +++ b/helm-configs/nova/nova-helm-overrides.yaml @@ -1640,7 +1640,7 @@ endpoints: username: nova password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /nova @@ -1657,7 +1657,7 @@ endpoints: username: nova password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /nova_api @@ -1674,7 +1674,7 @@ endpoints: username: nova password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /nova_cell0 diff --git a/helm-configs/octavia/octavia-helm-overrides.yaml b/helm-configs/octavia/octavia-helm-overrides.yaml index 2865d4c9..1a30a9e2 100644 --- a/helm-configs/octavia/octavia-helm-overrides.yaml +++ b/helm-configs/octavia/octavia-helm-overrides.yaml @@ -466,7 +466,7 @@ endpoints: username: octavia password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /octavia diff --git a/helm-configs/placement/placement-helm-overrides.yaml b/helm-configs/placement/placement-helm-overrides.yaml index f6a2cc8c..9d85dd6e 100644 --- a/helm-configs/placement/placement-helm-overrides.yaml +++ b/helm-configs/placement/placement-helm-overrides.yaml @@ -206,7 +206,7 @@ endpoints: username: nova password: password hosts: - default: maxscale-galera + default: mariadb-galera-primary host_fqdn_override: default: null path: /placement diff --git a/kustomize/mariadb-cluster/aio/kustomization.yaml b/kustomize/mariadb-cluster/aio/kustomization.yaml index 071d1a5d..13943e97 100644 --- a/kustomize/mariadb-cluster/aio/kustomization.yaml +++ b/kustomize/mariadb-cluster/aio/kustomization.yaml @@ -12,10 +12,3 @@ patches: - op: replace path: /spec/replicas value: 2 - - target: - kind: MaxScale - name: maxscale-galera - patch: |- - - op: replace - path: /spec/replicas - value: 1 diff --git a/kustomize/mariadb-cluster/base/kustomization.yaml b/kustomize/mariadb-cluster/base/kustomization.yaml index c074ce60..f297b151 100644 --- a/kustomize/mariadb-cluster/base/kustomization.yaml +++ b/kustomize/mariadb-cluster/base/kustomization.yaml @@ -1,5 +1,4 @@ resources: - mariadb-configmap.yaml - - mariadb-maxscale.yaml - mariadb-galera.yaml - mariadb-backup.yaml diff --git a/kustomize/mariadb-cluster/base/mariadb-galera.yaml b/kustomize/mariadb-cluster/base/mariadb-galera.yaml index bd31f2ec..09033274 100644 --- a/kustomize/mariadb-cluster/base/mariadb-galera.yaml +++ b/kustomize/mariadb-cluster/base/mariadb-galera.yaml @@ -28,10 +28,6 @@ spec: podSecurityContext: runAsUser: 0 - # point to an existing MaxScale instance. Doing this will delegate tasks such as primary failover to MaxScale. - maxScaleRef: - name: maxscale-galera - galera: enabled: true primary: diff --git a/kustomize/mariadb-cluster/base/mariadb-maxscale.yaml b/kustomize/mariadb-cluster/base/mariadb-maxscale.yaml deleted file mode 100644 index 0b30755a..00000000 --- a/kustomize/mariadb-cluster/base/mariadb-maxscale.yaml +++ /dev/null @@ -1,167 +0,0 @@ -apiVersion: k8s.mariadb.com/v1alpha1 -kind: MaxScale -metadata: - name: maxscale-galera -spec: - replicas: 3 - - mariaDbRef: - name: mariadb-galera - namespace: openstack - - services: - - name: rw-router - router: readwritesplit - params: - transaction_replay: "true" - transaction_replay_attempts: "10" - transaction_replay_timeout: "5s" - max_slave_connections: "255" - max_replication_lag: "3s" - master_accept_reads: "true" - listener: - name: rw-listener - port: 3306 - protocol: MariaDBProtocol - params: - connection_metadata: "tx_isolation=auto" - suspend: false - suspend: false - - name: rconn-master-router - router: readconnroute - params: - router_options: "master" - max_replication_lag: "3s" - master_accept_reads: "true" - listener: - port: 3307 - - name: rconn-slave-router - router: readconnroute - params: - router_options: "slave" - max_replication_lag: "3s" - listener: - port: 3308 - - monitor: - name: mariadb-monitor - module: galeramon - interval: 2s - cooperativeMonitoring: majority_of_all - params: - disable_master_failback: "false" - available_when_donor: "false" - disable_master_role_setting: "false" - suspend: false - - livenessProbe: - failureThreshold: 3 - tcpSocket: - port: 8989 - initialDelaySeconds: 20 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 5 - - readinessProbe: - failureThreshold: 3 - tcpSocket: - port: 8989 - initialDelaySeconds: 20 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 5 - - admin: - port: 8989 - guiEnabled: false - - config: - params: - log_info: "true" - volumeClaimTemplate: - resources: - requests: - storage: 100Mi - accessModes: - - ReadWriteOnce - storageClassName: general - sync: - database: mysql - interval: 5s - timeout: 10s - - auth: - generate: true - adminUsername: mariadb-operator - adminPasswordSecretKeyRef: - name: maxscale - key: password - deleteDefaultAdmin: true - clientUsername: maxscale-galera-client - clientPasswordSecretKeyRef: - name: maxscale - key: password - clientMaxConnections: 1024 - serverUsername: maxscale-galera-server - serverPasswordSecretKeyRef: - name: maxscale - key: password - serverMaxConnections: 1024 - monitorUsername: maxscale-galera-monitor - monitorPasswordSecretKeyRef: - name: maxscale - key: password - monitorMaxConnections: 128 - syncUsername: maxscale-galera-sync - syncPasswordSecretKeyRef: - name: maxscale - key: password - syncMaxConnections: 128 - - securityContext: - allowPrivilegeEscalation: false - - updateStrategy: - type: RollingUpdate - - kubernetesService: - type: LoadBalancer - annotations: - metallb.universe.tf/address-pool: primary - - connection: - secretName: mxs-galera-conn - port: 3306 - - resources: - requests: - memory: 128Mi - - affinity: - enableAntiAffinity: true - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: node-role.kubernetes.io/worker - operator: In - values: - - worker - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app.kubernetes.io/instance - operator: In - values: - - maxscale-galera - topologyKey: kubernetes.io/hostname - - tolerations: - - key: "k8s.mariadb.com/ha" - operator: "Exists" - effect: "NoSchedule" - - podDisruptionBudget: - maxUnavailable: 33% diff --git a/kustomize/mariadb-operator/kustomization.yaml b/kustomize/mariadb-operator/kustomization.yaml index c23068f1..dc9c9148 100644 --- a/kustomize/mariadb-operator/kustomization.yaml +++ b/kustomize/mariadb-operator/kustomization.yaml @@ -22,8 +22,6 @@ helmCharts: - worker metrics: enabled: true - extrArgs: - - '--log-maxscale' affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: diff --git a/kustomize/octavia/base/octavia-agent.yaml b/kustomize/octavia/base/octavia-agent.yaml index 58fb12ad..94ace373 100644 --- a/kustomize/octavia/base/octavia-agent.yaml +++ b/kustomize/octavia/base/octavia-agent.yaml @@ -81,7 +81,7 @@ spec: - name: PATH value: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/ - name: DEPENDENCY_SERVICE - value: "openstack:maxscale-galera,openstack:keystone-api,openstack:rabbitmq-nodes,openstack:memcached,openstack:neutron-server" + value: "openstack:mariadb-galera-primary,openstack:keystone-api,openstack:rabbitmq-nodes,openstack:memcached,openstack:neutron-server" - name: DEPENDENCY_JOBS value: "octavia-db-sync,octavia-ks-user,octavia-ks-endpoints" - name: DEPENDENCY_DAEMONSET From b5fb6d4b6fce11a0072413050fe843cfe0fa594f Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Fri, 10 May 2024 13:54:48 -0500 Subject: [PATCH 2/3] fix: Make 5 replicas the default Many recovery issues have been observed when losing a node during chaos testing. Moving to a 5 node cluster has improved recovery success rate. --- kustomize/mariadb-cluster/base/mariadb-galera.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kustomize/mariadb-cluster/base/mariadb-galera.yaml b/kustomize/mariadb-cluster/base/mariadb-galera.yaml index 09033274..eff64d0c 100644 --- a/kustomize/mariadb-cluster/base/mariadb-galera.yaml +++ b/kustomize/mariadb-cluster/base/mariadb-galera.yaml @@ -24,7 +24,7 @@ spec: storage: 10Gi storageClassName: general - replicas: 3 + replicas: 5 podSecurityContext: runAsUser: 0 @@ -114,7 +114,7 @@ spec: effect: "NoSchedule" podDisruptionBudget: - maxUnavailable: 33% + maxUnavailable: 40% updateStrategy: type: RollingUpdate From f006c5a28ae40d8baf00f2a5c2ac714c77dfac6a Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Fri, 10 May 2024 13:59:11 -0500 Subject: [PATCH 3/3] fix: Improve galera performance These changes were proposed by one of our DBAs to help cope with DBDeadlocks. They seem to have helped reduce occurrences. --- kustomize/mariadb-cluster/base/mariadb-galera.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kustomize/mariadb-cluster/base/mariadb-galera.yaml b/kustomize/mariadb-cluster/base/mariadb-galera.yaml index eff64d0c..6479a49d 100644 --- a/kustomize/mariadb-cluster/base/mariadb-galera.yaml +++ b/kustomize/mariadb-cluster/base/mariadb-galera.yaml @@ -134,18 +134,18 @@ spec: performance_schema=ON innodb_log_buffer_size=33554432 wsrep_slave_threads=144 - wsrep_sync_wait=14 + wsrep_sync_wait=0 innodb_flush_log_at_trx_commit=0 ignore-db-dir=lost+found skip-name-resolve - innodb_buffer_pool_size=1024M + innodb_buffer_pool_size=4G innodb_doublewrite=0 innodb_file_format=Barracuda innodb_file_per_table=1 innodb_flush_method=O_DIRECT innodb_io_capacity=500 innodb_locks_unsafe_for_binlog=1 - innodb_log_file_size=128M + innodb_log_file_size=1G innodb_old_blocks_time=1000 innodb_read_io_threads=8 innodb_write_io_threads=8