From ec0ff9ebedc062bb8ea1783e0d3dfab14c859661 Mon Sep 17 00:00:00 2001
From: Luke Repko <luke.repko@rackspace.com>
Date: Mon, 3 Jun 2024 16:59:30 -0500
Subject: [PATCH] fix: use rabbit quorum queues in lieu of ha

We define the use of quorum queues via kustomize as the default queue
type for the named vhosts, but the oslo_messaging_rabbit config opt of
`rabbit_ha_queues: true` was set, taking precedence. We actually do not
want to use HA queues, as they are being deprecated, and will be removed
in newer versions of RMQ (4.x being released EOY 2024). The use of HA
queues in genestack up to this point was the result of sane but no
longer ideal defaults set by openstack-helm that were carried forth.

This explicitly disables rabbit_ha_queues, and then enables
rabbit_quorum_queue. Removing the related rabbit vhost is required for
this change prior to re-deploying a given openstack service.

Example of re-deploying nova when making this change; note how we remove
the queue, vhost, and user:

```
kubectl -n openstack delete queues.rabbitmq.com nova-queue
kubectl -n openstack delete vhosts.rabbitmq.com nova-vhost
kubectl -n openstack delete users.rabbitmq.com nova
helm --upgrade install nova ./nova
```

**NOTE**: Several helm upgrades may be required due to a race condition
with the operator removing the vhost. Uninstalling first may be easier,
but do so carefully.

Other changes:

 - add: `rabbit_transient_quorum_queue` which is newly availably in
   2024.1. We will want to begin using this to make transient queues
   reliable
 - add: `use_queue_manager` which is newly available in 2024.1
   We will want to begin using this when available to de-obfuscate
   named queues in rabbit
 - add: `rabbit_interval_max` to reconnect faster after a node
   outage
 - fix: send heartbeats more frequently; clients should mark a
   given node as down about 30s more quickly (default was 60s)
 - fix: set `kombu_reconnect_delay` lower to help avoid multiple code
   paths not being traversed when a RMQ node goes down
---
 .../cinder/cinder-helm-overrides.yaml         |  34 +-
 .../glance/glance-helm-overrides.yaml         |  34 +-
 helm-configs/heat/heat-helm-overrides.yaml    |  34 +-
 .../keystone/keystone-helm-overrides.yaml     |  34 +-
 helm-configs/lab-overrides.yaml               | 519 ++++++++++++++++++
 .../neutron/neutron-helm-overrides.yaml       |  34 +-
 helm-configs/nova/nova-helm-overrides.yaml    |  34 +-
 7 files changed, 639 insertions(+), 84 deletions(-)
 create mode 100644 helm-configs/lab-overrides.yaml

diff --git a/helm-configs/cinder/cinder-helm-overrides.yaml b/helm-configs/cinder/cinder-helm-overrides.yaml
index 9d15d034..b3a59d20 100644
--- a/helm-configs/cinder/cinder-helm-overrides.yaml
+++ b/helm-configs/cinder/cinder-helm-overrides.yaml
@@ -813,7 +813,25 @@ conf:
     oslo_middleware:
       enable_proxy_headers_parsing: true
     oslo_messaging_rabbit:
-      rabbit_ha_queues: true
+      # We define use of quorum queues via kustomize but this was enabling HA queues instead
+      # ha_queues are deprecated, explicitly set to false and set quorum_queue true
+      rabbit_ha_queues: false
+      rabbit_quorum_queue: true
+      # TODO: Not available until 2024.1, but once it is, we want to enable these!
+      # new feature ref; https://docs.openstack.org/releasenotes/oslo.messaging/2024.1.html
+      # rabbit_transient_quorum_queue: true
+      # use_queue_manager: true
+      # Reconnect after a node outage more quickly
+      rabbit_interval_max: 10
+      # Send more frequent heartbeats and fail unhealthy nodes faster
+      # heartbeat_timeout / heartbeat_rate / 2.0 = 30 / 3 / 2.0 = 5
+      # https://opendev.org/openstack/oslo.messaging/commit/36fb5bceabe08a982ebd52e4a8f005cd26fdf6b8
+      heartbeat_rate: 3
+      heartbeat_timeout_threshold: 30
+      # Setting lower kombu_reconnect_delay should resolve isssue with HA failing when one node is down
+      # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html
+      # https://review.opendev.org/c/openstack/oslo.messaging/+/866617
+      kombu_reconnect_delay: 0.5
     coordination:
       backend_url: file:///var/lib/cinder/coordination
     service_user:
@@ -880,19 +898,7 @@ conf:
       format: "%(message)s"
       datefmt: "%Y-%m-%d %H:%M:%S"
   rabbitmq:
-    # NOTE(rk760n): adding rmq policy to mirror messages from notification queues and set expiration time for the ones
-    policies:
-      - vhost: "cinder"
-        name: "ha_ttl_cinder"
-        definition:
-          # mirror messges to other nodes in rmq cluster
-          ha-mode: "all"
-          ha-sync-mode: "automatic"
-          # 70s
-          message-ttl: 70000
-        priority: 0
-        apply-to: all
-        pattern: '^(?!(amq\.|reply_)).*'
+    policies: []
   backends:
     # Those options will be written to backends.conf as-is.
     lvmdriver-1:
diff --git a/helm-configs/glance/glance-helm-overrides.yaml b/helm-configs/glance/glance-helm-overrides.yaml
index 8bc6b2d3..20cf7163 100644
--- a/helm-configs/glance/glance-helm-overrides.yaml
+++ b/helm-configs/glance/glance-helm-overrides.yaml
@@ -264,7 +264,25 @@ conf:
     oslo_messaging_notifications:
       driver: messagingv2
     oslo_messaging_rabbit:
-      rabbit_ha_queues: true
+      # We define use of quorum queues via kustomize but this was enabling HA queues instead
+      # ha_queues are deprecated, explicitly set to false and set quorum_queue true
+      rabbit_ha_queues: false
+      rabbit_quorum_queue: true
+      # TODO: Not available until 2024.1, but once it is, we want to enable these!
+      # new feature ref; https://docs.openstack.org/releasenotes/oslo.messaging/2024.1.html
+      # rabbit_transient_quorum_queue: true
+      # use_queue_manager: true
+      # Reconnect after a node outage more quickly
+      rabbit_interval_max: 10
+      # Send more frequent heartbeats and fail unhealthy nodes faster
+      # heartbeat_timeout / heartbeat_rate / 2.0 = 30 / 3 / 2.0 = 5
+      # https://opendev.org/openstack/oslo.messaging/commit/36fb5bceabe08a982ebd52e4a8f005cd26fdf6b8
+      heartbeat_rate: 3
+      heartbeat_timeout_threshold: 30
+      # Setting lower kombu_reconnect_delay should resolve isssue with HA failing when one node is down
+      # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html
+      # https://review.opendev.org/c/openstack/oslo.messaging/+/866617
+      kombu_reconnect_delay: 0.5
     oslo_policy:
       policy_file: /etc/glance/policy.yaml
     cors: {}
@@ -358,19 +376,7 @@ conf:
     user_domain_id =
     {{- end -}}
   rabbitmq:
-    # NOTE(rk760n): adding rmq policy to mirror messages from notification queues and set expiration time for the ones
-    policies:
-      - vhost: "glance"
-        name: "ha_ttl_glance"
-        definition:
-          # mirror messges to other nodes in rmq cluster
-          ha-mode: "all"
-          ha-sync-mode: "automatic"
-          # 70s
-          message-ttl: 70000
-        priority: 0
-        apply-to: all
-        pattern: '^(?!(amq\.|reply_)).*'
+    policies: []
 
 network:
   api:
diff --git a/helm-configs/heat/heat-helm-overrides.yaml b/helm-configs/heat/heat-helm-overrides.yaml
index b27640c1..d3e48903 100644
--- a/helm-configs/heat/heat-helm-overrides.yaml
+++ b/helm-configs/heat/heat-helm-overrides.yaml
@@ -368,7 +368,25 @@ conf:
     oslo_middleware:
       enable_proxy_headers_parsing: true
     oslo_messaging_rabbit:
-      rabbit_ha_queues: True
+      # We define use of quorum queues via kustomize but this was enabling HA queues instead
+      # ha_queues are deprecated, explicitly set to false and set quorum_queue true
+      rabbit_ha_queues: false
+      rabbit_quorum_queue: true
+      # TODO: Not available until 2024.1, but once it is, we want to enable these!
+      # new feature ref; https://docs.openstack.org/releasenotes/oslo.messaging/2024.1.html
+      # rabbit_transient_quorum_queue: true
+      # use_queue_manager: true
+      # Reconnect after a node outage more quickly
+      rabbit_interval_max: 10
+      # Send more frequent heartbeats and fail unhealthy nodes faster
+      # heartbeat_timeout / heartbeat_rate / 2.0 = 30 / 3 / 2.0 = 5
+      # https://opendev.org/openstack/oslo.messaging/commit/36fb5bceabe08a982ebd52e4a8f005cd26fdf6b8
+      heartbeat_rate: 3
+      heartbeat_timeout_threshold: 30
+      # Setting lower kombu_reconnect_delay should resolve isssue with HA failing when one node is down
+      # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html
+      # https://review.opendev.org/c/openstack/oslo.messaging/+/866617
+      kombu_reconnect_delay: 0.5
     oslo_policy:
       policy_file: /etc/heat/policy.yaml
   api_audit_map:
@@ -460,19 +478,7 @@ conf:
       datefmt: "%Y-%m-%d %H:%M:%S"
 
   rabbitmq:
-    # NOTE(rk760n): adding rmq policy to mirror messages from notification queues and set expiration time for the ones
-    policies:
-      - vhost: "heat"
-        name: "ha_ttl_heat"
-        definition:
-          # mirror messges to other nodes in rmq cluster
-          ha-mode: "all"
-          ha-sync-mode: "automatic"
-          # 70s
-          message-ttl: 70000
-        priority: 0
-        apply-to: all
-        pattern: '^(?!(amq\.|reply_)).*'
+    policies: []
 
 network:
   api:
diff --git a/helm-configs/keystone/keystone-helm-overrides.yaml b/helm-configs/keystone/keystone-helm-overrides.yaml
index 09667ed0..b909f93f 100644
--- a/helm-configs/keystone/keystone-helm-overrides.yaml
+++ b/helm-configs/keystone/keystone-helm-overrides.yaml
@@ -520,7 +520,25 @@ conf:
     oslo_messaging_notifications:
       driver: messagingv2
     oslo_messaging_rabbit:
-      rabbit_ha_queues: true
+      # We define use of quorum queues via kustomize but this was enabling HA queues instead
+      # ha_queues are deprecated, explicitly set to false and set quorum_queue true
+      rabbit_ha_queues: false
+      rabbit_quorum_queue: true
+      # TODO: Not available until 2024.1, but once it is, we want to enable these!
+      # new feature ref; https://docs.openstack.org/releasenotes/oslo.messaging/2024.1.html
+      # rabbit_transient_quorum_queue: true
+      # use_queue_manager: true
+      # Reconnect after a node outage more quickly
+      rabbit_interval_max: 10
+      # Send more frequent heartbeats and fail unhealthy nodes faster
+      # heartbeat_timeout / heartbeat_rate / 2.0 = 30 / 3 / 2.0 = 5
+      # https://opendev.org/openstack/oslo.messaging/commit/36fb5bceabe08a982ebd52e4a8f005cd26fdf6b8
+      heartbeat_rate: 3
+      heartbeat_timeout_threshold:
+      # Setting lower kombu_reconnect_delay should resolve isssue with HA failing when one node is down
+      # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html
+      # https://review.opendev.org/c/openstack/oslo.messaging/+/866617
+      kombu_reconnect_delay: 0.5
     oslo_middleware:
       enable_proxy_headers_parsing: true
     oslo_policy:
@@ -543,19 +561,7 @@ conf:
   policy: {}
   access_rules: {}
   rabbitmq:
-    # NOTE(rk760n): adding rmq policy to mirror messages from notification queues and set expiration time for the ones
-    policies:
-      - vhost: "keystone"
-        name: "ha_ttl_keystone"
-        definition:
-          # mirror messges to other nodes in rmq cluster
-          ha-mode: "all"
-          ha-sync-mode: "automatic"
-          # 70s
-          message-ttl: 70000
-        priority: 0
-        apply-to: all
-        pattern: '^(?!(amq\.|reply_)).*'
+    policies: []
   rally_tests:
     run_tempest: false
     tests:
diff --git a/helm-configs/lab-overrides.yaml b/helm-configs/lab-overrides.yaml
new file mode 100644
index 00000000..1cefa27e
--- /dev/null
+++ b/helm-configs/lab-overrides.yaml
@@ -0,0 +1,519 @@
+_region: &region LAB1
+_certificate: &crt |
+  -----BEGIN CERTIFICATE-----
+  MIIE/zCCA+egAwIBAgISA9N+TziwbmSw14DqU92PwfQkMA0GCSqGSIb3DQEBCwUA
+  MDIxCzAJBgNVBAYTAlVTMRYwFAYDVQQKEw1MZXQncyBFbmNyeXB0MQswCQYDVQQD
+  EwJSMzAeFw0yNDAyMjQxNTEwNDRaFw0yNDA1MjQxNTEwNDNaMCUxIzAhBgNVBAMM
+  GiouYXBpLmxhYjEubGFyLnRyb24ucmF4LmlvMIIBIjANBgkqhkiG9w0BAQEFAAOC
+  AQ8AMIIBCgKCAQEA1KmEt/DW03F2I4P9hcghHUX0SPV/424ggvs1XUrzoIH9TvR4
+  gvkPrtDq8QUn+6/7bRw/iH/3iNgcdQSGhqoyaIiArRnwIieqr/aKPGEcw8TedOXH
+  jBg3MXIwC6hLSen2sbdJQMuewNh6DfpVzQ5APtAo0TaScXqFhGah9lBNkgx3IwCF
+  S5DUB4MSoKVFliqJkEDnjAJUIrA10nbHTTXgCaql3c/oiC/FQuoKBM5jjr4/dEcg
+  +uRJ2Lqo6MlQR47R/bNNiZkpnWUFBfHKJp2rKlKSz2z3QHKi/VWRq25Y1nkc22Uy
+  Q54P6Vv8fGcpfvrlBrl539xdlEvrjsUv7RenNQIDAQABo4ICGjCCAhYwDgYDVR0P
+  AQH/BAQDAgWgMB0GA1UdJQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjAMBgNVHRMB
+  Af8EAjAAMB0GA1UdDgQWBBR6lF65uU/OhgNc1DFaCMUNrQKUNjAfBgNVHSMEGDAW
+  gBQULrMXt1hWy65QCUDmH6+dixTCxjBVBggrBgEFBQcBAQRJMEcwIQYIKwYBBQUH
+  MAGGFWh0dHA6Ly9yMy5vLmxlbmNyLm9yZzAiBggrBgEFBQcwAoYWaHR0cDovL3Iz
+  LmkubGVuY3Iub3JnLzAlBgNVHREEHjAcghoqLmFwaS5sYWIxLmxhci50cm9uLnJh
+  eC5pbzATBgNVHSAEDDAKMAgGBmeBDAECATCCAQIGCisGAQQB1nkCBAIEgfMEgfAA
+  7gB1AEiw42vapkc0D+VqAvqdMOscUgHLVt0sgdm7v6s52IRzAAABjdviGuQAAAQD
+  AEYwRAIgf8N3Yo0bgSnj7tV07iMoLyXWrKtPWUYJ9AjHBO3oITwCIBykkwNsY0zW
+  laPPEDtPp9LBaMnZQxP+XJZCwj7KFxM/AHUAdv+IPwq2+5VRwmHM9Ye6NLSkzbsp
+  3GhCCp/mZ0xaOnQAAAGN2+IcXgAABAMARjBEAiBlKxu0kaYUJODFgI+MyljPhweu
+  uEfO3Wq1qM2yxCZQkwIgbikP+yLiz+2/S6FA2FU+5FVnQo6FeEMuJwOpxHPFl+Ew
+  DQYJKoZIhvcNAQELBQADggEBAB7smiU80EBYXH8tKUbI0dRulAsKZ1avCehF9M6B
+  IcbszEAhiAB0EsmmsCXQqiAUJMTfGRrfpOkjLEUIx8zzbUJIAKKRIGdPqfsEWxIB
+  etYECHMc6WCb9ZfX8m6qpO1NNacEuJQn4uogMoJDEuezdIjnbjM8E/BUcrwQVPVD
+  9hC9fAbEeVgdK3ZOin6wJtLmmD2OAbmlr+COonC+bpWvQtF6kss/0c7UHEiJn7o8
+  HyvYB+2Pmdt7eNyQmjjJPv+3y7+GRWpg5VnfLezdB3yMYnYX6zyYVvBpjizvQZHe
+  Tng7olxtJmpurG0yoAzPhABgaUBRbDRCxydbEVHEEHYTbvw=
+  -----END CERTIFICATE-----
+  -----BEGIN CERTIFICATE-----
+  MIIFFjCCAv6gAwIBAgIRAJErCErPDBinU/bWLiWnX1owDQYJKoZIhvcNAQELBQAw
+  TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+  cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMjAwOTA0MDAwMDAw
+  WhcNMjUwOTE1MTYwMDAwWjAyMQswCQYDVQQGEwJVUzEWMBQGA1UEChMNTGV0J3Mg
+  RW5jcnlwdDELMAkGA1UEAxMCUjMwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEK
+  AoIBAQC7AhUozPaglNMPEuyNVZLD+ILxmaZ6QoinXSaqtSu5xUyxr45r+XXIo9cP
+  R5QUVTVXjJ6oojkZ9YI8QqlObvU7wy7bjcCwXPNZOOftz2nwWgsbvsCUJCWH+jdx
+  sxPnHKzhm+/b5DtFUkWWqcFTzjTIUu61ru2P3mBw4qVUq7ZtDpelQDRrK9O8Zutm
+  NHz6a4uPVymZ+DAXXbpyb/uBxa3Shlg9F8fnCbvxK/eG3MHacV3URuPMrSXBiLxg
+  Z3Vms/EY96Jc5lP/Ooi2R6X/ExjqmAl3P51T+c8B5fWmcBcUr2Ok/5mzk53cU6cG
+  /kiFHaFpriV1uxPMUgP17VGhi9sVAgMBAAGjggEIMIIBBDAOBgNVHQ8BAf8EBAMC
+  AYYwHQYDVR0lBBYwFAYIKwYBBQUHAwIGCCsGAQUFBwMBMBIGA1UdEwEB/wQIMAYB
+  Af8CAQAwHQYDVR0OBBYEFBQusxe3WFbLrlAJQOYfr52LFMLGMB8GA1UdIwQYMBaA
+  FHm0WeZ7tuXkAXOACIjIGlj26ZtuMDIGCCsGAQUFBwEBBCYwJDAiBggrBgEFBQcw
+  AoYWaHR0cDovL3gxLmkubGVuY3Iub3JnLzAnBgNVHR8EIDAeMBygGqAYhhZodHRw
+  Oi8veDEuYy5sZW5jci5vcmcvMCIGA1UdIAQbMBkwCAYGZ4EMAQIBMA0GCysGAQQB
+  gt8TAQEBMA0GCSqGSIb3DQEBCwUAA4ICAQCFyk5HPqP3hUSFvNVneLKYY611TR6W
+  PTNlclQtgaDqw+34IL9fzLdwALduO/ZelN7kIJ+m74uyA+eitRY8kc607TkC53wl
+  ikfmZW4/RvTZ8M6UK+5UzhK8jCdLuMGYL6KvzXGRSgi3yLgjewQtCPkIVz6D2QQz
+  CkcheAmCJ8MqyJu5zlzyZMjAvnnAT45tRAxekrsu94sQ4egdRCnbWSDtY7kh+BIm
+  lJNXoB1lBMEKIq4QDUOXoRgffuDghje1WrG9ML+Hbisq/yFOGwXD9RiX8F6sw6W4
+  avAuvDszue5L3sz85K+EC4Y/wFVDNvZo4TYXao6Z0f+lQKc0t8DQYzk1OXVu8rp2
+  yJMC6alLbBfODALZvYH7n7do1AZls4I9d1P4jnkDrQoxB3UqQ9hVl3LEKQ73xF1O
+  yK5GhDDX8oVfGKF5u+decIsH4YaTw7mP3GFxJSqv3+0lUFJoi5Lc5da149p90Ids
+  hCExroL1+7mryIkXPeFM5TgO9r0rvZaBFOvV2z0gp35Z0+L4WPlbuEjN/lxPFin+
+  HlUjr8gRsI3qfJOQFy/9rKIJR0Y/8Omwt/8oTWgy1mdeHmmjk7j1nYsvC9JSQ6Zv
+  MldlTTKB3zhThV1+XWYp6rjd5JW1zbVWEkLNxE7GJThEUG3szgBVGP7pSWTUTsqX
+  nLRbwHOoq7hHwg==
+  -----END CERTIFICATE-----
+
+_certificate_key: &key |
+  -----BEGIN PRIVATE KEY-----
+  MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDUqYS38NbTcXYj
+  g/2FyCEdRfRI9X/jbiCC+zVdSvOggf1O9HiC+Q+u0OrxBSf7r/ttHD+If/eI2Bx1
+  BIaGqjJoiICtGfAiJ6qv9oo8YRzDxN505ceMGDcxcjALqEtJ6faxt0lAy57A2HoN
+  +lXNDkA+0CjRNpJxeoWEZqH2UE2SDHcjAIVLkNQHgxKgpUWWKomQQOeMAlQisDXS
+  dsdNNeAJqqXdz+iIL8VC6goEzmOOvj90RyD65EnYuqjoyVBHjtH9s02JmSmdZQUF
+  8comnasqUpLPbPdAcqL9VZGrbljWeRzbZTJDng/pW/x8Zyl++uUGuXnf3F2US+uO
+  xS/tF6c1AgMBAAECggEADc6fxk2FtR61u+KS8D+pHrxu+tmtnveEdtyWf2MhZDds
+  Wdb3iBFhmkkIJ/Pcv7OSUGg4G8WrLaBSYKqVjWK5eR37tMFtXNpUc64/FWJ8up0q
+  kE5m4UqlMrmmMuuPT9I36RvsTp+pw/2KFyQ8s96L/0gGjmbLpUB+MLV1KIfYMXYt
+  UR0CRPMDVTMf3MVU6wKTSpwSrYjfKyCMTt10b0X+hVPMRENdr0VWIK7eKP7lUh/M
+  VpJcb9guIkVzz2hqp3iqnglKJ8b1P0nOn6GXDIi37m0IZK1+PtLB2auNN17tqP9/
+  N2v2dTv/0VnE0h76tWvDcRhmwNUfdu+ue+6/OEDg9QKBgQD+PUY5CT8tTwTRlQ/4
+  pA6YpWGfAlzjr5xbw5acSHZX189Yb8Whe2F9sPJMz7H8071m6s1gK7wOY7/uSvgI
+  jsoqvSsGG/qw7UHGhSPF6FTV2LQVRzNCVyGWTAWmpDHqfJYWrTCV7DOw3SMHk3Vb
+  NDbnTJh1u+ORKnlBzBnBbGEDEwKBgQDWIojR06jQCnhLR444SetuKznvGL6bGzG6
+  NMsnEGTvx1imSgYRtLlCGBMflavvTX8p23AQ75vep/axa3I3bIU8DoqNwal65jjo
+  nzwycTKaxSMalDR6HJOcdckJPEreYcJIIC0oVRoMa7p7BfS0N3wK6V4XnqeeYksl
+  uPgh2mmtlwKBgQCiOMTK9ex645WmiR90vnGV0sjUZfSRT2dMYiOCuvXQHSLvQfpv
+  jENrqPEiM9BVC2Ip/h5ng+BtqhY4XVugUkQLO0LtlgLCy9CRdDicOh+Ph8N4ZEL/
+  zIrg1McySkiTX0WBWKbMsrdU3fVRCzTxqgJyWbxaOzX8VYajhjDU62uHQwKBgQCy
+  nKG1K7COv3zxheEaEIZhIYs0ZVZvuDI1WF3v2pvm6UeHhQ0k7bLtb3lcChMdC247
+  cr86UTKZeU6rVK0zj8BehN6LOXNtgOUNG+0xQfxs3mzWebJ//KAfjJvAqVrlvXM5
+  iCVClCGjJKSqgyBGxlUgIOIMH5k+4kw+TdgjHLSzRwKBgBFF/u4M+7U7+Mu4Dj6Y
+  cQvRcS7M261kC/0uwRvtgF5Z79Lu89ZzOn6u/KNLz6tgsHXSzpk7uQLhHIpCc4pH
+  6nsu1YoXNhPFQv7vnDraXKEHWh+JBUvtZ8ISdeW6QGmms/yUP8qOQCNR3hD0ts4r
+  BYyhqTWdXlXOi3RUNqZW0MWk
+  -----END PRIVATE KEY-----
+
+images:
+  tags:
+    bootstrap: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    db_init: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    db_drop: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    ks_user: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    ks_service: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    ks_endpoints: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    glance_db_sync: "docker.io/openstackhelm/glance:2023.1-ubuntu_jammy"
+    glance_api: "docker.io/openstackhelm/glance:2023.1-ubuntu_jammy"
+    glance_metadefs_load: "docker.io/openstackhelm/glance:2023.1-ubuntu_jammy"
+    glance_storage_init: "docker.io/openstackhelm/ceph-config-helper:ubuntu_focal_18.2.0-1-20231013"
+    entrypoint: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
+    ingress: registry.k8s.io/ingress-nginx/controller:v1.8.2
+    ingress_module_init: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    ingress_routed_vip: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    error_pages: registry.k8s.io/defaultbackend:1.4
+    keepalived: docker.io/osixia/keepalived:1.4.5
+    dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
+    image_repo_sync: docker.io/library/docker:17.07.0
+    heat_db_sync: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    heat_api: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    heat_cfn: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    heat_cloudwatch: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    heat_engine: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    heat_engine_cleaner: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    heat_purge_deleted: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    test: docker.io/xrally/xrally-openstack:2.0.0
+    rabbit_init: docker.io/rabbitmq:3.7-management
+    cinder_db_sync: "docker.io/openstackhelm/cinder:2023.1-ubuntu_jammy"
+    cinder_api: "docker.io/openstackhelm/cinder:2023.1-ubuntu_jammy"
+    cinder_scheduler: "docker.io/openstackhelm/cinder:2023.1-ubuntu_jammy"
+    cinder_volume: "docker.io/openstackhelm/cinder:2023.1-ubuntu_jammy"
+    cinder_volume_usage_audit: "docker.io/openstackhelm/cinder:2023.1-ubuntu_jammy"
+    cinder_storage_init: "docker.io/openstackhelm/ceph-config-helper:ubuntu_focal_18.2.0-1-20231013"
+    cinder_backup: "docker.io/openstackhelm/cinder:2023.1-ubuntu_jammy"
+    cinder_backup_storage_init: "docker.io/openstackhelm/ceph-config-helper:ubuntu_focal_18.2.0-1-20231013"
+    keystone_api: "ghcr.io/rackerlabs/genestack/keystone-rxt:2023.1-ubuntu_jammy"
+    keystone_bootstrap: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    keystone_credential_rotate: "ghcr.io/rackerlabs/genestack/keystone-rxt:2023.1-ubuntu_jammy"
+    keystone_credential_setup: "ghcr.io/rackerlabs/genestack/keystone-rxt:2023.1-ubuntu_jammy"
+    keystone_db_sync: "ghcr.io/rackerlabs/genestack/keystone-rxt:2023.1-ubuntu_jammy"
+    keystone_domain_manage: "ghcr.io/rackerlabs/genestack/keystone-rxt:2023.1-ubuntu_jammy"
+    keystone_fernet_rotate: "ghcr.io/rackerlabs/genestack/keystone-rxt:2023.1-ubuntu_jammy"
+    keystone_fernet_setup: "ghcr.io/rackerlabs/genestack/keystone-rxt:2023.1-ubuntu_jammy"
+    keystone_credential_cleanup: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    libvirt: docker.io/openstackhelm/libvirt:2023.2-ubuntu_jammy  # We want to use jammy. 2023.2 is the latest version that supports jammy.
+    libvirt_exporter: vexxhost/libvirtd-exporter:latest
+    ceph_config_helper: "docker.io/openstackhelm/ceph-config-helper:ubuntu_focal_18.2.0-1-20231013"
+    kubectl: docker.io/bitnami/kubectl:latest
+    neutron_db_sync: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_dhcp: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_l3: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_l2gw: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_linuxbridge_agent: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_metadata: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_ovn_metadata: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_openvswitch_agent: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_server: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_rpc_server: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_bagpipe_bgp: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_netns_cleanup_cron: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    purge_test: docker.io/openstackhelm/ospurge:latest
+    netoffload: ghcr.io/vexxhost/netoffload:v1.0.1
+    neutron_sriov_agent: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_sriov_agent_init: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_bgp_dragent: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    neutron_ironic_agent: "docker.io/openstackhelm/neutron:2023.1-ubuntu_jammy"
+    nova_api: "docker.io/openstackhelm/nova:2023.1-ubuntu_jammy"
+    nova_cell_setup: "docker.io/openstackhelm/nova:2023.1-ubuntu_jammy"
+    nova_cell_setup_init: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    nova_compute: "ghcr.io/rackerlabs/genestack/nova-efi:2023.1-ubuntu_jammy"
+    nova_compute_ssh: "ghcr.io/rackerlabs/genestack/nova-efi:2023.1-ubuntu_jammy"
+    nova_conductor: "docker.io/openstackhelm/nova:2023.1-ubuntu_jammy"
+    nova_db_sync: "docker.io/openstackhelm/nova:2023.1-ubuntu_jammy"
+    nova_novncproxy: "docker.io/openstackhelm/nova:2023.1-ubuntu_jammy"
+    nova_novncproxy_assets: "docker.io/openstackhelm/nova:2023.1-ubuntu_jammy"
+    nova_scheduler: "docker.io/openstackhelm/nova:2023.1-ubuntu_jammy"
+    nova_spiceproxy: "docker.io/openstackhelm/nova:2023.1-ubuntu_jammy"
+    nova_spiceproxy_assets: "docker.io/openstackhelm/nova:2023.1-ubuntu_jammy"
+    nova_service_cleaner: "docker.io/openstackhelm/ceph-config-helper:latest-ubuntu_focal"
+    nova_archive_deleted_rows: "docker.io/openstackhelm/nova:2023.1-ubuntu_jammy"
+    nova_compute_ironic: "docker.io/kolla/nova-compute-ironic:2023.2-ubuntu-jammy"
+    nova_wait_for_computes_init: gcr.io/google_containers/hyperkube-amd64:v1.11.6
+    placement: "docker.io/openstackhelm/placement:2023.1-ubuntu_jammy"
+    placement_db_sync: "docker.io/openstackhelm/placement:2023.1-ubuntu_jammy"
+    horizon_db_sync: docker.io/openstackhelm/horizon:2023.1-ubuntu_jammy
+    horizon: docker.io/openstackhelm/horizon:2023.1-ubuntu_jammy
+    octavia_db_sync: ghcr.io/rackerlabs/genestack/octavia-ovn:master-ubuntu_jammy
+    octavia_api: ghcr.io/rackerlabs/genestack/octavia-ovn:master-ubuntu_jammy
+    octavia_worker: ghcr.io/rackerlabs/genestack/octavia-ovn:master-ubuntu_jammy
+    octavia_housekeeping: ghcr.io/rackerlabs/genestack/octavia-ovn:master-ubuntu_jammy
+    octavia_health_manager: ghcr.io/rackerlabs/genestack/octavia-ovn:master-ubuntu_jammy
+    octavia_health_manager_init: "docker.io/openstackhelm/heat:2023.1-ubuntu_jammy"
+    openvswitch_vswitchd: docker.io/kolla/centos-source-openvswitch-vswitchd:rocky
+
+pod:
+  resources:
+    enabled: false
+
+endpoints:
+  compute:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: nova-tls-api
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: nova.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  compute_metadata:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: metadata-tls-metadata
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: metadata.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  compute_novnc_proxy:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: nova-novncproxy-tls-proxy
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: novnc.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  cloudformation:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: heat-tls-cfn
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: cloudformation.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  cloudwatch:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: heat-tls-cloudwatch
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: cloudwatch.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  dashboard:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: horizon-tls-web
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: horizon.api.lab1.lar.tron.rax.io
+    port:
+      web:
+        public: 443
+    scheme:
+      public: https
+  metric:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: gnocchi-tls-web
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: gnocchi.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  identity:
+    auth:
+      admin:
+        region_name: *region
+      ceilometer:
+        region_name: *region
+      cinder:
+        region_name: *region
+      designate:
+        region_name: *region
+      glance:
+        region_name: *region
+      gnocchi:
+        region_name: *region
+      heat:
+        region_name: *region
+      heat_trustee:
+        region_name: *region
+      heat_stack_user:
+        region_name: *region
+      ironic:
+        region_name: *region
+      neutron:
+        region_name: *region
+      nova:
+        region_name: *region
+      placement:
+        region_name: *region
+      test:
+        region_name: *region
+    hosts:
+      default: keystone
+      internal: keystone-api
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: keystone-tls-api
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: keystone.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        admin: 80
+        default: 80
+        internal: 5000
+        public: 443
+    scheme:
+      public: https
+  ingress:
+    port:
+      ingress:
+        public: 443
+  image:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: glance-tls-api
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: glance.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  load_balancer:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: octavia-tls-api
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: octavia.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  network:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: neutron-tls-server
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: neutron.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  orchestration:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: heat-tls-api
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: heat.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  placement:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: placement-tls-api
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: placement.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  volume:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: cinder-tls-api
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: cinder.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  volumev2:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: cinder-tls-api
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: cinder.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+  volumev3:
+    host_fqdn_override:
+      public:
+        tls:
+          crt: *crt
+          key: *key
+          secretName: cinder-tls-api
+          issuerRef:
+            name: ca-issuer
+            kind: ClusterIssuer
+        host: cinder.api.lab1.lar.tron.rax.io
+    port:
+      api:
+        public: 443
+    scheme:
+      public: https
+
+manifests:
+  secret_ingress_tls: true
+
+bootstrap:
+  structured:
+    images: {}
+
+network:
+  api:
+    ingress:
+      annotations:
+        nginx.ingress.kubernetes.io/ssl-redirect: "true"
+
+volume:
+  class_name: general-multi-attach
+  size: 100Gi
+  # Used for postgreSQL backup
+  backup:
+    size: 10Gi
+
+# Used for postgreSQL storage
+storage:
+  pvc:
+    size: 10Gi
+  archive_pvc:
+    size: 10Gi
+
+conf:
+  postgresql:
+    shared_buffers: '4GB'
diff --git a/helm-configs/neutron/neutron-helm-overrides.yaml b/helm-configs/neutron/neutron-helm-overrides.yaml
index ac8f036f..4c3ef005 100644
--- a/helm-configs/neutron/neutron-helm-overrides.yaml
+++ b/helm-configs/neutron/neutron-helm-overrides.yaml
@@ -1787,7 +1787,25 @@ conf:
     oslo_messaging_notifications:
       driver: messagingv2
     oslo_messaging_rabbit:
-      rabbit_ha_queues: true
+      # We define use of quorum queues via kustomize but this was enabling HA queues instead
+      # ha_queues are deprecated, explicitly set to false and set quorum_queue true
+      rabbit_ha_queues: false
+      rabbit_quorum_queue: true
+      # TODO: Not available until 2024.1, but once it is, we want to enable these!
+      # new feature ref; https://docs.openstack.org/releasenotes/oslo.messaging/2024.1.html
+      # rabbit_transient_quorum_queue: true
+      # use_queue_manager: true
+      # Reconnect after a node outage more quickly
+      rabbit_interval_max: 10
+      # Send more frequent heartbeats and fail unhealthy nodes faster
+      # heartbeat_timeout / heartbeat_rate / 2.0 = 30 / 3 / 2.0 = 5
+      # https://opendev.org/openstack/oslo.messaging/commit/36fb5bceabe08a982ebd52e4a8f005cd26fdf6b8
+      heartbeat_rate: 3
+      heartbeat_timeout_threshold: 30
+      # Setting lower kombu_reconnect_delay should resolve isssue with HA failing when one node is down
+      # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html
+      # https://review.opendev.org/c/openstack/oslo.messaging/+/866617
+      kombu_reconnect_delay: 0.5
     oslo_middleware:
       enable_proxy_headers_parsing: true
     oslo_policy:
@@ -2013,19 +2031,7 @@ conf:
   bgp_dragent: {}
 
   rabbitmq:
-    # NOTE(rk760n): adding rmq policy to mirror messages from notification queues and set expiration time for the ones
-    policies:
-      - vhost: "neutron"
-        name: "ha_ttl_neutron"
-        definition:
-          # mirror messges to other nodes in rmq cluster
-          ha-mode: "all"
-          ha-sync-mode: "automatic"
-          # 70s
-          message-ttl: 70000
-        priority: 0
-        apply-to: all
-        pattern: '^(?!(amq\.|reply_)).*'
+    policies: []
   ## NOTE: "besteffort" is meant for dev env with mixed compute type only.
   ##       This helps prevent sriov init script from failing due to mis-matched NIC
   ##       For prod env, target NIC should match and init script should fail otherwise.
diff --git a/helm-configs/nova/nova-helm-overrides.yaml b/helm-configs/nova/nova-helm-overrides.yaml
index 1b157e28..f0679e0d 100644
--- a/helm-configs/nova/nova-helm-overrides.yaml
+++ b/helm-configs/nova/nova-helm-overrides.yaml
@@ -1458,7 +1458,25 @@ conf:
     oslo_messaging_notifications:
       driver: messagingv2
     oslo_messaging_rabbit:
-      rabbit_ha_queues: true
+      # We define use of quorum queues via kustomize but this was enabling HA queues instead
+      # ha_queues are deprecated, explicitly set to false and set quorum_queue true
+      rabbit_ha_queues: false
+      rabbit_quorum_queue: true
+      # TODO: Not available until 2024.1, but once it is, we want to enable these!
+      # new feature ref; https://docs.openstack.org/releasenotes/oslo.messaging/2024.1.html
+      # rabbit_transient_quorum_queue: true
+      # use_queue_manager: true
+      # Reconnect after a node outage more quickly
+      rabbit_interval_max: 10
+      # Send more frequent heartbeats and fail unhealthy nodes faster
+      # heartbeat_timeout / heartbeat_rate / 2.0 = 30 / 3 / 2.0 = 5
+      # https://opendev.org/openstack/oslo.messaging/commit/36fb5bceabe08a982ebd52e4a8f005cd26fdf6b8
+      heartbeat_rate: 3
+      heartbeat_timeout_threshold: 30
+      # Setting lower kombu_reconnect_delay should resolve isssue with HA failing when one node is down
+      # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html
+      # https://review.opendev.org/c/openstack/oslo.messaging/+/866617
+      kombu_reconnect_delay: 0.5
     os_vif_ovs:
       ovsdb_connection: tcp:127.0.0.1:6640
     placement:
@@ -1537,19 +1555,7 @@ conf:
       format: "%(message)s"
       datefmt: "%Y-%m-%d %H:%M:%S"
   rabbitmq:
-    # NOTE(rk760n): adding rmq policy to mirror messages from notification queues and set expiration time for the ones
-    policies:
-      - vhost: "nova"
-        name: "ha_ttl_nova"
-        definition:
-          # mirror messges to other nodes in rmq cluster
-          ha-mode: "all"
-          ha-sync-mode: "automatic"
-          # 70s
-          message-ttl: 70000
-        priority: 0
-        apply-to: all
-        pattern: '^(?!(amq\.|reply_)).*'
+    policies: []
   enable_iscsi: false
   archive_deleted_rows:
     purge_deleted_rows: false