rackerlabs · cloudnull · Jun 4, 2024 · Jun 3, 2024
diff --git a/helm-configs/cinder/cinder-helm-overrides.yaml b/helm-configs/cinder/cinder-helm-overrides.yaml
@@ -813,7 +813,25 @@ conf:
     oslo_middleware:
       enable_proxy_headers_parsing: true
     oslo_messaging_rabbit:
-      rabbit_ha_queues: true
+      # We define use of quorum queues via kustomize but this was enabling HA queues instead
+      # ha_queues are deprecated, explicitly set to false and set quorum_queue true
+      rabbit_ha_queues: false
+      rabbit_quorum_queue: true
+      # TODO: Not available until 2024.1, but once it is, we want to enable these!
+      # new feature ref; https://docs.openstack.org/releasenotes/oslo.messaging/2024.1.html
+      # rabbit_transient_quorum_queue: true
+      # use_queue_manager: true
+      # Reconnect after a node outage more quickly
+      rabbit_interval_max: 10
+      # Send more frequent heartbeats and fail unhealthy nodes faster
+      # heartbeat_timeout / heartbeat_rate / 2.0 = 30 / 3 / 2.0 = 5
+      # https://opendev.org/openstack/oslo.messaging/commit/36fb5bceabe08a982ebd52e4a8f005cd26fdf6b8
+      heartbeat_rate: 3
+      heartbeat_timeout_threshold: 30
+      # Setting lower kombu_reconnect_delay should resolve isssue with HA failing when one node is down
+      # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html
+      # https://review.opendev.org/c/openstack/oslo.messaging/+/866617
+      kombu_reconnect_delay: 0.5
     coordination:
       backend_url: file:///var/lib/cinder/coordination
     service_user:
@@ -880,19 +898,7 @@ conf:
       format: "%(message)s"
       datefmt: "%Y-%m-%d %H:%M:%S"
   rabbitmq:
-    # NOTE(rk760n): adding rmq policy to mirror messages from notification queues and set expiration time for the ones
-    policies:
-      - vhost: "cinder"
-        name: "ha_ttl_cinder"
-        definition:
-          # mirror messges to other nodes in rmq cluster
-          ha-mode: "all"
-          ha-sync-mode: "automatic"
-          # 70s
-          message-ttl: 70000
-        priority: 0
-        apply-to: all
-        pattern: '^(?!(amq\.|reply_)).*'
+    policies: []
   backends:
     # Those options will be written to backends.conf as-is.
     lvmdriver-1:

diff --git a/helm-configs/glance/glance-helm-overrides.yaml b/helm-configs/glance/glance-helm-overrides.yaml
@@ -264,7 +264,25 @@ conf:
     oslo_messaging_notifications:
       driver: messagingv2
     oslo_messaging_rabbit:
-      rabbit_ha_queues: true
+      # We define use of quorum queues via kustomize but this was enabling HA queues instead
+      # ha_queues are deprecated, explicitly set to false and set quorum_queue true
+      rabbit_ha_queues: false
+      rabbit_quorum_queue: true
+      # TODO: Not available until 2024.1, but once it is, we want to enable these!
+      # new feature ref; https://docs.openstack.org/releasenotes/oslo.messaging/2024.1.html
+      # rabbit_transient_quorum_queue: true
+      # use_queue_manager: true
+      # Reconnect after a node outage more quickly
+      rabbit_interval_max: 10
+      # Send more frequent heartbeats and fail unhealthy nodes faster
+      # heartbeat_timeout / heartbeat_rate / 2.0 = 30 / 3 / 2.0 = 5
+      # https://opendev.org/openstack/oslo.messaging/commit/36fb5bceabe08a982ebd52e4a8f005cd26fdf6b8
+      heartbeat_rate: 3
+      heartbeat_timeout_threshold: 30
+      # Setting lower kombu_reconnect_delay should resolve isssue with HA failing when one node is down
+      # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html
+      # https://review.opendev.org/c/openstack/oslo.messaging/+/866617
+      kombu_reconnect_delay: 0.5
     oslo_policy:
       policy_file: /etc/glance/policy.yaml
     cors: {}
@@ -358,19 +376,7 @@ conf:
     user_domain_id =
     {{- end -}}
   rabbitmq:
-    # NOTE(rk760n): adding rmq policy to mirror messages from notification queues and set expiration time for the ones
-    policies:
-      - vhost: "glance"
-        name: "ha_ttl_glance"
-        definition:
-          # mirror messges to other nodes in rmq cluster
-          ha-mode: "all"
-          ha-sync-mode: "automatic"
-          # 70s
-          message-ttl: 70000
-        priority: 0
-        apply-to: all
-        pattern: '^(?!(amq\.|reply_)).*'
+    policies: []
 
 network:
   api:

diff --git a/helm-configs/heat/heat-helm-overrides.yaml b/helm-configs/heat/heat-helm-overrides.yaml
@@ -368,7 +368,25 @@ conf:
     oslo_middleware:
       enable_proxy_headers_parsing: true
     oslo_messaging_rabbit:
-      rabbit_ha_queues: True
+      # We define use of quorum queues via kustomize but this was enabling HA queues instead
+      # ha_queues are deprecated, explicitly set to false and set quorum_queue true
+      rabbit_ha_queues: false
+      rabbit_quorum_queue: true
+      # TODO: Not available until 2024.1, but once it is, we want to enable these!
+      # new feature ref; https://docs.openstack.org/releasenotes/oslo.messaging/2024.1.html
+      # rabbit_transient_quorum_queue: true
+      # use_queue_manager: true
+      # Reconnect after a node outage more quickly
+      rabbit_interval_max: 10
+      # Send more frequent heartbeats and fail unhealthy nodes faster
+      # heartbeat_timeout / heartbeat_rate / 2.0 = 30 / 3 / 2.0 = 5
+      # https://opendev.org/openstack/oslo.messaging/commit/36fb5bceabe08a982ebd52e4a8f005cd26fdf6b8
+      heartbeat_rate: 3
+      heartbeat_timeout_threshold: 30
+      # Setting lower kombu_reconnect_delay should resolve isssue with HA failing when one node is down
+      # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html
+      # https://review.opendev.org/c/openstack/oslo.messaging/+/866617
+      kombu_reconnect_delay: 0.5
     oslo_policy:
       policy_file: /etc/heat/policy.yaml
   api_audit_map:
@@ -460,19 +478,7 @@ conf:
       datefmt: "%Y-%m-%d %H:%M:%S"
 
   rabbitmq:
-    # NOTE(rk760n): adding rmq policy to mirror messages from notification queues and set expiration time for the ones
-    policies:
-      - vhost: "heat"
-        name: "ha_ttl_heat"
-        definition:
-          # mirror messges to other nodes in rmq cluster
-          ha-mode: "all"
-          ha-sync-mode: "automatic"
-          # 70s
-          message-ttl: 70000
-        priority: 0
-        apply-to: all
-        pattern: '^(?!(amq\.|reply_)).*'
+    policies: []
 
 network:
   api:

diff --git a/helm-configs/keystone/keystone-helm-overrides.yaml b/helm-configs/keystone/keystone-helm-overrides.yaml
@@ -520,7 +520,25 @@ conf:
     oslo_messaging_notifications:
       driver: messagingv2
     oslo_messaging_rabbit:
-      rabbit_ha_queues: true
+      # We define use of quorum queues via kustomize but this was enabling HA queues instead
+      # ha_queues are deprecated, explicitly set to false and set quorum_queue true
+      rabbit_ha_queues: false
+      rabbit_quorum_queue: true
+      # TODO: Not available until 2024.1, but once it is, we want to enable these!
+      # new feature ref; https://docs.openstack.org/releasenotes/oslo.messaging/2024.1.html
+      # rabbit_transient_quorum_queue: true
+      # use_queue_manager: true
+      # Reconnect after a node outage more quickly
+      rabbit_interval_max: 10
+      # Send more frequent heartbeats and fail unhealthy nodes faster
+      # heartbeat_timeout / heartbeat_rate / 2.0 = 30 / 3 / 2.0 = 5
+      # https://opendev.org/openstack/oslo.messaging/commit/36fb5bceabe08a982ebd52e4a8f005cd26fdf6b8
+      heartbeat_rate: 3
+      heartbeat_timeout_threshold:
+      # Setting lower kombu_reconnect_delay should resolve isssue with HA failing when one node is down
+      # https://lists.openstack.org/pipermail/openstack-discuss/2023-April/033314.html
+      # https://review.opendev.org/c/openstack/oslo.messaging/+/866617
+      kombu_reconnect_delay: 0.5
     oslo_middleware:
       enable_proxy_headers_parsing: true
     oslo_policy:
@@ -543,19 +561,7 @@ conf:
   policy: {}
   access_rules: {}
   rabbitmq:
-    # NOTE(rk760n): adding rmq policy to mirror messages from notification queues and set expiration time for the ones
-    policies:
-      - vhost: "keystone"
-        name: "ha_ttl_keystone"
-        definition:
-          # mirror messges to other nodes in rmq cluster
-          ha-mode: "all"
-          ha-sync-mode: "automatic"
-          # 70s
-          message-ttl: 70000
-        priority: 0
-        apply-to: all
-        pattern: '^(?!(amq\.|reply_)).*'
+    policies: []
   rally_tests:
     run_tempest: false
     tests: