From ba0a457f515882a5bad179a53dd88accce9c2a4a Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Thu, 7 Mar 2024 18:00:46 -0600 Subject: [PATCH 01/20] feat: introduce ceilometer helm chart overrides This begins to add the overrides for the Ceilometer helm chart. Ceilometer provides metering, monitoring, and alarming capabilities in Openstack for billing, performance, optimization, and capacity planning purposes. --- .../ceilometer/ceilometer-helm-overrides.yaml | 2019 +++++++++++++++++ 1 file changed, 2019 insertions(+) create mode 100644 helm-configs/ceilometer/ceilometer-helm-overrides.yaml diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml new file mode 100644 index 00000000..638c5484 --- /dev/null +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -0,0 +1,2019 @@ +--- +release_group: null + +labels: + api: + node_selector_key: openstack-control-plane + node_selector_value: enabled + compute: + node_selector_key: openstack-compute-node + node_selector_value: enabled + central: + node_selector_key: openstack-control-plane + node_selector_value: enabled + ipmi: + node_selector_key: openstack-node + node_selector_value: enabled + collector: + node_selector_key: openstack-control-plane + node_selector_value: enabled + notification: + node_selector_key: openstack-control-plane + node_selector_value: enabled + job: + node_selector_key: openstack-control-plane + node_selector_value: enabled + test: + node_selector_key: openstack-control-plane + node_selector_value: enabled + +images: + tags: + test: docker.io/xrally/xrally-openstack:2.0.0 + rabbit_init: docker.io/rabbitmq:3.7-management + ks_user: docker.io/openstackhelm/heat:wallaby-ubuntu_focal + ks_service: docker.io/openstackhelm/heat:wallaby-ubuntu_focal + ks_endpoints: docker.io/openstackhelm/heat:wallaby-ubuntu_focal + ceilometer_api: docker.io/kolla/ubuntu-source-ceilometer-api:wallaby + ceilometer_central: docker.io/kolla/ubuntu-source-ceilometer-central:wallaby + ceilometer_collector: docker.io/kolla/ubuntu-source-ceilometer-collector:wallaby + ceilometer_compute: docker.io/kolla/ubuntu-source-ceilometer-compute:wallaby + ceilometer_ipmi: docker.io/kolla/ubuntu-source-ceilometer-base:wallaby + ceilometer_notification: docker.io/kolla/ubuntu-source-ceilometer-notification:wallaby + dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0 + image_repo_sync: docker.io/docker:17.07.0 + pull_policy: "IfNotPresent" + local_registry: + active: false + exclude: + - dep_check + - image_repo_sync + +network: + api: + ingress: + public: true + classes: + namespace: "nginx" + cluster: "nginx-openstack" + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + port: 8777 + node_port: + enabled: false + port: 38777 + +ipmi_device: /dev/ipmi0 + +conf: + security: | + # + # Disable access to the entire file system except for the directories that + # are explicitly allowed later. + # + # This currently breaks the configurations that come with some web application + # Debian packages. + # + # + # AllowOverride None + # Require all denied + # + + # Changing the following options will not really affect the security of the + # server, but might make attacks slightly more difficult in some cases. + + # + # ServerTokens + # This directive configures what you return as the Server HTTP response + # Header. The default is 'Full' which sends information about the OS-Type + # and compiled in modules. + # Set to one of: Full | OS | Minimal | Minor | Major | Prod + # where Full conveys the most information, and Prod the least. + ServerTokens Prod + + # + # Optionally add a line containing the server version and virtual host + # name to server-generated pages (internal error documents, FTP directory + # listings, mod_status and mod_info output etc., but not CGI generated + # documents or custom error documents). + # Set to "EMail" to also include a mailto: link to the ServerAdmin. + # Set to one of: On | Off | EMail + ServerSignature Off + + # + # Allow TRACE method + # + # Set to "extended" to also reflect the request body (only for testing and + # diagnostic purposes). + # + # Set to one of: On | Off | extended + TraceEnable Off + + # + # Forbid access to version control directories + # + # If you use version control systems in your document root, you should + # probably deny access to their directories. For example, for subversion: + # + # + # Require all denied + # + + # + # Setting this header will prevent MSIE from interpreting files as something + # else than declared by the content type in the HTTP headers. + # Requires mod_headers to be enabled. + # + #Header set X-Content-Type-Options: "nosniff" + + # + # Setting this header will prevent other sites from embedding pages from this + # site as frames. This defends against clickjacking attacks. + # Requires mod_headers to be enabled. + # + #Header set X-Frame-Options: "sameorigin" + software: + apache2: + binary: apache2 + start_parameters: -DFOREGROUND + site_dir: /etc/apache2/sites-enable + conf_dir: /etc/apache2/conf-enabled + mods_dir: /etc/apache2/mods-available + a2enmod: null + a2dismod: null + ceilometer: + DEFAULT: + event_dispatchers: + type: multistring + values: + - gnocchi + meter_dispatchers: + type: multistring + values: + - gnocchi + api: + aodh_is_enabled: "False" + # NOTE(portdirect): the following option will turn off the ability to retrieve + # metrics via the ceilometer API: + # gnocchi_is_enabled: "True" + dispatcher_gnocchi: + filter_service_activity: False + archive_policy: low + resources_definition_file: /etc/ceilometer/gnocchi_resources.yaml + database: + max_retries: -1 + dispatcher: + archive_policy: low + filter_project: service + keystone_authtoken: + auth_type: password + auth_version: v3 + service_credentials: + auth_type: password + interface: internal + notification: + messaging_urls: + type: multistring + values: + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/ceilometer + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/cinder + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/glance + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/nova + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/keystone + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/neutron + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/heat + oslo_messaging_notifications: + driver: messagingv2 + topics: + - notifications + - profiler + oslo_policy: + policy_file: /etc/ceilometer/policy.yaml + cache: + enabled: true + backend: dogpile.cache.memcached + expiration_time: 86400 + event_definitions: + - event_type: 'compute.instance.*' + traits: &instance_traits + tenant_id: + fields: payload.tenant_id + user_id: + fields: payload.user_id + instance_id: + fields: payload.instance_id + resource_id: + fields: payload.instance_id + host: + fields: publisher_id.`split(., 1, 1)` + service: + fields: publisher_id.`split(., 0, -1)` + memory_mb: + type: int + fields: payload.memory_mb + disk_gb: + type: int + fields: payload.disk_gb + root_gb: + type: int + fields: payload.root_gb + ephemeral_gb: + type: int + fields: payload.ephemeral_gb + vcpus: + type: int + fields: payload.vcpus + instance_type_id: + type: int + fields: payload.instance_type_id + instance_type: + fields: payload.instance_type + state: + fields: payload.state + os_architecture: + fields: payload.image_meta.'org.openstack__1__architecture' + os_version: + fields: payload.image_meta.'org.openstack__1__os_version' + os_distro: + fields: payload.image_meta.'org.openstack__1__os_distro' + launched_at: + type: datetime + fields: payload.launched_at + deleted_at: + type: datetime + fields: payload.deleted_at + - event_type: compute.instance.update + traits: + <<: *instance_traits + old_state: + fields: payload.old_state + - event_type: compute.instance.exists + traits: + <<: *instance_traits + audit_period_beginning: + type: datetime + fields: payload.audit_period_beginning + audit_period_ending: + type: datetime + fields: payload.audit_period_ending + - event_type: ['volume.exists', 'volume.create.*', 'volume.delete.*', 'volume.resize.*', 'volume.attach.*', 'volume.detach.*', 'volume.update.*', 'snapshot.exists', 'snapshot.create.*', 'snapshot.delete.*', 'snapshot.update.*'] + traits: &cinder_traits + user_id: + fields: payload.user_id + project_id: + fields: payload.tenant_id + availability_zone: + fields: payload.availability_zone + display_name: + fields: payload.display_name + replication_status: + fields: payload.replication_status + status: + fields: payload.status + created_at: + fields: payload.created_at + - event_type: ['volume.exists', 'volume.create.*', 'volume.delete.*', 'volume.resize.*', 'volume.attach.*', 'volume.detach.*', 'volume.update.*'] + traits: + <<: *cinder_traits + resource_id: + fields: payload.volume_id + host: + fields: payload.host + size: + fields: payload.size + type: + fields: payload.volume_type + replication_status: + fields: payload.replication_status + - event_type: ['snapshot.exists', 'snapshot.create.*', 'snapshot.delete.*', 'snapshot.update.*'] + traits: + <<: *cinder_traits + resource_id: + fields: payload.snapshot_id + volume_id: + fields: payload.volume_id + - event_type: ['image_volume_cache.*'] + traits: + image_id: + fields: payload.image_id + host: + fields: payload.host + - event_type: ['image.create', 'image.update', 'image.upload', 'image.delete'] + traits: &glance_crud + project_id: + fields: payload.owner + resource_id: + fields: payload.id + name: + fields: payload.name + status: + fields: payload.status + created_at: + fields: payload.created_at + user_id: + fields: payload.owner + deleted_at: + fields: payload.deleted_at + size: + fields: payload.size + - event_type: image.send + traits: &glance_send + receiver_project: + fields: payload.receiver_tenant_id + receiver_user: + fields: payload.receiver_user_id + user_id: + fields: payload.owner_id + image_id: + fields: payload.image_id + destination_ip: + fields: payload.destination_ip + bytes_sent: + type: int + fields: payload.bytes_sent + - event_type: orchestration.stack.* + traits: &orchestration_crud + project_id: + fields: payload.tenant_id + user_id: + fields: ['_context_trustor_user_id', '_context_user_id'] + resource_id: + fields: payload.stack_identity + - event_type: sahara.cluster.* + traits: &sahara_crud + project_id: + fields: payload.project_id + user_id: + fields: _context_user_id + resource_id: + fields: payload.cluster_id + - event_type: sahara.cluster.health + traits: &sahara_health + <<: *sahara_crud + verification_id: + fields: payload.verification_id + health_check_status: + fields: payload.health_check_status + health_check_name: + fields: payload.health_check_name + health_check_description: + fields: payload.health_check_description + created_at: + type: datetime + fields: payload.created_at + updated_at: + type: datetime + fields: payload.updated_at + - event_type: ['identity.user.*', 'identity.project.*', 'identity.group.*', 'identity.role.*', 'identity.OS-TRUST:trust.*', + 'identity.region.*', 'identity.service.*', 'identity.endpoint.*', 'identity.policy.*'] + traits: &identity_crud + resource_id: + fields: payload.resource_info + initiator_id: + fields: payload.initiator.id + project_id: + fields: payload.initiator.project_id + domain_id: + fields: payload.initiator.domain_id + - event_type: identity.role_assignment.* + traits: &identity_role_assignment + role: + fields: payload.role + group: + fields: payload.group + domain: + fields: payload.domain + user: + fields: payload.user + project: + fields: payload.project + - event_type: identity.authenticate + traits: &identity_authenticate + typeURI: + fields: payload.typeURI + id: + fields: payload.id + action: + fields: payload.action + eventType: + fields: payload.eventType + eventTime: + fields: payload.eventTime + outcome: + fields: payload.outcome + initiator_typeURI: + fields: payload.initiator.typeURI + initiator_id: + fields: payload.initiator.id + initiator_name: + fields: payload.initiator.name + initiator_host_agent: + fields: payload.initiator.host.agent + initiator_host_addr: + fields: payload.initiator.host.address + target_typeURI: + fields: payload.target.typeURI + target_id: + fields: payload.target.id + observer_typeURI: + fields: payload.observer.typeURI + observer_id: + fields: payload.observer.id + - event_type: objectstore.http.request + traits: &objectstore_request + typeURI: + fields: payload.typeURI + id: + fields: payload.id + action: + fields: payload.action + eventType: + fields: payload.eventType + eventTime: + fields: payload.eventTime + outcome: + fields: payload.outcome + initiator_typeURI: + fields: payload.initiator.typeURI + initiator_id: + fields: payload.initiator.id + initiator_project_id: + fields: payload.initiator.project_id + target_typeURI: + fields: payload.target.typeURI + target_id: + fields: payload.target.id + target_action: + fields: payload.target.action + target_metadata_path: + fields: payload.target.metadata.path + target_metadata_version: + fields: payload.target.metadata.version + target_metadata_container: + fields: payload.target.metadata.container + target_metadata_object: + fields: payload.target.metadata.object + observer_id: + fields: payload.observer.id + - event_type: ['network.*', 'subnet.*', 'port.*', 'router.*', 'floatingip.*', 'pool.*', 'vip.*', 'member.*', 'health_monitor.*', 'healthmonitor.*', 'listener.*', 'loadbalancer.*', 'firewall.*', 'firewall_policy.*', 'firewall_rule.*', 'vpnservice.*', 'ipsecpolicy.*', 'ikepolicy.*', 'ipsec_site_connection.*'] + traits: &network_traits + user_id: + fields: _context_user_id + project_id: + fields: _context_tenant_id + - event_type: network.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.network.id', 'payload.id'] + - event_type: subnet.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.subnet.id', 'payload.id'] + - event_type: port.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.port.id', 'payload.id'] + - event_type: router.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.router.id', 'payload.id'] + - event_type: floatingip.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.floatingip.id', 'payload.id'] + - event_type: pool.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.pool.id', 'payload.id'] + - event_type: vip.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.vip.id', 'payload.id'] + - event_type: member.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.member.id', 'payload.id'] + - event_type: health_monitor.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.health_monitor.id', 'payload.id'] + - event_type: healthmonitor.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.healthmonitor.id', 'payload.id'] + - event_type: listener.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.listener.id', 'payload.id'] + - event_type: loadbalancer.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.loadbalancer.id', 'payload.id'] + - event_type: firewall.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.firewall.id', 'payload.id'] + - event_type: firewall_policy.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.firewall_policy.id', 'payload.id'] + - event_type: firewall_rule.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.firewall_rule.id', 'payload.id'] + - event_type: vpnservice.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.vpnservice.id', 'payload.id'] + - event_type: ipsecpolicy.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.ipsecpolicy.id', 'payload.id'] + - event_type: ikepolicy.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.ikepolicy.id', 'payload.id'] + - event_type: ipsec_site_connection.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.ipsec_site_connection.id', 'payload.id'] + - event_type: '*http.*' + traits: &http_audit + project_id: + fields: payload.initiator.project_id + user_id: + fields: payload.initiator.id + typeURI: + fields: payload.typeURI + eventType: + fields: payload.eventType + action: + fields: payload.action + outcome: + fields: payload.outcome + id: + fields: payload.id + eventTime: + fields: payload.eventTime + requestPath: + fields: payload.requestPath + observer_id: + fields: payload.observer.id + target_id: + fields: payload.target.id + target_typeURI: + fields: payload.target.typeURI + target_name: + fields: payload.target.name + initiator_typeURI: + fields: payload.initiator.typeURI + initiator_id: + fields: payload.initiator.id + initiator_name: + fields: payload.initiator.name + initiator_host_address: + fields: payload.initiator.host.address + - event_type: '*http.response' + traits: + <<: *http_audit + reason_code: + fields: payload.reason.reasonCode + - event_type: ['dns.domain.create', 'dns.domain.update', 'dns.domain.delete'] + traits: &dns_domain_traits + status: + fields: payload.status + retry: + fields: payload.retry + description: + fields: payload.description + expire: + fields: payload.expire + email: + fields: payload.email + ttl: + fields: payload.ttl + action: + fields: payload.action + name: + fields: payload.name + resource_id: + fields: payload.id + created_at: + fields: payload.created_at + updated_at: + fields: payload.updated_at + version: + fields: payload.version + parent_domain_id: + fields: parent_domain_id + serial: + fields: payload.serial + - event_type: dns.domain.exists + traits: + <<: *dns_domain_traits + audit_period_beginning: + type: datetime + fields: payload.audit_period_beginning + audit_period_ending: + type: datetime + fields: payload.audit_period_ending + - event_type: trove.* + traits: &trove_base_traits + instance_type: + fields: payload.instance_type + user_id: + fields: payload.user_id + resource_id: + fields: payload.instance_id + instance_type_id: + fields: payload.instance_type_id + launched_at: + type: datetime + fields: payload.launched_at + instance_name: + fields: payload.instance_name + state: + fields: payload.state + nova_instance_id: + fields: payload.nova_instance_id + service_id: + fields: payload.service_id + created_at: + type: datetime + fields: payload.created_at + region: + fields: payload.region + - event_type: ['trove.instance.create', 'trove.instance.modify_volume', 'trove.instance.modify_flavor', 'trove.instance.delete'] + traits: &trove_common_traits + name: + fields: payload.name + availability_zone: + fields: payload.availability_zone + instance_size: + type: int + fields: payload.instance_size + volume_size: + type: int + fields: payload.volume_size + nova_volume_id: + fields: payload.nova_volume_id + - event_type: trove.instance.create + traits: + <<: [*trove_base_traits, *trove_common_traits] + - event_type: trove.instance.modify_volume + traits: + <<: [*trove_base_traits, *trove_common_traits] + old_volume_size: + type: int + fields: payload.old_volume_size + modify_at: + type: datetime + fields: payload.modify_at + - event_type: trove.instance.modify_flavor + traits: + <<: [*trove_base_traits, *trove_common_traits] + old_instance_size: + type: int + fields: payload.old_instance_size + modify_at: + type: datetime + fields: payload.modify_at + - event_type: trove.instance.delete + traits: + <<: [*trove_base_traits, *trove_common_traits] + deleted_at: + type: datetime + fields: payload.deleted_at + - event_type: trove.instance.exists + traits: + <<: *trove_base_traits + display_name: + fields: payload.display_name + audit_period_beginning: + type: datetime + fields: payload.audit_period_beginning + audit_period_ending: + type: datetime + fields: payload.audit_period_ending + - event_type: profiler.* + traits: + project: + fields: payload.project + service: + fields: payload.service + name: + fields: payload.name + base_id: + fields: payload.base_id + trace_id: + fields: payload.trace_id + parent_id: + fields: payload.parent_id + timestamp: + fields: payload.timestamp + host: + fields: payload.info.host + path: + fields: payload.info.request.path + query: + fields: payload.info.request.query + method: + fields: payload.info.request.method + scheme: + fields: payload.info.request.scheme + db.statement: + fields: payload.info.db.statement + db.params: + fields: payload.info.db.params + - event_type: 'magnum.bay.*' + traits: &magnum_bay_crud + id: + fields: payload.id + typeURI: + fields: payload.typeURI + eventType: + fields: payload.eventType + eventTime: + fields: payload.eventTime + action: + fields: payload.action + outcome: + fields: payload.outcome + initiator_id: + fields: payload.initiator.id + initiator_typeURI: + fields: payload.initiator.typeURI + initiator_name: + fields: payload.initiator.name + initiator_host_agent: + fields: payload.initiator.host.agent + initiator_host_address: + fields: payload.initiator.host.address + target_id: + fields: payload.target.id + target_typeURI: + fields: payload.target.typeURI + observer_id: + fields: payload.observer.id + observer_typeURI: + fields: payload.observer.typeURI + event_pipeline: + sinks: + - name: event_sink + publishers: + - notifier:// + transformers: null + sources: + - events: + - '*' + name: event_source + sinks: + - event_sink + gnocchi_resources: + resources: + - archive_policy: low + metrics: + - identity.authenticate.success + - identity.authenticate.pending + - identity.authenticate.failure + - identity.user.created + - identity.user.deleted + - identity.user.updated + - identity.group.created + - identity.group.deleted + - identity.group.updated + - identity.role.created + - identity.role.deleted + - identity.role.updated + - identity.project.created + - identity.project.deleted + - identity.project.updated + - identity.trust.created + - identity.trust.deleted + - identity.role_assignment.created + - identity.role_assignment.deleted + resource_type: identity + - metrics: + - radosgw.objects + - radosgw.objects.size + - radosgw.objects.containers + - radosgw.api.request + - radosgw.containers.objects + - radosgw.containers.objects.size + resource_type: ceph_account + - attributes: + display_name: resource_metadata.display_name + flavor_id: resource_metadata.(instance_flavor_id|(flavor.id)) + host: resource_metadata.(instance_host|host) + image_ref: resource_metadata.image_ref + server_group: resource_metadata.user_metadata.server_group + event_associated_resources: + instance_disk: '{"=": {"instance_id": "%s"}}' + instance_network_interface: '{"=": {"instance_id": "%s"}}' + event_attributes: + id: instance_id + event_delete: compute.instance.delete.start + metrics: + - memory + - memory.usage + - memory.resident + - memory.bandwidth.total + - memory.bandwidth.local + - vcpus + - cpu + - cpu.delta + - cpu_util + - cpu_l3_cache + - disk.root.size + - disk.ephemeral.size + - disk.read.requests + - disk.read.requests.rate + - disk.write.requests + - disk.write.requests.rate + - disk.read.bytes + - disk.read.bytes.rate + - disk.write.bytes + - disk.write.bytes.rate + - disk.latency + - disk.iops + - disk.capacity + - disk.allocation + - disk.usage + - compute.instance.booting.time + - perf.cpu.cycles + - perf.instructions + - perf.cache.references + - perf.cache.misses + resource_type: instance + - attributes: + instance_id: resource_metadata.instance_id + name: resource_metadata.vnic_name + metrics: + - network.outgoing.packets.rate + - network.incoming.packets.rate + - network.outgoing.packets + - network.outgoing.packets.drop + - network.incoming.packets.drop + - network.outgoing.packets.error + - network.incoming.packets.error + - network.outgoing.bytes.rate + - network.incoming.bytes.rate + - network.outgoing.bytes + - network.incoming.bytes + resource_type: instance_network_interface + - attributes: + instance_id: resource_metadata.instance_id + name: resource_metadata.disk_name + metrics: + - disk.device.read.requests + - disk.device.read.requests.rate + - disk.device.write.requests + - disk.device.write.requests.rate + - disk.device.read.bytes + - disk.device.read.bytes.rate + - disk.device.write.bytes + - disk.device.write.bytes.rate + - disk.device.latency + - disk.device.iops + - disk.device.capacity + - disk.device.allocation + - disk.device.usage + resource_type: instance_disk + - attributes: + container_format: resource_metadata.container_format + disk_format: resource_metadata.disk_format + name: resource_metadata.name + event_attributes: + id: resource_id + event_delete: image.delete + metrics: + - image.size + - image.download + - image.serve + resource_type: image + - metrics: + - hardware.ipmi.node.power + - hardware.ipmi.node.temperature + - hardware.ipmi.node.inlet_temperature + - hardware.ipmi.node.outlet_temperature + - hardware.ipmi.node.fan + - hardware.ipmi.node.current + - hardware.ipmi.node.voltage + - hardware.ipmi.node.airflow + - hardware.ipmi.node.cups + - hardware.ipmi.node.cpu_util + - hardware.ipmi.node.mem_util + - hardware.ipmi.node.io_util + resource_type: ipmi + - event_delete: floatingip.delete.end + event_attributes: + id: resource_id + metrics: + - bandwidth + - network + - network.create + - network.update + - subnet + - subnet.create + - subnet.update + - port + - port.create + - port.update + - router + - router.create + - router.update + - ip.floating + - ip.floating.create + - ip.floating.update + resource_type: network + - metrics: + - stack.create + - stack.update + - stack.delete + - stack.resume + - stack.suspend + resource_type: stack + - metrics: + - storage.objects.incoming.bytes + - storage.objects.outgoing.bytes + - storage.api.request + - storage.objects.size + - storage.objects + - storage.objects.containers + - storage.containers.objects + - storage.containers.objects.size + resource_type: swift_account + - attributes: + display_name: resource_metadata.display_name + volume_type: resource_metadata.volume_type + event_delete: volume.delete.start + event_attributes: + id: resource_id + metrics: + - volume + - volume.size + - snapshot.size + - volume.snapshot.size + - volume.backup.size + resource_type: volume + - attributes: + host_name: resource_metadata.resource_url + metrics: + - hardware.cpu.load.1min + - hardware.cpu.load.5min + - hardware.cpu.load.15min + - hardware.cpu.util + - hardware.memory.total + - hardware.memory.used + - hardware.memory.swap.total + - hardware.memory.swap.avail + - hardware.memory.buffer + - hardware.memory.cached + - hardware.network.ip.outgoing.datagrams + - hardware.network.ip.incoming.datagrams + - hardware.system_stats.cpu.idle + - hardware.system_stats.io.outgoing.blocks + - hardware.system_stats.io.incoming.blocks + resource_type: host + - attributes: + device_name: resource_metadata.device + host_name: resource_metadata.resource_url + metrics: + - hardware.disk.size.total + - hardware.disk.size.used + resource_type: host_disk + - attributes: + device_name: resource_metadata.name + host_name: resource_metadata.resource_url + metrics: + - hardware.network.incoming.bytes + - hardware.network.outgoing.bytes + - hardware.network.outgoing.errors + resource_type: host_network_interface + meters: + metric: + - name: "image.size" + event_type: + - "image.upload" + - "image.delete" + - "image.update" + type: "gauge" + unit: B + volume: $.payload.size + resource_id: $.payload.id + project_id: $.payload.owner + - name: "image.download" + event_type: "image.send" + type: "delta" + unit: "B" + volume: $.payload.bytes_sent + resource_id: $.payload.image_id + user_id: $.payload.receiver_user_id + project_id: $.payload.receiver_tenant_id + - name: "image.serve" + event_type: "image.send" + type: "delta" + unit: "B" + volume: $.payload.bytes_sent + resource_id: $.payload.image_id + project_id: $.payload.owner_id + - name: 'volume.size' + event_type: + - 'volume.exists' + - 'volume.create.*' + - 'volume.delete.*' + - 'volume.resize.*' + - 'volume.attach.*' + - 'volume.detach.*' + - 'volume.update.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.size + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.volume_id + metadata: + display_name: $.payload.display_name + volume_type: $.payload.volume_type + - name: 'snapshot.size' + event_type: + - 'snapshot.exists' + - 'snapshot.create.*' + - 'snapshot.delete.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.volume_size + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.snapshot_id + metadata: + display_name: $.payload.display_name + - name: 'backup.size' + event_type: + - 'backup.exists' + - 'backup.create.*' + - 'backup.delete.*' + - 'backup.restore.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.size + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.backup_id + metadata: + display_name: $.payload.display_name + - name: $.payload.metrics.[*].name + event_type: 'magnum.bay.metrics.*' + type: 'gauge' + unit: $.payload.metrics.[*].unit + volume: $.payload.metrics.[*].value + user_id: $.payload.user_id + project_id: $.payload.project_id + resource_id: $.payload.resource_id + lookup: ['name', 'unit', 'volume'] + - name: $.payload.measurements.[*].metric.[*].name + event_type: 'objectstore.http.request' + type: 'delta' + unit: $.payload.measurements.[*].metric.[*].unit + volume: $.payload.measurements.[*].result + resource_id: $.payload.target.id + user_id: $.payload.initiator.id + project_id: $.payload.initiator.project_id + lookup: ['name', 'unit', 'volume'] + - name: 'memory' + event_type: 'compute.instance.*' + type: 'gauge' + unit: 'MB' + volume: $.payload.memory_mb + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_metadata: $.payload.metadata + metadata: &instance_meta + host: $.payload.host + flavor_id: $.payload.instance_flavor_id + flavor_name: $.payload.instance_type + display_name: $.payload.display_name + image_ref: $.payload.image_meta.base_image_ref + - name: 'vcpus' + event_type: 'compute.instance.*' + type: 'gauge' + unit: 'vcpu' + volume: $.payload.vcpus + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_metadata: $.payload.metadata + metadata: + <<: *instance_meta + - name: 'compute.instance.booting.time' + event_type: 'compute.instance.create.end' + type: 'gauge' + unit: 'sec' + volume: + fields: [$.payload.created_at, $.payload.launched_at] + plugin: 'timedelta' + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_metadata: $.payload.metadata + metadata: + <<: *instance_meta + - name: 'disk.root.size' + event_type: 'compute.instance.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.root_gb + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_metadata: $.payload.metadata + metadata: + <<: *instance_meta + - name: 'disk.ephemeral.size' + event_type: 'compute.instance.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.ephemeral_gb + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_metadata: $.payload.metadata + metadata: + <<: *instance_meta + - name: 'bandwidth' + event_type: 'l3.meter' + type: 'delta' + unit: 'B' + volume: $.payload.bytes + project_id: $.payload.tenant_id + resource_id: $.payload.label_id + - name: 'compute.node.cpu.frequency' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'MHz' + volume: $.payload.metrics[?(@.name='cpu.frequency')].value + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.frequency')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.frequency')].source + - name: 'compute.node.cpu.user.time' + event_type: 'compute.metrics.update' + type: 'cumulative' + unit: 'ns' + volume: $.payload.metrics[?(@.name='cpu.user.time')].value + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.user.time')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.user.time')].source + - name: 'compute.node.cpu.kernel.time' + event_type: 'compute.metrics.update' + type: 'cumulative' + unit: 'ns' + volume: $.payload.metrics[?(@.name='cpu.kernel.time')].value + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.kernel.time')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.kernel.time')].source + - name: 'compute.node.cpu.idle.time' + event_type: 'compute.metrics.update' + type: 'cumulative' + unit: 'ns' + volume: $.payload.metrics[?(@.name='cpu.idle.time')].value + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.idle.time')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.idle.time')].source + - name: 'compute.node.cpu.iowait.time' + event_type: 'compute.metrics.update' + type: 'cumulative' + unit: 'ns' + volume: $.payload.metrics[?(@.name='cpu.iowait.time')].value + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.iowait.time')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.iowait.time')].source + - name: 'compute.node.cpu.kernel.percent' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'percent' + volume: $.payload.metrics[?(@.name='cpu.kernel.percent')].value * 100 + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.kernel.percent')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.kernel.percent')].source + - name: 'compute.node.cpu.idle.percent' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'percent' + volume: $.payload.metrics[?(@.name='cpu.idle.percent')].value * 100 + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.idle.percent')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.idle.percent')].source + - name: 'compute.node.cpu.user.percent' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'percent' + volume: $.payload.metrics[?(@.name='cpu.user.percent')].value * 100 + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.user.percent')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.user.percent')].source + - name: 'compute.node.cpu.iowait.percent' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'percent' + volume: $.payload.metrics[?(@.name='cpu.iowait.percent')].value * 100 + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.iowait.percent')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.iowait.percent')].source + - name: 'compute.node.cpu.percent' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'percent' + volume: $.payload.metrics[?(@.name='cpu.percent')].value * 100 + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.percent')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.percent')].source + - name: $.payload.outcome - $.payload.outcome + 'identity.authenticate.' + $.payload.outcome + type: 'delta' + unit: 'user' + volume: 1 + event_type: + - 'identity.authenticate' + resource_id: $.payload.initiator.id + user_id: $.payload.initiator.id + - name: 'dns.domain.exists' + event_type: 'dns.domain.exists' + type: 'cumulative' + unit: 's' + volume: + fields: [$.payload.audit_period_beginning, $.payload.audit_period_ending] + plugin: 'timedelta' + project_id: $.payload.tenant_id + resource_id: $.payload.id + user_id: $._context_user + metadata: + status: $.payload.status + pool_id: $.payload.pool_id + host: $.publisher_id + - name: 'trove.instance.exists' + event_type: 'trove.instance.exists' + type: 'cumulative' + unit: 's' + volume: + fields: [$.payload.audit_period_beginning, $.payload.audit_period_ending] + plugin: 'timedelta' + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_id: $.payload.user_id + metadata: + nova_instance_id: $.payload.nova_instance_id + state: $.payload.state + service_id: $.payload.service_id + instance_type: $.payload.instance_type + instance_type_id: $.payload.instance_type_id + paste: + 'app:api-server': + paste.app_factory: 'ceilometer.api.app:app_factory' + 'filter:authtoken': + paste.filter_factory: 'keystonemiddleware.auth_token:filter_factory' + oslo_config_project: 'ceilometer' + 'filter:audit': + paste.filter_factory: 'keystonemiddleware.audit:filter_factory' + audit_map_file: '/etc/ceilometer/api_audit_map.conf' + 'filter:cors': + oslo_config_project: 'ceilometer' + paste.filter_factory: 'oslo_middleware.cors:filter_factory' + 'filter:http_proxy_to_wsgi': + oslo_config_project: 'ceilometer' + paste.filter_factory: 'oslo_middleware.http_proxy_to_wsgi:HTTPProxyToWSGI.factory' + 'filter:request_id': + oslo_config_project: 'ceilometer' + paste.filter_factory: 'oslo_middleware:RequestId.factory' + 'pipeline:main': + pipeline: cors http_proxy_to_wsgi request_id authtoken audit api-server + polling: + sources: + - name: all_pollsters + interval: 600 + meters: + - "*" + pipeline: + sources: + - name: meter_source + meters: + - "*" + sinks: + - meter_sink + - name: cpu_source + meters: + - "cpu" + sinks: + - cpu_sink + - cpu_delta_sink + - name: disk_source + meters: + - "disk.read.bytes" + - "disk.read.requests" + - "disk.write.bytes" + - "disk.write.requests" + - "disk.device.read.bytes" + - "disk.device.read.requests" + - "disk.device.write.bytes" + - "disk.device.write.requests" + sinks: + - disk_sink + - name: network_source + meters: + - "network.incoming.bytes" + - "network.incoming.packets" + - "network.outgoing.bytes" + - "network.outgoing.packets" + sinks: + - network_sink + sinks: + - name: meter_sink + transformers: + publishers: + - notifier:// + - name: cpu_sink + transformers: + - name: "rate_of_change" + parameters: + target: + name: "cpu_util" + unit: "%" + type: "gauge" + max: 100 + scale: "100.0 / (10**9 * (resource_metadata.cpu_number or 1))" + publishers: + - notifier:// + - name: cpu_delta_sink + transformers: + - name: "delta" + parameters: + target: + name: "cpu.delta" + growth_only: True + publishers: + - notifier:// + - name: disk_sink + transformers: + - name: "rate_of_change" + parameters: + source: + map_from: + name: "(disk\\.device|disk)\\.(read|write)\\.(bytes|requests)" + unit: "(B|request)" + target: + map_to: + name: "\\1.\\2.\\3.rate" + unit: "\\1/s" + type: "gauge" + publishers: + - notifier:// + - name: network_sink + transformers: + - name: "rate_of_change" + parameters: + source: + map_from: + name: "network\\.(incoming|outgoing)\\.(bytes|packets)" + unit: "(B|packet)" + target: + map_to: + name: "network.\\1.\\2.rate" + unit: "\\1/s" + type: "gauge" + publishers: + - notifier:// + policy: {} + audit_api_map: + DEFAULT: + target_endpoint_type: None + path_keywords: + meters: meter_name + resources: resource_id + statistics: None + samples: sample_id + service_endpoints: + metering: service/metering + wsgi_ceilometer: | + Listen 0.0.0.0:{{ tuple "metering" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} + + LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined + + + LogLevel info + WSGIDaemonProcess ceilometer-api processes=2 threads=1 user=ceilometer group=ceilometer display-name=%{GROUP} python-path=/var/lib/kolla/venv/lib/python2.7/site-packages + WSGIProcessGroup ceilometer-api + + WSGIScriptReloading On + WSGIScriptAlias / /var/lib/kolla/venv/lib/python2.7/site-packages/ceilometer/api/app.wsgi + + WSGIApplicationGroup %{GLOBAL} + + + = 2.4> + Require all granted + + + Order allow,deny + Allow from all + + + ErrorLog /dev/stdout + CustomLog /dev/stdout combined + + rally_tests: + CeilometerStats.create_meter_and_get_stats: + - args: + user_id: user-id + resource_id: resource-id + counter_volume: 1 + counter_unit: '' + counter_type: cumulative + runner: + type: constant + times: 1 + concurrency: 1 + sla: + failure_rate: + max: 0 + CeilometerMeters.list_meters: + - runner: + type: constant + times: 1 + concurrency: 1 + sla: + failure_rate: + max: 0 + context: + ceilometer: + counter_name: benchmark_meter + counter_type: gauge + counter_unit: "%" + counter_volume: 1 + resources_per_tenant: 1 + samples_per_resource: 1 + timestamp_interval: 10 + metadata_list: + - status: active + name: rally benchmark on + deleted: 'false' + - status: terminated + name: rally benchmark off + deleted: 'true' + args: + limit: 5 + metadata_query: + status: terminated + CeilometerQueries.create_and_query_samples: + - args: + filter: + "=": + counter_unit: instance + orderby: + limit: 10 + counter_name: cpu_util + counter_type: gauge + counter_unit: instance + counter_volume: 1 + resource_id: resource_id + runner: + type: constant + times: 1 + concurrency: 1 + sla: + failure_rate: + max: 0 + +dependencies: + dynamic: + common: + local_image_registry: + jobs: + - ceilometer-image-repo-sync + services: + - endpoint: node + service: local_image_registry + static: + api: + jobs: + - ceilometer-rabbit-init + - ceilometer-ks-user + - ceilometer-ks-endpoints + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + central: + jobs: + - ceilometer-rabbit-init + - ceilometer-ks-user + - ceilometer-ks-endpoints + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + ipmi: + jobs: + - ceilometer-rabbit-init + - ceilometer-ks-user + - ceilometer-ks-endpoints + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + collector: + jobs: + - ceilometer-rabbit-init + - ceilometer-ks-user + - ceilometer-ks-endpoints + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + compute: + jobs: + - ceilometer-rabbit-init + - ceilometer-ks-user + - ceilometer-ks-endpoints + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + ks_endpoints: + jobs: + - ceilometer-ks-service + services: + - endpoint: internal + service: identity + ks_service: + services: + - endpoint: internal + service: identity + ks_user: + services: + - endpoint: internal + service: identity + rabbit_init: + services: + - service: oslo_messaging + endpoint: internal + notification: + jobs: + - ceilometer-rabbit-init + - ceilometer-ks-user + - ceilometer-ks-endpoints + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + tests: + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metering + - endpoint: internal + service: metric + image_repo_sync: + services: + - endpoint: internal + service: local_image_registry + +# Names of secrets used by bootstrap and environmental checks +secrets: + identity: + admin: ceilometer-keystone-admin + ceilometer: ceilometer-keystone-user + test: ceilometer-keystone-test + oslo_messaging: + admin: ceilometer-rabbitmq-admin + ceilometer: ceilometer-rabbitmq-user + oci_image_registry: + ceilometer: ceilometer-oci-image-registry + +bootstrap: + enabled: false + ks_user: ceilometer + script: | + openstack token issue + +# typically overridden by environmental +# values, but should include all endpoints +# required by this chart +endpoints: + cluster_domain_suffix: cluster.local + local_image_registry: + name: docker-registry + namespace: docker-registry + hosts: + default: localhost + internal: docker-registry + node: localhost + host_fqdn_override: + default: null + port: + registry: + node: 5000 + oci_image_registry: + name: oci-image-registry + namespace: oci-image-registry + auth: + enabled: false + ceilometer: + username: ceilometer + password: password + hosts: + default: localhost + host_fqdn_override: + default: null + port: + registry: + default: null + identity: + name: keystone + auth: + admin: + region_name: RegionOne + username: admin + password: password + project_name: admin + user_domain_name: default + project_domain_name: default + ceilometer: + role: admin + region_name: RegionOne + username: ceilometer + password: password + project_name: service + user_domain_name: service + project_domain_name: service + test: + role: admin + region_name: RegionOne + username: ceilometer-test + password: password + project_name: test + user_domain_name: service + project_domain_name: service + hosts: + default: keystone + internal: keystone-api + host_fqdn_override: + default: null + path: + default: /v3 + scheme: + default: 'http' + port: + api: + default: 5000 + public: 80 + internal: 5000 + service: 5000 + metering: + name: ceilometer + hosts: + default: ceilometer-api + public: ceilometer + host_fqdn_override: + default: null + path: + default: null + scheme: + default: 'http' + port: + api: + default: 8777 + public: 80 + internal: 8777 + service: 8777 + metric: + name: gnocchi + hosts: + default: gnocchi-api + public: gnocchi + host_fqdn_override: + default: null + path: + default: null + scheme: + default: 'http' + port: + api: + default: 8041 + public: 80 + internal: 8041 + service: 8041 + alarming: + name: aodh + hosts: + default: aodh-api + public: aodh + host_fqdn_override: + default: null + path: + default: null + scheme: + default: 'http' + port: + api: + default: 8042 + public: 80 + oslo_cache: + auth: + # NOTE(portdirect): this is used to define the value for keystone + # authtoken cache encryption key, if not set it will be populated + # automatically with a random value, but to take advantage of + # this feature all services should be set to use the same key, + # and memcache service. + memcache_secret_key: null + hosts: + default: memcached + host_fqdn_override: + default: null + port: + memcache: + default: 11211 + oslo_messaging: + auth: + admin: + username: rabbitmq + password: password + ceilometer: + username: ceilometer + password: password + statefulset: + replicas: 2 + name: rabbitmq-rabbitmq + hosts: + default: rabbitmq + host_fqdn_override: + default: null + path: /ceilometer + scheme: rabbit + port: + amqp: + default: 5672 + http: + default: 15672 + +pod: + affinity: + anti: + type: + default: preferredDuringSchedulingIgnoredDuringExecution + topologyKey: + default: kubernetes.io/hostname + weight: + default: 10 + tolerations: + ceilometer: + enabled: false + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + mounts: + ceilometer_tests: + init_container: null + ceilometer_tests: + volumeMounts: + volumes: + ceilometer_api: + init_container: null + ceilometer_api: + volumeMounts: + volumes: + ceilometer_compute: + init_container: null + ceilometer_compute: + volumeMounts: + volumes: + ceilometer_central: + init_container: null + ceilometer_central: + volumeMounts: + volumes: + ceilometer_ipmi: + init_container: null + ceilometer_ipmi: + volumeMounts: + volumes: + ceilometer_collector: + init_container: null + ceilometer_collector: + volumeMounts: + volumes: + ceilometer_notification: + init_container: null + ceilometer_notification: + volumeMounts: + volumes: + replicas: + api: 1 + central: 1 + collector: 1 + notification: 1 + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 1 + max_surge: 3 + daemonsets: + pod_replacement_strategy: RollingUpdate + compute: + enabled: true + min_ready_seconds: 0 + max_unavailable: 1 + disruption_budget: + api: + min_available: 0 + termination_grace_period: + api: + timeout: 600 + resources: + enabled: true + api: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + compute: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + collector: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + notification: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + central: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + ipmi: + requests: + memory: "124Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + jobs: + rabbit_init: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + ks_endpoints: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + ks_service: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + ks_user: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + tests: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + image_repo_sync: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + +network_policy: + ceilometer: + ingress: + - {} + egress: + - {} + +manifests: + configmap_bin: true + configmap_etc: true + deployment_api: true + deployment_central: true + deployment_collector: true + daemonset_compute: true + daemonset_ipmi: false + deployment_notification: true + ingress_api: true + job_bootstrap: true + job_db_drop: false + # using gnocchi so no db init + job_db_init: false + job_db_init_mongodb: false + # using gnocchi so no db sync + job_db_sync: false + job_image_repo_sync: true + job_ks_endpoints: true + job_ks_service: true + job_ks_user: true + job_rabbit_init: true + pdb_api: true + pod_rally_test: true + network_policy: false + secret_db: true + secret_keystone: true + secret_mongodb: false + secret_rabbitmq: true + secret_registry: true + service_api: true + service_ingress_api: true +... From e2da175f9c98152b0a2943ca09ff3033d951f5dd Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Fri, 8 Mar 2024 12:45:08 -0600 Subject: [PATCH 02/20] fix: ceilometer-api image is deprecated Trying to pull the wallaby image for ceilometer-api results in a 404 not found now. ceilometer-api is in the base image as of commit cd67930 per the upstream kolla repo, so pull that instead. --- helm-configs/ceilometer/ceilometer-helm-overrides.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 638c5484..2dd69b36 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -34,7 +34,8 @@ images: ks_user: docker.io/openstackhelm/heat:wallaby-ubuntu_focal ks_service: docker.io/openstackhelm/heat:wallaby-ubuntu_focal ks_endpoints: docker.io/openstackhelm/heat:wallaby-ubuntu_focal - ceilometer_api: docker.io/kolla/ubuntu-source-ceilometer-api:wallaby + # ubuntu-source-ceilometer-api is deprecated + ceilometer_api: docker.io/kolla/ubuntu-source-ceilometer-base:wallaby ceilometer_central: docker.io/kolla/ubuntu-source-ceilometer-central:wallaby ceilometer_collector: docker.io/kolla/ubuntu-source-ceilometer-collector:wallaby ceilometer_compute: docker.io/kolla/ubuntu-source-ceilometer-compute:wallaby From 82703cd7235a4817436c5de6e5854c934151152f Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Fri, 8 Mar 2024 12:50:17 -0600 Subject: [PATCH 03/20] fix: remove ceilometer-collector from config ceilometer collector was removed from ceilometer code base[1] [1] https://review.openstack.org/504244 --- .../ceilometer/ceilometer-helm-overrides.yaml | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 2dd69b36..9316bcc1 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -14,9 +14,6 @@ labels: ipmi: node_selector_key: openstack-node node_selector_value: enabled - collector: - node_selector_key: openstack-control-plane - node_selector_value: enabled notification: node_selector_key: openstack-control-plane node_selector_value: enabled @@ -37,7 +34,6 @@ images: # ubuntu-source-ceilometer-api is deprecated ceilometer_api: docker.io/kolla/ubuntu-source-ceilometer-base:wallaby ceilometer_central: docker.io/kolla/ubuntu-source-ceilometer-central:wallaby - ceilometer_collector: docker.io/kolla/ubuntu-source-ceilometer-collector:wallaby ceilometer_compute: docker.io/kolla/ubuntu-source-ceilometer-compute:wallaby ceilometer_ipmi: docker.io/kolla/ubuntu-source-ceilometer-base:wallaby ceilometer_notification: docker.io/kolla/ubuntu-source-ceilometer-notification:wallaby @@ -1571,11 +1567,6 @@ dependencies: service: identity - endpoint: internal service: metric - collector: - jobs: - - ceilometer-rabbit-init - - ceilometer-ks-user - - ceilometer-ks-endpoints services: - endpoint: internal service: identity @@ -1855,11 +1846,6 @@ pod: ceilometer_ipmi: volumeMounts: volumes: - ceilometer_collector: - init_container: null - ceilometer_collector: - volumeMounts: - volumes: ceilometer_notification: init_container: null ceilometer_notification: @@ -1868,7 +1854,6 @@ pod: replicas: api: 1 central: 1 - collector: 1 notification: 1 lifecycle: upgrades: @@ -1906,13 +1891,6 @@ pod: limits: memory: "1024Mi" cpu: "2000m" - collector: - requests: - memory: "128Mi" - cpu: "100m" - limits: - memory: "1024Mi" - cpu: "2000m" notification: requests: memory: "128Mi" @@ -1990,7 +1968,7 @@ manifests: configmap_etc: true deployment_api: true deployment_central: true - deployment_collector: true + deployment_collector: false daemonset_compute: true daemonset_ipmi: false deployment_notification: true From 7724500c4e1a69331f739949f65f472173319314 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 11 Mar 2024 13:31:18 -0500 Subject: [PATCH 04/20] fix: ceilometer-api is fully deprecated so rm it This disables deployment of the api pod and removes related api configuration as ceilometer no longer has a rest API. It is simply a worker service at this point. Gnocchi API is preferred over ceilometer. --- .../ceilometer/ceilometer-helm-overrides.yaml | 116 +----------------- 1 file changed, 2 insertions(+), 114 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 9316bcc1..5f6a774b 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -2,9 +2,6 @@ release_group: null labels: - api: - node_selector_key: openstack-control-plane - node_selector_value: enabled compute: node_selector_key: openstack-compute-node node_selector_value: enabled @@ -31,8 +28,6 @@ images: ks_user: docker.io/openstackhelm/heat:wallaby-ubuntu_focal ks_service: docker.io/openstackhelm/heat:wallaby-ubuntu_focal ks_endpoints: docker.io/openstackhelm/heat:wallaby-ubuntu_focal - # ubuntu-source-ceilometer-api is deprecated - ceilometer_api: docker.io/kolla/ubuntu-source-ceilometer-base:wallaby ceilometer_central: docker.io/kolla/ubuntu-source-ceilometer-central:wallaby ceilometer_compute: docker.io/kolla/ubuntu-source-ceilometer-compute:wallaby ceilometer_ipmi: docker.io/kolla/ubuntu-source-ceilometer-base:wallaby @@ -46,20 +41,6 @@ images: - dep_check - image_repo_sync -network: - api: - ingress: - public: true - classes: - namespace: "nginx" - cluster: "nginx-openstack" - annotations: - nginx.ingress.kubernetes.io/rewrite-target: / - port: 8777 - node_port: - enabled: false - port: 38777 - ipmi_device: /dev/ipmi0 conf: @@ -1311,26 +1292,6 @@ conf: service_id: $.payload.service_id instance_type: $.payload.instance_type instance_type_id: $.payload.instance_type_id - paste: - 'app:api-server': - paste.app_factory: 'ceilometer.api.app:app_factory' - 'filter:authtoken': - paste.filter_factory: 'keystonemiddleware.auth_token:filter_factory' - oslo_config_project: 'ceilometer' - 'filter:audit': - paste.filter_factory: 'keystonemiddleware.audit:filter_factory' - audit_map_file: '/etc/ceilometer/api_audit_map.conf' - 'filter:cors': - oslo_config_project: 'ceilometer' - paste.filter_factory: 'oslo_middleware.cors:filter_factory' - 'filter:http_proxy_to_wsgi': - oslo_config_project: 'ceilometer' - paste.filter_factory: 'oslo_middleware.http_proxy_to_wsgi:HTTPProxyToWSGI.factory' - 'filter:request_id': - oslo_config_project: 'ceilometer' - paste.filter_factory: 'oslo_middleware:RequestId.factory' - 'pipeline:main': - pipeline: cors http_proxy_to_wsgi request_id authtoken audit api-server polling: sources: - name: all_pollsters @@ -1437,33 +1398,6 @@ conf: samples: sample_id service_endpoints: metering: service/metering - wsgi_ceilometer: | - Listen 0.0.0.0:{{ tuple "metering" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} - - LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined - - - LogLevel info - WSGIDaemonProcess ceilometer-api processes=2 threads=1 user=ceilometer group=ceilometer display-name=%{GROUP} python-path=/var/lib/kolla/venv/lib/python2.7/site-packages - WSGIProcessGroup ceilometer-api - - WSGIScriptReloading On - WSGIScriptAlias / /var/lib/kolla/venv/lib/python2.7/site-packages/ceilometer/api/app.wsgi - - WSGIApplicationGroup %{GLOBAL} - - - = 2.4> - Require all granted - - - Order allow,deny - Allow from all - - - ErrorLog /dev/stdout - CustomLog /dev/stdout combined - rally_tests: CeilometerStats.create_meter_and_get_stats: - args: @@ -1537,16 +1471,6 @@ dependencies: - endpoint: node service: local_image_registry static: - api: - jobs: - - ceilometer-rabbit-init - - ceilometer-ks-user - - ceilometer-ks-endpoints - services: - - endpoint: internal - service: identity - - endpoint: internal - service: metric central: jobs: - ceilometer-rabbit-init @@ -1714,23 +1638,6 @@ endpoints: public: 80 internal: 5000 service: 5000 - metering: - name: ceilometer - hosts: - default: ceilometer-api - public: ceilometer - host_fqdn_override: - default: null - path: - default: null - scheme: - default: 'http' - port: - api: - default: 8777 - public: 80 - internal: 8777 - service: 8777 metric: name: gnocchi hosts: @@ -1826,11 +1733,6 @@ pod: ceilometer_tests: volumeMounts: volumes: - ceilometer_api: - init_container: null - ceilometer_api: - volumeMounts: - volumes: ceilometer_compute: init_container: null ceilometer_compute: @@ -1852,7 +1754,6 @@ pod: volumeMounts: volumes: replicas: - api: 1 central: 1 notification: 1 lifecycle: @@ -1869,21 +1770,8 @@ pod: enabled: true min_ready_seconds: 0 max_unavailable: 1 - disruption_budget: - api: - min_available: 0 - termination_grace_period: - api: - timeout: 600 resources: enabled: true - api: - requests: - memory: "128Mi" - cpu: "100m" - limits: - memory: "1024Mi" - cpu: "2000m" compute: requests: memory: "128Mi" @@ -1966,13 +1854,13 @@ network_policy: manifests: configmap_bin: true configmap_etc: true - deployment_api: true + deployment_api: false deployment_central: true deployment_collector: false daemonset_compute: true daemonset_ipmi: false deployment_notification: true - ingress_api: true + ingress_api: false job_bootstrap: true job_db_drop: false # using gnocchi so no db init From b87fd0dbcb172ef4f270bcdeceee29bb1f7eaf35 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 11 Mar 2024 14:21:49 -0500 Subject: [PATCH 05/20] fix: set database keys to fake values The database section is not used, but the base chart still tries to set some sane default values, so to avoid confusion, just override those to a string value that makes it obvious this section is not used. The recommended storage location for meters and events is Gnocchi, which is automatically discovered and used by means of keystone. --- helm-configs/ceilometer/ceilometer-helm-overrides.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 5f6a774b..a1d0ac20 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -131,14 +131,15 @@ conf: - gnocchi api: aodh_is_enabled: "False" - # NOTE(portdirect): the following option will turn off the ability to retrieve - # metrics via the ceilometer API: - # gnocchi_is_enabled: "True" + aodh_url: "NotUsed" dispatcher_gnocchi: filter_service_activity: False archive_policy: low resources_definition_file: /etc/ceilometer/gnocchi_resources.yaml database: + connection: "NotUsed" + event_connection: "NotUsed" + metering_connection: "NotUsed" max_retries: -1 dispatcher: archive_policy: low From 5b7d299f6160b4ecdc950491ba2594d0eec48c51 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 11 Mar 2024 14:24:21 -0500 Subject: [PATCH 06/20] fix: set gnocchi as the publisher This was explicitly set to notify:// without any context as to what that is or does. The configuration does not list that as a valid value, so let's replace the publisher with the default, `gnocchi`. --- .../ceilometer/ceilometer-helm-overrides.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index a1d0ac20..39b7043f 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -759,7 +759,7 @@ conf: sinks: - name: event_sink publishers: - - notifier:// + - gnocchi transformers: null sources: - events: @@ -1336,7 +1336,7 @@ conf: - name: meter_sink transformers: publishers: - - notifier:// + - gnocchi - name: cpu_sink transformers: - name: "rate_of_change" @@ -1348,7 +1348,7 @@ conf: max: 100 scale: "100.0 / (10**9 * (resource_metadata.cpu_number or 1))" publishers: - - notifier:// + - gnocchi - name: cpu_delta_sink transformers: - name: "delta" @@ -1357,7 +1357,7 @@ conf: name: "cpu.delta" growth_only: True publishers: - - notifier:// + - gnocchi - name: disk_sink transformers: - name: "rate_of_change" @@ -1372,7 +1372,7 @@ conf: unit: "\\1/s" type: "gauge" publishers: - - notifier:// + - gnocchi - name: network_sink transformers: - name: "rate_of_change" @@ -1387,7 +1387,7 @@ conf: unit: "\\1/s" type: "gauge" publishers: - - notifier:// + - gnocchi policy: {} audit_api_map: DEFAULT: From 0bf3e8128a0477a57a3e00ffc8a63eddbaa42c57 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 11 Mar 2024 14:46:28 -0500 Subject: [PATCH 07/20] fix: disable the ks-endpoint job There is no endpoint for ceilometer anymore so remove the related job that makes a service in keystone for one. --- .../ceilometer/ceilometer-helm-overrides.yaml | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 39b7043f..73669f6c 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -27,7 +27,6 @@ images: rabbit_init: docker.io/rabbitmq:3.7-management ks_user: docker.io/openstackhelm/heat:wallaby-ubuntu_focal ks_service: docker.io/openstackhelm/heat:wallaby-ubuntu_focal - ks_endpoints: docker.io/openstackhelm/heat:wallaby-ubuntu_focal ceilometer_central: docker.io/kolla/ubuntu-source-ceilometer-central:wallaby ceilometer_compute: docker.io/kolla/ubuntu-source-ceilometer-compute:wallaby ceilometer_ipmi: docker.io/kolla/ubuntu-source-ceilometer-base:wallaby @@ -1476,7 +1475,6 @@ dependencies: jobs: - ceilometer-rabbit-init - ceilometer-ks-user - - ceilometer-ks-endpoints services: - endpoint: internal service: identity @@ -1486,7 +1484,6 @@ dependencies: jobs: - ceilometer-rabbit-init - ceilometer-ks-user - - ceilometer-ks-endpoints services: - endpoint: internal service: identity @@ -1501,15 +1498,11 @@ dependencies: jobs: - ceilometer-rabbit-init - ceilometer-ks-user - - ceilometer-ks-endpoints services: - endpoint: internal service: identity - endpoint: internal service: metric - ks_endpoints: - jobs: - - ceilometer-ks-service services: - endpoint: internal service: identity @@ -1529,7 +1522,6 @@ dependencies: jobs: - ceilometer-rabbit-init - ceilometer-ks-user - - ceilometer-ks-endpoints services: - endpoint: internal service: identity @@ -1809,13 +1801,6 @@ pod: limits: memory: "1024Mi" cpu: "2000m" - ks_endpoints: - requests: - memory: "128Mi" - cpu: "100m" - limits: - memory: "1024Mi" - cpu: "2000m" ks_service: requests: memory: "128Mi" @@ -1870,7 +1855,7 @@ manifests: # using gnocchi so no db sync job_db_sync: false job_image_repo_sync: true - job_ks_endpoints: true + job_ks_endpoints: false job_ks_service: true job_ks_user: true job_rabbit_init: true From 90798c59cd73e1be7fbce8af39e641303fb4e03c Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 11 Mar 2024 15:13:38 -0500 Subject: [PATCH 08/20] fix: bump ceilometer images to yoga This was the newest tagged image that I could find for Ceilometer. We will need to investigate building our own Ceilometer images for a later release of Genstack. --- helm-configs/ceilometer/ceilometer-helm-overrides.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 73669f6c..9d060aa8 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -27,10 +27,10 @@ images: rabbit_init: docker.io/rabbitmq:3.7-management ks_user: docker.io/openstackhelm/heat:wallaby-ubuntu_focal ks_service: docker.io/openstackhelm/heat:wallaby-ubuntu_focal - ceilometer_central: docker.io/kolla/ubuntu-source-ceilometer-central:wallaby - ceilometer_compute: docker.io/kolla/ubuntu-source-ceilometer-compute:wallaby - ceilometer_ipmi: docker.io/kolla/ubuntu-source-ceilometer-base:wallaby - ceilometer_notification: docker.io/kolla/ubuntu-source-ceilometer-notification:wallaby + ceilometer_central: docker.io/kolla/ubuntu-source-ceilometer-central:yoga + ceilometer_compute: docker.io/kolla/ubuntu-source-ceilometer-compute:yoga + ceilometer_ipmi: docker.io/kolla/ubuntu-source-ceilometer-base:yoga + ceilometer_notification: docker.io/kolla/ubuntu-source-ceilometer-notification:yoga dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0 image_repo_sync: docker.io/docker:17.07.0 pull_policy: "IfNotPresent" From 80e536941ee6c685a7b82b3685f83cddaa865adb Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Tue, 12 Mar 2024 16:53:19 -0500 Subject: [PATCH 09/20] fix: enable db-sync to init gnocchi resource types The helm chart has a db_sync job which executes ceilometer-upgrade which executes the storage upgrade function that initiates the resource types in gnocchi with their attributes. --- .../ceilometer/ceilometer-helm-overrides.yaml | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 9d060aa8..37d0ab86 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -24,6 +24,7 @@ labels: images: tags: test: docker.io/xrally/xrally-openstack:2.0.0 + ceilometer_db_sync: docker.io/kolla/ubuntu-source-ceilometer-base:yoga rabbit_init: docker.io/rabbitmq:3.7-management ks_user: docker.io/openstackhelm/heat:wallaby-ubuntu_focal ks_service: docker.io/openstackhelm/heat:wallaby-ubuntu_focal @@ -1473,6 +1474,7 @@ dependencies: static: central: jobs: + - ceilometer-db-sync - ceilometer-rabbit-init - ceilometer-ks-user services: @@ -1482,6 +1484,7 @@ dependencies: service: metric ipmi: jobs: + - ceilometer-db-sync - ceilometer-rabbit-init - ceilometer-ks-user services: @@ -1496,6 +1499,7 @@ dependencies: service: metric compute: jobs: + - ceilometer-db-sync - ceilometer-rabbit-init - ceilometer-ks-user services: @@ -1503,9 +1507,9 @@ dependencies: service: identity - endpoint: internal service: metric - services: - - endpoint: internal - service: identity + db_sync: + jobs: [] + services: [] ks_service: services: - endpoint: internal @@ -1520,6 +1524,7 @@ dependencies: endpoint: internal notification: jobs: + - ceilometer-db-sync - ceilometer-rabbit-init - ceilometer-ks-user services: @@ -1746,6 +1751,10 @@ pod: ceilometer_notification: volumeMounts: volumes: + ceilometer_db_sync: + ceilometer_db_sync: + volumeMounts: + volumes: replicas: central: 1 notification: 1 @@ -1794,6 +1803,13 @@ pod: memory: "1024Mi" cpu: "2000m" jobs: + db_sync: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" rabbit_init: requests: memory: "128Mi" @@ -1852,8 +1868,8 @@ manifests: # using gnocchi so no db init job_db_init: false job_db_init_mongodb: false - # using gnocchi so no db sync - job_db_sync: false + # runs ceilometer-upgrade which inits resource types in gnocchi! + job_db_sync: true job_image_repo_sync: true job_ks_endpoints: false job_ks_service: true From c129e4849c6b23996203060e64b24b28223614c8 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Wed, 13 Mar 2024 10:33:10 -0500 Subject: [PATCH 10/20] fix: add updated event definitions from yoga The event definitions defined in the helm chart were very dated, update them to match those found in the yoga release. --- .../ceilometer/ceilometer-helm-overrides.yaml | 143 +++++++++++++++--- 1 file changed, 122 insertions(+), 21 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 37d0ab86..fca6e54f 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -181,8 +181,12 @@ conf: fields: payload.user_id instance_id: fields: payload.instance_id + display_name: + fields: payload.display_name resource_id: fields: payload.instance_id + cell_name: + fields: payload.cell_name host: fields: publisher_id.`split(., 1, 1)` service: @@ -203,7 +207,6 @@ conf: type: int fields: payload.vcpus instance_type_id: - type: int fields: payload.instance_type_id instance_type: fields: payload.instance_type @@ -221,6 +224,11 @@ conf: deleted_at: type: datetime fields: payload.deleted_at + - event_type: compute.instance.create.end + traits: + <<: *instance_traits + availability_zone: + fields: payload.availability_zone - event_type: compute.instance.update traits: <<: *instance_traits @@ -235,7 +243,7 @@ conf: audit_period_ending: type: datetime fields: payload.audit_period_ending - - event_type: ['volume.exists', 'volume.create.*', 'volume.delete.*', 'volume.resize.*', 'volume.attach.*', 'volume.detach.*', 'volume.update.*', 'snapshot.exists', 'snapshot.create.*', 'snapshot.delete.*', 'snapshot.update.*'] + - event_type: ['volume.exists', 'volume.retype', 'volume.create.*', 'volume.delete.*', 'volume.resize.*', 'volume.attach.*', 'volume.detach.*', 'volume.update.*', 'snapshot.exists', 'snapshot.create.*', 'snapshot.delete.*', 'snapshot.update.*', 'volume.transfer.accept.end', 'snapshot.transfer.accept.end'] traits: &cinder_traits user_id: fields: payload.user_id @@ -250,8 +258,13 @@ conf: status: fields: payload.status created_at: + type: datetime fields: payload.created_at - - event_type: ['volume.exists', 'volume.create.*', 'volume.delete.*', 'volume.resize.*', 'volume.attach.*', 'volume.detach.*', 'volume.update.*'] + image_id: + fields: payload.glance_metadata[?key=image_id].value + instance_id: + fields: payload.volume_attachment[0].server_id + - event_type: ['volume.transfer.*', 'volume.exists', 'volume.retype', 'volume.create.*', 'volume.delete.*', 'volume.resize.*', 'volume.attach.*', 'volume.detach.*', 'volume.update.*', 'snapshot.transfer.accept.end'] traits: <<: *cinder_traits resource_id: @@ -259,11 +272,51 @@ conf: host: fields: payload.host size: + type: int fields: payload.size type: fields: payload.volume_type replication_status: fields: payload.replication_status + - event_type: ['snapshot.transfer.accept.end'] + traits: + <<: *cinder_traits + resource_id: + fields: payload.snapshot_id + project_id: + fields: payload.tenant_id + - event_type: ['share.create.*', 'share.delete.*', 'share.extend.*', 'share.shrink.*'] + traits: &share_traits + share_id: + fields: payload.share_id + user_id: + fields: payload.user_id + project_id: + fields: payload.tenant_id + snapshot_id: + fields: payload.snapshot_id + availability_zone: + fields: payload.availability_zone + status: + fields: payload.status + created_at: + type: datetime + fields: payload.created_at + share_group_id: + fields: payload.share_group_id + size: + type: int + fields: payload.size + name: + fields: payload.name + proto: + fields: payload.proto + is_public: + fields: payload.is_public + description: + fields: payload.description + host: + fields: payload.host - event_type: ['snapshot.exists', 'snapshot.create.*', 'snapshot.delete.*', 'snapshot.update.*'] traits: <<: *cinder_traits @@ -288,12 +341,15 @@ conf: status: fields: payload.status created_at: + type: datetime fields: payload.created_at user_id: fields: payload.owner deleted_at: + type: datetime fields: payload.deleted_at size: + type: int fields: payload.size - event_type: image.send traits: &glance_send @@ -315,17 +371,21 @@ conf: project_id: fields: payload.tenant_id user_id: - fields: ['_context_trustor_user_id', '_context_user_id'] + fields: ['ctxt.trustor_user_id', 'ctxt.user_id'] resource_id: fields: payload.stack_identity + name: + fields: payload.name - event_type: sahara.cluster.* traits: &sahara_crud project_id: fields: payload.project_id user_id: - fields: _context_user_id + fields: ctxt.user_id resource_id: fields: payload.cluster_id + name: + fields: payload.name - event_type: sahara.cluster.health traits: &sahara_health <<: *sahara_crud @@ -377,6 +437,7 @@ conf: eventType: fields: payload.eventType eventTime: + type: datetime fields: payload.eventTime outcome: fields: payload.outcome @@ -409,6 +470,7 @@ conf: eventType: fields: payload.eventType eventTime: + type: datetime fields: payload.eventTime outcome: fields: payload.outcome @@ -437,27 +499,35 @@ conf: - event_type: ['network.*', 'subnet.*', 'port.*', 'router.*', 'floatingip.*', 'pool.*', 'vip.*', 'member.*', 'health_monitor.*', 'healthmonitor.*', 'listener.*', 'loadbalancer.*', 'firewall.*', 'firewall_policy.*', 'firewall_rule.*', 'vpnservice.*', 'ipsecpolicy.*', 'ikepolicy.*', 'ipsec_site_connection.*'] traits: &network_traits user_id: - fields: _context_user_id + fields: ctxt.user_id project_id: - fields: _context_tenant_id + fields: ctxt.tenant_id - event_type: network.* traits: <<: *network_traits + name: + fields: payload.network.name resource_id: fields: ['payload.network.id', 'payload.id'] - event_type: subnet.* traits: <<: *network_traits + name: + fields: payload.subnet.name resource_id: fields: ['payload.subnet.id', 'payload.id'] - event_type: port.* traits: <<: *network_traits + name: + fields: payload.port.name resource_id: fields: ['payload.port.id', 'payload.id'] - event_type: router.* traits: <<: *network_traits + name: + fields: payload.router.name resource_id: fields: ['payload.router.id', 'payload.id'] - event_type: floatingip.* @@ -468,6 +538,8 @@ conf: - event_type: pool.* traits: <<: *network_traits + name: + fields: payload.pool.name resource_id: fields: ['payload.pool.id', 'payload.id'] - event_type: vip.* @@ -483,51 +555,71 @@ conf: - event_type: health_monitor.* traits: <<: *network_traits + name: + fields: payload.health_monitor.name resource_id: fields: ['payload.health_monitor.id', 'payload.id'] - event_type: healthmonitor.* traits: <<: *network_traits + name: + fields: payload.healthmonitor.name resource_id: fields: ['payload.healthmonitor.id', 'payload.id'] - event_type: listener.* traits: <<: *network_traits + name: + fields: payload.listener.name resource_id: fields: ['payload.listener.id', 'payload.id'] - event_type: loadbalancer.* traits: <<: *network_traits + name: + fields: payload.loadbalancer.name resource_id: fields: ['payload.loadbalancer.id', 'payload.id'] - event_type: firewall.* traits: <<: *network_traits + name: + fields: payload.firewall.name resource_id: fields: ['payload.firewall.id', 'payload.id'] - event_type: firewall_policy.* traits: <<: *network_traits + name: + fields: payload.firewall_policy.name resource_id: fields: ['payload.firewall_policy.id', 'payload.id'] - event_type: firewall_rule.* traits: <<: *network_traits + name: + fields: payload.firewall_rule.name resource_id: fields: ['payload.firewall_rule.id', 'payload.id'] - event_type: vpnservice.* traits: <<: *network_traits + name: + fields: payload.vpnservice.name resource_id: fields: ['payload.vpnservice.id', 'payload.id'] - event_type: ipsecpolicy.* traits: <<: *network_traits + name: + fields: payload.ipsecpolicy.name resource_id: fields: ['payload.ipsecpolicy.id', 'payload.id'] - event_type: ikepolicy.* traits: <<: *network_traits + name: + fields: payload.ikepolicy.name resource_id: fields: ['payload.ikepolicy.id', 'payload.id'] - event_type: ipsec_site_connection.* @@ -552,6 +644,7 @@ conf: id: fields: payload.id eventTime: + type: datetime fields: payload.eventTime requestPath: fields: payload.requestPath @@ -597,8 +690,10 @@ conf: resource_id: fields: payload.id created_at: + type: datetime fields: payload.created_at updated_at: + type: datetime fields: payload.updated_at version: fields: payload.version @@ -708,6 +803,7 @@ conf: parent_id: fields: payload.parent_id timestamp: + type: datetime fields: payload.timestamp host: fields: payload.info.host @@ -723,8 +819,8 @@ conf: fields: payload.info.db.statement db.params: fields: payload.info.db.params - - event_type: 'magnum.bay.*' - traits: &magnum_bay_crud + - event_type: 'magnum.cluster.*' + traits: &magnum_cluster_crud id: fields: payload.id typeURI: @@ -732,6 +828,7 @@ conf: eventType: fields: payload.eventType eventTime: + type: datetime fields: payload.eventTime action: fields: payload.action @@ -755,18 +852,22 @@ conf: fields: payload.observer.id observer_typeURI: fields: payload.observer.typeURI - event_pipeline: - sinks: - - name: event_sink - publishers: - - gnocchi - transformers: null - sources: - - events: - - '*' - name: event_source - sinks: - - event_sink + - event_type: 'alarm.*' + traits: + id: + fields: payload.alarm_id + user_id: + fields: payload.user_id + project_id: + fields: payload.project_id + on_behalf_of: + fields: payload.on_behalf_of + severity: + fields: payload.severity + detail: + fields: payload.detail + type: + fields: payload.type gnocchi_resources: resources: - archive_policy: low From 99630d844724e51312626af79d9bf115396e634d Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Wed, 13 Mar 2024 10:44:09 -0500 Subject: [PATCH 11/20] fix: update gnocchi resources to yoga The gnocchi resources were outdated. This updates them to match what was released with Yoga. --- .../ceilometer/ceilometer-helm-overrides.yaml | 577 ++++++++++++------ 1 file changed, 387 insertions(+), 190 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index fca6e54f..50ea01ba 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -868,227 +868,424 @@ conf: fields: payload.detail type: fields: payload.type + gnocchi_resources: + archive_policy_default: ceilometer-low + archive_policies: + # NOTE(sileht): We keep "mean" for now to not break all gating that + # use the current tempest scenario. + - name: ceilometer-low + aggregation_methods: + - mean + back_window: 0 + definition: + - granularity: 5 minutes + timespan: 30 days + - name: ceilometer-low-rate + aggregation_methods: + - mean + - rate:mean + back_window: 0 + definition: + - granularity: 5 minutes + timespan: 30 days + - name: ceilometer-high + aggregation_methods: + - mean + back_window: 0 + definition: + - granularity: 1 second + timespan: 1 hour + - granularity: 1 minute + timespan: 1 day + - granularity: 1 hour + timespan: 365 days + - name: ceilometer-high-rate + aggregation_methods: + - mean + - rate:mean + back_window: 0 + definition: + - granularity: 1 second + timespan: 1 hour + - granularity: 1 minute + timespan: 1 day + - granularity: 1 hour + timespan: 365 days + resources: - - archive_policy: low + - resource_type: identity metrics: - - identity.authenticate.success - - identity.authenticate.pending - - identity.authenticate.failure - - identity.user.created - - identity.user.deleted - - identity.user.updated - - identity.group.created - - identity.group.deleted - - identity.group.updated - - identity.role.created - - identity.role.deleted - - identity.role.updated - - identity.project.created - - identity.project.deleted - - identity.project.updated - - identity.trust.created - - identity.trust.deleted - - identity.role_assignment.created - - identity.role_assignment.deleted - resource_type: identity - - metrics: - - radosgw.objects - - radosgw.objects.size - - radosgw.objects.containers - - radosgw.api.request - - radosgw.containers.objects - - radosgw.containers.objects.size - resource_type: ceph_account - - attributes: - display_name: resource_metadata.display_name - flavor_id: resource_metadata.(instance_flavor_id|(flavor.id)) + identity.authenticate.success: + identity.authenticate.pending: + identity.authenticate.failure: + identity.user.created: + identity.user.deleted: + identity.user.updated: + identity.group.created: + identity.group.deleted: + identity.group.updated: + identity.role.created: + identity.role.deleted: + identity.role.updated: + identity.project.created: + identity.project.deleted: + identity.project.updated: + identity.trust.created: + identity.trust.deleted: + identity.role_assignment.created: + identity.role_assignment.deleted: + + - resource_type: ceph_account + metrics: + radosgw.objects: + radosgw.objects.size: + radosgw.objects.containers: + radosgw.api.request: + radosgw.containers.objects: + radosgw.containers.objects.size: + + - resource_type: instance + metrics: + memory: + memory.usage: + memory.resident: + memory.swap.in: + memory.swap.out: + memory.bandwidth.total: + memory.bandwidth.local: + vcpus: + cpu: + archive_policy_name: ceilometer-low-rate + cpu_l3_cache: + disk.root.size: + disk.ephemeral.size: + disk.latency: + disk.iops: + disk.capacity: + disk.allocation: + disk.usage: + compute.instance.booting.time: + perf.cpu.cycles: + perf.instructions: + perf.cache.references: + perf.cache.misses: + attributes: host: resource_metadata.(instance_host|host) image_ref: resource_metadata.image_ref + launched_at: resource_metadata.launched_at + created_at: resource_metadata.created_at + deleted_at: resource_metadata.deleted_at + display_name: resource_metadata.display_name + flavor_id: resource_metadata.(instance_flavor_id|(flavor.id)|flavor_id) + flavor_name: resource_metadata.(instance_type|(flavor.name)|flavor_name) server_group: resource_metadata.user_metadata.server_group - event_associated_resources: - instance_disk: '{"=": {"instance_id": "%s"}}' - instance_network_interface: '{"=": {"instance_id": "%s"}}' + event_delete: compute.instance.delete.start + event_create: compute.instance.create.end event_attributes: id: instance_id - event_delete: compute.instance.delete.start + display_name: display_name + host: host + availability_zone: availability_zone + flavor_id: instance_type_id + flavor_name: instance_type + user_id: user_id + project_id: project_id + event_associated_resources: + instance_network_interface: '{"=": {"instance_id": "%s"}}' + instance_disk: '{"=": {"instance_id": "%s"}}' + + - resource_type: instance_network_interface metrics: - - memory - - memory.usage - - memory.resident - - memory.bandwidth.total - - memory.bandwidth.local - - vcpus - - cpu - - cpu.delta - - cpu_util - - cpu_l3_cache - - disk.root.size - - disk.ephemeral.size - - disk.read.requests - - disk.read.requests.rate - - disk.write.requests - - disk.write.requests.rate - - disk.read.bytes - - disk.read.bytes.rate - - disk.write.bytes - - disk.write.bytes.rate - - disk.latency - - disk.iops - - disk.capacity - - disk.allocation - - disk.usage - - compute.instance.booting.time - - perf.cpu.cycles - - perf.instructions - - perf.cache.references - - perf.cache.misses - resource_type: instance - - attributes: - instance_id: resource_metadata.instance_id + network.outgoing.packets: + archive_policy_name: ceilometer-low-rate + network.incoming.packets: + archive_policy_name: ceilometer-low-rate + network.outgoing.packets.drop: + archive_policy_name: ceilometer-low-rate + network.incoming.packets.drop: + archive_policy_name: ceilometer-low-rate + network.outgoing.packets.error: + archive_policy_name: ceilometer-low-rate + network.incoming.packets.error: + archive_policy_name: ceilometer-low-rate + network.outgoing.bytes: + archive_policy_name: ceilometer-low-rate + network.incoming.bytes: + archive_policy_name: ceilometer-low-rate + attributes: name: resource_metadata.vnic_name - metrics: - - network.outgoing.packets.rate - - network.incoming.packets.rate - - network.outgoing.packets - - network.outgoing.packets.drop - - network.incoming.packets.drop - - network.outgoing.packets.error - - network.incoming.packets.error - - network.outgoing.bytes.rate - - network.incoming.bytes.rate - - network.outgoing.bytes - - network.incoming.bytes - resource_type: instance_network_interface - - attributes: instance_id: resource_metadata.instance_id + + - resource_type: instance_disk + metrics: + disk.device.read.requests: + archive_policy_name: ceilometer-low-rate + disk.device.write.requests: + archive_policy_name: ceilometer-low-rate + disk.device.read.bytes: + archive_policy_name: ceilometer-low-rate + disk.device.write.bytes: + archive_policy_name: ceilometer-low-rate + disk.device.latency: + disk.device.read.latency: + disk.device.write.latency: + disk.device.iops: + disk.device.capacity: + disk.device.allocation: + disk.device.usage: + attributes: name: resource_metadata.disk_name + instance_id: resource_metadata.instance_id + + - resource_type: image metrics: - - disk.device.read.requests - - disk.device.read.requests.rate - - disk.device.write.requests - - disk.device.write.requests.rate - - disk.device.read.bytes - - disk.device.read.bytes.rate - - disk.device.write.bytes - - disk.device.write.bytes.rate - - disk.device.latency - - disk.device.iops - - disk.device.capacity - - disk.device.allocation - - disk.device.usage - resource_type: instance_disk - - attributes: + image.size: + image.download: + image.serve: + attributes: + name: resource_metadata.name container_format: resource_metadata.container_format disk_format: resource_metadata.disk_format - name: resource_metadata.name + event_delete: image.delete event_attributes: id: resource_id - event_delete: image.delete + + - resource_type: ipmi + metrics: + hardware.ipmi.node.power: + hardware.ipmi.node.temperature: + hardware.ipmi.node.inlet_temperature: + hardware.ipmi.node.outlet_temperature: + hardware.ipmi.node.fan: + hardware.ipmi.node.current: + hardware.ipmi.node.voltage: + hardware.ipmi.node.airflow: + hardware.ipmi.node.cups: + hardware.ipmi.node.cpu_util: + hardware.ipmi.node.mem_util: + hardware.ipmi.node.io_util: + + - resource_type: ipmi_sensor + metrics: + - 'hardware.ipmi.power' + - 'hardware.ipmi.temperature' + - 'hardware.ipmi.current' + - 'hardware.ipmi.voltage' + attributes: + node: resource_metadata.node + + - resource_type: network metrics: - - image.size - - image.download - - image.serve - resource_type: image - - metrics: - - hardware.ipmi.node.power - - hardware.ipmi.node.temperature - - hardware.ipmi.node.inlet_temperature - - hardware.ipmi.node.outlet_temperature - - hardware.ipmi.node.fan - - hardware.ipmi.node.current - - hardware.ipmi.node.voltage - - hardware.ipmi.node.airflow - - hardware.ipmi.node.cups - - hardware.ipmi.node.cpu_util - - hardware.ipmi.node.mem_util - - hardware.ipmi.node.io_util - resource_type: ipmi - - event_delete: floatingip.delete.end + bandwidth: + ip.floating: + event_delete: floatingip.delete.end event_attributes: id: resource_id + + - resource_type: stack metrics: - - bandwidth - - network - - network.create - - network.update - - subnet - - subnet.create - - subnet.update - - port - - port.create - - port.update - - router - - router.create - - router.update - - ip.floating - - ip.floating.create - - ip.floating.update - resource_type: network - - metrics: - - stack.create - - stack.update - - stack.delete - - stack.resume - - stack.suspend - resource_type: stack - - metrics: - - storage.objects.incoming.bytes - - storage.objects.outgoing.bytes - - storage.api.request - - storage.objects.size - - storage.objects - - storage.objects.containers - - storage.containers.objects - - storage.containers.objects.size - resource_type: swift_account - - attributes: - display_name: resource_metadata.display_name + stack.create: + stack.update: + stack.delete: + stack.resume: + stack.suspend: + + - resource_type: swift_account + metrics: + storage.objects.incoming.bytes: + storage.objects.outgoing.bytes: + storage.objects.size: + storage.objects: + storage.objects.containers: + storage.containers.objects: + storage.containers.objects.size: + + - resource_type: volume + metrics: + volume: + volume.size: + snapshot.size: + volume.snapshot.size: + volume.backup.size: + backup.size: + volume.manage_existing.start: + volume.manage_existing.end: + volume.manage_existing_snapshot.start: + volume.manage_existing_snapshot.end: + attributes: + display_name: resource_metadata.(display_name|name) volume_type: resource_metadata.volume_type - event_delete: volume.delete.start + image_id: resource_metadata.image_id + instance_id: resource_metadata.instance_id + event_delete: + - volume.delete.end + - snapshot.delete.end + event_update: + - volume.transfer.accept.end + - snapshot.transfer.accept.end event_attributes: id: resource_id + project_id: project_id + + - resource_type: volume_provider metrics: - - volume - - volume.size - - snapshot.size - - volume.snapshot.size - - volume.backup.size - resource_type: volume - - attributes: + volume.provider.capacity.total: + volume.provider.capacity.free: + volume.provider.capacity.allocated: + volume.provider.capacity.provisioned: + volume.provider.capacity.virtual_free: + + - resource_type: volume_provider_pool + metrics: + volume.provider.pool.capacity.total: + volume.provider.pool.capacity.free: + volume.provider.pool.capacity.allocated: + volume.provider.pool.capacity.provisioned: + volume.provider.pool.capacity.virtual_free: + attributes: + provider: resource_metadata.provider + + - resource_type: host + metrics: + hardware.cpu.load.1min: + hardware.cpu.load.5min: + hardware.cpu.load.15min: + hardware.cpu.util: + hardware.cpu.user: + archive_policy_name: ceilometer-low-rate + hardware.cpu.nice: + archive_policy_name: ceilometer-low-rate + hardware.cpu.system: + archive_policy_name: ceilometer-low-rate + hardware.cpu.idle: + archive_policy_name: ceilometer-low-rate + hardware.cpu.wait: + archive_policy_name: ceilometer-low-rate + hardware.cpu.kernel: + archive_policy_name: ceilometer-low-rate + hardware.cpu.interrupt: + archive_policy_name: ceilometer-low-rate + hardware.memory.total: + hardware.memory.used: + hardware.memory.swap.total: + hardware.memory.swap.avail: + hardware.memory.buffer: + hardware.memory.cached: + hardware.network.ip.outgoing.datagrams: + hardware.network.ip.incoming.datagrams: + hardware.system_stats.cpu.idle: + hardware.system_stats.io.outgoing.blocks: + hardware.system_stats.io.incoming.blocks: + attributes: host_name: resource_metadata.resource_url + + - resource_type: host_disk metrics: - - hardware.cpu.load.1min - - hardware.cpu.load.5min - - hardware.cpu.load.15min - - hardware.cpu.util - - hardware.memory.total - - hardware.memory.used - - hardware.memory.swap.total - - hardware.memory.swap.avail - - hardware.memory.buffer - - hardware.memory.cached - - hardware.network.ip.outgoing.datagrams - - hardware.network.ip.incoming.datagrams - - hardware.system_stats.cpu.idle - - hardware.system_stats.io.outgoing.blocks - - hardware.system_stats.io.incoming.blocks - resource_type: host - - attributes: - device_name: resource_metadata.device + hardware.disk.size.total: + hardware.disk.size.used: + hardware.disk.read.bytes: + hardware.disk.write.bytes: + hardware.disk.read.requests: + hardware.disk.write.requests: + attributes: host_name: resource_metadata.resource_url + device_name: resource_metadata.device + + - resource_type: host_network_interface metrics: - - hardware.disk.size.total - - hardware.disk.size.used - resource_type: host_disk - - attributes: - device_name: resource_metadata.name + hardware.network.incoming.bytes: + hardware.network.outgoing.bytes: + hardware.network.outgoing.errors: + attributes: host_name: resource_metadata.resource_url + device_name: resource_metadata.name + + - resource_type: nova_compute + metrics: + compute.node.cpu.frequency: + compute.node.cpu.idle.percent: + compute.node.cpu.idle.time: + compute.node.cpu.iowait.percent: + compute.node.cpu.iowait.time: + compute.node.cpu.kernel.percent: + compute.node.cpu.kernel.time: + compute.node.cpu.percent: + compute.node.cpu.user.percent: + compute.node.cpu.user.time: + attributes: + host_name: resource_metadata.host + + - resource_type: manila_share + metrics: + manila.share.size: + attributes: + name: resource_metadata.name + host: resource_metadata.host + status: resource_metadata.status + availability_zone: resource_metadata.availability_zone + protocol: resource_metadata.protocol + + - resource_type: switch + metrics: + switch: + switch.ports: + attributes: + controller: resource_metadata.controller + + - resource_type: switch_port + metrics: + switch.port: + switch.port.uptime: + switch.port.receive.packets: + switch.port.transmit.packets: + switch.port.receive.bytes: + switch.port.transmit.bytes: + switch.port.receive.drops: + switch.port.transmit.drops: + switch.port.receive.errors: + switch.port.transmit.errors: + switch.port.receive.frame_error: + switch.port.receive.overrun_error: + switch.port.receive.crc_error: + switch.port.collision.count: + attributes: + switch: resource_metadata.switch + port_number_on_switch: resource_metadata.port_number_on_switch + neutron_port_id: resource_metadata.neutron_port_id + controller: resource_metadata.controller + + - resource_type: port + metrics: + port: + port.uptime: + port.receive.packets: + port.transmit.packets: + port.receive.bytes: + port.transmit.bytes: + port.receive.drops: + port.receive.errors: + attributes: + controller: resource_metadata.controller + + - resource_type: switch_table + metrics: + switch.table.active.entries: + attributes: + controller: resource_metadata.controller + switch: resource_metadata.switch + + - resource_type: loadbalancer metrics: - - hardware.network.incoming.bytes - - hardware.network.outgoing.bytes - - hardware.network.outgoing.errors - resource_type: host_network_interface + network.services.lb.outgoing.bytes: + network.services.lb.incoming.bytes: + network.services.lb.pool: + network.services.lb.listener: + network.services.lb.member: + network.services.lb.health_monitor: + network.services.lb.loadbalancer: + network.services.lb.total.connections: + network.services.lb.active.connections: meters: metric: - name: "image.size" From 45830f6d5dbc1bd274308f6daa6346dbe5a116c5 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Wed, 13 Mar 2024 10:49:33 -0500 Subject: [PATCH 12/20] fix: update ceilometer meters to yoga The existing meters were outdated. This brings them up to date with the yoga release. --- .../ceilometer/ceilometer-helm-overrides.yaml | 158 +++++++++++++++++- 1 file changed, 150 insertions(+), 8 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 50ea01ba..c5eac143 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -1288,6 +1288,7 @@ conf: network.services.lb.active.connections: meters: metric: + # Image - name: "image.size" event_type: - "image.upload" @@ -1298,6 +1299,7 @@ conf: volume: $.payload.size resource_id: $.payload.id project_id: $.payload.owner + - name: "image.download" event_type: "image.send" type: "delta" @@ -1306,6 +1308,7 @@ conf: resource_id: $.payload.image_id user_id: $.payload.receiver_user_id project_id: $.payload.receiver_tenant_id + - name: "image.serve" event_type: "image.send" type: "delta" @@ -1313,15 +1316,98 @@ conf: volume: $.payload.bytes_sent resource_id: $.payload.image_id project_id: $.payload.owner_id + + - name: 'volume.provider.capacity.total' + event_type: 'capacity.backend.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.total + resource_id: $.payload.name_to_id + + - name: 'volume.provider.capacity.free' + event_type: 'capacity.backend.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.free + resource_id: $.payload.name_to_id + + - name: 'volume.provider.capacity.allocated' + event_type: 'capacity.backend.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.allocated + resource_id: $.payload.name_to_id + + - name: 'volume.provider.capacity.provisioned' + event_type: 'capacity.backend.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.provisioned + resource_id: $.payload.name_to_id + + - name: 'volume.provider.capacity.virtual_free' + event_type: 'capacity.backend.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.virtual_free + resource_id: $.payload.name_to_id + + - name: 'volume.provider.pool.capacity.total' + event_type: 'capacity.pool.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.total + resource_id: $.payload.name_to_id + metadata: &provider_pool_meta + provider: $.payload.name_to_id.`split(#, 0, 1)` + + - name: 'volume.provider.pool.capacity.free' + event_type: 'capacity.pool.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.free + resource_id: $.payload.name_to_id + metadata: + <<: *provider_pool_meta + + - name: 'volume.provider.pool.capacity.allocated' + event_type: 'capacity.pool.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.allocated + resource_id: $.payload.name_to_id + metadata: + <<: *provider_pool_meta + + - name: 'volume.provider.pool.capacity.provisioned' + event_type: 'capacity.pool.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.provisioned + resource_id: $.payload.name_to_id + metadata: + <<: *provider_pool_meta + + - name: 'volume.provider.pool.capacity.virtual_free' + event_type: 'capacity.pool.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.virtual_free + resource_id: $.payload.name_to_id + metadata: + <<: *provider_pool_meta + - name: 'volume.size' event_type: - 'volume.exists' + - 'volume.retype' - 'volume.create.*' - 'volume.delete.*' - 'volume.resize.*' - 'volume.attach.*' - 'volume.detach.*' - 'volume.update.*' + - 'volume.manage.*' type: 'gauge' unit: 'GB' volume: $.payload.size @@ -1331,11 +1417,15 @@ conf: metadata: display_name: $.payload.display_name volume_type: $.payload.volume_type + image_id: $.payload.glance_metadata[?key=image_id].value + instance_id: $.payload.volume_attachment[0].server_id + - name: 'snapshot.size' event_type: - 'snapshot.exists' - 'snapshot.create.*' - 'snapshot.delete.*' + - 'snapshot.manage.*' type: 'gauge' unit: 'GB' volume: $.payload.volume_size @@ -1344,6 +1434,7 @@ conf: resource_id: $.payload.snapshot_id metadata: display_name: $.payload.display_name + - name: 'backup.size' event_type: - 'backup.exists' @@ -1358,6 +1449,8 @@ conf: resource_id: $.payload.backup_id metadata: display_name: $.payload.display_name + + # Magnum - name: $.payload.metrics.[*].name event_type: 'magnum.bay.metrics.*' type: 'gauge' @@ -1367,6 +1460,8 @@ conf: project_id: $.payload.project_id resource_id: $.payload.resource_id lookup: ['name', 'unit', 'volume'] + + # Swift - name: $.payload.measurements.[*].metric.[*].name event_type: 'objectstore.http.request' type: 'delta' @@ -1376,8 +1471,9 @@ conf: user_id: $.payload.initiator.id project_id: $.payload.initiator.project_id lookup: ['name', 'unit', 'volume'] + - name: 'memory' - event_type: 'compute.instance.*' + event_type: &instance_events compute.instance.(?!create.start|update).* type: 'gauge' unit: 'MB' volume: $.payload.memory_mb @@ -1391,8 +1487,12 @@ conf: flavor_name: $.payload.instance_type display_name: $.payload.display_name image_ref: $.payload.image_meta.base_image_ref + launched_at: $.payload.launched_at + created_at: $.payload.created_at + deleted_at: $.payload.deleted_at + - name: 'vcpus' - event_type: 'compute.instance.*' + event_type: *instance_events type: 'gauge' unit: 'vcpu' volume: $.payload.vcpus @@ -1401,7 +1501,8 @@ conf: resource_id: $.payload.instance_id user_metadata: $.payload.metadata metadata: - <<: *instance_meta + <<: *instance_meta + - name: 'compute.instance.booting.time' event_type: 'compute.instance.create.end' type: 'gauge' @@ -1414,8 +1515,9 @@ conf: user_metadata: $.payload.metadata metadata: <<: *instance_meta + - name: 'disk.root.size' - event_type: 'compute.instance.*' + event_type: *instance_events type: 'gauge' unit: 'GB' volume: $.payload.root_gb @@ -1424,9 +1526,10 @@ conf: resource_id: $.payload.instance_id user_metadata: $.payload.metadata metadata: - <<: *instance_meta + <<: *instance_meta + - name: 'disk.ephemeral.size' - event_type: 'compute.instance.*' + event_type: *instance_events type: 'gauge' unit: 'GB' volume: $.payload.ephemeral_gb @@ -1435,7 +1538,8 @@ conf: resource_id: $.payload.instance_id user_metadata: $.payload.metadata metadata: - <<: *instance_meta + <<: *instance_meta + - name: 'bandwidth' event_type: 'l3.meter' type: 'delta' @@ -1443,6 +1547,7 @@ conf: volume: $.payload.bytes project_id: $.payload.tenant_id resource_id: $.payload.label_id + - name: 'compute.node.cpu.frequency' event_type: 'compute.metrics.update' type: 'gauge' @@ -1454,6 +1559,7 @@ conf: event_type: $.event_type host: $.publisher_id source: $.payload.metrics[?(@.name='cpu.frequency')].source + - name: 'compute.node.cpu.user.time' event_type: 'compute.metrics.update' type: 'cumulative' @@ -1465,6 +1571,7 @@ conf: event_type: $.event_type host: $.publisher_id source: $.payload.metrics[?(@.name='cpu.user.time')].source + - name: 'compute.node.cpu.kernel.time' event_type: 'compute.metrics.update' type: 'cumulative' @@ -1476,6 +1583,7 @@ conf: event_type: $.event_type host: $.publisher_id source: $.payload.metrics[?(@.name='cpu.kernel.time')].source + - name: 'compute.node.cpu.idle.time' event_type: 'compute.metrics.update' type: 'cumulative' @@ -1487,6 +1595,7 @@ conf: event_type: $.event_type host: $.publisher_id source: $.payload.metrics[?(@.name='cpu.idle.time')].source + - name: 'compute.node.cpu.iowait.time' event_type: 'compute.metrics.update' type: 'cumulative' @@ -1498,6 +1607,7 @@ conf: event_type: $.event_type host: $.publisher_id source: $.payload.metrics[?(@.name='cpu.iowait.time')].source + - name: 'compute.node.cpu.kernel.percent' event_type: 'compute.metrics.update' type: 'gauge' @@ -1509,6 +1619,7 @@ conf: event_type: $.event_type host: $.publisher_id source: $.payload.metrics[?(@.name='cpu.kernel.percent')].source + - name: 'compute.node.cpu.idle.percent' event_type: 'compute.metrics.update' type: 'gauge' @@ -1520,6 +1631,7 @@ conf: event_type: $.event_type host: $.publisher_id source: $.payload.metrics[?(@.name='cpu.idle.percent')].source + - name: 'compute.node.cpu.user.percent' event_type: 'compute.metrics.update' type: 'gauge' @@ -1531,6 +1643,7 @@ conf: event_type: $.event_type host: $.publisher_id source: $.payload.metrics[?(@.name='cpu.user.percent')].source + - name: 'compute.node.cpu.iowait.percent' event_type: 'compute.metrics.update' type: 'gauge' @@ -1542,6 +1655,7 @@ conf: event_type: $.event_type host: $.publisher_id source: $.payload.metrics[?(@.name='cpu.iowait.percent')].source + - name: 'compute.node.cpu.percent' event_type: 'compute.metrics.update' type: 'gauge' @@ -1553,6 +1667,9 @@ conf: event_type: $.event_type host: $.publisher_id source: $.payload.metrics[?(@.name='cpu.percent')].source + + # Identity + # NOTE(gordc): hack because jsonpath-rw-ext can't concat starting with string. - name: $.payload.outcome - $.payload.outcome + 'identity.authenticate.' + $.payload.outcome type: 'delta' unit: 'user' @@ -1561,6 +1678,8 @@ conf: - 'identity.authenticate' resource_id: $.payload.initiator.id user_id: $.payload.initiator.id + + # DNS - name: 'dns.domain.exists' event_type: 'dns.domain.exists' type: 'cumulative' @@ -1570,11 +1689,13 @@ conf: plugin: 'timedelta' project_id: $.payload.tenant_id resource_id: $.payload.id - user_id: $._context_user + user_id: $.ctxt.user metadata: status: $.payload.status pool_id: $.payload.pool_id host: $.publisher_id + + # Trove - name: 'trove.instance.exists' event_type: 'trove.instance.exists' type: 'cumulative' @@ -1591,6 +1712,27 @@ conf: service_id: $.payload.service_id instance_type: $.payload.instance_type instance_type_id: $.payload.instance_type_id + + # Manila + - name: 'manila.share.size' + event_type: + - 'share.create.*' + - 'share.delete.*' + - 'share.extend.*' + - 'share.shrink.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.size + user_id: $.payload.user_id + project_id: $.payload.project_id + resource_id: $.payload.share_id + metadata: + name: $.payload.name + host: $.payload.host + status: $.payload.status + availability_zone: $.payload.availability_zone + protocol: $.payload.proto + polling: sources: - name: all_pollsters From 27e1f0c8296b96752b2fbc64b6960ace450926fe Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Wed, 13 Mar 2024 11:19:02 -0500 Subject: [PATCH 13/20] fix: simplify pipeline sinks for now This removes some complexity that the original helm chart introduced which defines custom meter sinks relating to instance cpu, disk, and net metrics. We may find ourselves disabling pollsters for individual instances, so let's not inundate the pipeline with un-necessary complexity yet. If we find they are useful or needed, we can re-enable them after verifying their proper operation. The polled metrics will still be stored in Gnocchi, just not transformed according to the defined sinks. Iff re-introduced, these pipeline sinks may need to be further tweaked to work with the updated event defs. --- .../ceilometer/ceilometer-helm-overrides.yaml | 80 +------------------ 1 file changed, 1 insertion(+), 79 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index c5eac143..55d424dc 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -1736,7 +1736,7 @@ conf: polling: sources: - name: all_pollsters - interval: 600 + interval: 300 meters: - "*" pipeline: @@ -1746,86 +1746,8 @@ conf: - "*" sinks: - meter_sink - - name: cpu_source - meters: - - "cpu" - sinks: - - cpu_sink - - cpu_delta_sink - - name: disk_source - meters: - - "disk.read.bytes" - - "disk.read.requests" - - "disk.write.bytes" - - "disk.write.requests" - - "disk.device.read.bytes" - - "disk.device.read.requests" - - "disk.device.write.bytes" - - "disk.device.write.requests" - sinks: - - disk_sink - - name: network_source - meters: - - "network.incoming.bytes" - - "network.incoming.packets" - - "network.outgoing.bytes" - - "network.outgoing.packets" - sinks: - - network_sink sinks: - name: meter_sink - transformers: - publishers: - - gnocchi - - name: cpu_sink - transformers: - - name: "rate_of_change" - parameters: - target: - name: "cpu_util" - unit: "%" - type: "gauge" - max: 100 - scale: "100.0 / (10**9 * (resource_metadata.cpu_number or 1))" - publishers: - - gnocchi - - name: cpu_delta_sink - transformers: - - name: "delta" - parameters: - target: - name: "cpu.delta" - growth_only: True - publishers: - - gnocchi - - name: disk_sink - transformers: - - name: "rate_of_change" - parameters: - source: - map_from: - name: "(disk\\.device|disk)\\.(read|write)\\.(bytes|requests)" - unit: "(B|request)" - target: - map_to: - name: "\\1.\\2.\\3.rate" - unit: "\\1/s" - type: "gauge" - publishers: - - gnocchi - - name: network_sink - transformers: - - name: "rate_of_change" - parameters: - source: - map_from: - name: "network\\.(incoming|outgoing)\\.(bytes|packets)" - unit: "(B|packet)" - target: - map_to: - name: "network.\\1.\\2.rate" - unit: "\\1/s" - type: "gauge" publishers: - gnocchi policy: {} From 27802d6232ed89c1151b5d4ce0f01fe144a93c0d Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 18 Mar 2024 09:47:52 -0500 Subject: [PATCH 14/20] fix: enable postgresql backup jobs --- helm-configs/postgresql/postgresql-helm-overrides.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/helm-configs/postgresql/postgresql-helm-overrides.yaml b/helm-configs/postgresql/postgresql-helm-overrides.yaml index 679228c1..a95c6db3 100644 --- a/helm-configs/postgresql/postgresql-helm-overrides.yaml +++ b/helm-configs/postgresql/postgresql-helm-overrides.yaml @@ -300,12 +300,12 @@ conf: hba_file: '/tmp/pg_hba.conf' ident_file: '/tmp/pg_ident.conf' backup: - enabled: false + enabled: true base_path: /var/backup days_to_keep: 3 pg_dumpall_options: '--inserts --clean' remote_backup: - enabled: false + enabled: true container_name: postgresql days_to_keep: 14 storage_policy: default-placement @@ -466,7 +466,7 @@ manifests: configmap_etc: true job_image_repo_sync: true network_policy: false - job_ks_user: false + job_ks_user: true secret_admin: true secret_etc: true secret_audit: true @@ -474,8 +474,8 @@ manifests: secret_registry: true service: true statefulset: true - cron_job_postgresql_backup: false - pvc_backup: false + cron_job_postgresql_backup: true + pvc_backup: true monitoring: prometheus: configmap_bin: false From b462b3d0a9252d79baabe3eca2ff102212559bd6 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 18 Mar 2024 10:42:50 -0500 Subject: [PATCH 15/20] fix: add gnocchi API replicas & enable daemonsets This should make Gnocchi more reliable and have better overall perf. --- helm-configs/gnocchi/gnocchi-helm-overrides.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/helm-configs/gnocchi/gnocchi-helm-overrides.yaml b/helm-configs/gnocchi/gnocchi-helm-overrides.yaml index 7ade5b93..db1c37bb 100644 --- a/helm-configs/gnocchi/gnocchi-helm-overrides.yaml +++ b/helm-configs/gnocchi/gnocchi-helm-overrides.yaml @@ -234,7 +234,7 @@ pod: init_container: null gnocchi_tests: replicas: - api: 1 + api: 3 lifecycle: upgrades: deployments: @@ -246,11 +246,11 @@ pod: daemonsets: pod_replacement_strategy: RollingUpdate metricd: - enabled: false + enabled: true min_ready_seconds: 0 max_unavailable: 1 statsd: - enabled: false + enabled: true min_ready_seconds: 0 max_unavailable: 1 disruption_budget: From 18f523828a61f3d17bb9590d560f8f334161cff6 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 18 Mar 2024 10:59:43 -0500 Subject: [PATCH 16/20] fix: disable resource limits for ceilometer We don't enforce pod resource limits in other helm charts so set this to false as the default. --- helm-configs/ceilometer/ceilometer-helm-overrides.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 55d424dc..05fbb52e 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -2135,7 +2135,7 @@ pod: min_ready_seconds: 0 max_unavailable: 1 resources: - enabled: true + enabled: false compute: requests: memory: "128Mi" From a8f9edf5bc568ab403533e0c5f67e05164efaa37 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 18 Mar 2024 13:07:23 -0500 Subject: [PATCH 17/20] fix: remove apache2 config for ceilometer Ceilometer no longer has a rest API so let's remove this section from the overrides. --- .../ceilometer/ceilometer-helm-overrides.yaml | 75 ------------------- 1 file changed, 75 deletions(-) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 05fbb52e..0cb51c74 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -44,81 +44,6 @@ images: ipmi_device: /dev/ipmi0 conf: - security: | - # - # Disable access to the entire file system except for the directories that - # are explicitly allowed later. - # - # This currently breaks the configurations that come with some web application - # Debian packages. - # - # - # AllowOverride None - # Require all denied - # - - # Changing the following options will not really affect the security of the - # server, but might make attacks slightly more difficult in some cases. - - # - # ServerTokens - # This directive configures what you return as the Server HTTP response - # Header. The default is 'Full' which sends information about the OS-Type - # and compiled in modules. - # Set to one of: Full | OS | Minimal | Minor | Major | Prod - # where Full conveys the most information, and Prod the least. - ServerTokens Prod - - # - # Optionally add a line containing the server version and virtual host - # name to server-generated pages (internal error documents, FTP directory - # listings, mod_status and mod_info output etc., but not CGI generated - # documents or custom error documents). - # Set to "EMail" to also include a mailto: link to the ServerAdmin. - # Set to one of: On | Off | EMail - ServerSignature Off - - # - # Allow TRACE method - # - # Set to "extended" to also reflect the request body (only for testing and - # diagnostic purposes). - # - # Set to one of: On | Off | extended - TraceEnable Off - - # - # Forbid access to version control directories - # - # If you use version control systems in your document root, you should - # probably deny access to their directories. For example, for subversion: - # - # - # Require all denied - # - - # - # Setting this header will prevent MSIE from interpreting files as something - # else than declared by the content type in the HTTP headers. - # Requires mod_headers to be enabled. - # - #Header set X-Content-Type-Options: "nosniff" - - # - # Setting this header will prevent other sites from embedding pages from this - # site as frames. This defends against clickjacking attacks. - # Requires mod_headers to be enabled. - # - #Header set X-Frame-Options: "sameorigin" - software: - apache2: - binary: apache2 - start_parameters: -DFOREGROUND - site_dir: /etc/apache2/sites-enable - conf_dir: /etc/apache2/conf-enabled - mods_dir: /etc/apache2/mods-available - a2enmod: null - a2dismod: null ceilometer: DEFAULT: event_dispatchers: From c885c75352ace0691989a798e561b9cece055baa Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 18 Mar 2024 15:15:42 -0500 Subject: [PATCH 18/20] fix: Add default loglevels to aid troubleshooting When troubleshooting, it helps to raise or lower default log levels of specific modules, setting requests related loggers to DEBUG for example can help one diagnose ceilometer CRUD operations. --- helm-configs/ceilometer/ceilometer-helm-overrides.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml index 0cb51c74..952324d7 100644 --- a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -46,6 +46,13 @@ ipmi_device: /dev/ipmi0 conf: ceilometer: DEFAULT: + debug: "false" +# default_log_levels: >- +# amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO, +# oslo_messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=DEBUG, +# urllib3.connectionpool=DEBUG,websocket=WARN,requests.packages.urllib3.util.retry=DEBUG, +# urllib3.util.retry=DEBUG,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN, +# taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,oslo_policy=INFO,dogpile.core.dogpile=INFO event_dispatchers: type: multistring values: From 6334f4025c38ed42cd5b1d06060003d2033f1761 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 18 Mar 2024 18:06:21 -0500 Subject: [PATCH 19/20] doc: add openstack ceilometer installation --- docs/openstack-ceilometer.md | 80 ++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 81 insertions(+) create mode 100644 docs/openstack-ceilometer.md diff --git a/docs/openstack-ceilometer.md b/docs/openstack-ceilometer.md new file mode 100644 index 00000000..d43f1879 --- /dev/null +++ b/docs/openstack-ceilometer.md @@ -0,0 +1,80 @@ +# Deploy Ceilometer + +## Create Secrets + +```shell +kubectl --namespace openstack create secret generic ceilometer-keystone-admin-password \ + --type Opaque \ + --from-literal=password="$(< /dev/urandom tr -dc _A-Za-z0-9 | head -c${1:-32};echo;)" +kubectl --namespace openstack create secret generic ceilometer-keystone-test-password \ + --type Opaque \ + --from-literal=password="$(< /dev/urandom tr -dc _A-Za-z0-9 | head -c${1:-32};echo;)" +kubectl --namespace openstack create secret generic ceilometer-rabbitmq-password \ + --type Opaque \ + --from-literal=password="$(< /dev/urandom tr -dc _A-Za-z0-9 | head -c${1:-32};echo;)" +``` + +## Run the package deployment + +```shell +cd /opt/genestack/submodules/openstack-helm +helm upgrade --install ceilometer ./ceilometer \ + --namespace=openstack \ + --wait \ + --timeout 10m \ + -f /opt/genestack/helm-configs/ceilometer/ceilometer-helm-overrides.yaml \ + --set endpoints.identity.auth.admin.password="$(kubectl --namespace openstack get secret keystone-admin -o jsonpath='{.data.password}' | base64 -d)" \ + --set endpoints.identity.auth.ceilometer.password="$(kubectl --namespace openstack get secret ceilometer-keystone-admin-password -o jsonpath='{.data.password}' | base64 -d)" \ + --set endpoints.identity.auth.test.password="$(kubectl --namespace openstack get secret ceilometer-keystone-test-password -o jsonpath='{.data.password}' | base64 -d)" \ + --set endpoints.oslo_messaging.auth.admin.username="$(kubectl --namespace openstack get secret rabbitmq-default-user -o jsonpath='{.data.username}' | base64 -d)" \ + --set endpoints.oslo_messaging.auth.admin.password="$(kubectl --namespace openstack get secret rabbitmq-default-user -o jsonpath='{.data.password}' | base64 -d)" \ + --set endpoints.oslo_messaging.auth.ceilometer.password="$(kubectl --namespace openstack get secret ceilometer-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)" \ + --set conf.ceilometer.oslo_messaging_notifications.transport_url="\ +rabbit://ceilometer:$(kubectl --namespace openstack get secret ceilometer-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/ceilometer"\ + --set conf.ceilometer.notification.messaging_urls.values="{\ +rabbit://ceilometer:$(kubectl --namespace openstack get secret ceilometer-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/ceilometer,\ +rabbit://cinder:$(kubectl --namespace openstack get secret cinder-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/cinder,\ +rabbit://glance:$(kubectl --namespace openstack get secret glance-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/glance,\ +rabbit://heat:$(kubectl --namespace openstack get secret heat-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/heat,\ +rabbit://keystone:$(kubectl --namespace openstack get secret keystone-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/keystone,\ +rabbit://neutron:$(kubectl --namespace openstack get secret neutron-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/neutron,\ +rabbit://nova:$(kubectl --namespace openstack get secret nova-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/nova}" +``` + +!!! tip + + In a production like environment you may need to include production specific files like the example variable file found in `helm-configs/prod-example-openstack-overrides.yaml`. + +## Verify Ceilometer Workers + +As there is no Ceilometer API, we will do a quick validation against the +Gnocchi API via a series of `openstack metric` commands to confirm that +Ceilometer workers are ingesting metric and event data then persisting them +storage. + +### Verify metric resource types exist + +The Ceilomter db-sync job will create the various resource types in Gnocchi. +Without them, metrics can't be stored, so let's verify they exist. The +output should include named resource types and some attributes for resources +like `instance`, `instance_disk`, `network`, `volume`, etc. + +```shell +kubectl exec -it openstack-admin-client -n openstack -- openstack metric resource-type list +``` + +### Verify metric resources + +Confirm that resources are populating in Gnocchi + +```shell +kubectl exec -it openstack-admin-client -n openstack -- openstack metric resource list +``` + +### Verify metrics + +Confirm that metrics can be retrieved from Gnocchi + +```shell +kubectl exec -it openstack-admin-client -n openstack -- openstack metric list +``` diff --git a/mkdocs.yml b/mkdocs.yml index d553449c..ce4435f5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -182,6 +182,7 @@ nav: - skyline: openstack-skyline.md - Octavia: openstack-octavia.md - Gnocchi: openstack-gnocchi.md + - Ceilometer: openstack-ceilometer.md - Monitoring: - Monitoring Overview: prometheus-monitoring-overview.md - Prometheus: prometheus.md From 9edb31d45bf019a49ad162cd3e8051daba76a568 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Mon, 18 Mar 2024 19:08:17 -0500 Subject: [PATCH 20/20] fix: set postgresql cron backup to 0015 once a day The default was midnight but a lot of jobs run then; kick this off a little later to help avoid the thundering herd affect. --- helm-configs/postgresql/postgresql-helm-overrides.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm-configs/postgresql/postgresql-helm-overrides.yaml b/helm-configs/postgresql/postgresql-helm-overrides.yaml index a95c6db3..ad41ea06 100644 --- a/helm-configs/postgresql/postgresql-helm-overrides.yaml +++ b/helm-configs/postgresql/postgresql-helm-overrides.yaml @@ -239,7 +239,7 @@ jobs: # activeDeadlineSeconds == 0 means no deadline activeDeadlineSeconds: 0 backoffLimit: 6 - cron: "0 0 * * *" + cron: "15 0 * * *" history: success: 3 failed: 1