From 1fc182cdae268ac64a6025ba024d46085717faf3 Mon Sep 17 00:00:00 2001 From: Luke Repko Date: Tue, 19 Mar 2024 08:50:17 -0500 Subject: [PATCH] feat: introduce ceilometer helm chart overrides (#128) * feat: introduce ceilometer helm chart overrides This begins to add the overrides for the Ceilometer helm chart. Ceilometer provides metering, monitoring, and alarming capabilities in Openstack for billing, performance, optimization, and capacity planning purposes. * fix: ceilometer-api image is deprecated Trying to pull the wallaby image for ceilometer-api results in a 404 not found now. ceilometer-api is in the base image as of commit cd67930 per the upstream kolla repo, so pull that instead. * fix: remove ceilometer-collector from config ceilometer collector was removed from ceilometer code base[1] [1] https://review.openstack.org/504244 * fix: ceilometer-api is fully deprecated so rm it This disables deployment of the api pod and removes related api configuration as ceilometer no longer has a rest API. It is simply a worker service at this point. Gnocchi API is preferred over ceilometer. * fix: set database keys to fake values The database section is not used, but the base chart still tries to set some sane default values, so to avoid confusion, just override those to a string value that makes it obvious this section is not used. The recommended storage location for meters and events is Gnocchi, which is automatically discovered and used by means of keystone. * fix: set gnocchi as the publisher This was explicitly set to notify:// without any context as to what that is or does. The configuration does not list that as a valid value, so let's replace the publisher with the default, `gnocchi`. * fix: disable the ks-endpoint job There is no endpoint for ceilometer anymore so remove the related job that makes a service in keystone for one. * fix: bump ceilometer images to yoga This was the newest tagged image that I could find for Ceilometer. We will need to investigate building our own Ceilometer images for a later release of Genstack. * fix: enable db-sync to init gnocchi resource types The helm chart has a db_sync job which executes ceilometer-upgrade which executes the storage upgrade function that initiates the resource types in gnocchi with their attributes. * fix: add updated event definitions from yoga The event definitions defined in the helm chart were very dated, update them to match those found in the yoga release. * fix: update gnocchi resources to yoga The gnocchi resources were outdated. This updates them to match what was released with Yoga. * fix: update ceilometer meters to yoga The existing meters were outdated. This brings them up to date with the yoga release. * fix: simplify pipeline sinks for now This removes some complexity that the original helm chart introduced which defines custom meter sinks relating to instance cpu, disk, and net metrics. We may find ourselves disabling pollsters for individual instances, so let's not inundate the pipeline with un-necessary complexity yet. If we find they are useful or needed, we can re-enable them after verifying their proper operation. The polled metrics will still be stored in Gnocchi, just not transformed according to the defined sinks. Iff re-introduced, these pipeline sinks may need to be further tweaked to work with the updated event defs. * fix: enable postgresql backup jobs * fix: add gnocchi API replicas & enable daemonsets This should make Gnocchi more reliable and have better overall perf. * fix: disable resource limits for ceilometer We don't enforce pod resource limits in other helm charts so set this to false as the default. * fix: remove apache2 config for ceilometer Ceilometer no longer has a rest API so let's remove this section from the overrides. * fix: Add default loglevels to aid troubleshooting When troubleshooting, it helps to raise or lower default log levels of specific modules, setting requests related loggers to DEBUG for example can help one diagnose ceilometer CRUD operations. * doc: add openstack ceilometer installation * fix: set postgresql cron backup to 0015 once a day The default was midnight but a lot of jobs run then; kick this off a little later to help avoid the thundering herd affect. --- docs/openstack-ceilometer.md | 80 + .../ceilometer/ceilometer-helm-overrides.yaml | 2182 +++++++++++++++++ .../gnocchi/gnocchi-helm-overrides.yaml | 6 +- .../postgresql/postgresql-helm-overrides.yaml | 12 +- mkdocs.yml | 1 + 5 files changed, 2272 insertions(+), 9 deletions(-) create mode 100644 docs/openstack-ceilometer.md create mode 100644 helm-configs/ceilometer/ceilometer-helm-overrides.yaml diff --git a/docs/openstack-ceilometer.md b/docs/openstack-ceilometer.md new file mode 100644 index 00000000..d43f1879 --- /dev/null +++ b/docs/openstack-ceilometer.md @@ -0,0 +1,80 @@ +# Deploy Ceilometer + +## Create Secrets + +```shell +kubectl --namespace openstack create secret generic ceilometer-keystone-admin-password \ + --type Opaque \ + --from-literal=password="$(< /dev/urandom tr -dc _A-Za-z0-9 | head -c${1:-32};echo;)" +kubectl --namespace openstack create secret generic ceilometer-keystone-test-password \ + --type Opaque \ + --from-literal=password="$(< /dev/urandom tr -dc _A-Za-z0-9 | head -c${1:-32};echo;)" +kubectl --namespace openstack create secret generic ceilometer-rabbitmq-password \ + --type Opaque \ + --from-literal=password="$(< /dev/urandom tr -dc _A-Za-z0-9 | head -c${1:-32};echo;)" +``` + +## Run the package deployment + +```shell +cd /opt/genestack/submodules/openstack-helm +helm upgrade --install ceilometer ./ceilometer \ + --namespace=openstack \ + --wait \ + --timeout 10m \ + -f /opt/genestack/helm-configs/ceilometer/ceilometer-helm-overrides.yaml \ + --set endpoints.identity.auth.admin.password="$(kubectl --namespace openstack get secret keystone-admin -o jsonpath='{.data.password}' | base64 -d)" \ + --set endpoints.identity.auth.ceilometer.password="$(kubectl --namespace openstack get secret ceilometer-keystone-admin-password -o jsonpath='{.data.password}' | base64 -d)" \ + --set endpoints.identity.auth.test.password="$(kubectl --namespace openstack get secret ceilometer-keystone-test-password -o jsonpath='{.data.password}' | base64 -d)" \ + --set endpoints.oslo_messaging.auth.admin.username="$(kubectl --namespace openstack get secret rabbitmq-default-user -o jsonpath='{.data.username}' | base64 -d)" \ + --set endpoints.oslo_messaging.auth.admin.password="$(kubectl --namespace openstack get secret rabbitmq-default-user -o jsonpath='{.data.password}' | base64 -d)" \ + --set endpoints.oslo_messaging.auth.ceilometer.password="$(kubectl --namespace openstack get secret ceilometer-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)" \ + --set conf.ceilometer.oslo_messaging_notifications.transport_url="\ +rabbit://ceilometer:$(kubectl --namespace openstack get secret ceilometer-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/ceilometer"\ + --set conf.ceilometer.notification.messaging_urls.values="{\ +rabbit://ceilometer:$(kubectl --namespace openstack get secret ceilometer-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/ceilometer,\ +rabbit://cinder:$(kubectl --namespace openstack get secret cinder-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/cinder,\ +rabbit://glance:$(kubectl --namespace openstack get secret glance-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/glance,\ +rabbit://heat:$(kubectl --namespace openstack get secret heat-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/heat,\ +rabbit://keystone:$(kubectl --namespace openstack get secret keystone-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/keystone,\ +rabbit://neutron:$(kubectl --namespace openstack get secret neutron-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/neutron,\ +rabbit://nova:$(kubectl --namespace openstack get secret nova-rabbitmq-password -o jsonpath='{.data.password}' | base64 -d)@rabbitmq.openstack.svc.cluster.local:5672/nova}" +``` + +!!! tip + + In a production like environment you may need to include production specific files like the example variable file found in `helm-configs/prod-example-openstack-overrides.yaml`. + +## Verify Ceilometer Workers + +As there is no Ceilometer API, we will do a quick validation against the +Gnocchi API via a series of `openstack metric` commands to confirm that +Ceilometer workers are ingesting metric and event data then persisting them +storage. + +### Verify metric resource types exist + +The Ceilomter db-sync job will create the various resource types in Gnocchi. +Without them, metrics can't be stored, so let's verify they exist. The +output should include named resource types and some attributes for resources +like `instance`, `instance_disk`, `network`, `volume`, etc. + +```shell +kubectl exec -it openstack-admin-client -n openstack -- openstack metric resource-type list +``` + +### Verify metric resources + +Confirm that resources are populating in Gnocchi + +```shell +kubectl exec -it openstack-admin-client -n openstack -- openstack metric resource list +``` + +### Verify metrics + +Confirm that metrics can be retrieved from Gnocchi + +```shell +kubectl exec -it openstack-admin-client -n openstack -- openstack metric list +``` diff --git a/helm-configs/ceilometer/ceilometer-helm-overrides.yaml b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml new file mode 100644 index 00000000..952324d7 --- /dev/null +++ b/helm-configs/ceilometer/ceilometer-helm-overrides.yaml @@ -0,0 +1,2182 @@ +--- +release_group: null + +labels: + compute: + node_selector_key: openstack-compute-node + node_selector_value: enabled + central: + node_selector_key: openstack-control-plane + node_selector_value: enabled + ipmi: + node_selector_key: openstack-node + node_selector_value: enabled + notification: + node_selector_key: openstack-control-plane + node_selector_value: enabled + job: + node_selector_key: openstack-control-plane + node_selector_value: enabled + test: + node_selector_key: openstack-control-plane + node_selector_value: enabled + +images: + tags: + test: docker.io/xrally/xrally-openstack:2.0.0 + ceilometer_db_sync: docker.io/kolla/ubuntu-source-ceilometer-base:yoga + rabbit_init: docker.io/rabbitmq:3.7-management + ks_user: docker.io/openstackhelm/heat:wallaby-ubuntu_focal + ks_service: docker.io/openstackhelm/heat:wallaby-ubuntu_focal + ceilometer_central: docker.io/kolla/ubuntu-source-ceilometer-central:yoga + ceilometer_compute: docker.io/kolla/ubuntu-source-ceilometer-compute:yoga + ceilometer_ipmi: docker.io/kolla/ubuntu-source-ceilometer-base:yoga + ceilometer_notification: docker.io/kolla/ubuntu-source-ceilometer-notification:yoga + dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0 + image_repo_sync: docker.io/docker:17.07.0 + pull_policy: "IfNotPresent" + local_registry: + active: false + exclude: + - dep_check + - image_repo_sync + +ipmi_device: /dev/ipmi0 + +conf: + ceilometer: + DEFAULT: + debug: "false" +# default_log_levels: >- +# amqp=WARN,amqplib=WARN,boto=WARN,qpid=WARN,sqlalchemy=WARN,suds=INFO,oslo.messaging=INFO, +# oslo_messaging=INFO,iso8601=WARN,requests.packages.urllib3.connectionpool=DEBUG, +# urllib3.connectionpool=DEBUG,websocket=WARN,requests.packages.urllib3.util.retry=DEBUG, +# urllib3.util.retry=DEBUG,keystonemiddleware=WARN,routes.middleware=WARN,stevedore=WARN, +# taskflow=WARN,keystoneauth=WARN,oslo.cache=INFO,oslo_policy=INFO,dogpile.core.dogpile=INFO + event_dispatchers: + type: multistring + values: + - gnocchi + meter_dispatchers: + type: multistring + values: + - gnocchi + api: + aodh_is_enabled: "False" + aodh_url: "NotUsed" + dispatcher_gnocchi: + filter_service_activity: False + archive_policy: low + resources_definition_file: /etc/ceilometer/gnocchi_resources.yaml + database: + connection: "NotUsed" + event_connection: "NotUsed" + metering_connection: "NotUsed" + max_retries: -1 + dispatcher: + archive_policy: low + filter_project: service + keystone_authtoken: + auth_type: password + auth_version: v3 + service_credentials: + auth_type: password + interface: internal + notification: + messaging_urls: + type: multistring + values: + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/ceilometer + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/cinder + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/glance + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/nova + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/keystone + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/neutron + - rabbit://rabbitmq:password@rabbitmq.openstack.svc.cluster.local:5672/heat + oslo_messaging_notifications: + driver: messagingv2 + topics: + - notifications + - profiler + oslo_policy: + policy_file: /etc/ceilometer/policy.yaml + cache: + enabled: true + backend: dogpile.cache.memcached + expiration_time: 86400 + event_definitions: + - event_type: 'compute.instance.*' + traits: &instance_traits + tenant_id: + fields: payload.tenant_id + user_id: + fields: payload.user_id + instance_id: + fields: payload.instance_id + display_name: + fields: payload.display_name + resource_id: + fields: payload.instance_id + cell_name: + fields: payload.cell_name + host: + fields: publisher_id.`split(., 1, 1)` + service: + fields: publisher_id.`split(., 0, -1)` + memory_mb: + type: int + fields: payload.memory_mb + disk_gb: + type: int + fields: payload.disk_gb + root_gb: + type: int + fields: payload.root_gb + ephemeral_gb: + type: int + fields: payload.ephemeral_gb + vcpus: + type: int + fields: payload.vcpus + instance_type_id: + fields: payload.instance_type_id + instance_type: + fields: payload.instance_type + state: + fields: payload.state + os_architecture: + fields: payload.image_meta.'org.openstack__1__architecture' + os_version: + fields: payload.image_meta.'org.openstack__1__os_version' + os_distro: + fields: payload.image_meta.'org.openstack__1__os_distro' + launched_at: + type: datetime + fields: payload.launched_at + deleted_at: + type: datetime + fields: payload.deleted_at + - event_type: compute.instance.create.end + traits: + <<: *instance_traits + availability_zone: + fields: payload.availability_zone + - event_type: compute.instance.update + traits: + <<: *instance_traits + old_state: + fields: payload.old_state + - event_type: compute.instance.exists + traits: + <<: *instance_traits + audit_period_beginning: + type: datetime + fields: payload.audit_period_beginning + audit_period_ending: + type: datetime + fields: payload.audit_period_ending + - event_type: ['volume.exists', 'volume.retype', 'volume.create.*', 'volume.delete.*', 'volume.resize.*', 'volume.attach.*', 'volume.detach.*', 'volume.update.*', 'snapshot.exists', 'snapshot.create.*', 'snapshot.delete.*', 'snapshot.update.*', 'volume.transfer.accept.end', 'snapshot.transfer.accept.end'] + traits: &cinder_traits + user_id: + fields: payload.user_id + project_id: + fields: payload.tenant_id + availability_zone: + fields: payload.availability_zone + display_name: + fields: payload.display_name + replication_status: + fields: payload.replication_status + status: + fields: payload.status + created_at: + type: datetime + fields: payload.created_at + image_id: + fields: payload.glance_metadata[?key=image_id].value + instance_id: + fields: payload.volume_attachment[0].server_id + - event_type: ['volume.transfer.*', 'volume.exists', 'volume.retype', 'volume.create.*', 'volume.delete.*', 'volume.resize.*', 'volume.attach.*', 'volume.detach.*', 'volume.update.*', 'snapshot.transfer.accept.end'] + traits: + <<: *cinder_traits + resource_id: + fields: payload.volume_id + host: + fields: payload.host + size: + type: int + fields: payload.size + type: + fields: payload.volume_type + replication_status: + fields: payload.replication_status + - event_type: ['snapshot.transfer.accept.end'] + traits: + <<: *cinder_traits + resource_id: + fields: payload.snapshot_id + project_id: + fields: payload.tenant_id + - event_type: ['share.create.*', 'share.delete.*', 'share.extend.*', 'share.shrink.*'] + traits: &share_traits + share_id: + fields: payload.share_id + user_id: + fields: payload.user_id + project_id: + fields: payload.tenant_id + snapshot_id: + fields: payload.snapshot_id + availability_zone: + fields: payload.availability_zone + status: + fields: payload.status + created_at: + type: datetime + fields: payload.created_at + share_group_id: + fields: payload.share_group_id + size: + type: int + fields: payload.size + name: + fields: payload.name + proto: + fields: payload.proto + is_public: + fields: payload.is_public + description: + fields: payload.description + host: + fields: payload.host + - event_type: ['snapshot.exists', 'snapshot.create.*', 'snapshot.delete.*', 'snapshot.update.*'] + traits: + <<: *cinder_traits + resource_id: + fields: payload.snapshot_id + volume_id: + fields: payload.volume_id + - event_type: ['image_volume_cache.*'] + traits: + image_id: + fields: payload.image_id + host: + fields: payload.host + - event_type: ['image.create', 'image.update', 'image.upload', 'image.delete'] + traits: &glance_crud + project_id: + fields: payload.owner + resource_id: + fields: payload.id + name: + fields: payload.name + status: + fields: payload.status + created_at: + type: datetime + fields: payload.created_at + user_id: + fields: payload.owner + deleted_at: + type: datetime + fields: payload.deleted_at + size: + type: int + fields: payload.size + - event_type: image.send + traits: &glance_send + receiver_project: + fields: payload.receiver_tenant_id + receiver_user: + fields: payload.receiver_user_id + user_id: + fields: payload.owner_id + image_id: + fields: payload.image_id + destination_ip: + fields: payload.destination_ip + bytes_sent: + type: int + fields: payload.bytes_sent + - event_type: orchestration.stack.* + traits: &orchestration_crud + project_id: + fields: payload.tenant_id + user_id: + fields: ['ctxt.trustor_user_id', 'ctxt.user_id'] + resource_id: + fields: payload.stack_identity + name: + fields: payload.name + - event_type: sahara.cluster.* + traits: &sahara_crud + project_id: + fields: payload.project_id + user_id: + fields: ctxt.user_id + resource_id: + fields: payload.cluster_id + name: + fields: payload.name + - event_type: sahara.cluster.health + traits: &sahara_health + <<: *sahara_crud + verification_id: + fields: payload.verification_id + health_check_status: + fields: payload.health_check_status + health_check_name: + fields: payload.health_check_name + health_check_description: + fields: payload.health_check_description + created_at: + type: datetime + fields: payload.created_at + updated_at: + type: datetime + fields: payload.updated_at + - event_type: ['identity.user.*', 'identity.project.*', 'identity.group.*', 'identity.role.*', 'identity.OS-TRUST:trust.*', + 'identity.region.*', 'identity.service.*', 'identity.endpoint.*', 'identity.policy.*'] + traits: &identity_crud + resource_id: + fields: payload.resource_info + initiator_id: + fields: payload.initiator.id + project_id: + fields: payload.initiator.project_id + domain_id: + fields: payload.initiator.domain_id + - event_type: identity.role_assignment.* + traits: &identity_role_assignment + role: + fields: payload.role + group: + fields: payload.group + domain: + fields: payload.domain + user: + fields: payload.user + project: + fields: payload.project + - event_type: identity.authenticate + traits: &identity_authenticate + typeURI: + fields: payload.typeURI + id: + fields: payload.id + action: + fields: payload.action + eventType: + fields: payload.eventType + eventTime: + type: datetime + fields: payload.eventTime + outcome: + fields: payload.outcome + initiator_typeURI: + fields: payload.initiator.typeURI + initiator_id: + fields: payload.initiator.id + initiator_name: + fields: payload.initiator.name + initiator_host_agent: + fields: payload.initiator.host.agent + initiator_host_addr: + fields: payload.initiator.host.address + target_typeURI: + fields: payload.target.typeURI + target_id: + fields: payload.target.id + observer_typeURI: + fields: payload.observer.typeURI + observer_id: + fields: payload.observer.id + - event_type: objectstore.http.request + traits: &objectstore_request + typeURI: + fields: payload.typeURI + id: + fields: payload.id + action: + fields: payload.action + eventType: + fields: payload.eventType + eventTime: + type: datetime + fields: payload.eventTime + outcome: + fields: payload.outcome + initiator_typeURI: + fields: payload.initiator.typeURI + initiator_id: + fields: payload.initiator.id + initiator_project_id: + fields: payload.initiator.project_id + target_typeURI: + fields: payload.target.typeURI + target_id: + fields: payload.target.id + target_action: + fields: payload.target.action + target_metadata_path: + fields: payload.target.metadata.path + target_metadata_version: + fields: payload.target.metadata.version + target_metadata_container: + fields: payload.target.metadata.container + target_metadata_object: + fields: payload.target.metadata.object + observer_id: + fields: payload.observer.id + - event_type: ['network.*', 'subnet.*', 'port.*', 'router.*', 'floatingip.*', 'pool.*', 'vip.*', 'member.*', 'health_monitor.*', 'healthmonitor.*', 'listener.*', 'loadbalancer.*', 'firewall.*', 'firewall_policy.*', 'firewall_rule.*', 'vpnservice.*', 'ipsecpolicy.*', 'ikepolicy.*', 'ipsec_site_connection.*'] + traits: &network_traits + user_id: + fields: ctxt.user_id + project_id: + fields: ctxt.tenant_id + - event_type: network.* + traits: + <<: *network_traits + name: + fields: payload.network.name + resource_id: + fields: ['payload.network.id', 'payload.id'] + - event_type: subnet.* + traits: + <<: *network_traits + name: + fields: payload.subnet.name + resource_id: + fields: ['payload.subnet.id', 'payload.id'] + - event_type: port.* + traits: + <<: *network_traits + name: + fields: payload.port.name + resource_id: + fields: ['payload.port.id', 'payload.id'] + - event_type: router.* + traits: + <<: *network_traits + name: + fields: payload.router.name + resource_id: + fields: ['payload.router.id', 'payload.id'] + - event_type: floatingip.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.floatingip.id', 'payload.id'] + - event_type: pool.* + traits: + <<: *network_traits + name: + fields: payload.pool.name + resource_id: + fields: ['payload.pool.id', 'payload.id'] + - event_type: vip.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.vip.id', 'payload.id'] + - event_type: member.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.member.id', 'payload.id'] + - event_type: health_monitor.* + traits: + <<: *network_traits + name: + fields: payload.health_monitor.name + resource_id: + fields: ['payload.health_monitor.id', 'payload.id'] + - event_type: healthmonitor.* + traits: + <<: *network_traits + name: + fields: payload.healthmonitor.name + resource_id: + fields: ['payload.healthmonitor.id', 'payload.id'] + - event_type: listener.* + traits: + <<: *network_traits + name: + fields: payload.listener.name + resource_id: + fields: ['payload.listener.id', 'payload.id'] + - event_type: loadbalancer.* + traits: + <<: *network_traits + name: + fields: payload.loadbalancer.name + resource_id: + fields: ['payload.loadbalancer.id', 'payload.id'] + - event_type: firewall.* + traits: + <<: *network_traits + name: + fields: payload.firewall.name + resource_id: + fields: ['payload.firewall.id', 'payload.id'] + - event_type: firewall_policy.* + traits: + <<: *network_traits + name: + fields: payload.firewall_policy.name + resource_id: + fields: ['payload.firewall_policy.id', 'payload.id'] + - event_type: firewall_rule.* + traits: + <<: *network_traits + name: + fields: payload.firewall_rule.name + resource_id: + fields: ['payload.firewall_rule.id', 'payload.id'] + - event_type: vpnservice.* + traits: + <<: *network_traits + name: + fields: payload.vpnservice.name + resource_id: + fields: ['payload.vpnservice.id', 'payload.id'] + - event_type: ipsecpolicy.* + traits: + <<: *network_traits + name: + fields: payload.ipsecpolicy.name + resource_id: + fields: ['payload.ipsecpolicy.id', 'payload.id'] + - event_type: ikepolicy.* + traits: + <<: *network_traits + name: + fields: payload.ikepolicy.name + resource_id: + fields: ['payload.ikepolicy.id', 'payload.id'] + - event_type: ipsec_site_connection.* + traits: + <<: *network_traits + resource_id: + fields: ['payload.ipsec_site_connection.id', 'payload.id'] + - event_type: '*http.*' + traits: &http_audit + project_id: + fields: payload.initiator.project_id + user_id: + fields: payload.initiator.id + typeURI: + fields: payload.typeURI + eventType: + fields: payload.eventType + action: + fields: payload.action + outcome: + fields: payload.outcome + id: + fields: payload.id + eventTime: + type: datetime + fields: payload.eventTime + requestPath: + fields: payload.requestPath + observer_id: + fields: payload.observer.id + target_id: + fields: payload.target.id + target_typeURI: + fields: payload.target.typeURI + target_name: + fields: payload.target.name + initiator_typeURI: + fields: payload.initiator.typeURI + initiator_id: + fields: payload.initiator.id + initiator_name: + fields: payload.initiator.name + initiator_host_address: + fields: payload.initiator.host.address + - event_type: '*http.response' + traits: + <<: *http_audit + reason_code: + fields: payload.reason.reasonCode + - event_type: ['dns.domain.create', 'dns.domain.update', 'dns.domain.delete'] + traits: &dns_domain_traits + status: + fields: payload.status + retry: + fields: payload.retry + description: + fields: payload.description + expire: + fields: payload.expire + email: + fields: payload.email + ttl: + fields: payload.ttl + action: + fields: payload.action + name: + fields: payload.name + resource_id: + fields: payload.id + created_at: + type: datetime + fields: payload.created_at + updated_at: + type: datetime + fields: payload.updated_at + version: + fields: payload.version + parent_domain_id: + fields: parent_domain_id + serial: + fields: payload.serial + - event_type: dns.domain.exists + traits: + <<: *dns_domain_traits + audit_period_beginning: + type: datetime + fields: payload.audit_period_beginning + audit_period_ending: + type: datetime + fields: payload.audit_period_ending + - event_type: trove.* + traits: &trove_base_traits + instance_type: + fields: payload.instance_type + user_id: + fields: payload.user_id + resource_id: + fields: payload.instance_id + instance_type_id: + fields: payload.instance_type_id + launched_at: + type: datetime + fields: payload.launched_at + instance_name: + fields: payload.instance_name + state: + fields: payload.state + nova_instance_id: + fields: payload.nova_instance_id + service_id: + fields: payload.service_id + created_at: + type: datetime + fields: payload.created_at + region: + fields: payload.region + - event_type: ['trove.instance.create', 'trove.instance.modify_volume', 'trove.instance.modify_flavor', 'trove.instance.delete'] + traits: &trove_common_traits + name: + fields: payload.name + availability_zone: + fields: payload.availability_zone + instance_size: + type: int + fields: payload.instance_size + volume_size: + type: int + fields: payload.volume_size + nova_volume_id: + fields: payload.nova_volume_id + - event_type: trove.instance.create + traits: + <<: [*trove_base_traits, *trove_common_traits] + - event_type: trove.instance.modify_volume + traits: + <<: [*trove_base_traits, *trove_common_traits] + old_volume_size: + type: int + fields: payload.old_volume_size + modify_at: + type: datetime + fields: payload.modify_at + - event_type: trove.instance.modify_flavor + traits: + <<: [*trove_base_traits, *trove_common_traits] + old_instance_size: + type: int + fields: payload.old_instance_size + modify_at: + type: datetime + fields: payload.modify_at + - event_type: trove.instance.delete + traits: + <<: [*trove_base_traits, *trove_common_traits] + deleted_at: + type: datetime + fields: payload.deleted_at + - event_type: trove.instance.exists + traits: + <<: *trove_base_traits + display_name: + fields: payload.display_name + audit_period_beginning: + type: datetime + fields: payload.audit_period_beginning + audit_period_ending: + type: datetime + fields: payload.audit_period_ending + - event_type: profiler.* + traits: + project: + fields: payload.project + service: + fields: payload.service + name: + fields: payload.name + base_id: + fields: payload.base_id + trace_id: + fields: payload.trace_id + parent_id: + fields: payload.parent_id + timestamp: + type: datetime + fields: payload.timestamp + host: + fields: payload.info.host + path: + fields: payload.info.request.path + query: + fields: payload.info.request.query + method: + fields: payload.info.request.method + scheme: + fields: payload.info.request.scheme + db.statement: + fields: payload.info.db.statement + db.params: + fields: payload.info.db.params + - event_type: 'magnum.cluster.*' + traits: &magnum_cluster_crud + id: + fields: payload.id + typeURI: + fields: payload.typeURI + eventType: + fields: payload.eventType + eventTime: + type: datetime + fields: payload.eventTime + action: + fields: payload.action + outcome: + fields: payload.outcome + initiator_id: + fields: payload.initiator.id + initiator_typeURI: + fields: payload.initiator.typeURI + initiator_name: + fields: payload.initiator.name + initiator_host_agent: + fields: payload.initiator.host.agent + initiator_host_address: + fields: payload.initiator.host.address + target_id: + fields: payload.target.id + target_typeURI: + fields: payload.target.typeURI + observer_id: + fields: payload.observer.id + observer_typeURI: + fields: payload.observer.typeURI + - event_type: 'alarm.*' + traits: + id: + fields: payload.alarm_id + user_id: + fields: payload.user_id + project_id: + fields: payload.project_id + on_behalf_of: + fields: payload.on_behalf_of + severity: + fields: payload.severity + detail: + fields: payload.detail + type: + fields: payload.type + + gnocchi_resources: + archive_policy_default: ceilometer-low + archive_policies: + # NOTE(sileht): We keep "mean" for now to not break all gating that + # use the current tempest scenario. + - name: ceilometer-low + aggregation_methods: + - mean + back_window: 0 + definition: + - granularity: 5 minutes + timespan: 30 days + - name: ceilometer-low-rate + aggregation_methods: + - mean + - rate:mean + back_window: 0 + definition: + - granularity: 5 minutes + timespan: 30 days + - name: ceilometer-high + aggregation_methods: + - mean + back_window: 0 + definition: + - granularity: 1 second + timespan: 1 hour + - granularity: 1 minute + timespan: 1 day + - granularity: 1 hour + timespan: 365 days + - name: ceilometer-high-rate + aggregation_methods: + - mean + - rate:mean + back_window: 0 + definition: + - granularity: 1 second + timespan: 1 hour + - granularity: 1 minute + timespan: 1 day + - granularity: 1 hour + timespan: 365 days + + resources: + - resource_type: identity + metrics: + identity.authenticate.success: + identity.authenticate.pending: + identity.authenticate.failure: + identity.user.created: + identity.user.deleted: + identity.user.updated: + identity.group.created: + identity.group.deleted: + identity.group.updated: + identity.role.created: + identity.role.deleted: + identity.role.updated: + identity.project.created: + identity.project.deleted: + identity.project.updated: + identity.trust.created: + identity.trust.deleted: + identity.role_assignment.created: + identity.role_assignment.deleted: + + - resource_type: ceph_account + metrics: + radosgw.objects: + radosgw.objects.size: + radosgw.objects.containers: + radosgw.api.request: + radosgw.containers.objects: + radosgw.containers.objects.size: + + - resource_type: instance + metrics: + memory: + memory.usage: + memory.resident: + memory.swap.in: + memory.swap.out: + memory.bandwidth.total: + memory.bandwidth.local: + vcpus: + cpu: + archive_policy_name: ceilometer-low-rate + cpu_l3_cache: + disk.root.size: + disk.ephemeral.size: + disk.latency: + disk.iops: + disk.capacity: + disk.allocation: + disk.usage: + compute.instance.booting.time: + perf.cpu.cycles: + perf.instructions: + perf.cache.references: + perf.cache.misses: + attributes: + host: resource_metadata.(instance_host|host) + image_ref: resource_metadata.image_ref + launched_at: resource_metadata.launched_at + created_at: resource_metadata.created_at + deleted_at: resource_metadata.deleted_at + display_name: resource_metadata.display_name + flavor_id: resource_metadata.(instance_flavor_id|(flavor.id)|flavor_id) + flavor_name: resource_metadata.(instance_type|(flavor.name)|flavor_name) + server_group: resource_metadata.user_metadata.server_group + event_delete: compute.instance.delete.start + event_create: compute.instance.create.end + event_attributes: + id: instance_id + display_name: display_name + host: host + availability_zone: availability_zone + flavor_id: instance_type_id + flavor_name: instance_type + user_id: user_id + project_id: project_id + event_associated_resources: + instance_network_interface: '{"=": {"instance_id": "%s"}}' + instance_disk: '{"=": {"instance_id": "%s"}}' + + - resource_type: instance_network_interface + metrics: + network.outgoing.packets: + archive_policy_name: ceilometer-low-rate + network.incoming.packets: + archive_policy_name: ceilometer-low-rate + network.outgoing.packets.drop: + archive_policy_name: ceilometer-low-rate + network.incoming.packets.drop: + archive_policy_name: ceilometer-low-rate + network.outgoing.packets.error: + archive_policy_name: ceilometer-low-rate + network.incoming.packets.error: + archive_policy_name: ceilometer-low-rate + network.outgoing.bytes: + archive_policy_name: ceilometer-low-rate + network.incoming.bytes: + archive_policy_name: ceilometer-low-rate + attributes: + name: resource_metadata.vnic_name + instance_id: resource_metadata.instance_id + + - resource_type: instance_disk + metrics: + disk.device.read.requests: + archive_policy_name: ceilometer-low-rate + disk.device.write.requests: + archive_policy_name: ceilometer-low-rate + disk.device.read.bytes: + archive_policy_name: ceilometer-low-rate + disk.device.write.bytes: + archive_policy_name: ceilometer-low-rate + disk.device.latency: + disk.device.read.latency: + disk.device.write.latency: + disk.device.iops: + disk.device.capacity: + disk.device.allocation: + disk.device.usage: + attributes: + name: resource_metadata.disk_name + instance_id: resource_metadata.instance_id + + - resource_type: image + metrics: + image.size: + image.download: + image.serve: + attributes: + name: resource_metadata.name + container_format: resource_metadata.container_format + disk_format: resource_metadata.disk_format + event_delete: image.delete + event_attributes: + id: resource_id + + - resource_type: ipmi + metrics: + hardware.ipmi.node.power: + hardware.ipmi.node.temperature: + hardware.ipmi.node.inlet_temperature: + hardware.ipmi.node.outlet_temperature: + hardware.ipmi.node.fan: + hardware.ipmi.node.current: + hardware.ipmi.node.voltage: + hardware.ipmi.node.airflow: + hardware.ipmi.node.cups: + hardware.ipmi.node.cpu_util: + hardware.ipmi.node.mem_util: + hardware.ipmi.node.io_util: + + - resource_type: ipmi_sensor + metrics: + - 'hardware.ipmi.power' + - 'hardware.ipmi.temperature' + - 'hardware.ipmi.current' + - 'hardware.ipmi.voltage' + attributes: + node: resource_metadata.node + + - resource_type: network + metrics: + bandwidth: + ip.floating: + event_delete: floatingip.delete.end + event_attributes: + id: resource_id + + - resource_type: stack + metrics: + stack.create: + stack.update: + stack.delete: + stack.resume: + stack.suspend: + + - resource_type: swift_account + metrics: + storage.objects.incoming.bytes: + storage.objects.outgoing.bytes: + storage.objects.size: + storage.objects: + storage.objects.containers: + storage.containers.objects: + storage.containers.objects.size: + + - resource_type: volume + metrics: + volume: + volume.size: + snapshot.size: + volume.snapshot.size: + volume.backup.size: + backup.size: + volume.manage_existing.start: + volume.manage_existing.end: + volume.manage_existing_snapshot.start: + volume.manage_existing_snapshot.end: + attributes: + display_name: resource_metadata.(display_name|name) + volume_type: resource_metadata.volume_type + image_id: resource_metadata.image_id + instance_id: resource_metadata.instance_id + event_delete: + - volume.delete.end + - snapshot.delete.end + event_update: + - volume.transfer.accept.end + - snapshot.transfer.accept.end + event_attributes: + id: resource_id + project_id: project_id + + - resource_type: volume_provider + metrics: + volume.provider.capacity.total: + volume.provider.capacity.free: + volume.provider.capacity.allocated: + volume.provider.capacity.provisioned: + volume.provider.capacity.virtual_free: + + - resource_type: volume_provider_pool + metrics: + volume.provider.pool.capacity.total: + volume.provider.pool.capacity.free: + volume.provider.pool.capacity.allocated: + volume.provider.pool.capacity.provisioned: + volume.provider.pool.capacity.virtual_free: + attributes: + provider: resource_metadata.provider + + - resource_type: host + metrics: + hardware.cpu.load.1min: + hardware.cpu.load.5min: + hardware.cpu.load.15min: + hardware.cpu.util: + hardware.cpu.user: + archive_policy_name: ceilometer-low-rate + hardware.cpu.nice: + archive_policy_name: ceilometer-low-rate + hardware.cpu.system: + archive_policy_name: ceilometer-low-rate + hardware.cpu.idle: + archive_policy_name: ceilometer-low-rate + hardware.cpu.wait: + archive_policy_name: ceilometer-low-rate + hardware.cpu.kernel: + archive_policy_name: ceilometer-low-rate + hardware.cpu.interrupt: + archive_policy_name: ceilometer-low-rate + hardware.memory.total: + hardware.memory.used: + hardware.memory.swap.total: + hardware.memory.swap.avail: + hardware.memory.buffer: + hardware.memory.cached: + hardware.network.ip.outgoing.datagrams: + hardware.network.ip.incoming.datagrams: + hardware.system_stats.cpu.idle: + hardware.system_stats.io.outgoing.blocks: + hardware.system_stats.io.incoming.blocks: + attributes: + host_name: resource_metadata.resource_url + + - resource_type: host_disk + metrics: + hardware.disk.size.total: + hardware.disk.size.used: + hardware.disk.read.bytes: + hardware.disk.write.bytes: + hardware.disk.read.requests: + hardware.disk.write.requests: + attributes: + host_name: resource_metadata.resource_url + device_name: resource_metadata.device + + - resource_type: host_network_interface + metrics: + hardware.network.incoming.bytes: + hardware.network.outgoing.bytes: + hardware.network.outgoing.errors: + attributes: + host_name: resource_metadata.resource_url + device_name: resource_metadata.name + + - resource_type: nova_compute + metrics: + compute.node.cpu.frequency: + compute.node.cpu.idle.percent: + compute.node.cpu.idle.time: + compute.node.cpu.iowait.percent: + compute.node.cpu.iowait.time: + compute.node.cpu.kernel.percent: + compute.node.cpu.kernel.time: + compute.node.cpu.percent: + compute.node.cpu.user.percent: + compute.node.cpu.user.time: + attributes: + host_name: resource_metadata.host + + - resource_type: manila_share + metrics: + manila.share.size: + attributes: + name: resource_metadata.name + host: resource_metadata.host + status: resource_metadata.status + availability_zone: resource_metadata.availability_zone + protocol: resource_metadata.protocol + + - resource_type: switch + metrics: + switch: + switch.ports: + attributes: + controller: resource_metadata.controller + + - resource_type: switch_port + metrics: + switch.port: + switch.port.uptime: + switch.port.receive.packets: + switch.port.transmit.packets: + switch.port.receive.bytes: + switch.port.transmit.bytes: + switch.port.receive.drops: + switch.port.transmit.drops: + switch.port.receive.errors: + switch.port.transmit.errors: + switch.port.receive.frame_error: + switch.port.receive.overrun_error: + switch.port.receive.crc_error: + switch.port.collision.count: + attributes: + switch: resource_metadata.switch + port_number_on_switch: resource_metadata.port_number_on_switch + neutron_port_id: resource_metadata.neutron_port_id + controller: resource_metadata.controller + + - resource_type: port + metrics: + port: + port.uptime: + port.receive.packets: + port.transmit.packets: + port.receive.bytes: + port.transmit.bytes: + port.receive.drops: + port.receive.errors: + attributes: + controller: resource_metadata.controller + + - resource_type: switch_table + metrics: + switch.table.active.entries: + attributes: + controller: resource_metadata.controller + switch: resource_metadata.switch + + - resource_type: loadbalancer + metrics: + network.services.lb.outgoing.bytes: + network.services.lb.incoming.bytes: + network.services.lb.pool: + network.services.lb.listener: + network.services.lb.member: + network.services.lb.health_monitor: + network.services.lb.loadbalancer: + network.services.lb.total.connections: + network.services.lb.active.connections: + meters: + metric: + # Image + - name: "image.size" + event_type: + - "image.upload" + - "image.delete" + - "image.update" + type: "gauge" + unit: B + volume: $.payload.size + resource_id: $.payload.id + project_id: $.payload.owner + + - name: "image.download" + event_type: "image.send" + type: "delta" + unit: "B" + volume: $.payload.bytes_sent + resource_id: $.payload.image_id + user_id: $.payload.receiver_user_id + project_id: $.payload.receiver_tenant_id + + - name: "image.serve" + event_type: "image.send" + type: "delta" + unit: "B" + volume: $.payload.bytes_sent + resource_id: $.payload.image_id + project_id: $.payload.owner_id + + - name: 'volume.provider.capacity.total' + event_type: 'capacity.backend.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.total + resource_id: $.payload.name_to_id + + - name: 'volume.provider.capacity.free' + event_type: 'capacity.backend.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.free + resource_id: $.payload.name_to_id + + - name: 'volume.provider.capacity.allocated' + event_type: 'capacity.backend.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.allocated + resource_id: $.payload.name_to_id + + - name: 'volume.provider.capacity.provisioned' + event_type: 'capacity.backend.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.provisioned + resource_id: $.payload.name_to_id + + - name: 'volume.provider.capacity.virtual_free' + event_type: 'capacity.backend.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.virtual_free + resource_id: $.payload.name_to_id + + - name: 'volume.provider.pool.capacity.total' + event_type: 'capacity.pool.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.total + resource_id: $.payload.name_to_id + metadata: &provider_pool_meta + provider: $.payload.name_to_id.`split(#, 0, 1)` + + - name: 'volume.provider.pool.capacity.free' + event_type: 'capacity.pool.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.free + resource_id: $.payload.name_to_id + metadata: + <<: *provider_pool_meta + + - name: 'volume.provider.pool.capacity.allocated' + event_type: 'capacity.pool.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.allocated + resource_id: $.payload.name_to_id + metadata: + <<: *provider_pool_meta + + - name: 'volume.provider.pool.capacity.provisioned' + event_type: 'capacity.pool.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.provisioned + resource_id: $.payload.name_to_id + metadata: + <<: *provider_pool_meta + + - name: 'volume.provider.pool.capacity.virtual_free' + event_type: 'capacity.pool.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.virtual_free + resource_id: $.payload.name_to_id + metadata: + <<: *provider_pool_meta + + - name: 'volume.size' + event_type: + - 'volume.exists' + - 'volume.retype' + - 'volume.create.*' + - 'volume.delete.*' + - 'volume.resize.*' + - 'volume.attach.*' + - 'volume.detach.*' + - 'volume.update.*' + - 'volume.manage.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.size + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.volume_id + metadata: + display_name: $.payload.display_name + volume_type: $.payload.volume_type + image_id: $.payload.glance_metadata[?key=image_id].value + instance_id: $.payload.volume_attachment[0].server_id + + - name: 'snapshot.size' + event_type: + - 'snapshot.exists' + - 'snapshot.create.*' + - 'snapshot.delete.*' + - 'snapshot.manage.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.volume_size + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.snapshot_id + metadata: + display_name: $.payload.display_name + + - name: 'backup.size' + event_type: + - 'backup.exists' + - 'backup.create.*' + - 'backup.delete.*' + - 'backup.restore.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.size + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.backup_id + metadata: + display_name: $.payload.display_name + + # Magnum + - name: $.payload.metrics.[*].name + event_type: 'magnum.bay.metrics.*' + type: 'gauge' + unit: $.payload.metrics.[*].unit + volume: $.payload.metrics.[*].value + user_id: $.payload.user_id + project_id: $.payload.project_id + resource_id: $.payload.resource_id + lookup: ['name', 'unit', 'volume'] + + # Swift + - name: $.payload.measurements.[*].metric.[*].name + event_type: 'objectstore.http.request' + type: 'delta' + unit: $.payload.measurements.[*].metric.[*].unit + volume: $.payload.measurements.[*].result + resource_id: $.payload.target.id + user_id: $.payload.initiator.id + project_id: $.payload.initiator.project_id + lookup: ['name', 'unit', 'volume'] + + - name: 'memory' + event_type: &instance_events compute.instance.(?!create.start|update).* + type: 'gauge' + unit: 'MB' + volume: $.payload.memory_mb + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_metadata: $.payload.metadata + metadata: &instance_meta + host: $.payload.host + flavor_id: $.payload.instance_flavor_id + flavor_name: $.payload.instance_type + display_name: $.payload.display_name + image_ref: $.payload.image_meta.base_image_ref + launched_at: $.payload.launched_at + created_at: $.payload.created_at + deleted_at: $.payload.deleted_at + + - name: 'vcpus' + event_type: *instance_events + type: 'gauge' + unit: 'vcpu' + volume: $.payload.vcpus + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_metadata: $.payload.metadata + metadata: + <<: *instance_meta + + - name: 'compute.instance.booting.time' + event_type: 'compute.instance.create.end' + type: 'gauge' + unit: 'sec' + volume: + fields: [$.payload.created_at, $.payload.launched_at] + plugin: 'timedelta' + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_metadata: $.payload.metadata + metadata: + <<: *instance_meta + + - name: 'disk.root.size' + event_type: *instance_events + type: 'gauge' + unit: 'GB' + volume: $.payload.root_gb + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_metadata: $.payload.metadata + metadata: + <<: *instance_meta + + - name: 'disk.ephemeral.size' + event_type: *instance_events + type: 'gauge' + unit: 'GB' + volume: $.payload.ephemeral_gb + user_id: $.payload.user_id + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_metadata: $.payload.metadata + metadata: + <<: *instance_meta + + - name: 'bandwidth' + event_type: 'l3.meter' + type: 'delta' + unit: 'B' + volume: $.payload.bytes + project_id: $.payload.tenant_id + resource_id: $.payload.label_id + + - name: 'compute.node.cpu.frequency' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'MHz' + volume: $.payload.metrics[?(@.name='cpu.frequency')].value + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.frequency')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.frequency')].source + + - name: 'compute.node.cpu.user.time' + event_type: 'compute.metrics.update' + type: 'cumulative' + unit: 'ns' + volume: $.payload.metrics[?(@.name='cpu.user.time')].value + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.user.time')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.user.time')].source + + - name: 'compute.node.cpu.kernel.time' + event_type: 'compute.metrics.update' + type: 'cumulative' + unit: 'ns' + volume: $.payload.metrics[?(@.name='cpu.kernel.time')].value + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.kernel.time')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.kernel.time')].source + + - name: 'compute.node.cpu.idle.time' + event_type: 'compute.metrics.update' + type: 'cumulative' + unit: 'ns' + volume: $.payload.metrics[?(@.name='cpu.idle.time')].value + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.idle.time')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.idle.time')].source + + - name: 'compute.node.cpu.iowait.time' + event_type: 'compute.metrics.update' + type: 'cumulative' + unit: 'ns' + volume: $.payload.metrics[?(@.name='cpu.iowait.time')].value + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.iowait.time')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.iowait.time')].source + + - name: 'compute.node.cpu.kernel.percent' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'percent' + volume: $.payload.metrics[?(@.name='cpu.kernel.percent')].value * 100 + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.kernel.percent')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.kernel.percent')].source + + - name: 'compute.node.cpu.idle.percent' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'percent' + volume: $.payload.metrics[?(@.name='cpu.idle.percent')].value * 100 + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.idle.percent')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.idle.percent')].source + + - name: 'compute.node.cpu.user.percent' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'percent' + volume: $.payload.metrics[?(@.name='cpu.user.percent')].value * 100 + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.user.percent')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.user.percent')].source + + - name: 'compute.node.cpu.iowait.percent' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'percent' + volume: $.payload.metrics[?(@.name='cpu.iowait.percent')].value * 100 + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.iowait.percent')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.iowait.percent')].source + + - name: 'compute.node.cpu.percent' + event_type: 'compute.metrics.update' + type: 'gauge' + unit: 'percent' + volume: $.payload.metrics[?(@.name='cpu.percent')].value * 100 + resource_id: $.payload.host + "_" + $.payload.nodename + timestamp: $.payload.metrics[?(@.name='cpu.percent')].timestamp + metadata: + event_type: $.event_type + host: $.publisher_id + source: $.payload.metrics[?(@.name='cpu.percent')].source + + # Identity + # NOTE(gordc): hack because jsonpath-rw-ext can't concat starting with string. + - name: $.payload.outcome - $.payload.outcome + 'identity.authenticate.' + $.payload.outcome + type: 'delta' + unit: 'user' + volume: 1 + event_type: + - 'identity.authenticate' + resource_id: $.payload.initiator.id + user_id: $.payload.initiator.id + + # DNS + - name: 'dns.domain.exists' + event_type: 'dns.domain.exists' + type: 'cumulative' + unit: 's' + volume: + fields: [$.payload.audit_period_beginning, $.payload.audit_period_ending] + plugin: 'timedelta' + project_id: $.payload.tenant_id + resource_id: $.payload.id + user_id: $.ctxt.user + metadata: + status: $.payload.status + pool_id: $.payload.pool_id + host: $.publisher_id + + # Trove + - name: 'trove.instance.exists' + event_type: 'trove.instance.exists' + type: 'cumulative' + unit: 's' + volume: + fields: [$.payload.audit_period_beginning, $.payload.audit_period_ending] + plugin: 'timedelta' + project_id: $.payload.tenant_id + resource_id: $.payload.instance_id + user_id: $.payload.user_id + metadata: + nova_instance_id: $.payload.nova_instance_id + state: $.payload.state + service_id: $.payload.service_id + instance_type: $.payload.instance_type + instance_type_id: $.payload.instance_type_id + + # Manila + - name: 'manila.share.size' + event_type: + - 'share.create.*' + - 'share.delete.*' + - 'share.extend.*' + - 'share.shrink.*' + type: 'gauge' + unit: 'GB' + volume: $.payload.size + user_id: $.payload.user_id + project_id: $.payload.project_id + resource_id: $.payload.share_id + metadata: + name: $.payload.name + host: $.payload.host + status: $.payload.status + availability_zone: $.payload.availability_zone + protocol: $.payload.proto + + polling: + sources: + - name: all_pollsters + interval: 300 + meters: + - "*" + pipeline: + sources: + - name: meter_source + meters: + - "*" + sinks: + - meter_sink + sinks: + - name: meter_sink + publishers: + - gnocchi + policy: {} + audit_api_map: + DEFAULT: + target_endpoint_type: None + path_keywords: + meters: meter_name + resources: resource_id + statistics: None + samples: sample_id + service_endpoints: + metering: service/metering + rally_tests: + CeilometerStats.create_meter_and_get_stats: + - args: + user_id: user-id + resource_id: resource-id + counter_volume: 1 + counter_unit: '' + counter_type: cumulative + runner: + type: constant + times: 1 + concurrency: 1 + sla: + failure_rate: + max: 0 + CeilometerMeters.list_meters: + - runner: + type: constant + times: 1 + concurrency: 1 + sla: + failure_rate: + max: 0 + context: + ceilometer: + counter_name: benchmark_meter + counter_type: gauge + counter_unit: "%" + counter_volume: 1 + resources_per_tenant: 1 + samples_per_resource: 1 + timestamp_interval: 10 + metadata_list: + - status: active + name: rally benchmark on + deleted: 'false' + - status: terminated + name: rally benchmark off + deleted: 'true' + args: + limit: 5 + metadata_query: + status: terminated + CeilometerQueries.create_and_query_samples: + - args: + filter: + "=": + counter_unit: instance + orderby: + limit: 10 + counter_name: cpu_util + counter_type: gauge + counter_unit: instance + counter_volume: 1 + resource_id: resource_id + runner: + type: constant + times: 1 + concurrency: 1 + sla: + failure_rate: + max: 0 + +dependencies: + dynamic: + common: + local_image_registry: + jobs: + - ceilometer-image-repo-sync + services: + - endpoint: node + service: local_image_registry + static: + central: + jobs: + - ceilometer-db-sync + - ceilometer-rabbit-init + - ceilometer-ks-user + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + ipmi: + jobs: + - ceilometer-db-sync + - ceilometer-rabbit-init + - ceilometer-ks-user + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + compute: + jobs: + - ceilometer-db-sync + - ceilometer-rabbit-init + - ceilometer-ks-user + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + db_sync: + jobs: [] + services: [] + ks_service: + services: + - endpoint: internal + service: identity + ks_user: + services: + - endpoint: internal + service: identity + rabbit_init: + services: + - service: oslo_messaging + endpoint: internal + notification: + jobs: + - ceilometer-db-sync + - ceilometer-rabbit-init + - ceilometer-ks-user + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metric + tests: + services: + - endpoint: internal + service: identity + - endpoint: internal + service: metering + - endpoint: internal + service: metric + image_repo_sync: + services: + - endpoint: internal + service: local_image_registry + +# Names of secrets used by bootstrap and environmental checks +secrets: + identity: + admin: ceilometer-keystone-admin + ceilometer: ceilometer-keystone-user + test: ceilometer-keystone-test + oslo_messaging: + admin: ceilometer-rabbitmq-admin + ceilometer: ceilometer-rabbitmq-user + oci_image_registry: + ceilometer: ceilometer-oci-image-registry + +bootstrap: + enabled: false + ks_user: ceilometer + script: | + openstack token issue + +# typically overridden by environmental +# values, but should include all endpoints +# required by this chart +endpoints: + cluster_domain_suffix: cluster.local + local_image_registry: + name: docker-registry + namespace: docker-registry + hosts: + default: localhost + internal: docker-registry + node: localhost + host_fqdn_override: + default: null + port: + registry: + node: 5000 + oci_image_registry: + name: oci-image-registry + namespace: oci-image-registry + auth: + enabled: false + ceilometer: + username: ceilometer + password: password + hosts: + default: localhost + host_fqdn_override: + default: null + port: + registry: + default: null + identity: + name: keystone + auth: + admin: + region_name: RegionOne + username: admin + password: password + project_name: admin + user_domain_name: default + project_domain_name: default + ceilometer: + role: admin + region_name: RegionOne + username: ceilometer + password: password + project_name: service + user_domain_name: service + project_domain_name: service + test: + role: admin + region_name: RegionOne + username: ceilometer-test + password: password + project_name: test + user_domain_name: service + project_domain_name: service + hosts: + default: keystone + internal: keystone-api + host_fqdn_override: + default: null + path: + default: /v3 + scheme: + default: 'http' + port: + api: + default: 5000 + public: 80 + internal: 5000 + service: 5000 + metric: + name: gnocchi + hosts: + default: gnocchi-api + public: gnocchi + host_fqdn_override: + default: null + path: + default: null + scheme: + default: 'http' + port: + api: + default: 8041 + public: 80 + internal: 8041 + service: 8041 + alarming: + name: aodh + hosts: + default: aodh-api + public: aodh + host_fqdn_override: + default: null + path: + default: null + scheme: + default: 'http' + port: + api: + default: 8042 + public: 80 + oslo_cache: + auth: + # NOTE(portdirect): this is used to define the value for keystone + # authtoken cache encryption key, if not set it will be populated + # automatically with a random value, but to take advantage of + # this feature all services should be set to use the same key, + # and memcache service. + memcache_secret_key: null + hosts: + default: memcached + host_fqdn_override: + default: null + port: + memcache: + default: 11211 + oslo_messaging: + auth: + admin: + username: rabbitmq + password: password + ceilometer: + username: ceilometer + password: password + statefulset: + replicas: 2 + name: rabbitmq-rabbitmq + hosts: + default: rabbitmq + host_fqdn_override: + default: null + path: /ceilometer + scheme: rabbit + port: + amqp: + default: 5672 + http: + default: 15672 + +pod: + affinity: + anti: + type: + default: preferredDuringSchedulingIgnoredDuringExecution + topologyKey: + default: kubernetes.io/hostname + weight: + default: 10 + tolerations: + ceilometer: + enabled: false + tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + mounts: + ceilometer_tests: + init_container: null + ceilometer_tests: + volumeMounts: + volumes: + ceilometer_compute: + init_container: null + ceilometer_compute: + volumeMounts: + volumes: + ceilometer_central: + init_container: null + ceilometer_central: + volumeMounts: + volumes: + ceilometer_ipmi: + init_container: null + ceilometer_ipmi: + volumeMounts: + volumes: + ceilometer_notification: + init_container: null + ceilometer_notification: + volumeMounts: + volumes: + ceilometer_db_sync: + ceilometer_db_sync: + volumeMounts: + volumes: + replicas: + central: 1 + notification: 1 + lifecycle: + upgrades: + deployments: + revision_history: 3 + pod_replacement_strategy: RollingUpdate + rolling_update: + max_unavailable: 1 + max_surge: 3 + daemonsets: + pod_replacement_strategy: RollingUpdate + compute: + enabled: true + min_ready_seconds: 0 + max_unavailable: 1 + resources: + enabled: false + compute: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + notification: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + central: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + ipmi: + requests: + memory: "124Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + jobs: + db_sync: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + rabbit_init: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + ks_service: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + ks_user: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + tests: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + image_repo_sync: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "1024Mi" + cpu: "2000m" + +network_policy: + ceilometer: + ingress: + - {} + egress: + - {} + +manifests: + configmap_bin: true + configmap_etc: true + deployment_api: false + deployment_central: true + deployment_collector: false + daemonset_compute: true + daemonset_ipmi: false + deployment_notification: true + ingress_api: false + job_bootstrap: true + job_db_drop: false + # using gnocchi so no db init + job_db_init: false + job_db_init_mongodb: false + # runs ceilometer-upgrade which inits resource types in gnocchi! + job_db_sync: true + job_image_repo_sync: true + job_ks_endpoints: false + job_ks_service: true + job_ks_user: true + job_rabbit_init: true + pdb_api: true + pod_rally_test: true + network_policy: false + secret_db: true + secret_keystone: true + secret_mongodb: false + secret_rabbitmq: true + secret_registry: true + service_api: true + service_ingress_api: true +... diff --git a/helm-configs/gnocchi/gnocchi-helm-overrides.yaml b/helm-configs/gnocchi/gnocchi-helm-overrides.yaml index 7ade5b93..db1c37bb 100644 --- a/helm-configs/gnocchi/gnocchi-helm-overrides.yaml +++ b/helm-configs/gnocchi/gnocchi-helm-overrides.yaml @@ -234,7 +234,7 @@ pod: init_container: null gnocchi_tests: replicas: - api: 1 + api: 3 lifecycle: upgrades: deployments: @@ -246,11 +246,11 @@ pod: daemonsets: pod_replacement_strategy: RollingUpdate metricd: - enabled: false + enabled: true min_ready_seconds: 0 max_unavailable: 1 statsd: - enabled: false + enabled: true min_ready_seconds: 0 max_unavailable: 1 disruption_budget: diff --git a/helm-configs/postgresql/postgresql-helm-overrides.yaml b/helm-configs/postgresql/postgresql-helm-overrides.yaml index 679228c1..ad41ea06 100644 --- a/helm-configs/postgresql/postgresql-helm-overrides.yaml +++ b/helm-configs/postgresql/postgresql-helm-overrides.yaml @@ -239,7 +239,7 @@ jobs: # activeDeadlineSeconds == 0 means no deadline activeDeadlineSeconds: 0 backoffLimit: 6 - cron: "0 0 * * *" + cron: "15 0 * * *" history: success: 3 failed: 1 @@ -300,12 +300,12 @@ conf: hba_file: '/tmp/pg_hba.conf' ident_file: '/tmp/pg_ident.conf' backup: - enabled: false + enabled: true base_path: /var/backup days_to_keep: 3 pg_dumpall_options: '--inserts --clean' remote_backup: - enabled: false + enabled: true container_name: postgresql days_to_keep: 14 storage_policy: default-placement @@ -466,7 +466,7 @@ manifests: configmap_etc: true job_image_repo_sync: true network_policy: false - job_ks_user: false + job_ks_user: true secret_admin: true secret_etc: true secret_audit: true @@ -474,8 +474,8 @@ manifests: secret_registry: true service: true statefulset: true - cron_job_postgresql_backup: false - pvc_backup: false + cron_job_postgresql_backup: true + pvc_backup: true monitoring: prometheus: configmap_bin: false diff --git a/mkdocs.yml b/mkdocs.yml index d553449c..ce4435f5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -182,6 +182,7 @@ nav: - skyline: openstack-skyline.md - Octavia: openstack-octavia.md - Gnocchi: openstack-gnocchi.md + - Ceilometer: openstack-ceilometer.md - Monitoring: - Monitoring Overview: prometheus-monitoring-overview.md - Prometheus: prometheus.md