Skip to content

Commit

Permalink
[ATMOSPHERE-523] Improve NeutronNetworkOutOfIPs alarm (#2063)
Browse files Browse the repository at this point in the history
Co-authored-by: Mohammed Naser <[email protected]>
  • Loading branch information
larainema and mnaser authored Oct 30, 2024
1 parent 170ae76 commit fa5d244
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 3 deletions.
61 changes: 61 additions & 0 deletions hack/promtool-test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Copyright (c) 2024 VEXXHOST, Inc.
# SPDX-License-Identifier: Apache-2.0

import json
import os
import shutil
import tempfile

import rjsonnet
import yaml


def import_callback(base, rel):
"""
:param base: The directory containing the code that did the import.
:param rel: The path imported by the code.
"""
path = os.path.join(base, rel)
with open(path, "r") as f:
return path, f.read()


def main():
compiled_string = rjsonnet.evaluate_file(
"roles/kube_prometheus_stack/files/jsonnet/rules.jsonnet",
import_callback=import_callback,
)
compiled = json.loads(compiled_string)

tempdir = tempfile.mkdtemp()
rule_files = []

try:
for rule_file, data in compiled.items():
file_name = rule_file + ".yml"
path = os.path.join(tempdir, file_name)

if os.path.exists(path):
raise Exception(f"File {path} already exists")
with open(path, "w") as f:
yaml.dump(data, f)

rule_files.append(path)

with open("roles/kube_prometheus_stack/files/jsonnet/tests.yml") as f:
tests = yaml.safe_load(f)

tests["rule_files"] = rule_files

tests_file = os.path.join(tempdir, "tests.yml")
with open(tests_file, "w") as f:
yaml.dump(tests, f)

# TODO(mnaser): Enable JUnit output
os.system(f"promtool test rules {tests_file}")
finally:
shutil.rmtree(tempdir)


if __name__ == "__main__":
main()
6 changes: 3 additions & 3 deletions roles/kube_prometheus_stack/files/jsonnet/openstack.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,10 @@
{
alert: 'NeutronNetworkOutOfIPs',
annotations: {
description: 'The subnet {{ $labels.subnet_name }} within {{ $labels.network_name }} is currently at {{ $value }}% utilization. If the IP addresses run out, it will impact the provisioning of new ports.',
summary: '[{{ $labels.network_name }}] {{ $labels.subnet_name }} running out of IPs',
description: 'The network {{ $labels.network_id }} is currently at {{ $value }}% utilization. If the IP addresses run out, it will impact the provisioning of new ports.',
summary: '[{{ $labels.network_id }}] Network running out of IPs',
},
expr: 'sum by (network_id) (openstack_neutron_network_ip_availabilities_used{project_id!=""}) / sum by (network_id) (openstack_neutron_network_ip_availabilities_total{project_id!=""}) * 100 > 80',
expr: '(sum by (network_id) (openstack_neutron_network_ip_availabilities_used{project_id!=""}) and on (network_id) label_replace(openstack_neutron_network{is_external="true", is_shared="true"}, "network_id", "$1", "id", "(.*)")) / (sum by (network_id) (openstack_neutron_network_ip_availabilities_total{project_id!=""}) and on (network_id) label_replace(openstack_neutron_network{is_external="true", is_shared="true"}, "network_id", "$1", "id", "(.*)")) * 100 > 80',
'for': '6h',
labels: {
severity: 'warning',
Expand Down
35 changes: 35 additions & 0 deletions roles/kube_prometheus_stack/files/jsonnet/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright (c) 2024 VEXXHOST, Inc.
# SPDX-License-Identifier: Apache-2.0

tests:
- interval: 1m
input_series:
- series: 'openstack_neutron_network{id="4cf895c9-c3d1-489e-b02e-59b5c8976809",is_external="false",is_shared="false",name="public",provider_network_type="vlan",provider_physical_network="external",provider_segmentation_id="3",status="ACTIVE",subnets="54d6f61d-db07-451c-9ab3-b9609b6b6f0b",tags="tag1,tag2",tenant_id="4fd44f30292945e481c7b8a0c8908869"} 0'
values: '0x360'
- series: 'openstack_neutron_network_ip_availabilities_total{cidr="172.24.4.0/24",ip_version="4",network_id="4cf895c9-c3d1-489e-b02e-59b5c8976809",network_name="public",project_id="1a02cc95f1734fcc9d3c753818f03002",subnet_name="public-subnet"}'
values: '253x360'
- series: 'openstack_neutron_network_ip_availabilities_used{cidr="172.24.4.0/24",ip_version="4",network_id="4cf895c9-c3d1-489e-b02e-59b5c8976809",network_name="public",project_id="1a02cc95f1734fcc9d3c753818f03002",subnet_name="public-subnet"}'
values: '250x360'
alert_rule_test:
- eval_time: 6h
alertname: NeutronNetworkOutOfIPs
exp_alerts: []

- interval: 1m
input_series:
- series: 'openstack_neutron_network{id="4cf895c9-c3d1-489e-b02e-59b5c8976809",is_external="true",is_shared="true",name="public",provider_network_type="vlan",provider_physical_network="external",provider_segmentation_id="3",status="ACTIVE",subnets="54d6f61d-db07-451c-9ab3-b9609b6b6f0b",tags="tag1,tag2",tenant_id="4fd44f30292945e481c7b8a0c8908869"} 0'
values: '0x360'
- series: 'openstack_neutron_network_ip_availabilities_total{cidr="172.24.4.0/24",ip_version="4",network_id="4cf895c9-c3d1-489e-b02e-59b5c8976809",network_name="public",project_id="1a02cc95f1734fcc9d3c753818f03002",subnet_name="public-subnet"}'
values: '253x360'
- series: 'openstack_neutron_network_ip_availabilities_used{cidr="172.24.4.0/24",ip_version="4",network_id="4cf895c9-c3d1-489e-b02e-59b5c8976809",network_name="public",project_id="1a02cc95f1734fcc9d3c753818f03002",subnet_name="public-subnet"}'
values: '250x360'
alert_rule_test:
- eval_time: 6h
alertname: NeutronNetworkOutOfIPs
exp_alerts:
- exp_labels:
network_id: 4cf895c9-c3d1-489e-b02e-59b5c8976809
severity: P3
exp_annotations:
summary: "[4cf895c9-c3d1-489e-b02e-59b5c8976809] Network running out of IPs"
description: "The network 4cf895c9-c3d1-489e-b02e-59b5c8976809 is currently at 98.81422924901186% utilization. If the IP addresses run out, it will impact the provisioning of new ports."
8 changes: 8 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,11 @@ allowlist_externals =
bash
commands =
bash {toxinidir}/build/build-manila-image.sh

[testenv:promtool-test]
skip_install = true
deps =
PyYAML
rjsonnet
commands =
python3 {toxinidir}/hack/promtool-test.py
7 changes: 7 additions & 0 deletions zuul.d/jobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@
parent: tox-linters
pre-run: zuul.d/playbooks/linters/pre.yml

- job:
name: atmosphere-tox-promtool-test
parent: tox
pre-run: zuul.d/playbooks/promtool/pre.yml
vars:
tox_envlist: promtool-test

- job:
name: atmosphere-tox-py3
parent: tox
Expand Down
34 changes: 34 additions & 0 deletions zuul.d/playbooks/promtool/pre.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright (c) 2024 VEXXHOST, Inc.
# SPDX-License-Identifier: Apache-2.0

- hosts: all
tasks:
- name: Install promtool
block:
- name: Create temporary file to download
ansible.builtin.tempfile:
state: file
suffix: .tar.gz
register: promtool_file

- name: Download Prometheus
ansible.builtin.get_url:
url: https://github.com/prometheus/prometheus/releases/download/v2.55.0/prometheus-2.55.0.linux-amd64.tar.gz
dest: "{{ promtool_file.path }}"
checksum: sha256:7a6b6d5ea003e8d59def294392c64e28338da627bf760cf268e788d6a8832a23

- name: Extract Prometheus into /usr/local/bin
become: true
ansible.builtin.unarchive:
src: "{{ promtool_file.path }}"
dest: /usr/local/bin
remote_src: true
extra_opts:
- --strip-components=1
include:
- prometheus-2.55.0.linux-amd64/promtool
always:
- name: Remove temporary file
ansible.builtin.file:
path: "{{ promtool_file.path }}"
state: absent
1 change: 1 addition & 0 deletions zuul.d/project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
jobs:
- atmosphere-chart-vendor
- atmosphere-linters
- atmosphere-tox-promtool-test
- atmosphere-tox-py3
- atmosphere-build-collection:
dependencies: &molecule_check_dependencies
Expand Down

0 comments on commit fa5d244

Please sign in to comment.