generated from canonical/template-operator
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch '2/edge' into opensearch-throttling
- Loading branch information
Showing
3 changed files
with
214 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
rule_files: | ||
- ../../../src/alert_rules/prometheus/prometheus_alerts.yaml | ||
|
||
evaluation_interval: 1m | ||
|
||
tests: | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_cluster_status{cluster="opensearch-x7zb"}' | ||
values: '2x3' | ||
alert_rule_test: | ||
- eval_time: 2m | ||
alertname: OpenSearchClusterRed | ||
exp_alerts: | ||
- exp_labels: | ||
severity: critical | ||
cluster: opensearch-x7zb | ||
exp_annotations: | ||
message: "Cluster opensearch-x7zb health status has been RED for at least 2m. Cluster does not accept writes, shards may be missing or master node hasn't been elected yet." | ||
summary: "Cluster health status is RED" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'up{juju_unit="opensearch/1"}' | ||
values: '0x20' | ||
alert_rule_test: | ||
- eval_time: 5m | ||
alertname: OpenSearchScrapeFailed | ||
exp_alerts: | ||
- exp_labels: | ||
severity: critical | ||
juju_unit: opensearch/1 | ||
exp_annotations: | ||
message: "Scrape on opensearch/1 failed. Ensure that the OpenSearch systemd service is healthy and that the unit is part of the cluster." | ||
summary: "OpenSearch exporter scrape failed" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_cluster_status{cluster="opensearch-x7zb"}' | ||
values: '1x21' | ||
- series: 'opensearch_cluster_shards_number{cluster="opensearch-x7zb", type="relocating"}' | ||
values: '35x21' | ||
alert_rule_test: | ||
- eval_time: 20m | ||
alertname: OpenSearchClusterYellowTemp | ||
exp_alerts: | ||
- exp_labels: | ||
severity: warning | ||
cluster: opensearch-x7zb | ||
exp_annotations: | ||
message: "Cluster opensearch-x7zb health status has been YELLOW for at least 20m. Shards are still relocating or initializing. The cluster might be under heavy load." | ||
summary: "Cluster health status is temporarily YELLOW" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_cluster_status{cluster="opensearch-x7zb"}' | ||
values: '1x21' | ||
- series: 'opensearch_cluster_shards_number{cluster="opensearch-x7zb", type="unassigned"}' | ||
values: '35x21' | ||
alert_rule_test: | ||
- eval_time: 20m | ||
alertname: OpenSearchClusterYellow | ||
exp_alerts: | ||
- exp_labels: | ||
severity: warning | ||
cluster: opensearch-x7zb | ||
exp_annotations: | ||
message: "Cluster opensearch-x7zb health status has been YELLOW. Some replica shards are unassigned." | ||
summary: "Number of nodes in the cluster might be too low. Consider scaling the application to ensure that it has enough nodes to host all shards." | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_threadpool_threads_count{name="write", type="rejected", cluster="opensearch-x7zb", node="opensearch-0.fa9"}' | ||
values: '0 2 4 6 8 10 12 14 16 18 20' # Simulates increasing rejection rates | ||
|
||
- series: 'opensearch_threadpool_threads_count{name="write", type="completed", cluster="opensearch-x7zb", node="opensearch-0.fa9"}' | ||
values: '100 110 120 130 140 150 160 170 180 190 200' # Total requests, increasing over time | ||
|
||
alert_rule_test: | ||
- eval_time: 11m | ||
alertname: OpenSearchWriteRequestsRejectionJumps | ||
exp_alerts: | ||
- exp_labels: | ||
severity: warning | ||
cluster: opensearch-x7zb | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "High Write Rejection Ratio at opensearch-0.fa9 node in opensearch-x7zb cluster. This node may not be keeping up with the indexing speed." | ||
summary: "High Write Rejection Ratio - 16.7%" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_fs_path_available_bytes{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '69802552852x10' # just 70 GB available | ||
- series: 'opensearch_fs_path_total_bytes{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '498589663232x10' # HD with 500 GB | ||
alert_rule_test: | ||
- eval_time: 5m | ||
alertname: OpenSearchNodeDiskLowWatermarkReached | ||
exp_alerts: | ||
- exp_labels: | ||
severity: alert | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "Disk Low Watermark Reached at opensearch-0.fa9 node in opensearch-x7zb cluster. Shards can not be allocated to this node anymore. You should consider adding more disk to the node." | ||
summary: "Disk Low Watermark Reached - disk saturation is 86%" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_fs_path_available_bytes{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '44873069690x10' # just 45 GB available | ||
- series: 'opensearch_fs_path_total_bytes{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '498589663232x10' # HD with 500 GB | ||
alert_rule_test: | ||
- eval_time: 5m | ||
alertname: OpenSearchNodeDiskHighWatermarkReached | ||
# both low and high water mark alerts are triggered | ||
exp_alerts: | ||
- exp_labels: | ||
severity: high | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "Disk High Watermark Reached at opensearch-0.fa9 node in opensearch-x7zb cluster. Some shards will be re-allocated to different nodes if possible. Make sure more disk space is added to the node or drop old indices allocated to this node." | ||
summary: "Disk High Watermark Reached - disk saturation is 91%" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_jvm_mem_heap_used_percent{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '76x60' | ||
alert_rule_test: | ||
- eval_time: 10m | ||
alertname: OpenSearchJVMHeapUseHigh | ||
exp_alerts: | ||
- exp_labels: | ||
severity: alert | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "JVM Heap usage on the node opensearch-0.fa9 in opensearch-x7zb cluster is 76%." | ||
summary: "JVM Heap usage on the node is high" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_os_cpu_percent{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '91x6' | ||
alert_rule_test: | ||
- eval_time: 1m | ||
alertname: OpenSearchHostSystemCPUHigh | ||
exp_alerts: | ||
- exp_labels: | ||
severity: alert | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "System CPU usage on the node opensearch-0.fa9 in opensearch-x7zb cluster is 91%" | ||
summary: "System CPU usage is high" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_process_cpu_percent{cluster="opensearch-x7zb", instance="10.1.156.70:9200", node="opensearch-0.fa9"}' | ||
values: '91x6' | ||
alert_rule_test: | ||
- eval_time: 1m | ||
alertname: OpenSearchProcessCPUHigh | ||
exp_alerts: | ||
- exp_labels: | ||
severity: alert | ||
cluster: opensearch-x7zb | ||
instance: 10.1.156.70:9200 | ||
node: opensearch-0.fa9 | ||
exp_annotations: | ||
message: "OSE process CPU usage on the node opensearch-0.fa9 in opensearch-x7zb cluster is 91%" | ||
summary: "OSE process CPU usage is high" | ||
|
||
- interval: 1m | ||
input_series: | ||
- series: 'opensearch_indices_indexing_is_throttled_bool{cluster="opensearch-x7zb"}' | ||
values: '1x60' | ||
alert_rule_test: | ||
- eval_time: 5m | ||
alertname: OpenSearchThrottling | ||
exp_alerts: | ||
- exp_labels: | ||
severity: warning | ||
cluster: opensearch-x7zb | ||
exp_annotations: | ||
message: "Cluster opensearch-x7zb is throttling. Please optimize queries and indexing patterns or consider scale the application." | ||
summary: "OpenSearch Indexing Throttle" |