diff --git a/CHANGES.md b/CHANGES.md index 3696ffe..c9c7448 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,15 @@ # CHANGES +## 2024.1.0 + +- [FIX] Sora 2023.2.0 で `ListClusterNodes` API の `include_all_known_nodes` のデフォルト値が変更で panic が起こす問題に対応する + - Sora 2023.2.0 以降で Sora Exporter 2023.5.0 以前のバージョンを使用し、クラスターメトリクスが有効になっている場合に発生する + - @tnamao +- [CHANGE] Sora の `ListClusterNodes` API を呼び出す際に、API リクエストの `include_all_known_nodes` を `true` にし切断中のノードも含め、接続状態を gauge で返すようにする + - **破壊的変更** になるため、バージョンアップの際に注意してください + - gauge の値は 1 が接続、0 が切断を表し `ListClusterNodes` API のレスポンスに含まれる `connected` の値により返す値を切り替えている + - @tnamao + ## 2023.5.0 - [UPDATE] CI の staticcheck のバージョンを 2023.1.6 に上げる diff --git a/collector/cluster_node.go b/collector/cluster_node.go index 116eb3b..b1f9a3f 100644 --- a/collector/cluster_node.go +++ b/collector/cluster_node.go @@ -1,6 +1,8 @@ package collector -import "github.com/prometheus/client_golang/prometheus" +import ( + "github.com/prometheus/client_golang/prometheus" +) var ( soraClusterMetrics = SoraClusterMetrics{ @@ -27,10 +29,14 @@ func (m *SoraClusterMetrics) Describe(ch chan<- *prometheus.Desc) { func (m *SoraClusterMetrics) Collect(ch chan<- prometheus.Metric, nodeList []soraClusterNode, report soraClusterReport) { for _, node := range nodeList { - if node.ClusterNodeName != nil { - ch <- newGauge(m.clusterNode, 1, *node.ClusterNodeName, *node.Mode) + value := 0.0 + if node.Connected { + value = 1.0 + } + if node.ClusterNodeName != "" { + ch <- newGauge(m.clusterNode, value, node.ClusterNodeName, node.Mode) } else { - ch <- newGauge(m.clusterNode, 1, *node.NodeName, *node.Mode) + ch <- newGauge(m.clusterNode, value, node.NodeName, node.Mode) } } ch <- newGauge(m.raftState, 1.0, report.RaftState) diff --git a/collector/collector.go b/collector/collector.go index 2b65e09..1a99b56 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -1,6 +1,7 @@ package collector import ( + "bytes" "context" "crypto/tls" "encoding/json" @@ -50,6 +51,10 @@ type HTTPClient interface { Do(*http.Request) (*http.Response, error) } +type SoraListClusterNodesRequest struct { + IncludeAllKnownNodes bool `json:"include_all_known_nodes"` +} + func NewCollector(options *CollectorOptions) *Collector { return &Collector{ URI: options.URI, @@ -112,7 +117,17 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { var nodeList []soraClusterNode if c.EnableSoraClusterMetrics { - req, err = http.NewRequestWithContext(ctx, http.MethodPost, c.URI, nil) + requestParams := SoraListClusterNodesRequest{ + IncludeAllKnownNodes: true, + } + encodedParams, err := json.Marshal(requestParams) + if err != nil { + level.Error(c.logger).Log("msg", "failed to encode Sora ListClusterNodes API request parameters", "err", err) + ch <- newGauge(c.soraUp, 0) + return + } + + req, err = http.NewRequestWithContext(ctx, http.MethodPost, c.URI, bytes.NewBuffer(encodedParams)) if err != nil { level.Error(c.logger).Log("msg", "failed to create request to sora", "err", err) ch <- newGauge(c.soraUp, 0) diff --git a/collector/sora_api.go b/collector/sora_api.go index 8011fe4..74121dd 100644 --- a/collector/sora_api.go +++ b/collector/sora_api.go @@ -136,9 +136,10 @@ type soraClusterReport struct { } type soraClusterNode struct { - ClusterNodeName *string `json:"cluster_node_name"` - NodeName *string `json:"node_name"` - Mode *string `json:"mode"` + ClusterNodeName string `json:"cluster_node_name"` + NodeName string `json:"node_name"` + Mode string `json:"mode"` + Connected bool `json:"connected"` } type soraLicenseInfo struct { diff --git a/main_test.go b/main_test.go index 276625a..1516b87 100644 --- a/main_test.go +++ b/main_test.go @@ -150,17 +150,7 @@ var ( listClusterNodesJSONData = `[ { "node_name": "node-01_canary_sora@10.211.55.42", - "epoch": 1, - "mode": "normal", - "cluster_signaling_url": "ws://127.0.0.1:5001/signaling", - "cluster_api_url": "http://127.0.0.1:3101/", - "member_since": "2022-05-09T07:44:52.973761Z", - "sora_version": "2022.1.0-canary.44", - "license_max_nodes": 10, - "license_max_connections": 100, - "license_serial_code": "SAMPLE-SRA-E001-202212-N10-100", - "license_type": "Experimental", - "connected": true + "connected": false }, { "node_name": "node-02_canary_sora@10.211.55.40", @@ -175,20 +165,25 @@ var ( "license_serial_code": "SAMPLE-SRA-E001-202212-N10-100", "license_type": "Experimental", "connected": true - } - ]` + }, + { + "node_name": "node-03_canary_sora@10.211.55.41", + "epoch": 1, + "mode": "normal", + "cluster_signaling_url": "ws://127.0.0.1:5001/signaling", + "cluster_api_url": "http://127.0.0.1:3101/", + "member_since": "2022-05-09T07:44:54.160763Z", + "sora_version": "2022.1.0-canary.44", + "license_max_nodes": 10, + "license_max_connections": 100, + "license_serial_code": "SAMPLE-SRA-E001-202212-N10-100", + "license_type": "Experimental", + "connected": true + } + ]` listClusterNodesCurrentJSONData = `[ { "cluster_node_name": "node-01_canary_sora@10.211.55.42", - "epoch": 1, - "mode": "normal", - "member_since": "2022-05-02T15:26:44.302363Z", - "sora_version": "2021.2.9", - "license_max_connections": 100, - "license_serial_code": "SAMPLE-SRA-E001-202212-N10-100", - "license_type": "Experimental", - "cluster_signaling_url": "ws://127.0.0.1:5001/signaling", - "cluster_api_url": "http://10.1.1.4:3000/", "connected": false }, { @@ -203,7 +198,21 @@ var ( "cluster_signaling_url": "ws://127.0.0.1:5002/signaling", "cluster_api_url": "http://10.1.1.3:3000/", "connected": true - } + }, + { + "node_name": "node-03_canary_sora@10.211.55.41", + "epoch": 1, + "mode": "normal", + "cluster_signaling_url": "ws://127.0.0.1:5001/signaling", + "cluster_api_url": "http://127.0.0.1:3101/", + "member_since": "2022-05-09T07:44:54.160763Z", + "sora_version": "2022.1.0-canary.44", + "license_max_nodes": 10, + "license_max_connections": 100, + "license_serial_code": "SAMPLE-SRA-E001-202212-N10-100", + "license_type": "Experimental", + "connected": true + } ]` getLicenseJSONDATA = `{ "expired_at": "2025-09", diff --git a/test/maximum.metrics b/test/maximum.metrics index 755e312..fc7d4fa 100644 --- a/test/maximum.metrics +++ b/test/maximum.metrics @@ -10,8 +10,9 @@ sora_average_duration_seconds 706 sora_average_setup_time_seconds 0 # HELP sora_cluster_node The sora server known cluster node. # TYPE sora_cluster_node gauge +sora_cluster_node{mode="",node_name="node-01_canary_sora@10.211.55.42"} 0 sora_cluster_node{mode="block_new_connection",node_name="node-02_canary_sora@10.211.55.40"} 1 -sora_cluster_node{mode="normal",node_name="node-01_canary_sora@10.211.55.42"} 1 +sora_cluster_node{mode="normal",node_name="node-03_canary_sora@10.211.55.41"} 1 # HELP sora_cluster_raft_commit_index The latest committed Raft log index. # TYPE sora_cluster_raft_commit_index counter sora_cluster_raft_commit_index 10 diff --git a/test/sora_cluster_metrics_enabled.metrics b/test/sora_cluster_metrics_enabled.metrics index 150c02e..757e071 100644 --- a/test/sora_cluster_metrics_enabled.metrics +++ b/test/sora_cluster_metrics_enabled.metrics @@ -10,8 +10,9 @@ sora_average_duration_seconds 706 sora_average_setup_time_seconds 0 # HELP sora_cluster_node The sora server known cluster node. # TYPE sora_cluster_node gauge +sora_cluster_node{mode="normal",node_name="node-03_canary_sora@10.211.55.41"} 1 sora_cluster_node{mode="block_new_connection",node_name="node-02_canary_sora@10.211.55.40"} 1 -sora_cluster_node{mode="normal",node_name="node-01_canary_sora@10.211.55.42"} 1 +sora_cluster_node{mode="",node_name="node-01_canary_sora@10.211.55.42"} 0 # HELP sora_cluster_raft_commit_index The latest committed Raft log index. # TYPE sora_cluster_raft_commit_index counter sora_cluster_raft_commit_index 10