diff --git a/pkg/redis/events/watcher.go b/pkg/redis/events/watcher.go index d25b95a6..91eb5f59 100644 --- a/pkg/redis/events/watcher.go +++ b/pkg/redis/events/watcher.go @@ -38,7 +38,7 @@ var ( Namespace: "saas_redis_sentinel", Help: "+sdown (https://redis.io/topics/sentinel#sentinel-api)", }, - []string{"sentinel", "shard", "redis_server"}, + []string{"sentinel", "shard", "redis_server_host", "redis_server_alias"}, ) sdownSentinelCount = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -46,7 +46,7 @@ var ( Namespace: "saas_redis_sentinel", Help: "+sdown (https://redis.io/topics/sentinel#sentinel-api)", }, - []string{"sentinel", "shard", "redis_server"}, + []string{"sentinel", "shard", "redis_server_host", "redis_server_alias"}, ) sdownClearedCount = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -54,7 +54,7 @@ var ( Namespace: "saas_redis_sentinel", Help: "-sdown (https://redis.io/topics/sentinel#sentinel-api)", }, - []string{"sentinel", "shard", "redis_server"}, + []string{"sentinel", "shard", "redis_server_host", "redis_server_alias"}, ) sdownClearedSentinelCount = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -62,7 +62,7 @@ var ( Namespace: "saas_redis_sentinel", Help: "-sdown (https://redis.io/topics/sentinel#sentinel-api)", }, - []string{"sentinel", "shard", "redis_server"}, + []string{"sentinel", "shard", "redis_server_host", "redis_server_alias"}, ) ) @@ -208,14 +208,16 @@ func (sew *SentinelEventWatcher) metricsFromEvent(rem RedisEventMessage) { sdownSentinelCount.With( prometheus.Labels{ "sentinel": sew.sentinelURI, "shard": rem.master.name, - "redis_server": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port), + "redis_server_host": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port), + "redis_server_alias": sew.topology.GetPool().GetServerAlias(fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port)), }, ).Add(1) default: sdownCount.With( prometheus.Labels{ "sentinel": sew.sentinelURI, "shard": rem.master.name, - "redis_server": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port), + "redis_server_host": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port), + "redis_server_alias": sew.topology.GetPool().GetServerAlias(fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port)), }, ).Add(1) } @@ -225,14 +227,16 @@ func (sew *SentinelEventWatcher) metricsFromEvent(rem RedisEventMessage) { sdownClearedSentinelCount.With( prometheus.Labels{ "sentinel": sew.sentinelURI, "shard": rem.master.name, - "redis_server": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port), + "redis_server_host": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port), + "redis_server_alias": sew.topology.GetPool().GetServerAlias(fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port)), }, ).Add(1) default: sdownClearedCount.With( prometheus.Labels{ "sentinel": sew.sentinelURI, "shard": rem.master.name, - "redis_server": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port), + "redis_server_host": fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port), + "redis_server_alias": sew.topology.GetPool().GetServerAlias(fmt.Sprintf("%s:%s", rem.target.ip, rem.target.port)), }, ).Add(1) } @@ -258,25 +262,29 @@ func (sew *SentinelEventWatcher) initCounters() { sdownSentinelCount.With( prometheus.Labels{ "sentinel": sew.sentinelURI, "shard": shard.Name, - "redis_server": server.ID(), + "redis_server_host": server.ID(), + "redis_server_alias": sew.topology.GetPool().GetServerAlias(server.ID()), }, ).Add(0) sdownCount.With( prometheus.Labels{ "sentinel": sew.sentinelURI, "shard": shard.Name, - "redis_server": server.ID(), + "redis_server_host": server.ID(), + "redis_server_alias": sew.topology.GetPool().GetServerAlias(server.ID()), }, ).Add(0) sdownClearedSentinelCount.With( prometheus.Labels{ "sentinel": sew.sentinelURI, "shard": shard.Name, - "redis_server": server.ID(), + "redis_server_host": server.ID(), + "redis_server_alias": sew.topology.GetPool().GetServerAlias(server.ID()), }, ).Add(0) sdownClearedCount.With( prometheus.Labels{ "sentinel": sew.sentinelURI, "shard": shard.Name, - "redis_server": server.ID(), + "redis_server_host": server.ID(), + "redis_server_alias": sew.topology.GetPool().GetServerAlias(server.ID()), }, ).Add(0) } diff --git a/pkg/redis/metrics/sentinel_metrics.go b/pkg/redis/metrics/sentinel_metrics.go index 1011b412..dd698904 100644 --- a/pkg/redis/metrics/sentinel_metrics.go +++ b/pkg/redis/metrics/sentinel_metrics.go @@ -22,7 +22,7 @@ var ( Namespace: "saas_redis_sentinel", Help: `"sentinel master link-pending-commands"`, }, - []string{"sentinel", "shard", "redis_server", "role"}, + []string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"}, ) lastOkPingReply = prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -30,7 +30,7 @@ var ( Namespace: "saas_redis_sentinel", Help: `"sentinel master last-ok-ping-reply"`, }, - []string{"sentinel", "shard", "redis_server", "role"}, + []string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"}, ) roleReportedTime = prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -38,7 +38,7 @@ var ( Namespace: "saas_redis_sentinel", Help: `"sentinel master role-reported-time"`, }, - []string{"sentinel", "shard", "redis_server", "role"}, + []string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"}, ) numOtherSentinels = prometheus.NewGaugeVec( prometheus.GaugeOpts{ @@ -46,7 +46,7 @@ var ( Namespace: "saas_redis_sentinel", Help: `"sentinel master num-other-sentinels"`, }, - []string{"sentinel", "shard", "redis_server", "role"}, + []string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"}, ) masterLinkDownTime = prometheus.NewGaugeVec( @@ -55,7 +55,7 @@ var ( Namespace: "saas_redis_sentinel", Help: `"sentinel slaves master-link-down-time"`, }, - []string{"sentinel", "shard", "redis_server", "role"}, + []string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"}, ) slaveReplOffset = prometheus.NewGaugeVec( @@ -64,7 +64,7 @@ var ( Namespace: "saas_redis_sentinel", Help: `"sentinel slaves slave-repl-offset"`, }, - []string{"sentinel", "shard", "redis_server", "role"}, + []string{"sentinel", "shard", "redis_server_host", "redis_server_alias", "role"}, ) ) @@ -85,6 +85,7 @@ type SentinelMetricsGatherer struct { refreshInterval time.Duration sentinelURI string sentinel *sharded.SentinelServer + serverPool *redis.ServerPool started bool cancel context.CancelFunc } @@ -99,6 +100,7 @@ func NewSentinelMetricsGatherer(sentinelURI string, refreshInterval time.Duratio refreshInterval: refreshInterval, sentinelURI: sentinelURI, sentinel: sentinel, + serverPool: pool, }, nil } @@ -177,21 +179,39 @@ func (smg *SentinelMetricsGatherer) gatherMetrics(ctx context.Context) error { } for _, master := range mresult { - - linkPendingCommands.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported, + masterServerHost := fmt.Sprintf("%s:%d", master.IP, master.Port) + masterServerAlias := smg.serverPool.GetServerAlias(masterServerHost) + + linkPendingCommands.With(prometheus.Labels{ + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": masterServerHost, + "redis_server_alias": masterServerAlias, + "role": master.RoleReported, }).Set(float64(master.LinkPendingCommands)) - lastOkPingReply.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported, + lastOkPingReply.With(prometheus.Labels{ + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": masterServerHost, + "redis_server_alias": masterServerAlias, + "role": master.RoleReported, }).Set(float64(master.LastOkPingReply)) - roleReportedTime.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported, + roleReportedTime.With(prometheus.Labels{ + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": masterServerHost, + "redis_server_alias": masterServerAlias, + "role": master.RoleReported, }).Set(float64(master.RoleReportedTime)) - numOtherSentinels.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), "role": master.RoleReported, + numOtherSentinels.With(prometheus.Labels{ + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": masterServerHost, + "redis_server_alias": masterServerAlias, + "role": master.RoleReported, }).Set(float64(master.NumOtherSentinels)) sresult, err := smg.sentinel.SentinelSlaves(ctx, master.Name) @@ -202,39 +222,63 @@ func (smg *SentinelMetricsGatherer) gatherMetrics(ctx context.Context) error { // Cleanup any vector that corresponds to the same server but with a // different role to avoid stale metrics after a role switch cleanupMetrics(prometheus.Labels{ - "sentinel": smg.sentinelURI, - "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", master.IP, master.Port), - "role": string(client.Slave), + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": masterServerHost, + "redis_server_alias": masterServerAlias, + "role": string(client.Slave), }) for _, slave := range sresult { - - linkPendingCommands.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported, + slaveServerHost := fmt.Sprintf("%s:%d", slave.IP, slave.Port) + slaveServerAlias := smg.serverPool.GetServerAlias(slaveServerHost) + + linkPendingCommands.With(prometheus.Labels{ + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": slaveServerHost, + "redis_server_alias": slaveServerAlias, + "role": slave.RoleReported, }).Set(float64(slave.LinkPendingCommands)) - lastOkPingReply.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported, + lastOkPingReply.With(prometheus.Labels{ + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": slaveServerHost, + "redis_server_alias": slaveServerAlias, + "role": slave.RoleReported, }).Set(float64(slave.LastOkPingReply)) - roleReportedTime.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported, + roleReportedTime.With(prometheus.Labels{ + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": slaveServerHost, + "redis_server_alias": slaveServerAlias, + "role": slave.RoleReported, }).Set(float64(slave.RoleReportedTime)) - masterLinkDownTime.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported, + masterLinkDownTime.With(prometheus.Labels{ + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": slaveServerHost, + "redis_server_alias": slaveServerAlias, + "role": slave.RoleReported, }).Set(float64(slave.MasterLinkDownTime)) - slaveReplOffset.With(prometheus.Labels{"sentinel": smg.sentinelURI, "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), "role": slave.RoleReported, + slaveReplOffset.With(prometheus.Labels{ + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": slaveServerHost, + "redis_server_alias": slaveServerAlias, + "role": slave.RoleReported, }).Set(float64(slave.SlaveReplOffset)) cleanupMetrics(prometheus.Labels{ - "sentinel": smg.sentinelURI, - "shard": master.Name, - "redis_server": fmt.Sprintf("%s:%d", slave.IP, slave.Port), - "role": string(client.Master), + "sentinel": smg.sentinelURI, + "shard": master.Name, + "redis_server_host": slaveServerHost, + "redis_server_alias": slaveServerAlias, + "role": string(client.Master), }) } } diff --git a/pkg/redis/server/pool.go b/pkg/redis/server/pool.go index d4524a3b..5c3af5f4 100644 --- a/pkg/redis/server/pool.go +++ b/pkg/redis/server/pool.go @@ -70,3 +70,11 @@ func (pool *ServerPool) indexByHostPort() map[string]*Server { return index } + +func (pool *ServerPool) GetServerAlias(connectionString string) string { + if srv, err := pool.GetServer(connectionString, nil); err == nil { + return srv.alias + } else { + return "" + } +} diff --git a/pkg/redis/sharded/redis_sharded_cluster.go b/pkg/redis/sharded/redis_sharded_cluster.go index fb068e2e..4f577257 100644 --- a/pkg/redis/sharded/redis_sharded_cluster.go +++ b/pkg/redis/sharded/redis_sharded_cluster.go @@ -103,6 +103,10 @@ func (cluster Cluster) LookupServerByID(hostport string) *RedisServer { return nil } +func (cluster Cluster) GetPool() *redis.ServerPool { + return cluster.pool +} + func (cluster *Cluster) Discover(ctx context.Context, options ...DiscoveryOption) error { var merr operatorutils.MultiError