From 37a3aac46204324432e487c31c0ecd54cd1cf368 Mon Sep 17 00:00:00 2001 From: corver Date: Fri, 4 Oct 2024 13:55:25 +0200 Subject: [PATCH] chore(monitor): instrument last applied upgrade (#2076) Monitor last applied upgrade in `monitor` service. Add a method to `CProvider` to query this. Also increase ephemeral chain plan-upgrade delay from 5 to 100 since 5 wasn't sufficient. issue: none --- e2e/app/admin/planupgrade.go | 2 +- lib/cchain/provider.go | 3 +++ lib/cchain/provider/abci.go | 23 +++++++++++++++++++-- lib/cchain/provider/provider.go | 12 ++++++++--- lib/cchain/provider/provider_test.go | 16 +++++++++++++++ monitor/app/metrics.go | 8 +++++++- monitor/app/upgrade.go | 30 +++++++++++++++++++++++++--- 7 files changed, 84 insertions(+), 10 deletions(-) diff --git a/e2e/app/admin/planupgrade.go b/e2e/app/admin/planupgrade.go index 064316cc1..a6f6f09f5 100644 --- a/e2e/app/admin/planupgrade.go +++ b/e2e/app/admin/planupgrade.go @@ -43,7 +43,7 @@ func PlanUpgrade(ctx context.Context, def app.Definition, cfg Config) error { return err } - const delay = 5 // Upgrades must be planned in the future, add a small buffer + const delay = 100 // Upgrades must be planned in the future, add a buffer of few minutes plan.Height = latest + delay } diff --git a/lib/cchain/provider.go b/lib/cchain/provider.go index 3e467f4ea..e5b9aa81c 100644 --- a/lib/cchain/provider.go +++ b/lib/cchain/provider.go @@ -85,4 +85,7 @@ type Provider interface { // CurrentPlannedPlan returns the current (non-activated) upgrade plan. CurrentPlannedPlan(ctx context.Context) (utypes.Plan, bool, error) + + // AppliedPlan returns the applied (activated) upgrade plan by name. + AppliedPlan(ctx context.Context, name string) (utypes.Plan, bool, error) } diff --git a/lib/cchain/provider/abci.go b/lib/cchain/provider/abci.go index 8a3816668..8f51d9415 100644 --- a/lib/cchain/provider/abci.go +++ b/lib/cchain/provider/abci.go @@ -79,7 +79,8 @@ func NewABCIProvider(cmtCl rpcclient.Client, network netconf.ID, chainNamer func portalBlock: newABCIPortalBlockFunc(pcl), networkFunc: newABCINetworkFunc(rcl), genesisFunc: newABCIGenesisFunc(gcl), - upgradeFunc: newABCIUpgradeFunc(ucl), + plannedFunc: newABCIPlannedUpgradeFunc(ucl), + appliedFunc: newABCIAppliedUpgradeFunc(ucl), chainID: newChainIDFunc(cmtCl), header: cmtCl.Header, backoffFunc: backoffFunc, @@ -127,7 +128,25 @@ func newABCISigningFunc(cl sltypes.QueryClient) signingFunc { } } -func newABCIUpgradeFunc(ucl utypes.QueryClient) upgradeFunc { +func newABCIAppliedUpgradeFunc(ucl utypes.QueryClient) appliedUpgradeFunc { + return func(ctx context.Context, name string) (utypes.Plan, bool, error) { + resp, err := ucl.AppliedPlan(ctx, &utypes.QueryAppliedPlanRequest{ + Name: name, + }) + if err != nil { + return utypes.Plan{}, false, errors.Wrap(err, "abci query applied plan") + } else if resp.Height == 0 { + return utypes.Plan{}, false, nil + } + + return utypes.Plan{ + Name: name, + Height: resp.Height, + }, true, nil + } +} + +func newABCIPlannedUpgradeFunc(ucl utypes.QueryClient) planedUpgradeFunc { return func(ctx context.Context) (utypes.Plan, bool, error) { resp, err := ucl.CurrentPlan(ctx, &utypes.QueryCurrentPlanRequest{}) if err != nil { diff --git a/lib/cchain/provider/provider.go b/lib/cchain/provider/provider.go index 28855dfd5..44a808653 100644 --- a/lib/cchain/provider/provider.go +++ b/lib/cchain/provider/provider.go @@ -45,7 +45,8 @@ type valsetFunc func(ctx context.Context, valSetID uint64, latest bool) (valSetR type headerFunc func(ctx context.Context, height *int64) (*ctypes.ResultHeader, error) type chainIDFunc func(ctx context.Context) (uint64, error) type genesisFunc func(ctx context.Context) (execution []byte, consensus []byte, err error) -type upgradeFunc func(ctx context.Context) (upgradetypes.Plan, bool, error) +type planedUpgradeFunc func(ctx context.Context) (upgradetypes.Plan, bool, error) +type appliedUpgradeFunc func(ctx context.Context, name string) (upgradetypes.Plan, bool, error) type signingFunc func(ctx context.Context) ([]cchain.SDKSigningInfo, error) type valSetResponse struct { @@ -72,7 +73,8 @@ type Provider struct { portalBlock portalBlockFunc networkFunc networkFunc genesisFunc genesisFunc - upgradeFunc upgradeFunc + plannedFunc planedUpgradeFunc + appliedFunc appliedUpgradeFunc backoffFunc func(context.Context) func() chainNamer func(xchain.ChainVersion) string network netconf.ID @@ -96,7 +98,11 @@ func (p Provider) CometClient() rpcclient.Client { } func (p Provider) CurrentPlannedPlan(ctx context.Context) (upgradetypes.Plan, bool, error) { - return p.upgradeFunc(ctx) + return p.plannedFunc(ctx) +} + +func (p Provider) AppliedPlan(ctx context.Context, name string) (upgradetypes.Plan, bool, error) { + return p.appliedFunc(ctx, name) } func (p Provider) AttestationsFrom(ctx context.Context, chainVer xchain.ChainVersion, attestOffset uint64, diff --git a/lib/cchain/provider/provider_test.go b/lib/cchain/provider/provider_test.go index 3f51b633f..8fd548f28 100644 --- a/lib/cchain/provider/provider_test.go +++ b/lib/cchain/provider/provider_test.go @@ -17,6 +17,22 @@ import ( var integration = flag.Bool("integration", false, "run integration tests") +func TestUpgradeQueries(t *testing.T) { + t.Parallel() + if !*integration { + t.Skip("skipping integration test") + } + + ctx := context.Background() + + cprov, err := provider.Dial(netconf.Staging) + require.NoError(t, err) + + _, ok, err := cprov.AppliedPlan(ctx, "not an upgrade") + require.NoError(t, err) + require.False(t, ok) +} + func TestSigningInfos(t *testing.T) { t.Parallel() if !*integration { diff --git a/monitor/app/metrics.go b/monitor/app/metrics.go index a25a0fe81..30dcf6bd5 100644 --- a/monitor/app/metrics.go +++ b/monitor/app/metrics.go @@ -28,6 +28,12 @@ var ( plannedUpgradeGauge = promutil.NewResetGaugeVec(prometheus.GaugeOpts{ Namespace: "monitor", Name: "planned_upgrade", - Help: "Height of current planned (non-progressed) upgrade by name", + Help: "Height of current planned (non-processed) upgrade by name", + }, []string{"upgrade"}) + + appliedUpgradeGauge = promutil.NewResetGaugeVec(prometheus.GaugeOpts{ + Namespace: "monitor", + Name: "applied_upgrade", + Help: "Height of last applied (processed) upgrade by name", }, []string{"upgrade"}) ) diff --git a/monitor/app/upgrade.go b/monitor/app/upgrade.go index 6f4c5aa60..eade0c82f 100644 --- a/monitor/app/upgrade.go +++ b/monitor/app/upgrade.go @@ -4,12 +4,17 @@ import ( "context" "time" + uluwatu1 "github.com/omni-network/omni/halo/app/upgrades/uluwatu" "github.com/omni-network/omni/lib/cchain" "github.com/omni-network/omni/lib/log" utypes "cosmossdk.io/x/upgrade/types" ) +// upgrades defines the list upgrades to monitor. +// Add new upgrades here. +var upgrades = []string{uluwatu1.UpgradeName} + // monitorUpgradesForever blocks until the context is closed and // periodically updates the planned upgrade gauge. func monitorUpgradesForever(ctx context.Context, cprov cchain.Provider) { @@ -21,19 +26,38 @@ func monitorUpgradesForever(ctx context.Context, cprov cchain.Provider) { case <-ctx.Done(): return case <-ticker.C: - plan, ok, err := cprov.CurrentPlannedPlan(ctx) + planned, ok, err := cprov.CurrentPlannedPlan(ctx) if err != nil { log.Warn(ctx, "Failed fetching planned upgrade (will retry)", err) continue } else if !ok { - plan = utypes.Plan{ + planned = utypes.Plan{ Name: "none", Height: 0, } } plannedUpgradeGauge.Reset() - plannedUpgradeGauge.WithLabelValues(plan.Name).Set(float64(plan.Height)) + plannedUpgradeGauge.WithLabelValues(planned.Name).Set(float64(planned.Height)) + + applied := utypes.Plan{ + Name: "none", + Height: 0, + } + for _, upgrade := range upgrades { + p, ok, err := cprov.AppliedPlan(ctx, upgrade) + if err != nil { + log.Warn(ctx, "Failed fetching applied upgrade (will retry)", err, "name", upgrade) + continue + } else if !ok || p.Height < applied.Height { + continue + } + + applied = p // Update last applied + } + + appliedUpgradeGauge.Reset() + appliedUpgradeGauge.WithLabelValues(applied.Name).Set(float64(applied.Height)) } } }