From eafb3ec44b0b8ced679f5ba1692a2aff13533e8f Mon Sep 17 00:00:00 2001 From: Redm4x <2829180+Redm4x@users.noreply.github.com> Date: Tue, 23 Apr 2024 08:33:32 -0400 Subject: [PATCH] feat: Add grace period to uptime check --- api/src/routes/internal/gpu.ts | 10 +- api/src/routes/internal/gpuPrices.ts | 13 ++- api/src/routes/internal/providerVersions.ts | 27 ++++-- api/src/routes/v1/providers/byAddress.ts | 1 + api/src/routes/v1/providers/list.ts | 1 + api/src/services/db/providerStatusService.ts | 53 +++++++---- api/src/services/db/statsService.ts | 8 +- api/src/types/provider.ts | 4 +- api/src/utils/env.ts | 4 + api/src/utils/map/provider.ts | 7 +- .../src/components/providers/ProviderMap.tsx | 2 +- .../src/pages/providers/[owner]/index.tsx | 72 ++++++++++----- deploy-web/src/types/provider.ts | 2 +- indexer/UPGRADE.md | 48 ++++++++++ indexer/src/index.ts | 2 +- .../src/providers/providerStatusProvider.ts | 53 +++++++++-- indexer/src/tasks/providerUptimeTracker.ts | 92 ++++++++----------- shared/dbSchemas/akash/provider.ts | 4 + shared/dbSchemas/akash/providerSnapshot.ts | 4 +- 19 files changed, 275 insertions(+), 132 deletions(-) diff --git a/api/src/routes/internal/gpu.ts b/api/src/routes/internal/gpu.ts index 362a28a14..2c848017b 100644 --- a/api/src/routes/internal/gpu.ts +++ b/api/src/routes/internal/gpu.ts @@ -1,6 +1,9 @@ import { OpenAPIHono, createRoute, z } from "@hono/zod-openapi"; import { chainDb } from "@src/db/dbConnection"; +import { toUTC } from "@src/utils"; import { isValidBech32Address } from "@src/utils/addresses"; +import { env } from "@src/utils/env"; +import { sub } from "date-fns"; import { QueryTypes } from "sequelize"; const route = createRoute({ @@ -86,8 +89,8 @@ export default new OpenAPIHono().openapi(route, async (c) => { "hostUri", p."owner" FROM provider p - INNER JOIN "providerSnapshot" ps ON ps.id=p."lastSnapshotId" - WHERE p."isOnline" IS TRUE + INNER JOIN "providerSnapshot" ps ON ps.id=p."lastSuccessfulSnapshotId" + WHERE p."isOnline" IS TRUE OR ps."checkDate" >= :grace_date ) SELECT s."hostUri", n."name", n."gpuAllocatable" AS allocatable, n."gpuAllocated" AS allocated, gpu."modelId", gpu.vendor, gpu.name AS "modelName", gpu.interface, gpu."memorySize" FROM snapshots s @@ -110,7 +113,8 @@ export default new OpenAPIHono().openapi(route, async (c) => { model: model ?? null, memory_size: memory_size ?? null, provider_address: provider_address ?? null, - provider_hosturi: provider_hosturi ?? null + provider_hosturi: provider_hosturi ?? null, + grace_date: toUTC(sub(new Date(), { minutes: env.ProviderUptimeGracePeriodMinutes })) } } ); diff --git a/api/src/routes/internal/gpuPrices.ts b/api/src/routes/internal/gpuPrices.ts index a2437cc0d..0d8098fd6 100644 --- a/api/src/routes/internal/gpuPrices.ts +++ b/api/src/routes/internal/gpuPrices.ts @@ -5,10 +5,12 @@ import { Day, Transaction } from "@shared/dbSchemas/base"; import { cacheResponse } from "@src/caching/helpers"; import { chainDb } from "@src/db/dbConnection"; import { MsgCreateBid } from "@src/proto/akash/v1beta4"; +import { toUTC } from "@src/utils"; import { averageBlockCountInAMonth, averageBlockCountInAnHour } from "@src/utils/constants"; +import { env } from "@src/utils/env"; import { average, median, round, weightedAverage } from "@src/utils/math"; import { decodeMsg, uint8arrayToString } from "@src/utils/protobuf"; -import { addDays } from "date-fns"; +import { addDays, sub } from "date-fns"; import { Op, QueryTypes } from "sequelize"; const route = createRoute({ @@ -345,8 +347,8 @@ async function getGpus() { "hostUri", p."owner" FROM provider p - INNER JOIN "providerSnapshot" ps ON ps.id=p."lastSnapshotId" - WHERE p."isOnline" IS TRUE + INNER JOIN "providerSnapshot" ps ON ps.id=p."lastSuccessfulSnapshotId" + WHERE p."isOnline" IS TRUE OR ps."checkDate" >= :grace_date ORDER BY p."hostUri", p."createdHeight" DESC ) SELECT s."hostUri", s."owner", n."name", n."gpuAllocatable" AS allocatable, LEAST(n."gpuAllocated", n."gpuAllocatable") AS allocated, gpu."modelId", gpu.vendor, gpu.name AS "modelName", gpu.interface, gpu."memorySize" @@ -360,7 +362,10 @@ async function getGpus() { gpu.vendor IS NOT NULL `, { - type: QueryTypes.SELECT + type: QueryTypes.SELECT, + replacements: { + grace_date: toUTC(sub(new Date(), { minutes: env.ProviderUptimeGracePeriodMinutes })) + } } ); diff --git a/api/src/routes/internal/providerVersions.ts b/api/src/routes/internal/providerVersions.ts index 040a242c5..282b3ea5c 100644 --- a/api/src/routes/internal/providerVersions.ts +++ b/api/src/routes/internal/providerVersions.ts @@ -1,7 +1,11 @@ import { OpenAPIHono, createRoute, z } from "@hono/zod-openapi"; -import { Provider } from "@shared/dbSchemas/akash"; +import { chainDb } from "@src/db/dbConnection"; +import { toUTC } from "@src/utils"; +import { env } from "@src/utils/env"; import { round } from "@src/utils/math"; +import { sub } from "date-fns"; import * as semver from "semver"; +import { QueryTypes } from "sequelize"; const route = createRoute({ method: "get", @@ -28,13 +32,20 @@ const route = createRoute({ }); export default new OpenAPIHono().openapi(route, async (c) => { - const providers = await Provider.findAll({ - attributes: ["hostUri", "akashVersion"], - where: { - isOnline: true - }, - group: ["hostUri", "akashVersion"] - }); + const providers = await chainDb.query<{ hostUri: string; akashVersion: string }>( + ` + SELECT DISTINCT ON ("hostUri") "hostUri","akashVersion" + FROM provider p + INNER JOIN "providerSnapshot" ps ON ps.id=p."lastSuccessfulSnapshotId" + WHERE p."isOnline" IS TRUE OR ps."checkDate" >= :grace_date + `, + { + type: QueryTypes.SELECT, + replacements: { + grace_date: toUTC(sub(new Date(), { minutes: env.ProviderUptimeGracePeriodMinutes })) + } + } + ); const grouped: { version: string; providers: string[] }[] = []; diff --git a/api/src/routes/v1/providers/byAddress.ts b/api/src/routes/v1/providers/byAddress.ts index 1359e527a..406fc0667 100644 --- a/api/src/routes/v1/providers/byAddress.ts +++ b/api/src/routes/v1/providers/byAddress.ts @@ -43,6 +43,7 @@ const route = createRoute({ uptime30d: z.number(), isValidVersion: z.boolean(), isOnline: z.boolean(), + lastOnlineDate: z.string().nullable(), isAudited: z.boolean(), activeStats: z.object({ cpu: z.number(), diff --git a/api/src/routes/v1/providers/list.ts b/api/src/routes/v1/providers/list.ts index 4f15cd086..3f641bd6e 100644 --- a/api/src/routes/v1/providers/list.ts +++ b/api/src/routes/v1/providers/list.ts @@ -36,6 +36,7 @@ const route = createRoute({ uptime30d: z.number(), isValidVersion: z.boolean(), isOnline: z.boolean(), + lastOnlineDate: z.string().nullable(), isAudited: z.boolean(), activeStats: z.object({ cpu: z.number(), diff --git a/api/src/services/db/providerStatusService.ts b/api/src/services/db/providerStatusService.ts index 2f5c1435a..6eb6cc5b4 100644 --- a/api/src/services/db/providerStatusService.ts +++ b/api/src/services/db/providerStatusService.ts @@ -1,20 +1,31 @@ import { Provider, ProviderAttribute, ProviderAttributeSignature, ProviderSnapshotNode, ProviderSnapshotNodeGPU } from "@shared/dbSchemas/akash"; import { ProviderSnapshot } from "@shared/dbSchemas/akash/providerSnapshot"; import { toUTC } from "@src/utils"; -import { add } from "date-fns"; +import { add, sub } from "date-fns"; import { Op } from "sequelize"; import { mapProviderToList } from "@src/utils/map/provider"; import { getAuditors, getProviderAttributesSchema } from "../external/githubService"; import { ProviderDetail } from "@src/types/provider"; +import { env } from "@src/utils/env"; export async function getNetworkCapacity() { const providers = await Provider.findAll({ where: { - isOnline: true, deletedHeight: null - } + }, + include: [ + { + required: false, + model: ProviderSnapshot, + as: "lastSuccessfulSnapshot", + where: { checkDate: { [Op.gte]: toUTC(sub(new Date(), { minutes: env.ProviderUptimeGracePeriodMinutes })) } } + } + ] }); - const filteredProviders = providers.filter((value, index, self) => self.map((x) => x.hostUri).indexOf(value.hostUri) === index); + + const filteredProviders = providers + .filter((x) => x.isOnline || x.lastSuccessfulSnapshot) + .filter((value, index, self) => self.map((x) => x.hostUri).indexOf(value.hostUri) === index); const stats = { activeProviderCount: filteredProviders.length, @@ -67,7 +78,7 @@ export const getProviderList = async () => { model: ProviderSnapshot, attributes: ["id"], required: true, - as: "lastSnapshot", + as: "lastSuccessfulSnapshot", include: [ { model: ProviderSnapshotNode, @@ -87,8 +98,8 @@ export const getProviderList = async () => { const [auditors, providerAttributeSchema] = await Promise.all([auditorsQuery, providerAttributeSchemaQuery]); return distinctProviders.map((x) => { - const nodes = providerWithNodes.find((p) => p.owner === x.owner)?.lastSnapshot?.nodes; - return mapProviderToList(x, providerAttributeSchema, auditors, nodes); + const lastSuccessfulSnapshot = providerWithNodes.find((p) => p.owner === x.owner)?.lastSnapshot; + return mapProviderToList(x, providerAttributeSchema, auditors, lastSuccessfulSnapshot); }); }; @@ -121,18 +132,20 @@ export const getProviderDetail = async (address: string): Promise ({ id: ps.id, isOnline: ps.isOnline, diff --git a/api/src/services/db/statsService.ts b/api/src/services/db/statsService.ts index 71d09c5f1..ff5f05018 100644 --- a/api/src/services/db/statsService.ts +++ b/api/src/services/db/statsService.ts @@ -5,6 +5,7 @@ import { Op, QueryTypes } from "sequelize"; import { chainDb } from "@src/db/dbConnection"; import { ProviderActiveLeasesStats, ProviderStats, ProviderStatsKey } from "@src/types/graph"; import { cacheKeys, cacheResponse } from "@src/caching/helpers"; +import { env } from "@src/utils/env"; type GraphData = { currentValue: number; @@ -203,14 +204,17 @@ export const getProviderGraphData = async (dataName: ProviderStatsKey) => { SELECT DISTINCT ON("hostUri",DATE("checkDate")) DATE("checkDate") AS date, ps."activeCPU", ps."pendingCPU", ps."availableCPU", ps."activeGPU", ps."pendingGPU", ps."availableGPU", ps."activeMemory", ps."pendingMemory", ps."availableMemory", ps."activeStorage", ps."pendingStorage", ps."availableStorage", ps."isOnline" FROM "providerSnapshot" ps INNER JOIN "provider" ON "provider"."owner"=ps."owner" - WHERE ps."isLastOfDay" = TRUE AND ps."isOnline" = TRUE + WHERE ps."isLastSuccessOfDay" = TRUE AND ps."checkDate" >= DATE(ps."checkDate")::timestamp + INTERVAL '1 day' - INTERVAL :grace_duration || ' minutes' ORDER BY "hostUri",DATE("checkDate"),"checkDate" DESC ) "dailyProviderStats" ON DATE(d."date")="dailyProviderStats"."date" GROUP BY d."date" ORDER BY d."date" ASC`, { - type: QueryTypes.SELECT + type: QueryTypes.SELECT, + replacements: { + grace_duration: env.ProviderUptimeGracePeriodMinutes + } } ); }, diff --git a/api/src/types/provider.ts b/api/src/types/provider.ts index c7a942ea6..b6e95e412 100644 --- a/api/src/types/provider.ts +++ b/api/src/types/provider.ts @@ -76,11 +76,11 @@ export interface ProviderList { } export interface ProviderDetail extends ProviderList { - uptime: Array<{ + uptime: { id: string; isOnline: boolean; checkDate: Date; - }>; + }[]; } export type ProviderAttributesSchema = { diff --git a/api/src/utils/env.ts b/api/src/utils/env.ts index c0194ff69..cf76024a9 100644 --- a/api/src/utils/env.ts +++ b/api/src/utils/env.ts @@ -23,6 +23,10 @@ export const env = z Auth0Issuer: z.string().optional(), WebsiteUrl: z.string().optional(), SecretToken: z.string().optional(), + ProviderUptimeGracePeriodMinutes: z + .number() + .optional() + .default(3 * 60), NODE_API_BASE_PATH: z.string().optional().default("https://raw.githubusercontent.com/akash-network") }) .parse(process.env); diff --git a/api/src/utils/map/provider.ts b/api/src/utils/map/provider.ts index f8b91f24b..831b0110a 100644 --- a/api/src/utils/map/provider.ts +++ b/api/src/utils/map/provider.ts @@ -1,4 +1,4 @@ -import { Provider, ProviderSnapshotNode } from "@shared/dbSchemas/akash"; +import { Provider, ProviderSnapshot, ProviderSnapshotNode } from "@shared/dbSchemas/akash"; import { Auditor, ProviderAttributesSchema, ProviderList } from "@src/types/provider"; import { createFilterUnique } from "../array/array"; import semver from "semver"; @@ -7,11 +7,11 @@ export const mapProviderToList = ( provider: Provider, providerAttributeSchema: ProviderAttributesSchema, auditors: Array, - nodes?: ProviderSnapshotNode[] + lastSuccessfulSnapshot?: ProviderSnapshot ): ProviderList => { const isValidVersion = provider.cosmosSdkVersion ? semver.gte(provider.cosmosSdkVersion, "v0.45.9") : false; const name = provider.isOnline ? new URL(provider.hostUri).hostname : null; - const gpuModels = getDistinctGpuModelsFromNodes(nodes || []); + const gpuModels = getDistinctGpuModelsFromNodes(lastSuccessfulSnapshot?.nodes || []); return { owner: provider.owner, @@ -55,6 +55,7 @@ export const mapProviderToList = ( uptime30d: provider.uptime30d, isValidVersion, isOnline: provider.isOnline, + lastOnlineDate: lastSuccessfulSnapshot?.checkDate, isAudited: provider.providerAttributeSignatures.some((a) => auditors.some((y) => y.address === a.auditor)), attributes: provider.providerAttributes.map((attr) => ({ key: attr.key, diff --git a/deploy-web/src/components/providers/ProviderMap.tsx b/deploy-web/src/components/providers/ProviderMap.tsx index e9ee0c415..57924b2cc 100644 --- a/deploy-web/src/components/providers/ProviderMap.tsx +++ b/deploy-web/src/components/providers/ProviderMap.tsx @@ -29,7 +29,7 @@ export const ProviderMap: React.FunctionComponent = ({ providers, initial const { classes } = useStyles(); const [dotSize, setDotSize] = useState({ r: 5, w: 1 }); const theme = useTheme(); - const activeProviders = providers.filter(x => x.isOnline || x.isOnline); + const activeProviders = providers.filter(x => x.isOnline); const bgColor = theme.palette.mode === "dark" ? theme.palette.grey[800] : theme.palette.grey[400]; const [position, setPosition] = useState({ coordinates: initialCoordinates, zoom: initialZoom }); const isInitialPosition = diff --git a/deploy-web/src/pages/providers/[owner]/index.tsx b/deploy-web/src/pages/providers/[owner]/index.tsx index 49d5943d0..b2edd2e56 100644 --- a/deploy-web/src/pages/providers/[owner]/index.tsx +++ b/deploy-web/src/pages/providers/[owner]/index.tsx @@ -1,4 +1,4 @@ -import { useState, useEffect } from "react"; +import { useState, useEffect, useMemo } from "react"; import { Typography, Box, Paper, useTheme, CircularProgress, Alert } from "@mui/material"; import { useAllLeases } from "@src/queries/useLeaseQuery"; import Layout from "@src/components/layout/Layout"; @@ -18,6 +18,7 @@ import { CustomNextSeo } from "@src/components/shared/CustomNextSeo"; import { UrlService } from "@src/utils/urlUtils"; import { getNetworkBaseApiUrl } from "@src/utils/constants"; import axios from "axios"; +import { differenceInMinutes, sub } from "date-fns"; const NetworkCapacity = dynamic(() => import("../../../components/providers/NetworkCapacity"), { ssr: false @@ -87,6 +88,33 @@ const ProviderDetailPage: React.FunctionComponent = ({ owner, _provider } getProviderStatus(); }; + function groupUptimeChecksByPeriod(uptimeChecks: { isOnline: boolean; checkDate: string }[] = []) { + const groupedSnapshots: { checkDate: Date; checks: boolean[] }[] = []; + + const sortedUptimeChecks = uptimeChecks.toSorted((a, b) => new Date(a.checkDate).getTime() - new Date(b.checkDate).getTime()); + + for (const snapshot of sortedUptimeChecks) { + const recentGroup = groupedSnapshots.find(x => differenceInMinutes(new Date(snapshot.checkDate), x.checkDate) < 15); + + if (recentGroup) { + recentGroup.checks.push(snapshot.isOnline); + } else { + groupedSnapshots.push({ + checkDate: new Date(snapshot.checkDate), + checks: [snapshot.isOnline] + }); + } + } + + return groupedSnapshots.map(x => ({ + date: x.checkDate, + status: x.checks.every(x => x) ? "online" : x.checks.every(x => !x) ? "offline" : "partial" + })); + } + + const uptimePeriods = useMemo(() => groupUptimeChecksByPeriod(provider?.uptime || []), [provider?.uptime]); + const wasRecentlyOnline = provider && (provider.isOnline || (provider.lastCheckDate && new Date(provider.lastCheckDate) >= sub(new Date(), { hours: 24 }))); + return ( @@ -98,7 +126,7 @@ const ProviderDetailPage: React.FunctionComponent = ({ owner, _provider } )} - {provider && !provider.isOnline && !isLoading && ( + {provider && !wasRecentlyOnline && !isLoading && ( = ({ owner, _provider } )} - {provider && provider.isOnline && ( + {provider && wasRecentlyOnline && ( <> = ({ owner, _provider } Up time (24h) - {provider?.uptime - // sort by date - .sort((a, b) => new Date(a.checkDate).getTime() - new Date(b.checkDate).getTime()) - .map((x, i) => ( - } - leaveDelay={0} - > - 0 ? ".25rem" : 0, - backgroundColor: x.isOnline ? theme.palette.success.main : theme.palette.error.main, - borderRadius: "2px" - }} - > - - ))} + {uptimePeriods.map((x, i) => ( + } + leaveDelay={0} + > + 0 ? ".25rem" : 0, + backgroundColor: + x.status === "online" ? theme.palette.success.main : x.status === "partial" ? theme.palette.warning.main : theme.palette.error.main, + borderRadius: "2px" + }} + > + + ))} diff --git a/deploy-web/src/types/provider.ts b/deploy-web/src/types/provider.ts index e73b81744..dd72312e9 100644 --- a/deploy-web/src/types/provider.ts +++ b/deploy-web/src/types/provider.ts @@ -268,7 +268,7 @@ export interface ApiProviderDetail extends ApiProviderList { uptime: Array<{ id: string; isOnline: boolean; - checkDate: Date; + checkDate: string; }>; } diff --git a/indexer/UPGRADE.md b/indexer/UPGRADE.md index a155b3380..78a8f6e95 100644 --- a/indexer/UPGRADE.md +++ b/indexer/UPGRADE.md @@ -124,3 +124,51 @@ FROM "created_msg" cm LEFT JOIN "update_msg" um ON um."address"=cm."address" WHERE cm."address"=p."owner"; ``` + +## v1.8.0 + +Version 1.8.0 adds the necessary fields for improving the Akash provider uptime checks. + +``` + +ALTER TABLE IF EXISTS public.provider + ADD COLUMN "nextCheckDate" timestamp with time zone NOT NULL DEFAULT NOW(), + ADD COLUMN "failedCheckCount" integer DEFAULT 0, + ADD COLUMN "lastSuccessfulSnapshotId" uuid; + +-- Set lastSuccessfulSnapshotId on providers +WITH last_successful_snapshots AS ( + SELECT DISTINCT ON(p.owner) p.owner, ps.id AS "snapshotId" + FROM provider p + INNER JOIN "providerSnapshot" ps ON p.owner=ps.owner AND ps."isOnline" IS TRUE + ORDER BY p.owner, ps."checkDate" DESC +) +UPDATE provider p +SET "lastSuccessfulSnapshotId"=last_successful_snapshots."snapshotId" +FROM last_successful_snapshots +WHERE p.owner=last_successful_snapshots.owner + +-- Spread providers "nextCheckDate" evenly accross a 15 minute window +UPDATE provider SET "nextCheckDate"=NOW() + interval '1 second' * (random() * 15 * 60) + +ALTER TABLE IF EXISTS public."providerSnapshot" + ADD COLUMN "isLastSuccessOfDay" boolean NOT NULL DEFAULT false; + +-- Set isLastSuccessOfDay to true for successful snapshots that are the last of each day for every providers +WITH last_successful_snapshots AS ( + SELECT DISTINCT ON(ps."owner",DATE("checkDate")) DATE("checkDate") AS date, ps."id" AS "psId" + FROM "providerSnapshot" ps + WHERE "isOnline" = TRUE + ORDER BY ps."owner",DATE("checkDate"),"checkDate" DESC +) +UPDATE "providerSnapshot" AS ps +SET "isLastSuccessOfDay" = TRUE +FROM last_successful_snapshots AS ls +WHERE ls."psId"=ps.id; + +CREATE INDEX IF NOT EXISTS provider_snapshot_id_where_islastsuccessofday + ON public."providerSnapshot" USING btree + (id ASC NULLS LAST) + TABLESPACE pg_default + WHERE "isLastSuccessOfDay" = true; +``` diff --git a/indexer/src/index.ts b/indexer/src/index.ts index 907abda7b..238e47301 100644 --- a/indexer/src/index.ts +++ b/indexer/src/index.ts @@ -92,7 +92,7 @@ function startScheduler() { scheduler.registerTask("Address Balance Monitor", () => addressBalanceMonitor.run(), "10 minutes"); if (env.ActiveChain === "akash" || env.ActiveChain === "akashTestnet" || env.ActiveChain === "akashSandbox") { - scheduler.registerTask("Sync Providers Info", syncProvidersInfo, "15 minutes", true, { + scheduler.registerTask("Sync Providers Info", syncProvidersInfo, "10 seconds", true, { id: env.HealthChecks_SyncProviderInfo, measureDuration: true }); diff --git a/indexer/src/providers/providerStatusProvider.ts b/indexer/src/providers/providerStatusProvider.ts index d499bb946..bbbe11ffe 100644 --- a/indexer/src/providers/providerStatusProvider.ts +++ b/indexer/src/providers/providerStatusProvider.ts @@ -7,23 +7,23 @@ import { ProviderSnapshot } from "@src/../../shared/dbSchemas/akash/providerSnap import { sequelize } from "@src/db/dbConnection"; import { toUTC } from "@src/shared/utils/date"; import { ProviderStatusInfo, ProviderVersionEndpointResponseType } from "./statusEndpointHandlers/types"; -import { isSameDay } from "date-fns"; +import { add, differenceInDays, differenceInHours, differenceInMinutes, isSameDay } from "date-fns"; import { fetchProviderStatusFromGRPC } from "./statusEndpointHandlers/grpc"; import { fetchProviderStatusFromREST } from "./statusEndpointHandlers/rest"; +import { Op } from "sequelize"; const ConcurrentStatusCall = 10; const StatusCallTimeout = 10_000; // 10 seconds +const UptimeCheckIntervalSeconds = 15 * 60; // 15 minutes export async function syncProvidersInfo() { let providers = await Provider.findAll({ where: { - deletedHeight: null + deletedHeight: null, + nextCheckDate: { [Op.lte]: toUTC(new Date()) } }, include: [{ model: ProviderSnapshot, as: "lastSnapshot" }], - order: [ - ["isOnline", "DESC"], - ["uptime30d", "DESC"] - ] + order: [["nextCheckDate", "ASC"]] }); const httpsAgent = new https.Agent({ @@ -66,8 +66,6 @@ export async function syncProvidersInfo() { console.log("Fetched provider info: " + doneCount + " / " + providers.length); }) ); - - console.log("Finished refreshing provider infos"); } async function saveProviderStatus( @@ -85,6 +83,7 @@ async function saveProviderStatus( owner: provider.owner, isOnline: !!providerStatus, isLastOfDay: true, + isLastSuccessOfDay: !!providerStatus, error: error, checkDate: checkDate, deploymentCount: providerStatus?.resources.deploymentCount, @@ -108,21 +107,37 @@ async function saveProviderStatus( if (provider.lastSnapshot && isSameDay(provider.lastSnapshot.checkDate, checkDate)) { await ProviderSnapshot.update( { - isLastOfDay: false + isLastOfDay: false, + isLastSuccessOfDay: false }, { where: { id: provider.lastSnapshot.id }, transaction: t } ); + + if (providerStatus && provider.lastSuccessfulSnapshotId && provider.lastSuccessfulSnapshotId !== provider.lastSnapshotId) { + await ProviderSnapshot.update( + { + isLastSuccessOfDay: false + }, + { + where: { id: provider.lastSuccessfulSnapshotId }, + transaction: t + } + ); + } } await Provider.update( { lastSnapshotId: createdSnapshot.id, + lastSuccessfulSnapshotId: createdSnapshot.isOnline ? createdSnapshot.id : provider.lastSuccessfulSnapshotId, isOnline: !!providerStatus, error: error, lastCheckDate: checkDate, + failedCheckCount: providerStatus ? 0 : provider.failedCheckCount + 1, + nextCheckDate: getNextCheckDate(!!providerStatus, checkDate, provider.lastCheckDate), cosmosSdkVersion: cosmosVersion, akashVersion: akashVersion, deploymentCount: providerStatus?.resources.deploymentCount, @@ -192,3 +207,23 @@ async function saveProviderStatus( } }); } + +function getNextCheckDate(successful: boolean, checkDate: Date, lastCheckDate: Date) { + if (successful) { + return add(checkDate, { seconds: UptimeCheckIntervalSeconds }); + } + + if (differenceInMinutes(checkDate, lastCheckDate) < 15) { + return add(checkDate, { minutes: 1 }); + } else if (differenceInHours(checkDate, lastCheckDate) < 1) { + return add(checkDate, { minutes: 5 }); + } else if (differenceInHours(checkDate, lastCheckDate) < 6) { + return add(checkDate, { minutes: 15 }); + } else if (differenceInHours(checkDate, lastCheckDate) < 24) { + return add(checkDate, { minutes: 30 }); + } else if (differenceInDays(checkDate, lastCheckDate) < 7) { + return add(checkDate, { hours: 1 }); + } else { + return add(checkDate, { hours: 24 }); + } +} diff --git a/indexer/src/tasks/providerUptimeTracker.ts b/indexer/src/tasks/providerUptimeTracker.ts index 4f3178dd9..5ffcda451 100644 --- a/indexer/src/tasks/providerUptimeTracker.ts +++ b/indexer/src/tasks/providerUptimeTracker.ts @@ -1,66 +1,50 @@ import { Provider } from "@shared/dbSchemas/akash"; -import { ProviderSnapshot } from "@shared/dbSchemas/akash/providerSnapshot"; import { sequelize } from "@src/db/dbConnection"; -import { toUTC } from "@src/shared/utils/date"; -import { add } from "date-fns"; -import { Op, QueryTypes } from "sequelize"; +import { secondsInDay } from "date-fns"; +import { QueryTypes } from "sequelize"; export async function updateProviderUptime() { console.log("Updating provider uptimes."); - console.time("getAllProviders"); + console.time("updateProviderUptimes"); - const nowUtc = toUTC(new Date()); - const oneDayAgo = add(nowUtc, { days: -1 }); - const sevenDaysAgo = add(nowUtc, { days: -7 }); - const thirtyDaysAgo = add(nowUtc, { days: -30 }); + const providers = await Provider.findAll(); - const providers = await sequelize.query<{ - owner: string; - oldUptime1d: number; - oldUptime7d: number; - oldUptime30d: number; - online1d: number; - total1d: number; - online7d: number; - total7d: number; - online30d: number; - total30d: number; - }>( - ` -SELECT - p."owner", - p."uptime30d" AS "oldUptime30d", - p."uptime7d" AS "oldUptime7d", - p."uptime1d" AS "oldUptime1d", - COUNT(ps.id) FILTER(WHERE ps."isOnline") AS "online30d", - COUNT(ps.id) AS "total30d", - COUNT(ps.id) FILTER(WHERE ps."isOnline" AND ps."checkDate" > $sevenDaysAgo) AS "online7d", - COUNT(ps.id) FILTER(WHERE ps."checkDate" > $sevenDaysAgo) AS "total7d", - COUNT(ps.id) FILTER(WHERE ps."isOnline" AND ps."checkDate" > $oneDayAgo) AS "online1d", - COUNT(ps.id) FILTER(WHERE ps."checkDate" > $oneDayAgo) AS "total1d" -FROM "provider" p -INNER JOIN "providerSnapshot" ps ON p."owner"=ps."owner" AND ps."checkDate" > $thirtyDaysAgo -GROUP BY p."owner"`, - { - type: QueryTypes.SELECT, - bind: { - oneDayAgo: oneDayAgo, - sevenDaysAgo: sevenDaysAgo, - thirtyDaysAgo: thirtyDaysAgo + for (const provider of providers) { + const [{ offline_seconds_30d, offline_seconds_7d, offline_seconds_1d }] = await sequelize.query<{ + offline_seconds_30d: number; + offline_seconds_7d: number; + offline_seconds_1d: number; + }>( + ` + WITH offline_periods AS ( + SELECT + "checkDate", + LEAD("checkDate") OVER (ORDER BY "checkDate") AS "next_checkDate", + "isOnline" + FROM + "providerSnapshot" + WHERE "owner"=:owner AND "checkDate" >= NOW() - INTERVAL '30 days' + ) + SELECT + SUM(CASE WHEN NOT "isOnline" THEN EXTRACT(EPOCH FROM ("next_checkDate" - "checkDate")) ELSE 0 END) AS offline_seconds_30d, + SUM(CASE WHEN NOT "isOnline" AND "checkDate" >= NOW() - INTERVAL '7 days' THEN EXTRACT(EPOCH FROM ("next_checkDate" - "checkDate")) ELSE 0 END) AS offline_seconds_7d, + SUM(CASE WHEN NOT "isOnline" AND "checkDate" >= NOW() - INTERVAL '1 day' THEN EXTRACT(EPOCH FROM ("next_checkDate" - "checkDate")) ELSE 0 END) AS offline_seconds_1d + FROM + offline_periods; + `, + { + type: QueryTypes.SELECT, + replacements: { + owner: provider.owner + } } - } - ); - - console.timeEnd("getAllProviders"); + ); - console.time("updateProviderUptime"); - - for (const provider of providers) { - const uptime1d = provider.total1d > 0 ? provider.online1d / provider.total1d : 0; - const uptime7d = provider.total7d > 0 ? provider.online7d / provider.total7d : 0; - const uptime30d = provider.total30d > 0 ? provider.online30d / provider.total30d : 0; + const uptime1d = Math.max(0, 1 - offline_seconds_1d / secondsInDay); + const uptime7d = Math.max(0, 1 - offline_seconds_7d / (7 * secondsInDay)); + const uptime30d = Math.max(0, 1 - offline_seconds_30d / (30 * secondsInDay)); - if (uptime1d !== provider.oldUptime1d || uptime7d !== provider.oldUptime7d || uptime30d !== provider.oldUptime30d) { + if (uptime1d !== provider.uptime1d || uptime7d !== provider.uptime7d || uptime30d !== provider.uptime30d) { await Provider.update( { uptime1d: uptime1d, @@ -72,5 +56,5 @@ GROUP BY p."owner"`, } } - console.timeEnd("updateProviderUptime"); + console.timeEnd("updateProviderUptimes"); } diff --git a/shared/dbSchemas/akash/provider.ts b/shared/dbSchemas/akash/provider.ts index eefa4fb49..233f05969 100644 --- a/shared/dbSchemas/akash/provider.ts +++ b/shared/dbSchemas/akash/provider.ts @@ -22,8 +22,11 @@ export class Provider extends Model { // Stats @Column(DataTypes.UUID) lastSnapshotId?: string; + @Column(DataTypes.UUID) lastSuccessfulSnapshotId?: string; @Column isOnline?: boolean; @Column lastCheckDate?: Date; + @Required @Default(DataTypes.NOW) @Column nextCheckDate: Date; + @Required @Default(0) @Column failedCheckCount: number; @Column(DataTypes.TEXT) error?: string; @Column deploymentCount?: number; @Column leaseCount?: number; @@ -55,4 +58,5 @@ export class Provider extends Model { @HasMany(() => ProviderAttributeSignature, "provider") providerAttributeSignatures: ProviderAttributeSignature[]; @HasMany(() => ProviderSnapshot, "owner") providerSnapshots: ProviderSnapshot[]; @BelongsTo(() => ProviderSnapshot, "lastSnapshotId") lastSnapshot: ProviderSnapshot; + @BelongsTo(() => ProviderSnapshot, "lastSuccessfulSnapshotId") lastSuccessfulSnapshot: ProviderSnapshot; } diff --git a/shared/dbSchemas/akash/providerSnapshot.ts b/shared/dbSchemas/akash/providerSnapshot.ts index 1ed675d1d..626340e44 100644 --- a/shared/dbSchemas/akash/providerSnapshot.ts +++ b/shared/dbSchemas/akash/providerSnapshot.ts @@ -8,13 +8,15 @@ import { ProviderSnapshotNode } from "./providerSnapshotNode"; indexes: [ { unique: false, fields: ["owner"] }, { unique: false, fields: ["owner", "checkDate"] }, - { name: "provider_snapshot_id_where_isonline_and_islastofday", unique: false, fields: ["id"], where: { isOnline: true, isLastOfDay: true } } + { name: "provider_snapshot_id_where_isonline_and_islastofday", unique: false, fields: ["id"], where: { isOnline: true, isLastOfDay: true } }, + { name: "provider_snapshot_id_where_islastsuccessofday", unique: false, fields: ["id"], where: { isLastSuccessOfDay: true } } ] }) export class ProviderSnapshot extends Model { @Required @PrimaryKey @Default(DataTypes.UUIDV4) @Column(DataTypes.UUID) id: string; @Required @Column owner: string; @Required @Default(false) @Column isLastOfDay: boolean; + @Required @Default(false) @Column isLastSuccessOfDay: boolean; // Stats @Required @Column isOnline: boolean;