diff --git a/indexer/src/providers/providerStatusProvider.ts b/indexer/src/providers/providerStatusProvider.ts index 55aee95b2..d499bb946 100644 --- a/indexer/src/providers/providerStatusProvider.ts +++ b/indexer/src/providers/providerStatusProvider.ts @@ -1,11 +1,15 @@ import https from "https"; import axios from "axios"; import semver from "semver"; -import { Provider } from "@shared/dbSchemas/akash"; +import { Provider, ProviderSnapshotNode, ProviderSnapshotNodeCPU, ProviderSnapshotNodeGPU } from "@shared/dbSchemas/akash"; import { asyncify, eachLimit } from "async"; import { ProviderSnapshot } from "@src/../../shared/dbSchemas/akash/providerSnapshot"; -import { fetchAndSaveProviderStats as grpcFetchAndSaveProviderStats } from "./statusEndpointHandlers/grpc"; -import { fetchAndSaveProviderStats as restFetchAndSaveProviderStats } from "./statusEndpointHandlers/rest"; +import { sequelize } from "@src/db/dbConnection"; +import { toUTC } from "@src/shared/utils/date"; +import { ProviderStatusInfo, ProviderVersionEndpointResponseType } from "./statusEndpointHandlers/types"; +import { isSameDay } from "date-fns"; +import { fetchProviderStatusFromGRPC } from "./statusEndpointHandlers/grpc"; +import { fetchProviderStatusFromREST } from "./statusEndpointHandlers/rest"; const ConcurrentStatusCall = 10; const StatusCallTimeout = 10_000; // 10 seconds @@ -16,7 +20,10 @@ export async function syncProvidersInfo() { deletedHeight: null }, include: [{ model: ProviderSnapshot, as: "lastSnapshot" }], - order: [["isOnline", "DESC"]] + order: [ + ["isOnline", "DESC"], + ["uptime30d", "DESC"] + ] }); const httpsAgent = new https.Agent({ @@ -28,103 +35,160 @@ export async function syncProvidersInfo() { providers, ConcurrentStatusCall, asyncify(async (provider: Provider) => { + let providerStatus: ProviderStatusInfo | null = null; + let errorMessage: string | null = null; + let akashVersion: string | null = null; + let cosmosVersion: string | null = null; + try { const versionResponse = await axios.get(provider.hostUri + "/version", { httpsAgent: httpsAgent, timeout: StatusCallTimeout }); - const akashVersion = semver.valid(versionResponse.data.akash.version); - const cosmosVersion = semver.valid( + akashVersion = semver.valid(versionResponse.data.akash.version); + cosmosVersion = semver.valid( "cosmosSdkVersion" in versionResponse.data.akash ? versionResponse.data.akash.cosmosSdkVersion : versionResponse.data.akash.cosmos_sdk_version ); if (akashVersion && semver.gte(akashVersion, "0.5.0-0")) { - await grpcFetchAndSaveProviderStats(provider, cosmosVersion, akashVersion, StatusCallTimeout); + providerStatus = await fetchProviderStatusFromGRPC(provider, StatusCallTimeout); } else { - await restFetchAndSaveProviderStats(provider, cosmosVersion, akashVersion, StatusCallTimeout); + providerStatus = await fetchProviderStatusFromREST(provider, StatusCallTimeout); } } catch (err) { - const checkDate = new Date(); - const errorMessage = err?.message?.toString() ?? err?.toString(); + errorMessage = err?.message?.toString() ?? err?.toString(); + } - await Provider.update( - { - isOnline: false, - lastCheckDate: checkDate, - error: errorMessage, - akashVersion: null, - cosmosSdkVersion: null, - deploymentCount: null, - leaseCount: null, - activeCPU: null, - activeGPU: null, - activeMemory: null, - activeStorage: null, - pendingCPU: null, - pendingGPU: null, - pendingMemory: null, - pendingStorage: null, - availableCPU: null, - availableGPU: null, - availableMemory: null, - availableStorage: null - }, - { - where: { owner: provider.owner } - } - ); + await saveProviderStatus(provider, providerStatus, akashVersion, cosmosVersion, errorMessage); - await ProviderSnapshot.create({ - owner: provider.owner, - isOnline: false, - error: errorMessage, - checkDate: checkDate - }); - } finally { - doneCount++; - console.log("Fetched provider info: " + doneCount + " / " + providers.length); - } + doneCount++; + console.log("Fetched provider info: " + doneCount + " / " + providers.length); }) ); console.log("Finished refreshing provider infos"); } -type ProviderVersionEndpointResponseType = - | { - akash: { version: string; commit: string; buildTags: string; go: string; cosmosSdkVersion: string }; - kube: { - major: string; - minor: string; - gitVersion: string; - gitCommit: string; - gitTreeState: string; - buildDate: string; - goVersion: string; - compiler: string; - platform: string; - }; +async function saveProviderStatus( + provider: Provider, + providerStatus: ProviderStatusInfo | null, + akashVersion: string | null, + cosmosVersion: string | null, + error: string | null +) { + await sequelize.transaction(async (t) => { + const checkDate = toUTC(new Date()); + + const createdSnapshot = await ProviderSnapshot.create( + { + owner: provider.owner, + isOnline: !!providerStatus, + isLastOfDay: true, + error: error, + checkDate: checkDate, + deploymentCount: providerStatus?.resources.deploymentCount, + leaseCount: providerStatus?.resources.leaseCount, + activeCPU: providerStatus?.resources.activeCPU, + activeGPU: providerStatus?.resources.activeGPU, + activeMemory: providerStatus?.resources.activeMemory, + activeStorage: providerStatus?.resources.activeStorage, + pendingCPU: providerStatus?.resources.pendingCPU, + pendingGPU: providerStatus?.resources.pendingGPU, + pendingMemory: providerStatus?.resources.pendingMemory, + pendingStorage: providerStatus?.resources.pendingStorage, + availableCPU: providerStatus?.resources.availableCPU, + availableGPU: providerStatus?.resources.availableGPU, + availableMemory: providerStatus?.resources.availableMemory, + availableStorage: providerStatus?.resources.availableStorage + }, + { transaction: t } + ); + + if (provider.lastSnapshot && isSameDay(provider.lastSnapshot.checkDate, checkDate)) { + await ProviderSnapshot.update( + { + isLastOfDay: false + }, + { + where: { id: provider.lastSnapshot.id }, + transaction: t + } + ); } - | { - akash: { - name: string; - server_name: string; - version: string; - commit: string; - build_tags: string; - go: string; - cosmos_sdk_version: string; - }; - kube: { - major: string; - minor: string; - gitVersion: string; - gitCommit: string; - gitTreeState: string; - buildDate: string; - goVersion: string; - compiler: string; - platform: string; - }; - }; + + await Provider.update( + { + lastSnapshotId: createdSnapshot.id, + isOnline: !!providerStatus, + error: error, + lastCheckDate: checkDate, + cosmosSdkVersion: cosmosVersion, + akashVersion: akashVersion, + deploymentCount: providerStatus?.resources.deploymentCount, + leaseCount: providerStatus?.resources.leaseCount, + activeCPU: providerStatus?.resources.activeCPU, + activeGPU: providerStatus?.resources.activeGPU, + activeMemory: providerStatus?.resources.activeMemory, + activeStorage: providerStatus?.resources.activeStorage, + pendingCPU: providerStatus?.resources.pendingCPU, + pendingGPU: providerStatus?.resources.pendingGPU, + pendingMemory: providerStatus?.resources.pendingMemory, + pendingStorage: providerStatus?.resources.pendingStorage, + availableCPU: providerStatus?.resources.availableCPU, + availableGPU: providerStatus?.resources.availableGPU, + availableMemory: providerStatus?.resources.availableMemory, + availableStorage: providerStatus?.resources.availableStorage + }, + { + where: { owner: provider.owner }, + transaction: t + } + ); + + if (providerStatus) { + for (const node of providerStatus.nodes) { + const providerSnapshotNode = await ProviderSnapshotNode.create( + { + snapshotId: createdSnapshot.id, + name: node.name, + cpuAllocatable: node.cpuAllocatable, + cpuAllocated: node.cpuAllocated, + memoryAllocatable: node.memoryAllocatable, + memoryAllocated: node.memoryAllocated, + ephemeralStorageAllocatable: node.ephemeralStorageAllocatable, + ephemeralStorageAllocated: node.ephemeralStorageAllocated, + capabilitiesStorageHDD: node.capabilitiesStorageHDD, + capabilitiesStorageSSD: node.capabilitiesStorageSSD, + capabilitiesStorageNVME: node.capabilitiesStorageNVME, + gpuAllocatable: node.gpuAllocatable, + gpuAllocated: node.gpuAllocated + }, + { transaction: t } + ); + + await ProviderSnapshotNodeCPU.bulkCreate( + node.cpus.map((cpuInfo) => ({ + snapshotNodeId: providerSnapshotNode.id, + vendor: cpuInfo.vendor, + model: cpuInfo.model, + vcores: cpuInfo.vcores + })), + { transaction: t } + ); + + await ProviderSnapshotNodeGPU.bulkCreate( + node.gpus.map((gpuInfo) => ({ + snapshotNodeId: providerSnapshotNode.id, + vendor: gpuInfo.vendor, + name: gpuInfo.name, + modelId: gpuInfo.modelId, + interface: gpuInfo.interface, + memorySize: gpuInfo.memorySize + })), + { transaction: t } + ); + } + } + }); +} diff --git a/indexer/src/providers/statusEndpointHandlers/grpc.ts b/indexer/src/providers/statusEndpointHandlers/grpc.ts index 9382f9456..b8356547c 100644 --- a/indexer/src/providers/statusEndpointHandlers/grpc.ts +++ b/indexer/src/providers/statusEndpointHandlers/grpc.ts @@ -1,15 +1,13 @@ -import { Provider, ProviderSnapshot, ProviderSnapshotNode, ProviderSnapshotNodeCPU, ProviderSnapshotNodeGPU } from "@shared/dbSchemas/akash"; -import { sequelize } from "@src/db/dbConnection"; -import { toUTC } from "@src/shared/utils/date"; +import { Provider } from "@shared/dbSchemas/akash"; import { parseDecimalKubernetesString, parseSizeStr } from "@src/shared/utils/files"; -import { isSameDay } from "date-fns"; import { createPromiseClient } from "@connectrpc/connect"; import { createGrpcTransport } from "@connectrpc/connect-node"; import { ProviderRPC } from "@src/proto/gen/akash/provider/v1/service_connect"; import { ResourcesMetric, Status } from "@src/proto/gen/akash/provider/v1/status_pb"; import { NodeResources } from "@src/proto/gen/akash/inventory/v1/resources_pb"; +import { ProviderStatusInfo } from "./types"; -export async function fetchAndSaveProviderStats(provider: Provider, cosmosSdkVersion: string, version: string, timeout: number) { +export async function fetchProviderStatusFromGRPC(provider: Provider, timeout: number): Promise { const data = await queryStatus(provider.hostUri, timeout); const activeResources = parseResources(data.cluster.inventory.reservations.active.resources); @@ -30,122 +28,55 @@ export async function fetchAndSaveProviderStats(provider: Provider, cosmosSdkVer storage: 0 } ); - const checkDate = toUTC(new Date()); - await sequelize.transaction(async (t) => { - const createdSnapshot = await ProviderSnapshot.create( - { - owner: provider.owner, - isOnline: true, - checkDate: checkDate, - isLastOfDay: true, - deploymentCount: data.manifest.deployments, - leaseCount: data.cluster.leases.active ?? 0, - activeCPU: activeResources.cpu, - activeGPU: activeResources.gpu, - activeMemory: activeResources.memory, - activeStorage: activeResources.storage, - pendingCPU: pendingResources.cpu, - pendingGPU: pendingResources.gpu, - pendingMemory: pendingResources.memory, - pendingStorage: pendingResources.storage, - availableCPU: availableResources.cpu, - availableGPU: availableResources.gpu, - availableMemory: availableResources.memory, - availableStorage: availableResources.storage - }, - { transaction: t } - ); - - if (provider.lastSnapshot && isSameDay(provider.lastSnapshot.checkDate, checkDate)) { - await ProviderSnapshot.update( - { - isLastOfDay: false - }, - { - where: { id: provider.lastSnapshot.id }, - transaction: t - } - ); - } - - await Provider.update( - { - lastSnapshotId: createdSnapshot.id, - isOnline: true, - error: null, - lastCheckDate: checkDate, - cosmosSdkVersion: cosmosSdkVersion, - akashVersion: version, - deploymentCount: data.manifest.deployments, - leaseCount: data.cluster.leases.active ?? 0, - activeCPU: activeResources.cpu, - activeGPU: activeResources.gpu, - activeMemory: activeResources.memory, - activeStorage: activeResources.storage, - pendingCPU: pendingResources.cpu, - pendingGPU: pendingResources.gpu, - pendingMemory: pendingResources.memory, - pendingStorage: pendingResources.storage, - availableCPU: availableResources.cpu, - availableGPU: availableResources.gpu, - availableMemory: availableResources.memory, - availableStorage: availableResources.storage - }, - { - where: { owner: provider.owner }, - transaction: t - } - ); - - for (const node of data.cluster.inventory.cluster.nodes) { + return { + resources: { + deploymentCount: data.manifest.deployments, + leaseCount: data.cluster.leases.active ?? 0, + activeCPU: activeResources.cpu, + activeGPU: activeResources.gpu, + activeMemory: activeResources.memory, + activeStorage: activeResources.storage, + pendingCPU: pendingResources.cpu, + pendingGPU: pendingResources.gpu, + pendingMemory: pendingResources.memory, + pendingStorage: pendingResources.storage, + availableCPU: availableResources.cpu, + availableGPU: availableResources.gpu, + availableMemory: availableResources.memory, + availableStorage: availableResources.storage + }, + nodes: data.cluster.inventory.cluster.nodes.map((node) => { const parsedResources = parseNodeResources(node.resources); - const providerSnapshotNode = await ProviderSnapshotNode.create( - { - snapshotId: createdSnapshot.id, - name: node.name, - cpuAllocatable: parsedResources.allocatableCPU, - cpuAllocated: parsedResources.allocatedCPU, - memoryAllocatable: parsedResources.allocatableMemory, - memoryAllocated: parsedResources.allocatedMemory, - ephemeralStorageAllocatable: parsedResources.allocatableStorage, - ephemeralStorageAllocated: parsedResources.allocatedStorage, - capabilitiesStorageHDD: node.capabilities.storageClasses.includes("beta1"), - capabilitiesStorageSSD: node.capabilities.storageClasses.includes("beta2"), - capabilitiesStorageNVME: node.capabilities.storageClasses.includes("beta3"), - gpuAllocatable: parsedResources.allocatableGPU, - gpuAllocated: parsedResources.allocatedGPU - }, - { transaction: t } - ); - - for (const cpuInfo of node.resources.cpu.info) { - await ProviderSnapshotNodeCPU.create( - { - snapshotNodeId: providerSnapshotNode.id, - vendor: cpuInfo.vendor, - model: cpuInfo.model, - vcores: cpuInfo.vcores - }, - { transaction: t } - ); - } - for (const gpuInfo of node.resources.gpu.info) { - await ProviderSnapshotNodeGPU.create( - { - snapshotNodeId: providerSnapshotNode.id, - vendor: gpuInfo.vendor, - name: gpuInfo.name, - modelId: gpuInfo.modelid, - interface: gpuInfo.interface, - memorySize: gpuInfo.memorySize // TODO: Change type to bytes? - }, - { transaction: t } - ); - } - } - }); + return { + name: node.name, + cpuAllocatable: parsedResources.allocatableCPU, + cpuAllocated: parsedResources.allocatedCPU, + memoryAllocatable: parsedResources.allocatableMemory, + memoryAllocated: parsedResources.allocatedMemory, + ephemeralStorageAllocatable: parsedResources.allocatableStorage, + ephemeralStorageAllocated: parsedResources.allocatedStorage, + capabilitiesStorageHDD: node.capabilities.storageClasses.includes("beta1"), + capabilitiesStorageSSD: node.capabilities.storageClasses.includes("beta2"), + capabilitiesStorageNVME: node.capabilities.storageClasses.includes("beta3"), + gpuAllocatable: parsedResources.allocatableGPU, + gpuAllocated: parsedResources.allocatedGPU, + cpus: node.resources.cpu.info.map((cpuInfo) => ({ + vendor: cpuInfo.vendor, + model: cpuInfo.model, + vcores: cpuInfo.vcores + })), + gpus: node.resources.gpu.info.map((gpuInfo) => ({ + vendor: gpuInfo.vendor, + name: gpuInfo.name, + modelId: gpuInfo.modelid, + interface: gpuInfo.interface, + memorySize: gpuInfo.memorySize // TODO: Change type to bytes? + })) + }; + }) + }; } async function queryStatus(hostUri: string, timeout: number): Promise { @@ -155,6 +86,7 @@ async function queryStatus(hostUri: string, timeout: number): Promise { baseUrl: url, httpVersion: "2", nodeOptions: { rejectUnauthorized: false }, + defaultTimeoutMs: timeout, interceptors: [] }); const client = createPromiseClient(ProviderRPC, transport); @@ -187,10 +119,14 @@ function parseNodeResources(resources: NodeResources) { function getAvailableResources(resources: NodeResources) { const parsedResources = parseNodeResources(resources); + + // Setting minimum to 0 to prevent negative values due to overcommit + // https://github.com/akash-network/docs/blob/master/operator/provider/README.md#cluster-resources-overcommit + return { - cpu: parsedResources.allocatableCPU - parsedResources.allocatedCPU, - memory: parsedResources.allocatableMemory - parsedResources.allocatedMemory, - storage: parsedResources.allocatableStorage - parsedResources.allocatedStorage, - gpu: parsedResources.allocatableGPU - parsedResources.allocatedGPU + cpu: Math.max(0, parsedResources.allocatableCPU - parsedResources.allocatedCPU), + memory: Math.max(0, parsedResources.allocatableMemory - parsedResources.allocatedMemory), + storage: Math.max(0, parsedResources.allocatableStorage - parsedResources.allocatedStorage), + gpu: Math.max(0, parsedResources.allocatableGPU - parsedResources.allocatedGPU) }; } diff --git a/indexer/src/providers/statusEndpointHandlers/rest.ts b/indexer/src/providers/statusEndpointHandlers/rest.ts index 653a4fd99..1ac3d1746 100644 --- a/indexer/src/providers/statusEndpointHandlers/rest.ts +++ b/indexer/src/providers/statusEndpointHandlers/rest.ts @@ -1,11 +1,9 @@ -import { Provider, ProviderSnapshot } from "@shared/dbSchemas/akash"; -import { sequelize } from "@src/db/dbConnection"; -import { toUTC } from "@src/shared/utils/date"; +import { Provider } from "@shared/dbSchemas/akash"; import axios from "axios"; -import { isSameDay } from "date-fns"; import https from "https"; +import { ProviderStatusInfo } from "./types"; -export async function fetchAndSaveProviderStats(provider: Provider, cosmosSdkVersion: string, version: string, timeout: number) { +export async function fetchProviderStatusFromREST(provider: Provider, timeout: number): Promise { const httpsAgent = new https.Agent({ rejectUnauthorized: false }); @@ -20,74 +18,26 @@ export async function fetchAndSaveProviderStats(provider: Provider, cosmosSdkVer const activeResources = sumResources(response.data.cluster.inventory.active); const pendingResources = sumResources(response.data.cluster.inventory.pending); const availableResources = sumResources(response.data.cluster.inventory.available); - const checkDate = toUTC(new Date()); - await sequelize.transaction(async (t) => { - const createdSnapshot = await ProviderSnapshot.create( - { - owner: provider.owner, - isOnline: true, - isLastOfDay: true, - checkDate: checkDate, - deploymentCount: response.data.manifest.deployments, - leaseCount: response.data.cluster.leases, - activeCPU: activeResources.cpu, - activeGPU: activeResources.gpu, - activeMemory: activeResources.memory, - activeStorage: activeResources.storage, - pendingCPU: pendingResources.cpu, - pendingGPU: pendingResources.gpu, - pendingMemory: pendingResources.memory, - pendingStorage: pendingResources.storage, - availableCPU: availableResources.cpu, - availableGPU: availableResources.gpu, - availableMemory: availableResources.memory, - availableStorage: availableResources.storage - }, - { transaction: t } - ); - - if (provider.lastSnapshot && isSameDay(provider.lastSnapshot.checkDate, checkDate)) { - await ProviderSnapshot.update( - { - isLastOfDay: false - }, - { - where: { id: provider.lastSnapshot.id }, - transaction: t - } - ); - } - - await Provider.update( - { - lastSnapshotId: createdSnapshot.id, - isOnline: true, - error: null, - lastCheckDate: checkDate, - cosmosSdkVersion: cosmosSdkVersion, - akashVersion: version, - deploymentCount: response.data.manifest.deployments, - leaseCount: response.data.cluster.leases, - activeCPU: activeResources.cpu, - activeGPU: activeResources.gpu, - activeMemory: activeResources.memory, - activeStorage: activeResources.storage, - pendingCPU: pendingResources.cpu, - pendingGPU: pendingResources.gpu, - pendingMemory: pendingResources.memory, - pendingStorage: pendingResources.storage, - availableCPU: availableResources.cpu, - availableGPU: availableResources.gpu, - availableMemory: availableResources.memory, - availableStorage: availableResources.storage - }, - { - where: { owner: provider.owner }, - transaction: t - } - ); - }); + return { + resources: { + deploymentCount: response.data.manifest.deployments, + leaseCount: response.data.cluster.leases, + activeCPU: activeResources.cpu, + activeGPU: activeResources.gpu, + activeMemory: activeResources.memory, + activeStorage: activeResources.storage, + pendingCPU: pendingResources.cpu, + pendingGPU: pendingResources.gpu, + pendingMemory: pendingResources.memory, + pendingStorage: pendingResources.storage, + availableCPU: availableResources.cpu, + availableGPU: availableResources.gpu, + availableMemory: availableResources.memory, + availableStorage: availableResources.storage + }, + nodes: [] + }; } function sumResources(resources) { diff --git a/indexer/src/providers/statusEndpointHandlers/types.ts b/indexer/src/providers/statusEndpointHandlers/types.ts new file mode 100644 index 000000000..a0f5b2884 --- /dev/null +++ b/indexer/src/providers/statusEndpointHandlers/types.ts @@ -0,0 +1,74 @@ +export type ProviderStatusInfo = { + resources: { + deploymentCount: number; + leaseCount: number; + activeCPU: number; + activeGPU: number; + activeMemory: number; + activeStorage: number; + pendingCPU: number; + pendingGPU: number; + pendingMemory: number; + pendingStorage: number; + availableCPU: number; + availableGPU: number; + availableMemory: number; + availableStorage: number; + }; + + nodes: { + name: string; + cpuAllocatable: number; + cpuAllocated: number; + memoryAllocatable: number; + memoryAllocated: number; + ephemeralStorageAllocatable: number; + ephemeralStorageAllocated: number; + capabilitiesStorageHDD: boolean; + capabilitiesStorageSSD: boolean; + capabilitiesStorageNVME: boolean; + gpuAllocatable: number; + gpuAllocated: number; + + cpus: { vendor: string; model: string; vcores: number }[]; + gpus: { vendor: string; name: string; modelId: string; interface: string; memorySize: string }[]; + }[]; +}; + +export type ProviderVersionEndpointResponseType = + | { + akash: { version: string; commit: string; buildTags: string; go: string; cosmosSdkVersion: string }; + kube: { + major: string; + minor: string; + gitVersion: string; + gitCommit: string; + gitTreeState: string; + buildDate: string; + goVersion: string; + compiler: string; + platform: string; + }; + } + | { + akash: { + name: string; + server_name: string; + version: string; + commit: string; + build_tags: string; + go: string; + cosmos_sdk_version: string; + }; + kube: { + major: string; + minor: string; + gitVersion: string; + gitCommit: string; + gitTreeState: string; + buildDate: string; + goVersion: string; + compiler: string; + platform: string; + }; + };