Skip to content

Commit

Permalink
add queries + support GPU utilization in /graph-data
Browse files Browse the repository at this point in the history
  • Loading branch information
0xnirmal committed Dec 23, 2024
1 parent 5229083 commit b3f0a95
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 5 deletions.
4 changes: 3 additions & 1 deletion apps/api/src/caching/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,5 +85,7 @@ export const cacheKeys = {
getTestnetVersion: "getTestnetVersion",
getSandboxVersion: "getSandboxVersion",
getGpuModels: "getGpuModels",
getTrialProviders: "getTrialProviders"
getTrialProviders: "getTrialProviders",
getGpuUtilization: "getGpuUtilization",
getGpuBreakdown: "getGpuBreakdown"
};
163 changes: 163 additions & 0 deletions apps/api/src/services/db/gpuBreakdownService.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import { cacheKeys, cacheResponse } from "@src/caching/helpers";
import { chainDb } from "@src/db/dbConnection";
import { QueryTypes } from "sequelize";

type GpuUtilizationData = {
date: Date;
cpuUtilization: number;
cpu: number;
gpuUtilization: number;
gpu: number;
count: number;
node_count: number;
};

type GpuBreakdownData = {
date: Date;
vendor: string;
model: string;
providerCount: number;
nodeCount: number;
totalGpus: number;
leasedGpus: number;
gpuUtilization: number;
};

export async function getGpuUtilization() {
return await cacheResponse(
60 * 5, // 5 minutes
cacheKeys.getGpuUtilization,
async () => {
const result = await chainDb.query<GpuUtilizationData>(
`SELECT
d."date",
ROUND(
COALESCE((SUM("activeCPU") + SUM("pendingCPU")) * 100.0 /
NULLIF(SUM("activeCPU") + SUM("pendingCPU") + SUM("availableCPU"), 0), 0),
2
)::float AS "cpuUtilization",
COALESCE(SUM("activeCPU") + SUM("pendingCPU") + SUM("availableCPU"), 0)::integer AS "cpu",
ROUND(
COALESCE((SUM("activeGPU") + SUM("pendingGPU")) * 100.0 /
NULLIF(SUM("activeGPU") + SUM("pendingGPU") + SUM("availableGPU"), 0), 0),
2
)::float AS "gpuUtilization",
COALESCE(SUM("activeGPU") + SUM("pendingGPU") + SUM("availableGPU"), 0)::integer AS "gpu",
COUNT(*) as provider_count,
COALESCE(COUNT(DISTINCT "nodeId"), 0) as node_count
FROM "day" d
INNER JOIN (
SELECT DISTINCT ON("hostUri",DATE("checkDate"))
DATE("checkDate") AS date,
ps."activeCPU", ps."pendingCPU", ps."availableCPU",
ps."activeGPU", ps."pendingGPU", ps."availableGPU",
ps."isOnline",
n.id as "nodeId"
FROM "providerSnapshot" ps
INNER JOIN "provider" ON "provider"."owner"=ps."owner"
INNER JOIN "providerSnapshotNode" n ON n."snapshotId"=ps.id AND n."gpuAllocatable" > 0
LEFT JOIN "providerSnapshotNodeGPU" gpu ON gpu."snapshotNodeId" = n.id
WHERE ps."isLastSuccessOfDay" = TRUE
ORDER BY "hostUri",DATE("checkDate"),"checkDate" DESC
) "dailyProviderStats"
ON DATE(d."date")="dailyProviderStats"."date"
GROUP BY d."date"
ORDER BY d."date" ASC`,
{
type: QueryTypes.SELECT
}
);

const stats = result.map(day => ({
date: day.date,
value: day.gpuUtilization
}));

return {
currentValue: stats[stats.length - 1]?.value ?? 0,
compareValue: stats[stats.length - 2]?.value ?? 0,
snapshots: stats
};
},
true
);
}

export async function getGpuBreakdownByVendorAndModel(): Promise<GpuBreakdownData[]> {
return await cacheResponse(
60 * 5, // 5 minutes
cacheKeys.getGpuBreakdown,
async () => {
const result = await chainDb.query<{
date: Date;
vendor: string;
model: string;
provider_count: number;
node_count: number;
total_gpus: number;
leased_gpus: number;
gpuUtilization: number;
}>(
`SELECT
d."date",
COALESCE(gpu."vendor", 'Unknown') as "vendor",
COALESCE(gpu."name", 'Unknown') as "model",
COALESCE(COUNT(DISTINCT "dailyProviderStats"."hostUri"), 0) as provider_count,
COALESCE(COUNT(DISTINCT n.id), 0) as node_count,
COALESCE(COUNT(gpu.id), 0) as total_gpus,
COALESCE(CAST(ROUND(SUM(
CAST(n."gpuAllocated" as float) /
NULLIF((SELECT COUNT(*)
FROM "providerSnapshotNodeGPU" subgpu
WHERE subgpu."snapshotNodeId" = n.id), 0)
)) as int), 0) as leased_gpus,
CAST(COALESCE(
SUM(
CAST(n."gpuAllocated" as float) /
NULLIF((SELECT COUNT(*)
FROM "providerSnapshotNodeGPU" subgpu
WHERE subgpu."snapshotNodeId" = n.id), 0)
) * 100.0 / NULLIF(COUNT(gpu.id), 0)
, 0) as numeric(10,2)) as "gpuUtilization"
FROM "day" d
INNER JOIN (
SELECT DISTINCT ON("hostUri", DATE("checkDate"))
ps.id as "snapshotId",
"hostUri",
DATE("checkDate") AS date,
ps."isOnline"
FROM "providerSnapshot" ps
INNER JOIN "provider" ON "provider"."owner" = ps."owner"
WHERE ps."isLastSuccessOfDay" = TRUE
ORDER BY "hostUri", DATE("checkDate"), "checkDate" DESC
) "dailyProviderStats" ON DATE(d."date") = "dailyProviderStats"."date"
INNER JOIN "providerSnapshotNode" n ON n."snapshotId" = "dailyProviderStats"."snapshotId" AND n."gpuAllocatable" > 0
LEFT JOIN "providerSnapshotNodeGPU" gpu ON gpu."snapshotNodeId" = n.id
GROUP BY d."date", gpu."vendor", gpu."name"
ORDER BY d."date" ASC, gpu."vendor", gpu."name"`,
{
type: QueryTypes.SELECT
}
);

return result.map(row => ({
date: row.date,
vendor: row.vendor,
model: row.model,
providerCount: row.provider_count,
nodeCount: row.node_count,
totalGpus: row.total_gpus,
leasedGpus: row.leased_gpus,
gpuUtilization: row.gpuUtilization
}));
},
true
);
}

export async function getLatestGpuBreakdown(): Promise<GpuBreakdownData[]> {
const allData = await getGpuBreakdownByVendorAndModel();
const latestDate = allData.reduce((latest, current) => (latest > current.date ? latest : current.date), new Date(0));

return allData.filter(data => data.date.getTime() === latestDate.getTime());
}
11 changes: 7 additions & 4 deletions apps/api/src/services/db/statsService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { cacheKeys, cacheResponse } from "@src/caching/helpers";
import { chainDb } from "@src/db/dbConnection";
import { ProviderActiveLeasesStats, ProviderStats, ProviderStatsKey } from "@src/types/graph";
import { env } from "@src/utils/env";
import { getGpuUtilization } from "./gpuBreakdownService";

type GraphData = {
currentValue: number;
Expand Down Expand Up @@ -90,7 +91,8 @@ type AuthorizedGraphDataName =
| "activeCPU"
| "activeGPU"
| "activeMemory"
| "activeStorage";
| "activeStorage"
| "gpuUtilization";

export const AuthorizedGraphDataNames: AuthorizedGraphDataName[] = [
"dailyUAktSpent",
Expand All @@ -105,16 +107,15 @@ export const AuthorizedGraphDataNames: AuthorizedGraphDataName[] = [
"activeCPU",
"activeGPU",
"activeMemory",
"activeStorage"
"activeStorage",
"gpuUtilization"
];

export function isValidGraphDataName(x: string): x is AuthorizedGraphDataName {
return AuthorizedGraphDataNames.includes(x as AuthorizedGraphDataName);
}

export async function getGraphData(dataName: AuthorizedGraphDataName): Promise<GraphData> {
console.log("getGraphData: " + dataName);

let attributes: (keyof Block)[] = [];
let isRelative = false;
let getter: (block: Block) => number = null;
Expand Down Expand Up @@ -144,6 +145,8 @@ export async function getGraphData(dataName: AuthorizedGraphDataName): Promise<G
attributes = ["activeEphemeralStorage", "activePersistentStorage"];
getter = (block: Block) => block.activeEphemeralStorage + block.activePersistentStorage;
break;
case "gpuUtilization":
return await getGpuUtilization();
default:
attributes = [dataName];
getter = (block: Block) => block[dataName];
Expand Down

0 comments on commit b3f0a95

Please sign in to comment.