From 68c0d88d489dc3ad95cc3697391b9ace1d62b58e Mon Sep 17 00:00:00 2001
From: "Andres D. Molins" <amolinsdiaz@yahoo.es>
Date: Mon, 2 Dec 2024 21:48:53 +0100
Subject: [PATCH] Fix: Added GPU resources to the main pool and exposed it on
 the endpoint.

---
 src/aleph/vm/conf.py                   |  7 ++
 src/aleph/vm/orchestrator/resources.py | 83 ++++------------------
 src/aleph/vm/pool.py                   | 10 +++
 src/aleph/vm/resources.py              | 96 ++++++++++++++++++++++++++
 4 files changed, 126 insertions(+), 70 deletions(-)
 create mode 100644 src/aleph/vm/resources.py

diff --git a/src/aleph/vm/conf.py b/src/aleph/vm/conf.py
index 18add717..264be819 100644
--- a/src/aleph/vm/conf.py
+++ b/src/aleph/vm/conf.py
@@ -275,6 +275,11 @@ class Settings(BaseSettings):
 
     CONFIDENTIAL_SESSION_DIRECTORY: Path = Field(None, description="Default to EXECUTION_ROOT/sessions")
 
+    ENABLE_GPU_SUPPORT: bool = Field(
+        default=False,
+        description="Enable GPU pass-through support to VMs, only allowed for QEmu hypervisor",
+    )
+
     # Tests on programs
 
     FAKE_DATA_PROGRAM: Path | None = None
@@ -391,6 +396,8 @@ def check(self):
             # assert check_amd_sev_snp_supported(), "SEV-SNP feature isn't enabled, enable it in BIOS"
             assert self.ENABLE_QEMU_SUPPORT, "Qemu Support is needed for confidential computing and it's disabled, "
             "enable it setting the env variable `ENABLE_QEMU_SUPPORT=True` in configuration"
+        if self.ENABLE_GPU_SUPPORT:
+            assert self.ENABLE_QEMU_SUPPORT, "Qemu Support is needed for GPU support and it's disabled, "
 
     def setup(self):
         """Setup the environment defined by the settings. Call this method after loading the settings."""
diff --git a/src/aleph/vm/orchestrator/resources.py b/src/aleph/vm/orchestrator/resources.py
index 1818d23d..d802817e 100644
--- a/src/aleph/vm/orchestrator/resources.py
+++ b/src/aleph/vm/orchestrator/resources.py
@@ -1,19 +1,18 @@
 import math
-import subprocess
 from datetime import datetime, timezone
-from enum import Enum
 from functools import lru_cache
-from typing import List, Literal, Optional
+from typing import List, Optional
 
 import cpuinfo
 import psutil
 from aiohttp import web
 from aleph_message.models import ItemHash
-from aleph_message.models.abstract import HashableModel
 from aleph_message.models.execution.environment import CpuProperties
-from pydantic import BaseModel, Extra, Field
+from pydantic import BaseModel, Field
 
 from aleph.vm.conf import settings
+from aleph.vm.pool import VmPool
+from aleph.vm.resources import GpuProperties
 from aleph.vm.sevclient import SevClient
 from aleph.vm.utils import (
     check_amd_sev_es_supported,
@@ -73,36 +72,10 @@ class UsagePeriod(BaseModel):
     duration_seconds: float
 
 
-class GpuDeviceClass(str, Enum):
-    VGA_COMPATIBLE_CONTROLLER = "0300"
-    _3D_CONTROLLER = "0302"
-
-
-class GpuProperties(BaseModel):
-    """GPU properties."""
-
-    vendor: str = Field(description="GPU vendor name")
-    device_name: str = Field(description="GPU vendor card name")
-    device_class: GpuDeviceClass = Field(
-        description="GPU device class. Look at https://admin.pci-ids.ucw.cz/read/PD/03"
-    )
-    device_id: str = Field(description="GPU vendor & device ids")
-
-    class Config:
-        extra = Extra.forbid
-
-
-def is_gpu_device_class(device_class: str) -> bool:
-    try:
-        GpuDeviceClass(device_class)
-        return True
-    except ValueError:
-        return False
-
-
 class MachineProperties(BaseModel):
     cpu: CpuProperties
     gpu: Optional[List[GpuProperties]]
+    available_gpus: Optional[List[GpuProperties]]
 
 
 class MachineUsage(BaseModel):
@@ -114,48 +87,17 @@ class MachineUsage(BaseModel):
     active: bool = True
 
 
-def parse_gpu_device_info(line) -> Optional[GpuProperties]:
-    """Parse GPU device info from a line of lspci output."""
-
-    device = line.split(' "', maxsplit=1)[1]
-    device_class, device_vendor, device_info = device.split('" "', maxsplit=2)
-    device_class = device_class.split("[", maxsplit=1)[1][:-1]
-    vendor, vendor_id = device_vendor.split(" [", maxsplit=1)
-    device_name = device_info.split('"', maxsplit=1)[0]
-    device_name, model_id = device_name.split(" [", maxsplit=1)
-    device_id = f"{vendor_id[:-1]}:{model_id[:-1]}"
-
-    return (
-        GpuProperties(
-            vendor=vendor,
-            device_name=device_name,
-            device_class=device_class,
-            device_id=device_id,
-        )
-        if is_gpu_device_class(device_class)
-        else None
-    )
-
-
-def get_gpu_info() -> Optional[List[GpuProperties]]:
-    """Get GPU info using lspci command."""
-
-    result = subprocess.run(["lspci", "-mmnnn"], capture_output=True, text=True, check=True)
-    gpu_devices = list(
-        {device for line in result.stdout.split("\n") if line and (device := parse_gpu_device_info(line)) is not None}
-    )
-    return gpu_devices if gpu_devices else None
-
-
 @lru_cache
-def get_machine_properties() -> MachineProperties:
+def get_machine_properties(request: web.Request) -> MachineProperties:
     """Fetch machine properties such as architecture, CPU vendor, ...
     These should not change while the supervisor is running.
 
     In the future, some properties may have to be fetched from within a VM.
     """
     cpu_info = cpuinfo.get_cpu_info()  # Slow
-    gpu_info = get_gpu_info()
+    pool: VmPool = request.app["vm_pool"]
+    gpus = pool.gpus
+    available_gpus = pool.get_available_gpus()
     return MachineProperties(
         cpu=CpuProperties(
             architecture=cpu_info.get("raw_arch_string", cpu_info.get("arch_string_raw")),
@@ -171,12 +113,13 @@ def get_machine_properties() -> MachineProperties:
                 )
             ),
         ),
-        gpu=gpu_info,
+        gpu=gpus,
+        available_gpus=available_gpus,
     )
 
 
 @cors_allow_all
-async def about_system_usage(_: web.Request):
+async def about_system_usage(request: web.Request):
     """Public endpoint to expose information about the system usage."""
     period_start = datetime.now(timezone.utc).replace(second=0, microsecond=0)
 
@@ -198,7 +141,7 @@ async def about_system_usage(_: web.Request):
             start_timestamp=period_start,
             duration_seconds=60,
         ),
-        properties=get_machine_properties(),
+        properties=get_machine_properties(request),
     )
 
     return web.json_response(text=usage.json(exclude_none=True))
diff --git a/src/aleph/vm/pool.py b/src/aleph/vm/pool.py
index 58a3e6fa..37c9ea7b 100644
--- a/src/aleph/vm/pool.py
+++ b/src/aleph/vm/pool.py
@@ -18,6 +18,7 @@
 from aleph.vm.controllers.firecracker.snapshot_manager import SnapshotManager
 from aleph.vm.network.hostnetwork import Network, make_ipv6_allocator
 from aleph.vm.orchestrator.metrics import get_execution_records
+from aleph.vm.resources import get_gpu_info
 from aleph.vm.systemd import SystemDManager
 from aleph.vm.utils import get_message_executable_content
 from aleph.vm.vm_type import VmType
@@ -41,6 +42,7 @@ class VmPool:
     snapshot_manager: SnapshotManager | None = None
     systemd_manager: SystemDManager
     creation_lock: asyncio.Lock
+    gpus: List[GpuProperties] = []
 
     def __init__(self, loop: asyncio.AbstractEventLoop):
         self.executions = {}
@@ -78,6 +80,10 @@ def setup(self) -> None:
             logger.debug("Initializing SnapshotManager ...")
             self.snapshot_manager.run_in_thread()
 
+        if settings.ENABLE_GPU_SUPPORT:
+            logger.debug("Detecting GPU devices ...")
+            self.available_gpus = get_gpu_info()
+
     def teardown(self) -> None:
         """Stop the VM pool and the network properly."""
         if self.network:
@@ -281,6 +287,10 @@ def get_instance_executions(self) -> Iterable[VmExecution]:
         )
         return executions or []
 
+    def get_available_gpus(self) -> Iterable[GpuProperties]:
+        available_gpus = self.available_gpus
+        return available_gpus or []
+
     def get_executions_by_sender(self, payment_type: PaymentType) -> dict[str, dict[str, list[VmExecution]]]:
         """Return all executions of the given type, grouped by sender and by chain."""
         executions_by_sender: dict[str, dict[str, list[VmExecution]]] = {}
diff --git a/src/aleph/vm/resources.py b/src/aleph/vm/resources.py
new file mode 100644
index 00000000..1c0b5a27
--- /dev/null
+++ b/src/aleph/vm/resources.py
@@ -0,0 +1,96 @@
+import subprocess
+from enum import Enum
+from typing import Optional, List
+
+from pydantic import BaseModel, Field, Extra
+
+
+class GpuDeviceClass(str, Enum):
+    VGA_COMPATIBLE_CONTROLLER = "0300"
+    _3D_CONTROLLER = "0302"
+
+
+class GpuProperties(BaseModel):
+    """GPU properties."""
+
+    vendor: str = Field(description="GPU vendor name")
+    device_name: str = Field(description="GPU vendor card name")
+    device_class: GpuDeviceClass = Field(
+        description="GPU device class. Look at https://admin.pci-ids.ucw.cz/read/PD/03"
+    )
+    pci_host: str = Field(description="Host PCI bus for this device")
+    device_id: str = Field(description="GPU vendor & device ids")
+
+    class Config:
+        extra = Extra.forbid
+
+
+def is_gpu_device_class(device_class: str) -> bool:
+    try:
+        GpuDeviceClass(device_class)
+        return True
+    except ValueError:
+        return False
+
+
+def get_vendor_name(vendor_id: str) -> str:
+    match vendor_id:
+        case "10de":
+            return "NVIDIA"
+        case "1002":
+            return "AMD"
+        case "8086":
+            return "Intel"
+        case _:
+            raise ValueError("Device vendor not compatible")
+
+
+def is_kernel_enabled_gpu(pci_host: str) -> bool:
+    # Get detailed info about Kernel drivers used by this device.
+    # Needs to use specifically only the kernel driver vfio-pci to be compatible for QEmu virtualization
+    result = subprocess.run(["lspci", "-s", pci_host, "-nnk"], capture_output=True, text=True, check=True)
+    details = result.stdout.split("\n")
+    if "\tKernel driver in use: vfio-pci" in details:
+        return True
+
+    return False
+
+
+def parse_gpu_device_info(line: str) -> Optional[GpuProperties]:
+    """Parse GPU device info from a line of lspci output."""
+
+    pci_host, device = line.split(' "', maxsplit=1)
+
+    if not is_kernel_enabled_gpu(pci_host):
+        return None
+
+    device_class, device_vendor, device_info = device.split('" "', maxsplit=2)
+    device_class = device_class.split("[", maxsplit=1)[1][:-1]
+
+    if not is_gpu_device_class(device_class):
+        return None
+
+    vendor, vendor_id = device_vendor.split(" [", maxsplit=1)
+    vendor_id = vendor_id[:-1]
+    vendor_name = get_vendor_name(vendor_id)
+    device_name = device_info.split('"', maxsplit=1)[0]
+    device_name, model_id = device_name.split(" [", maxsplit=1)
+    model_id = model_id[:-1]
+    device_id = f"{vendor_id}:{model_id}"
+
+    return GpuProperties(
+        vendor=vendor_name,
+        device_name=device_name,
+        device_class=device_class,
+        device_id=device_id,
+    )
+
+
+def get_gpu_info() -> Optional[List[GpuProperties]]:
+    """Get GPU info using lspci command."""
+
+    result = subprocess.run(["lspci", "-mmnnn"], capture_output=True, text=True, check=True)
+    gpu_devices = list(
+        {device for line in result.stdout.split("\n") if line and (device := parse_gpu_device_info(line)) is not None}
+    )
+    return gpu_devices if gpu_devices else None