From a9f85ebbb7311f74ce900aee543f39ed4dfc3581 Mon Sep 17 00:00:00 2001 From: "Andres D. Molins" Date: Tue, 3 Dec 2024 22:35:45 +0100 Subject: [PATCH] Fix: Solved code quality issues --- .../vm/controllers/firecracker/executable.py | 5 ----- src/aleph/vm/controllers/qemu/instance.py | 11 ++++------- .../vm/controllers/qemu_confidential/instance.py | 5 +---- src/aleph/vm/hypervisors/qemu/qemuvm.py | 4 ++-- .../vm/hypervisors/qemu_confidential/qemuvm.py | 2 +- src/aleph/vm/models.py | 14 ++++++-------- src/aleph/vm/pool.py | 16 +++++++--------- src/aleph/vm/resources.py | 6 ++++++ 8 files changed, 27 insertions(+), 36 deletions(-) diff --git a/src/aleph/vm/controllers/firecracker/executable.py b/src/aleph/vm/controllers/firecracker/executable.py index 840dcb9b..cbbad03c 100644 --- a/src/aleph/vm/controllers/firecracker/executable.py +++ b/src/aleph/vm/controllers/firecracker/executable.py @@ -72,11 +72,6 @@ class HostVolume: read_only: bool -@dataclass -class HostGPU: - pci_host: str - - @dataclass class BaseConfiguration: vm_hash: ItemHash diff --git a/src/aleph/vm/controllers/qemu/instance.py b/src/aleph/vm/controllers/qemu/instance.py index 51392c5f..715be2bf 100644 --- a/src/aleph/vm/controllers/qemu/instance.py +++ b/src/aleph/vm/controllers/qemu/instance.py @@ -5,7 +5,7 @@ from asyncio import Task from asyncio.subprocess import Process from pathlib import Path -from typing import Generic, TypeVar, List +from typing import Generic, TypeVar import psutil from aleph_message.models import ItemHash @@ -17,20 +17,20 @@ from aleph.vm.controllers.configuration import ( Configuration, HypervisorType, + QemuGPU, QemuVMConfiguration, QemuVMHostVolume, - QemuGPU, save_controller_configuration, ) from aleph.vm.controllers.firecracker.executable import ( AlephFirecrackerResources, VmSetupError, - HostGPU, ) from aleph.vm.controllers.interface import AlephVmControllerInterface from aleph.vm.controllers.qemu.cloudinit import CloudInitMixin from aleph.vm.network.firewall import teardown_nftables_for_vm from aleph.vm.network.interfaces import TapInterface +from aleph.vm.resources import HostGPU from aleph.vm.storage import get_rootfs_base_path from aleph.vm.utils import HostNotFoundError, ping, run_in_subprocess @@ -204,10 +204,7 @@ async def configure(self): ) for volume in self.resources.volumes ], - gpus=[ - QemuGPU(pci_host=gpu.pci_host) - for gpu in self.resources.gpus - ] + gpus=[QemuGPU(pci_host=gpu.pci_host) for gpu in self.resources.gpus], ) configuration = Configuration( diff --git a/src/aleph/vm/controllers/qemu_confidential/instance.py b/src/aleph/vm/controllers/qemu_confidential/instance.py index 2908e87e..37986b10 100644 --- a/src/aleph/vm/controllers/qemu_confidential/instance.py +++ b/src/aleph/vm/controllers/qemu_confidential/instance.py @@ -127,10 +127,7 @@ async def configure(self): ) for volume in self.resources.volumes ], - gpus=[ - QemuGPU(pci_host=gpu.pci_host) - for gpu in self.resources.gpus - ] + gpus=[QemuGPU(pci_host=gpu.pci_host) for gpu in self.resources.gpus], ) configuration = Configuration( diff --git a/src/aleph/vm/hypervisors/qemu/qemuvm.py b/src/aleph/vm/hypervisors/qemu/qemuvm.py index d6c9274d..9b95f187 100644 --- a/src/aleph/vm/hypervisors/qemu/qemuvm.py +++ b/src/aleph/vm/hypervisors/qemu/qemuvm.py @@ -7,7 +7,7 @@ import qmp from systemd import journal -from aleph.vm.controllers.configuration import QemuVMConfiguration, QemuGPU +from aleph.vm.controllers.configuration import QemuGPU, QemuVMConfiguration from aleph.vm.controllers.qemu.instance import logger @@ -107,7 +107,7 @@ async def start( # Use host-phys-bits-limit argument for GPU support. TODO: Investigate how to get the correct bits size # "-cpu", - "host,host-phys-bits-limit=0x28" + "host,host-phys-bits-limit=0x28", # Uncomment for debug # "-serial", "telnet:localhost:4321,server,nowait", # "-snapshot", # Do not save anything to disk diff --git a/src/aleph/vm/hypervisors/qemu_confidential/qemuvm.py b/src/aleph/vm/hypervisors/qemu_confidential/qemuvm.py index 868c12c1..1ef33e40 100644 --- a/src/aleph/vm/hypervisors/qemu_confidential/qemuvm.py +++ b/src/aleph/vm/hypervisors/qemu_confidential/qemuvm.py @@ -107,7 +107,7 @@ async def start( # AMD have different methods) and properly boot. # Use host-phys-bits-limit argument for GPU support. TODO: Investigate how to get the correct bits size "-cpu", - "host,host-phys-bits-limit=0x28" + "host,host-phys-bits-limit=0x28", # Uncomment following for debug # "-serial", "telnet:localhost:4321,server,nowait", # "-snapshot", # Do not save anything to disk diff --git a/src/aleph/vm/models.py b/src/aleph/vm/models.py index 97e846d6..b22c3d39 100644 --- a/src/aleph/vm/models.py +++ b/src/aleph/vm/models.py @@ -16,7 +16,7 @@ from aleph_message.models.execution.environment import HypervisorType from aleph.vm.conf import settings -from aleph.vm.controllers.firecracker.executable import AlephFirecrackerExecutable, HostGPU +from aleph.vm.controllers.firecracker.executable import AlephFirecrackerExecutable from aleph.vm.controllers.firecracker.instance import AlephInstanceResources from aleph.vm.controllers.firecracker.program import ( AlephFirecrackerProgram, @@ -38,7 +38,7 @@ ) from aleph.vm.orchestrator.pubsub import PubSub from aleph.vm.orchestrator.vm import AlephFirecrackerInstance -from aleph.vm.resources import GpuDevice +from aleph.vm.resources import GpuDevice, HostGPU from aleph.vm.systemd import SystemDManager from aleph.vm.utils import create_task_log_exceptions, dumps_for_json @@ -70,7 +70,9 @@ class VmExecution: vm_hash: ItemHash original: ExecutableContent message: ExecutableContent - resources: AlephProgramResources | AlephInstanceResources | AlephQemuResources | AlephQemuConfidentialInstance | None = None + resources: ( + AlephProgramResources | AlephInstanceResources | AlephQemuResources | AlephQemuConfidentialInstance | None + ) = None vm: AlephFirecrackerExecutable | AlephQemuInstance | AlephQemuConfidentialInstance | None = None gpus: List[HostGPU] @@ -224,11 +226,7 @@ def prepare_gpus(self, available_gpus: List[GpuDevice]) -> None: for gpu in self.message.requirements.gpu: for available_gpu in available_gpus: if available_gpu.device_id == gpu.device_id: - gpus.append( - HostGPU( - pci_host=available_gpu.pci_host - ) - ) + gpus.append(HostGPU(pci_host=available_gpu.pci_host)) break self.gpus = gpus diff --git a/src/aleph/vm/pool.py b/src/aleph/vm/pool.py index 8b7cbef2..a4f2f2f9 100644 --- a/src/aleph/vm/pool.py +++ b/src/aleph/vm/pool.py @@ -19,7 +19,7 @@ from aleph.vm.controllers.firecracker.snapshot_manager import SnapshotManager from aleph.vm.network.hostnetwork import Network, make_ipv6_allocator from aleph.vm.orchestrator.metrics import get_execution_records -from aleph.vm.resources import GpuDevice, get_gpu_devices +from aleph.vm.resources import GpuDevice, HostGPU, get_gpu_devices from aleph.vm.systemd import SystemDManager from aleph.vm.utils import get_message_executable_content from aleph.vm.vm_type import VmType @@ -297,14 +297,12 @@ def get_instance_executions(self) -> Iterable[VmExecution]: return executions or [] def get_available_gpus(self) -> List[GpuDevice]: - available_gpus = ( - gpu - for gpu in self.gpus - for _, execution in self.executions.items() - if (isinstance(execution.resources, AlephQemuResources) or isinstance(execution.resources, AlephQemuConfidentialResources)) and not execution.uses_device_gpu(gpu.pci_host) - ) - - return available_gpus or [] + available_gpus = [] + for gpu in self.gpus: + for _, execution in self.executions.items(): + if not execution.uses_gpu(gpu.pci_host): + available_gpus.append(gpu) + return available_gpus def get_executions_by_sender(self, payment_type: PaymentType) -> dict[str, dict[str, list[VmExecution]]]: """Return all executions of the given type, grouped by sender and by chain.""" diff --git a/src/aleph/vm/resources.py b/src/aleph/vm/resources.py index 5532c226..b237dc0e 100644 --- a/src/aleph/vm/resources.py +++ b/src/aleph/vm/resources.py @@ -1,4 +1,5 @@ import subprocess +from dataclasses import dataclass from enum import Enum from typing import List, Optional @@ -6,6 +7,11 @@ from pydantic import Extra, Field +@dataclass +class HostGPU: + pci_host: str + + class GpuDeviceClass(str, Enum): VGA_COMPATIBLE_CONTROLLER = "0300" _3D_CONTROLLER = "0302"