Skip to content

Commit

Permalink
Fix: Solved compilation issue and fixed gpu logic.
Browse files Browse the repository at this point in the history
  • Loading branch information
nesitor committed Dec 5, 2024
1 parent 24516a2 commit 891ed73
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 10 deletions.
2 changes: 1 addition & 1 deletion packaging/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ debian-package-code:
cp -r ../examples/data ./aleph-vm/opt/aleph-vm/examples/data
mkdir -p ./aleph-vm/opt/aleph-vm/examples/volumes
# Fixing this protobuf dependency version to avoid getting CI errors as version 5.29.0 have this compilation issue
pip3 install --progress-bar off --target ./aleph-vm/opt/aleph-vm/ 'aleph-message==0.5.0' 'eth-account==0.10' 'sentry-sdk==1.31.0' 'qmp==1.1.0' 'aleph-superfluid~=0.2.1' 'sqlalchemy[asyncio]>=2.0' 'aiosqlite==0.19.0' 'alembic==1.13.1' 'aiohttp_cors==0.7.0' 'pyroute2==0.7.12' 'python-cpuid==0.1.0' 'solathon==1.0.2' 'protobuf==5.28.3'
pip3 install --progress-bar off --target ./aleph-vm/opt/aleph-vm/ 'aleph-message@git+https://github.com/aleph-im/aleph-message@andres-feature-add_gpu_requirement' 'eth-account==0.10' 'sentry-sdk==1.31.0' 'qmp==1.1.0' 'aleph-superfluid~=0.2.1' 'sqlalchemy[asyncio]>=2.0' 'aiosqlite==0.19.0' 'alembic==1.13.1' 'aiohttp_cors==0.7.0' 'pyroute2==0.7.12' 'python-cpuid==0.1.0' 'solathon==1.0.2' 'protobuf==5.28.3'
python3 -m compileall ./aleph-vm/opt/aleph-vm/

debian-package-resources: firecracker-bins vmlinux download-ipfs-kubo target/bin/sevctl
Expand Down
16 changes: 9 additions & 7 deletions src/aleph/vm/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
ItemHash,
ProgramContent,
)
from aleph_message.models.execution.environment import HypervisorType
from aleph_message.models.execution.environment import HypervisorType, GpuProperties

from aleph.vm.conf import settings
from aleph.vm.controllers.firecracker.executable import AlephFirecrackerExecutable
Expand Down Expand Up @@ -74,7 +74,7 @@ class VmExecution:
AlephProgramResources | AlephInstanceResources | AlephQemuResources | AlephQemuConfidentialInstance | None
) = None
vm: AlephFirecrackerExecutable | AlephQemuInstance | AlephQemuConfidentialInstance | None = None
gpus: List[HostGPU]
gpus: List[HostGPU] = []

times: VmExecutionTimes

Expand Down Expand Up @@ -223,11 +223,13 @@ async def prepare(self) -> None:

def prepare_gpus(self, available_gpus: List[GpuDevice]) -> None:
gpus = []
for gpu in self.message.requirements.gpu:
for available_gpu in available_gpus:
if available_gpu.device_id == gpu.device_id:
gpus.append(HostGPU(pci_host=available_gpu.pci_host))
break
if self.message.requirements and self.message.requirements.gpu:
for gpu in self.message.requirements.gpu:
gpu = GpuProperties.parse_obj(gpu)
for available_gpu in available_gpus:
if available_gpu.device_id == gpu.device_id:
gpus.append(HostGPU(pci_host=available_gpu.pci_host))
break
self.gpus = gpus

def uses_gpu(self, pci_host: str) -> bool:
Expand Down
8 changes: 6 additions & 2 deletions src/aleph/vm/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,9 +299,13 @@ def get_instance_executions(self) -> Iterable[VmExecution]:
def get_available_gpus(self) -> List[GpuDevice]:
available_gpus = []
for gpu in self.gpus:
used = False
for _, execution in self.executions.items():
if not execution.uses_gpu(gpu.pci_host):
available_gpus.append(gpu)
if execution.uses_gpu(gpu.pci_host):
used = True
break
if not used:
available_gpus.append(gpu)
return available_gpus

def get_executions_by_sender(self, payment_type: PaymentType) -> dict[str, dict[str, list[VmExecution]]]:
Expand Down

0 comments on commit 891ed73

Please sign in to comment.