Skip to content

Commit

Permalink
Fix CI slowness and correct execution tests (#686)
Browse files Browse the repository at this point in the history
* Problem: Execution tests were very slow

Solution: This was due to an import in the test app that is somehow
very slow but only during testing.

Haven't figured out why it is slow, but have implemented a workaround
that delay the import so it's not hit during the tests

* Fix 'real' executions test were testing the fake VM

This was due to as settings contamination which made it runn the FAKE_DATA_PROGRAM instead of the real one

Also correct some things that made the test not run (load_update_mesage
instead of get_message)

* Correct the Workflow name

It was the same name as an other workflow which caused issue in github

* Execution test were failing on Python 3.12

Due to change in behaviour of unix_socket.wait_closed

* Symlink don't work so make a copy instead

* add vm-connector in test runner

* Increase timeout for ci

* Update comment src/aleph/vm/hypervisors/firecracker/microvm.py


Co-authored-by: Hugo Herter <[email protected]>
  • Loading branch information
olethanh and hoh authored Sep 2, 2024
1 parent b74d05f commit 46063fb
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 16 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/test-using-pytest.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "Test on DigitalOcean Droplets"
name: "py.test and linting"

on:
push
Expand All @@ -7,6 +7,12 @@ jobs:
tests-python:
name: "Test Python code"
runs-on: ubuntu-22.04
services:
# Run vm connector for the execution tests
vm-connector:
image: alephim/vm-connector:alpha
ports:
- 4021:4021

steps:
- uses: actions/checkout@v4
Expand Down
6 changes: 5 additions & 1 deletion examples/example_fastapi/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from pydantic import BaseModel, HttpUrl
from starlette.responses import JSONResponse

from aleph.sdk.chains.ethereum import get_fallback_account
from aleph.sdk.chains.remote import RemoteAccount
from aleph.sdk.client import AlephHttpClient, AuthenticatedAlephHttpClient
from aleph.sdk.query.filters import MessageFilter
Expand Down Expand Up @@ -292,6 +291,7 @@ async def post_with_remote_account():
@app.post("/post_a_message_local_account")
async def post_with_local_account():
"""Post a message on the Aleph.im network using a local private key."""
from aleph.sdk.chains.ethereum import get_fallback_account

account = get_fallback_account()

Expand Down Expand Up @@ -326,6 +326,8 @@ async def post_with_local_account():

@app.post("/post_a_file")
async def post_a_file():
from aleph.sdk.chains.ethereum import get_fallback_account

account = get_fallback_account()
file_path = Path(__file__).absolute()
async with AuthenticatedAlephHttpClient(
Expand All @@ -351,6 +353,8 @@ async def post_a_file():
async def sign_a_message():
"""Sign a message using a locally managed account within the virtual machine."""
# FIXME: Broken, fixing this depends on https://github.com/aleph-im/aleph-sdk-python/pull/120
from aleph.sdk.chains.ethereum import get_fallback_account

account = get_fallback_account()
message = {"hello": "world", "chain": "ETH"}
signed_message = await account.sign_message(message)
Expand Down
1 change: 1 addition & 0 deletions runtimes/aleph-debian-11-python/init1.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ async def setup_code_asgi(code: bytes, encoding: Encoding, entrypoint: str) -> A
module = __import__(module_name)
for level in module_name.split(".")[1:]:
module = getattr(module, level)
logger.debug("import done")
app = getattr(module, app_name)
elif encoding == Encoding.plain:
# Execute the code and extract the entrypoint
Expand Down
18 changes: 16 additions & 2 deletions src/aleph/vm/hypervisors/firecracker/microvm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import asyncio
import errno
import json
import logging
import os.path
Expand Down Expand Up @@ -318,7 +319,8 @@ def enable_rootfs(self, path_on_host: Path) -> Path:
def enable_file_rootfs(self, path_on_host: Path) -> Path:
"""Make a rootfs available to the VM.
Creates a symlink to the rootfs file if jailer is in use.
If jailer is in use, try to create a hardlink
If it is not possible to create a link because the dir are in separate device made a copy.
"""
if self.use_jailer:
rootfs_filename = Path(path_on_host).name
Expand All @@ -327,6 +329,13 @@ def enable_file_rootfs(self, path_on_host: Path) -> Path:
os.link(path_on_host, f"{self.jailer_path}/{jailer_path_on_host}")
except FileExistsError:
logger.debug(f"File {jailer_path_on_host} already exists")
except OSError as err:
if err.errno == errno.EXDEV:
# Invalid cross-device link: cannot make hard link between partition.
# In this case, copy the file instead:
shutil.copyfile(path_on_host, f"{self.jailer_path}/{jailer_path_on_host}")
else:
raise
return Path(jailer_path_on_host)
else:
return path_on_host
Expand Down Expand Up @@ -489,7 +498,12 @@ async def teardown(self):
if self._unix_socket:
logger.debug("Closing unix socket")
self._unix_socket.close()
await self._unix_socket.wait_closed()
try:
await asyncio.wait_for(self._unix_socket.wait_closed(), 2)
except asyncio.TimeoutError:
# In Python < 3.11 wait_closed() was broken and returned immediatly
# It is supposedly fixed in Python 3.12.1, but it hangs indefinitely during tests.
logger.info("f{self} unix socket closing timeout")

logger.debug("Removing files")
if self.config_file_path:
Expand Down
1 change: 1 addition & 0 deletions src/aleph/vm/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ async def get_message(ref: str) -> Union[ProgramMessage, InstanceMessage]:
cache_path = settings.FAKE_INSTANCE_MESSAGE
elif settings.FAKE_DATA_PROGRAM:
cache_path = settings.FAKE_DATA_MESSAGE
logger.debug("Using the fake data message")
else:
cache_path = (Path(settings.MESSAGE_CACHE) / ref).with_suffix(".json")
url = f"{settings.CONNECTOR_URL}/download/message/{ref}"
Expand Down
36 changes: 24 additions & 12 deletions tests/supervisor/test_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,35 @@
import pytest
from aleph_message.models import ItemHash

from aleph.vm.conf import settings
from aleph.vm.conf import Settings, settings
from aleph.vm.controllers.firecracker import AlephFirecrackerProgram
from aleph.vm.models import VmExecution
from aleph.vm.orchestrator import metrics
from aleph.vm.orchestrator.messages import load_updated_message
from aleph.vm.storage import get_message


@pytest.mark.asyncio
async def test_create_execution():
async def test_create_execution(mocker):
"""
Create a new VM execution and check that it starts properly.
"""
mock_settings = Settings()
mocker.patch("aleph.vm.conf.settings", new=mock_settings)
mocker.patch("aleph.vm.storage.settings", new=mock_settings)
mocker.patch("aleph.vm.controllers.firecracker.executable.settings", new=mock_settings)
mocker.patch("aleph.vm.controllers.firecracker.program.settings", new=mock_settings)

settings.FAKE_DATA_PROGRAM = settings.BENCHMARK_FAKE_DATA_PROGRAM
settings.ALLOW_VM_NETWORKING = False
settings.USE_JAILER = False
mock_settings.FAKE_DATA_PROGRAM = mock_settings.BENCHMARK_FAKE_DATA_PROGRAM
mock_settings.ALLOW_VM_NETWORKING = False
mock_settings.USE_JAILER = False

logging.basicConfig(level=logging.DEBUG)
settings.PRINT_SYSTEM_LOGS = True
mock_settings.PRINT_SYSTEM_LOGS = True

# Ensure that the settings are correct and required files present.
settings.setup()
settings.check()
mock_settings.setup()
mock_settings.check()

# The database is required for the metrics and is currently not optional.
engine = metrics.setup_engine()
Expand Down Expand Up @@ -57,6 +63,7 @@ async def test_create_execution():
await execution.stop()


# This test depends on having a vm-connector running on port 4021
@pytest.mark.asyncio
async def test_create_execution_online(vm_hash: ItemHash = None):
"""
Expand All @@ -73,29 +80,34 @@ async def test_create_execution_online(vm_hash: ItemHash = None):
engine = metrics.setup_engine()
await metrics.create_tables(engine)

message = await get_message(ref=vm_hash)
message, original_message = await load_updated_message(vm_hash)

execution = VmExecution(
vm_hash=vm_hash,
message=message.content,
original=message.content,
original=original_message.content,
snapshot_manager=None,
systemd_manager=None,
persistent=False,
)

# Downloading the resources required may take some time, limit it to 10 seconds
await asyncio.wait_for(execution.prepare(), timeout=30)
# Downloading the resources required may take some time, limit it to 120 seconds
# since it is a bit slow in GitHub Actions
await asyncio.wait_for(execution.prepare(), timeout=120)

vm = execution.create(vm_id=3, tap_interface=None)

# Test that the VM is created correctly. It is not started yet.
assert isinstance(vm, AlephFirecrackerProgram)
vm.enable_console = True
vm.fvm.enable_log = True
assert vm.vm_id == 3

await execution.start()
await execution.stop()


# This test depends on having a vm-connector running on port 4021
@pytest.mark.asyncio
async def test_create_execution_legacy():
"""
Expand Down

0 comments on commit 46063fb

Please sign in to comment.