Skip to content

Commit

Permalink
Merge branch 'main' into fix/hanging-bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
dekkers committed Sep 11, 2024
2 parents 1ef26e1 + 12fdb44 commit f6a8675
Show file tree
Hide file tree
Showing 60 changed files with 2,118 additions and 404 deletions.
27 changes: 13 additions & 14 deletions boefjes/boefjes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
from boefjes.clients.bytes_client import BytesAPIClient
from boefjes.clients.scheduler_client import SchedulerAPIClient, TaskStatus
from boefjes.config import settings
from boefjes.dependencies.plugins import PluginService, get_plugin_service
from boefjes.job_handler import get_environment_settings, get_octopoes_api_connector
from boefjes.job_models import BoefjeMeta
from boefjes.local_repository import LocalPluginRepository, get_local_repository
from boefjes.models import PluginType
from boefjes.plugins.models import _default_mime_types
from octopoes.models import Reference
from octopoes.models.exception import ObjectNotFoundException
Expand Down Expand Up @@ -88,14 +89,15 @@ async def root():
def boefje_input(
task_id: UUID,
scheduler_client: SchedulerAPIClient = Depends(get_scheduler_client),
local_repository: LocalPluginRepository = Depends(get_local_repository),
plugin_service: PluginService = Depends(get_plugin_service),
):
task = get_task(task_id, scheduler_client)

if task.status is not TaskStatus.RUNNING:
raise HTTPException(status_code=403, detail="Task does not have status running")

boefje_meta = create_boefje_meta(task, local_repository)
plugin = plugin_service.by_plugin_id(task.data.boefje.id, task.data.organization)
boefje_meta = create_boefje_meta(task, plugin)

output_url = str(settings.api).rstrip("/") + f"/api/v0/tasks/{task_id}"
return BoefjeInput(task_id=task_id, output_url=output_url, boefje_meta=boefje_meta)
Expand All @@ -107,22 +109,23 @@ def boefje_output(
boefje_output: BoefjeOutput,
scheduler_client: SchedulerAPIClient = Depends(get_scheduler_client),
bytes_client: BytesAPIClient = Depends(get_bytes_client),
local_repository: LocalPluginRepository = Depends(get_local_repository),
plugin_service: PluginService = Depends(get_plugin_service),
):
task = get_task(task_id, scheduler_client)

if task.status is not TaskStatus.RUNNING:
raise HTTPException(status_code=403, detail="Task does not have status running")

boefje_meta = create_boefje_meta(task, local_repository)
plugin = plugin_service.by_plugin_id(task.data.boefje.id, task.data.organization)
boefje_meta = create_boefje_meta(task, plugin)
boefje_meta.started_at = task.modified_at
boefje_meta.ended_at = datetime.now(timezone.utc)

bytes_client.login()
bytes_client.save_boefje_meta(boefje_meta)

if boefje_output.files:
mime_types = _default_mime_types(task.data.boefje)
mime_types = _default_mime_types(boefje_meta.boefje).union(plugin.produces)
for file in boefje_output.files:
raw = base64.b64decode(file.content)
# when supported, also save file.name to Bytes
Expand All @@ -148,14 +151,10 @@ def get_task(task_id, scheduler_client):
return task


def create_boefje_meta(task, local_repository):
boefje = task.data.boefje
boefje_resource = local_repository.by_id(boefje.id)
environment = get_environment_settings(task.data, boefje_resource.schema)

def create_boefje_meta(task, plugin: PluginType) -> BoefjeMeta:
organization = task.data.organization
input_ooi = task.data.input_ooi
arguments = {"oci_arguments": boefje_resource.oci_arguments}
arguments = {"oci_arguments": plugin.oci_arguments}

if input_ooi:
reference = Reference.from_str(input_ooi)
Expand All @@ -168,10 +167,10 @@ def create_boefje_meta(task, local_repository):

boefje_meta = BoefjeMeta(
id=task.id,
boefje=boefje,
boefje=task.data.boefje,
input_ooi=input_ooi,
arguments=arguments,
organization=organization,
environment=environment,
environment=get_environment_settings(task.data, plugin.schema),
)
return boefje_meta
15 changes: 14 additions & 1 deletion boefjes/boefjes/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,18 @@
import structlog
from httpx import HTTPError
from pydantic import ValidationError
from sqlalchemy.orm import sessionmaker

from boefjes.clients.scheduler_client import SchedulerAPIClient, SchedulerClientInterface, Task, TaskStatus
from boefjes.config import Settings
from boefjes.dependencies.plugins import PluginService
from boefjes.job_handler import BoefjeHandler, NormalizerHandler, bytes_api_client
from boefjes.local import LocalBoefjeJobRunner, LocalNormalizerJobRunner
from boefjes.local_repository import get_local_repository
from boefjes.runtime_interfaces import Handler, WorkerManager
from boefjes.sql.config_storage import create_config_storage
from boefjes.sql.db import get_engine
from boefjes.sql.plugin_storage import create_plugin_storage

logger = structlog.get_logger(__name__)

Expand Down Expand Up @@ -256,9 +261,17 @@ def _start_working(

def get_runtime_manager(settings: Settings, queue: WorkerManager.Queue, log_level: str) -> WorkerManager:
local_repository = get_local_repository()

session = sessionmaker(bind=get_engine())()
plugin_service = PluginService(
create_plugin_storage(session),
create_config_storage(session),
local_repository,
)

item_handler: Handler
if queue is WorkerManager.Queue.BOEFJES:
item_handler = BoefjeHandler(LocalBoefjeJobRunner(local_repository), local_repository, bytes_api_client)
item_handler = BoefjeHandler(LocalBoefjeJobRunner(local_repository), plugin_service, bytes_api_client)
else:
item_handler = NormalizerHandler(
LocalNormalizerJobRunner(local_repository), bytes_api_client, settings.scan_profile_whitelist
Expand Down
23 changes: 16 additions & 7 deletions boefjes/boefjes/clients/bytes_client.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import typing
import uuid
from base64 import b64encode
from collections.abc import Callable, Set
from functools import wraps
from typing import Any
Expand Down Expand Up @@ -99,17 +100,25 @@ def get_normalizer_meta(self, normalizer_meta_id: uuid.UUID) -> NormalizerMeta:

@retry_with_login
def save_raw(self, boefje_meta_id: str, raw: str | bytes, mime_types: Set[str] = frozenset()) -> UUID:
headers = {"content-type": "application/octet-stream"}
headers.update(self.headers)
file_name = "raw" # The name provides a key for all ids returned, so this is arbitrary as we only upload 1 file

response = self._session.post(
"/bytes/raw",
content=raw,
headers=headers,
params={"mime_types": list(mime_types), "boefje_meta_id": boefje_meta_id},
json={
"files": [
{
"name": file_name,
"content": b64encode(raw if isinstance(raw, bytes) else raw.encode()).decode(),
"tags": list(mime_types),
}
]
},
headers=self.headers,
params={"boefje_meta_id": str(boefje_meta_id)},
)

self._verify_response(response)
return UUID(response.json()["id"])

return UUID(response.json()[file_name])

@retry_with_login
def get_raw(self, raw_data_id: str) -> bytes:
Expand Down
2 changes: 1 addition & 1 deletion boefjes/boefjes/dependencies/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def _set_plugin_enabled(self, plugin: PluginType, organisation_id: str) -> Plugi
return plugin


def get_plugin_service(organisation_id: str) -> Iterator[PluginService]:
def get_plugin_service() -> Iterator[PluginService]:
def closure(session: Session):
return PluginService(
create_plugin_storage(session),
Expand Down
41 changes: 31 additions & 10 deletions boefjes/boefjes/job_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@

from boefjes.clients.bytes_client import BytesAPIClient
from boefjes.config import settings
from boefjes.dependencies.plugins import PluginService
from boefjes.docker_boefjes_runner import DockerBoefjesRunner
from boefjes.job_models import BoefjeMeta, NormalizerMeta
from boefjes.local_repository import LocalPluginRepository
from boefjes.plugins.models import _default_mime_types
from boefjes.runtime_interfaces import BoefjeJobRunner, Handler, NormalizerJobRunner
from boefjes.storage.interfaces import SettingsNotConformingToSchema
Expand Down Expand Up @@ -79,26 +79,30 @@ class BoefjeHandler(Handler):
def __init__(
self,
job_runner: BoefjeJobRunner,
local_repository: LocalPluginRepository,
plugin_service: PluginService,
bytes_client: BytesAPIClient,
):
self.job_runner = job_runner
self.local_repository = local_repository
self.plugin_service = plugin_service
self.bytes_client = bytes_client

def handle(self, boefje_meta: BoefjeMeta) -> None:
logger.info("Handling boefje %s[task_id=%s]", boefje_meta.boefje.id, str(boefje_meta.id))

# Check if this boefje is container-native, if so, continue using the Docker boefjes runner
boefje_resource = self.local_repository.by_id(boefje_meta.boefje.id)
if boefje_resource.oci_image:
plugin = self.plugin_service.by_plugin_id(boefje_meta.boefje.id, boefje_meta.organization)

if plugin.type != "boefje":
raise ValueError("Plugin id does not belong to a boefje")

if plugin.oci_image:
logger.info(
"Delegating boefje %s[task_id=%s] to Docker runner with OCI image [%s]",
boefje_meta.boefje.id,
str(boefje_meta.id),
boefje_resource.oci_image,
plugin.oci_image,
)
docker_runner = DockerBoefjesRunner(boefje_resource, boefje_meta)
docker_runner = DockerBoefjesRunner(plugin, boefje_meta)
return docker_runner.run()

if boefje_meta.input_ooi:
Expand All @@ -112,10 +116,10 @@ def handle(self, boefje_meta: BoefjeMeta) -> None:

boefje_meta.arguments["input"] = ooi.serialize()

boefje_meta.runnable_hash = boefje_resource.runnable_hash
boefje_meta.environment = get_environment_settings(boefje_meta, boefje_resource.schema)
boefje_meta.runnable_hash = plugin.runnable_hash
boefje_meta.environment = get_environment_settings(boefje_meta, plugin.schema)

mime_types = _default_mime_types(boefje_meta.boefje)
mime_types = _default_mime_types(boefje_meta.boefje).union(plugin.produces)

logger.info("Starting boefje %s[%s]", boefje_meta.boefje.id, str(boefje_meta.id))

Expand Down Expand Up @@ -222,6 +226,23 @@ def handle(self, normalizer_meta: NormalizerMeta) -> None:
)
)

if (
normalizer_meta.raw_data.boefje_meta.input_ooi # No input OOI means no deletion propagation
and not (results.observations or results.declarations or results.affirmations)
):
# There were no results found, which we still need to signal to Octopoes for deletion propagation

connector.save_observation(
Observation(
method=normalizer_meta.normalizer.id,
source=Reference.from_str(normalizer_meta.raw_data.boefje_meta.input_ooi),
source_method=normalizer_meta.raw_data.boefje_meta.boefje.id,
task_id=normalizer_meta.id,
valid_time=normalizer_meta.raw_data.boefje_meta.ended_at,
result=[],
)
)

corrected_scan_profiles = []
for profile in results.scan_profiles:
profile.level = ScanLevel(
Expand Down
8 changes: 8 additions & 0 deletions boefjes/tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from pathlib import Path
from unittest import mock

import boefjes.api
from boefjes.clients.scheduler_client import TaskStatus
from boefjes.dependencies.plugins import PluginService
from boefjes.local_repository import get_local_repository
from tests.conftest import MockSchedulerClient
from tests.loading import get_dummy_data

Expand All @@ -26,6 +29,11 @@ def test_boefje_input_running(api, tmp_path):
task = scheduler_client.pop_item("boefje")
scheduler_client.patch_task(task.id, TaskStatus.RUNNING)
api.app.dependency_overrides[boefjes.api.get_scheduler_client] = lambda: scheduler_client
api.app.dependency_overrides[boefjes.api.get_plugin_service] = lambda: PluginService(
mock.MagicMock(),
mock.MagicMock(),
get_local_repository(),
)

boefjes.api.get_environment_settings = lambda *_: {}
response = api.get("/api/v0/tasks/70da7d4f-f41f-4940-901b-d98a92e9014b")
Expand Down
15 changes: 13 additions & 2 deletions boefjes/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@

import pytest

from boefjes.dependencies.plugins import PluginService
from boefjes.job_handler import BoefjeHandler
from boefjes.job_models import BoefjeMeta, InvalidReturnValueNormalizer, NormalizerMeta
from boefjes.local import LocalBoefjeJobRunner, LocalNormalizerJobRunner
from boefjes.local_repository import LocalPluginRepository
from boefjes.models import Bit, Boefje, Normalizer, PluginType
from boefjes.runtime_interfaces import JobRuntimeError
from boefjes.sql.config_storage import create_config_storage
from boefjes.sql.plugin_storage import create_plugin_storage
from tests.loading import get_dummy_data


Expand Down Expand Up @@ -106,11 +109,19 @@ def test_handle_boefje_with_exception(self, mock_get_octopoes_api_connector, moc
arguments={},
organization="_dev",
)

local_repository = LocalPluginRepository(Path(__file__).parent / "modules")

mock_session = mock.MagicMock()
mock_session.query.all.return_value = []

plugin_service = PluginService(
create_plugin_storage(mock_session),
create_config_storage(mock_session),
local_repository,
)

with pytest.raises(RuntimeError): # Bytes still saves exceptions before they are reraised
BoefjeHandler(LocalBoefjeJobRunner(local_repository), local_repository, mock_bytes_api_client).handle(meta)
BoefjeHandler(LocalBoefjeJobRunner(local_repository), plugin_service, mock_bytes_api_client).handle(meta)

mock_bytes_api_client.save_boefje_meta.assert_called_once_with(meta)
mock_bytes_api_client.save_raw.assert_called_once()
Expand Down
15 changes: 14 additions & 1 deletion boefjes/tools/run_boefje.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
from pathlib import Path

import click
from sqlalchemy.orm import sessionmaker

from boefjes.dependencies.plugins import PluginService
from boefjes.sql.config_storage import create_config_storage
from boefjes.sql.db import get_engine
from boefjes.sql.plugin_storage import create_plugin_storage

sys.path.append(str(Path(__file__).resolve().parent.parent))

Expand All @@ -31,7 +37,14 @@ def run_boefje(start_pdb, organization_code, boefje_id, input_ooi):

local_repository = get_local_repository()

handler = BoefjeHandler(LocalBoefjeJobRunner(local_repository), local_repository, bytes_api_client)
session = sessionmaker(bind=get_engine())()
plugin_service = PluginService(
create_plugin_storage(session),
create_config_storage(session),
local_repository,
)

handler = BoefjeHandler(LocalBoefjeJobRunner(local_repository), plugin_service, bytes_api_client)
try:
handler.handle(meta)
except Exception:
Expand Down
14 changes: 12 additions & 2 deletions bytes/bytes/api/models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
from pydantic import BaseModel
from pydantic import BaseModel, Field


class RawResponse(BaseModel):
status: str
message: str
id: str | None = None
ids: list[str] | None = None


class File(BaseModel):
name: str
content: str = Field(..., contentEncoding="base64")
tags: list[str] = Field(default_factory=list)


class BoefjeOutput(BaseModel):
files: list[File] = Field(default_factory=list)
Loading

0 comments on commit f6a8675

Please sign in to comment.