Skip to content

Commit

Permalink
Merge branch 'main' into poc/mula/combined-schedulers
Browse files Browse the repository at this point in the history
* main: (64 commits)
  Bug fix: KAT-alogus parameter is now organization member instead of organization code (#3895)
  Remove sigrid workflows (#3920)
  Updated packages (#3898)
  Fix mula migrations Debian package (#3919)
  Adds loggers to report flow (#3872)
  Add additional check if task already run for report scheduler (#3900)
  Create separate finding for Microsoft RDP port (#3882)
  fix: 🐛 allow boefje completion with 404 (#3893)
  Feature/improve rename bulk modal (#3885)
  Update scheduler folder structure (#3883)
  Translations update from Hosted Weblate (#3870)
  Increase max number of PostgreSQL connections (#3889)
  Fix for task id as valid UUID (#3744)
  Add `auto_calculate_deadline` attribute to Scheduler (#3869)
  Ignore specific url parameters when following location headers (#3856)
  Let mailserver inherit l1 (#3704)
  Change plugins enabling in report flow to checkboxes (#3747)
  Fix rocky katalogus tests and delete unused fixtures (#3884)
  Enable/disable scheduled reports (#3871)
  optimize locking in katalogus.py, reuse available data (#3752)
  ...
  • Loading branch information
jpbruinsslot committed Dec 2, 2024
2 parents 9ccecba + 05f4ab9 commit d602c08
Show file tree
Hide file tree
Showing 293 changed files with 11,157 additions and 6,580 deletions.
3 changes: 0 additions & 3 deletions .env-dist
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ BYTES_DB_URI=postgresql://${BYTES_DB_USER}:${BYTES_DB_PASSWORD}@postgres:5432/${
# --- Octopoes --- #
# See `octopoes/octopoes/config/settings.py`

# Number of Celery workers (for the Octopoes API worker) that need to be started
CELERY_WORKER_CONCURRENCY=${CELERY_WORKER_CONCURRENCY:-4}

# --- Mula --- #
# See `mula/scheduler/config/settings.py`

Expand Down
2 changes: 1 addition & 1 deletion .env-prod
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ SCHEDULER_API=http://scheduler:8000
KEIKO_API=http://keiko:8000
KATALOGUS_API=http://katalogus:8000
XTDB_URI=http://crux:3000
BOEFJE_API=http://boefje:8000
BOEFJES_API=http://boefje:8000

# Bytes uses JWT for authentication
BYTES_API=http://bytes:8000
Expand Down
20 changes: 0 additions & 20 deletions .github/workflows/sigrid-publish.yml

This file was deleted.

23 changes: 0 additions & 23 deletions .github/workflows/sigrid-pullrequest.yml

This file was deleted.

2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,4 @@ An OpenKAT installation requires user accounts for users to be able to log in. T
Security
========

OpenKAT is designed to be secure by default in its production setup. In the development setup some debugging flags are enabled by default and it will not include TLS out of the box. To set up a secure production OpenKAT install, please follow the `Production setup guidelines <https://docs.openkat.nl/installation_and_deployment/install.html#production-environments>`_ and `Hardening guidelines <https://docs.openkat.nl/installation_and_deployment/hardening.html>`_.
OpenKAT is designed to be secure by default in its production setup. In the development setup some debugging flags are enabled by default and it will not include TLS out of the box. To set up a secure production OpenKAT install, please follow the `Production setup guidelines <https://docs.openkat.nl/installation-and-deployment/install.html#production-environments>`_ and `Hardening guidelines <https://docs.openkat.nl/installation-and-deployment/hardening.html>`_.
2 changes: 1 addition & 1 deletion boefjes/boefjes/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def _fill_queue(self, task_queue: Queue, queue_type: WorkerManager.Queue):

# We do not target a specific queue since we start one runtime for all organisations
# and queue ids contain the organisation_id
queues = [q for q in queues if q.id.startswith(queue_type.value)]
queues = [q for q in queues if q.id.startswith(queue_type.value) and q.size > 0]

logger.debug("Found queues: %s", [queue.id for queue in queues])

Expand Down
2 changes: 2 additions & 0 deletions boefjes/boefjes/clients/bytes_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import structlog
from httpx import Client, HTTPStatusError, HTTPTransport, Response

from boefjes.config import settings
from boefjes.job_models import BoefjeMeta, NormalizerMeta, RawDataMeta

BYTES_API_CLIENT_VERSION = "0.3"
Expand Down Expand Up @@ -38,6 +39,7 @@ def __init__(self, base_url: str, username: str, password: str):
base_url=base_url,
headers={"User-Agent": f"bytes-api-client/{BYTES_API_CLIENT_VERSION}"},
transport=(HTTPTransport(retries=6)),
timeout=settings.outgoing_request_timeout,
)

self.credentials = {"username": username, "password": password}
Expand Down
5 changes: 4 additions & 1 deletion boefjes/boefjes/clients/scheduler_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from httpx import Client, HTTPTransport, Response
from pydantic import BaseModel, TypeAdapter

from boefjes.config import settings
from boefjes.job_models import BoefjeMeta, NormalizerMeta


Expand Down Expand Up @@ -57,7 +58,9 @@ def push_item(self, p_item: Task) -> None:

class SchedulerAPIClient(SchedulerClientInterface):
def __init__(self, base_url: str):
self._session = Client(base_url=base_url, transport=HTTPTransport(retries=6))
self._session = Client(
base_url=base_url, transport=HTTPTransport(retries=6), timeout=settings.outgoing_request_timeout
)

@staticmethod
def _verify_response(response: Response) -> None:
Expand Down
2 changes: 2 additions & 0 deletions boefjes/boefjes/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ class Settings(BaseSettings):

logging_format: Literal["text", "json"] = Field("text", description="Logging format")

outgoing_request_timeout: int = Field(30, description="Timeout for outgoing HTTP requests")

model_config = SettingsConfigDict(env_prefix="BOEFJES_")

@classmethod
Expand Down
38 changes: 17 additions & 21 deletions boefjes/boefjes/dependencies/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
from boefjes.storage.interfaces import (
ConfigStorage,
DuplicatePlugin,
IntegrityError,
NotFound,
PluginNotFound,
PluginStorage,
SettingsNotConformingToSchema,
UniqueViolation,
)

logger = structlog.get_logger(__name__)
Expand Down Expand Up @@ -49,9 +49,9 @@ def get_all(self, organisation_id: str) -> list[PluginType]:
return [self._set_plugin_enabled(plugin, organisation_id) for plugin in all_plugins.values()]

def _get_all_without_enabled(self) -> dict[str, PluginType]:
all_plugins = {plugin.id: plugin for plugin in self.local_repo.get_all()}
all_plugins = {plugin.id: plugin for plugin in self.plugin_storage.get_all()}

for plugin in self.plugin_storage.get_all():
for plugin in self.local_repo.get_all(): # Local plugins take precedence
all_plugins[plugin.id] = plugin

return all_plugins
Expand Down Expand Up @@ -94,7 +94,7 @@ def clone_settings_to_organisation(self, from_organisation: str, to_organisation
self.set_enabled_by_id(plugin_id, to_organisation, enabled=True)

def upsert_settings(self, settings: dict, organisation_id: str, plugin_id: str):
self._assert_settings_match_schema(settings, plugin_id)
self._assert_settings_match_schema(settings, plugin_id, organisation_id)
self._put_boefje(plugin_id)

return self.config_storage.upsert(organisation_id, plugin_id, settings=settings)
Expand All @@ -113,29 +113,25 @@ def create_boefje(self, boefje: Boefje) -> None:
try:
with self.plugin_storage as storage:
storage.create_boefje(boefje)
except IntegrityError as error:
raise DuplicatePlugin(self._translate_duplicate_plugin(error.message))
except UniqueViolation as error:
raise DuplicatePlugin(error.field)
except KeyError:
try:
with self.plugin_storage as storage:
storage.create_boefje(boefje)
except IntegrityError as error:
raise DuplicatePlugin(self._translate_duplicate_plugin(error.message))

def _translate_duplicate_plugin(self, error_message):
translations = {"boefje_plugin_id": "id", "boefje_name": "name"}
return next((value for key, value in translations.items() if key in error_message), None)
except UniqueViolation as error:
raise DuplicatePlugin(error.field)

def create_normalizer(self, normalizer: Normalizer) -> None:
try:
self.local_repo.by_id(normalizer.id)
raise DuplicatePlugin("id")
raise DuplicatePlugin(field="id")
except KeyError:
try:
plugin = self.local_repo.by_name(normalizer.name)

if plugin.types == "normalizer":
raise DuplicatePlugin("name")
raise DuplicatePlugin(field="name")
else:
self.plugin_storage.create_normalizer(normalizer)
except KeyError:
Expand Down Expand Up @@ -177,12 +173,12 @@ def delete_settings(self, organisation_id: str, plugin_id: str):
# We don't check the schema anymore because we can provide entries through the global environment as well

def schema(self, plugin_id: str) -> dict | None:
try:
boefje = self.plugin_storage.boefje_by_id(plugin_id)
plugin = self._get_all_without_enabled().get(plugin_id)

return boefje.boefje_schema
except PluginNotFound:
return self.local_repo.schema(plugin_id)
if plugin is None or not isinstance(plugin, Boefje):
return None

return plugin.boefje_schema

def cover(self, plugin_id: str) -> Path:
try:
Expand Down Expand Up @@ -212,8 +208,8 @@ def set_enabled_by_id(self, plugin_id: str, organisation_id: str, enabled: bool)

self.config_storage.upsert(organisation_id, plugin_id, enabled=enabled)

def _assert_settings_match_schema(self, all_settings: dict, plugin_id: str):
schema = self.schema(plugin_id)
def _assert_settings_match_schema(self, all_settings: dict, plugin_id: str, organisation_id: str):
schema = self.by_plugin_id(plugin_id, organisation_id).boefje_schema

if schema: # No schema means that there is nothing to assert
try:
Expand Down
31 changes: 24 additions & 7 deletions boefjes/boefjes/job_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import traceback
from collections.abc import Callable
from datetime import datetime, timezone
from functools import cache
from typing import cast

import httpx
Expand Down Expand Up @@ -33,26 +34,42 @@


def get_octopoes_api_connector(org_code: str) -> OctopoesAPIConnector:
return OctopoesAPIConnector(str(settings.octopoes_api), org_code)
return OctopoesAPIConnector(str(settings.octopoes_api), org_code, timeout=settings.outgoing_request_timeout)


@cache
def boefje_env_variables() -> dict:
"""
Return all environment variables that start with BOEFJE_. The returned
keys have the BOEFJE_ prefix removed.
"""

boefje_variables = {}
for key, value in os.environ.items():
if key.startswith("BOEFJE_"):
boefje_variables[key.removeprefix("BOEFJE_")] = value

return boefje_variables


def get_system_env_settings_for_boefje(allowed_keys: list[str]) -> dict:
return {key: value for key, value in boefje_env_variables().items() if key in allowed_keys}


def get_environment_settings(boefje_meta: BoefjeMeta, schema: dict | None = None) -> dict[str, str]:
try:
katalogus_api = str(settings.katalogus_api).rstrip("/")
response = httpx.get(
f"{katalogus_api}/v1/organisations/{boefje_meta.organization}/{boefje_meta.boefje.id}/settings", timeout=30
f"{katalogus_api}/v1/organisations/{boefje_meta.organization}/{boefje_meta.boefje.id}/settings",
timeout=settings.outgoing_request_timeout,
)
response.raise_for_status()
except HTTPError:
logger.exception("Error getting environment settings")
raise

allowed_keys = schema.get("properties", []) if schema else []
new_env = {
key.split("BOEFJE_", 1)[1]: value
for key, value in os.environ.items()
if key.startswith("BOEFJE_") and key in allowed_keys
}
new_env = get_system_env_settings_for_boefje(allowed_keys)

settings_from_katalogus = response.json()

Expand Down
2 changes: 1 addition & 1 deletion boefjes/boefjes/plugins/kat_answer_parser/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]:

bit_id = data["schema"].removeprefix("/bit/")

yield Config(ooi=input_ooi["primary_key"], bit_id=bit_id, config=data["answer"])
yield Config(ooi=data["answer_ooi"], bit_id=bit_id, config=data["answer"])
4 changes: 2 additions & 2 deletions boefjes/boefjes/plugins/kat_fierce/boefje.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"id": "fierce",
"name": "Fierce",
"description": "Perform DNS reconnaissance using Fierce. Helps to locate non-contiguous IP space and hostnames against specified hostnames. No exploitation is performed.",
"description": "Perform DNS reconnaissance using Fierce. Helps to locate non-contiguous IP space and hostnames against specified hostnames. No exploitation is performed. Beware if your DNS is managed by an external party. This boefjes performs a brute force attack against the name server.",
"consumes": [
"Hostname"
],
"scan_level": 1
"scan_level": 3
}
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,13 @@
"impact": "System administrator ports should only be reachable from safe and known locations to reduce attack surface.",
"recommendation": "Determine if this port should be reachable from the identified location. Limit access to reduce the attack surface if necessary."
},
"KAT-REMOTE-DESKTOP-PORT": {
"description": "An open Microsoft Remote Desktop Protocol (RDP) port was detected.",
"source": "https://www.cloudflare.com/en-gb/learning/access-management/rdp-security-risks/",
"risk": "medium",
"impact": "Remote desktop ports are often the root cause in ransomware attacks, due to weak password usage, outdated software or insecure configurations.",
"recommendation": "Disable the Microsoft RDP service on port 3389 if this is publicly reachable. Add additional security layers, such as VPN access if these ports do require to be enabled to limit the attack surface."
},
"KAT-OPEN-DATABASE-PORT": {
"description": "A database port is open.",
"source": "https://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"id": "kat_manual_ooi",
"name": "Manual OOI normalizer",
"description": "Parses manually added objects.",
"consumes": [
"manual/ooi"
],
Expand Down
2 changes: 1 addition & 1 deletion boefjes/boefjes/plugins/kat_shodan_internetdb/boefje.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"id": "shodan_internetdb",
"name": "'Shodan InternetDB",
"name": "Shodan InternetDB",
"description": "Use Shodan InternetDB to find open ports with vulnerabilities that are found on an IP.",
"consumes": [
"IPAddressV4",
Expand Down
7 changes: 4 additions & 3 deletions boefjes/boefjes/plugins/kat_shodan_internetdb/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ipaddress import ip_address

import requests
import httpx

from boefjes.job_models import BoefjeMeta

Expand All @@ -12,7 +12,8 @@ def run(boefje_meta: BoefjeMeta) -> list[tuple[set, bytes | str]]:
ip = boefje_meta.arguments["input"]["address"]
if ip_address(ip).is_private:
return [({"info/boefje"}, "Skipping private IP address")]
response = requests.get(f"https://internetdb.shodan.io/{ip}", timeout=REQUEST_TIMEOUT)
response.raise_for_status()
response = httpx.get(f"https://internetdb.shodan.io/{ip}", timeout=REQUEST_TIMEOUT)
if response.status_code != httpx.codes.NOT_FOUND:
response.raise_for_status()

return [(set(), response.content)]
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"id": "kat_shodan_internetdb_normalize",
"name": "Shodan InternetDB normalizer",
"description": "Parses Shodan InternetDB into findings.",
"consumes": [
"boefje/shodan_internetdb"
],
Expand Down
4 changes: 2 additions & 2 deletions boefjes/boefjes/plugins/pdio_subfinder/boefje.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"id": "pdio-subfinder",
"name": "Subfinder",
"description": "A subdomain discovery tool. (projectdiscovery.io)",
"description": "A subdomain discovery tool. (projectdiscovery.io). Returns valid subdomains for websites using passive online sources. Beware that many of the online sources require their own API key to get more accurate data.",
"consumes": [
"Hostname"
],
"environment_keys": [
"SUBFINDER_RATE_LIMIT",
"SUBFINDER_VERSION"
],
"scan_level": 2
"scan_level": 1
}
1 change: 1 addition & 0 deletions boefjes/boefjes/plugins/pdio_subfinder/normalizer.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"id": "pdio-subfinder-normalizer",
"name": "PDIO subfinder",
"description": "Parses ProjectDiscovery subfinder data for finding subdomains.",
"consumes": [
"boefje/pdio-subfinder"
],
Expand Down
4 changes: 2 additions & 2 deletions boefjes/boefjes/sql/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sqlalchemy.orm import Session
from typing_extensions import Self

from boefjes.storage.interfaces import IntegrityError, StorageError
from boefjes.storage.interfaces import IntegrityError, StorageError, UniqueViolation

logger = structlog.get_logger(__name__)

Expand Down Expand Up @@ -40,7 +40,7 @@ def __exit__(self, exc_type: type[Exception], exc_value: str, exc_traceback: str
self.session.commit()
except exc.IntegrityError as e:
if isinstance(e.orig, errors.UniqueViolation):
raise IntegrityError(str(e.orig))
raise UniqueViolation(str(e.orig))
raise IntegrityError("An integrity error occurred") from e
except exc.DatabaseError as e:
raise StorageError("A storage error occurred") from e
Expand Down
Loading

0 comments on commit d602c08

Please sign in to comment.