Skip to content

Commit

Permalink
Merge branch 'main' into feature/add-logging-to-reports
Browse files Browse the repository at this point in the history
  • Loading branch information
underdarknl authored Nov 28, 2024
2 parents 167f450 + dad9c3d commit 915e2cc
Show file tree
Hide file tree
Showing 117 changed files with 2,852 additions and 1,211 deletions.
2 changes: 1 addition & 1 deletion boefjes/boefjes/plugins/kat_answer_parser/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ def run(input_ooi: dict, raw: bytes) -> Iterable[NormalizerOutput]:

bit_id = data["schema"].removeprefix("/bit/")

yield Config(ooi=input_ooi["primary_key"], bit_id=bit_id, config=data["answer"])
yield Config(ooi=data["answer_ooi"], bit_id=bit_id, config=data["answer"])
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,13 @@
"impact": "System administrator ports should only be reachable from safe and known locations to reduce attack surface.",
"recommendation": "Determine if this port should be reachable from the identified location. Limit access to reduce the attack surface if necessary."
},
"KAT-REMOTE-DESKTOP-PORT": {
"description": "An open Microsoft Remote Desktop Protocol (RDP) port was detected.",
"source": "https://www.cloudflare.com/en-gb/learning/access-management/rdp-security-risks/",
"risk": "medium",
"impact": "Remote desktop ports are often the root cause in ransomware attacks, due to weak password usage, outdated software or insecure configurations.",
"recommendation": "Disable the Microsoft RDP service on port 3389 if this is publicly reachable. Add additional security layers, such as VPN access if these ports do require to be enabled to limit the attack surface."
},
"KAT-OPEN-DATABASE-PORT": {
"description": "A database port is open.",
"source": "https://en.wikipedia.org/wiki/List_of_TCP_and_UDP_port_numbers",
Expand Down
7 changes: 4 additions & 3 deletions boefjes/boefjes/plugins/kat_shodan_internetdb/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from ipaddress import ip_address

import requests
import httpx

from boefjes.job_models import BoefjeMeta

Expand All @@ -12,7 +12,8 @@ def run(boefje_meta: BoefjeMeta) -> list[tuple[set, bytes | str]]:
ip = boefje_meta.arguments["input"]["address"]
if ip_address(ip).is_private:
return [({"info/boefje"}, "Skipping private IP address")]
response = requests.get(f"https://internetdb.shodan.io/{ip}", timeout=REQUEST_TIMEOUT)
response.raise_for_status()
response = httpx.get(f"https://internetdb.shodan.io/{ip}", timeout=REQUEST_TIMEOUT)
if response.status_code != httpx.codes.NOT_FOUND:
response.raise_for_status()

return [(set(), response.content)]
4 changes: 2 additions & 2 deletions boefjes/tests/plugins/test_answer_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ def test_config_yielded(normalizer_runner):
normalizer_runner.run(meta, bytes(raw, "UTF-8"))

with pytest.raises(ValidationError):
raw = '{"schema": "/bit/port-classification-ip", "answer": [{"key": "test"}]}'
raw = '{"schema": "/bit/port-classification-ip", "answer": [{"key": "test"}], "answer_ooi": "Network|internet"}'
normalizer_runner.run(meta, bytes(raw, "UTF-8"))

raw = '{"schema": "/bit/port-classification-ip", "answer": {"key": "test"}}'
raw = '{"schema": "/bit/port-classification-ip", "answer": {"key": "test"}, "answer_ooi": "Network|internet"}'
output = normalizer_runner.run(meta, bytes(raw, "UTF-8"))

assert len(output.observations) == 1
Expand Down
3 changes: 3 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ services:
test: ["CMD", "gosu", "postgres", "pg_isready"]
interval: 10s
retries: 10
# Django runserver does not limit the number of threads. We need to increase
# the maximum number of connection to make sure that we don't hit the limit.
command: -c max_connections=500
volumes:
- postgres-data:/var/lib/postgresql/data
- ./init-user-db.sh:/docker-entrypoint-initdb.d/init-user-db.sh
Expand Down
78 changes: 46 additions & 32 deletions docs/source/developer-documentation/mula.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ The scheduler is responsible for scheduling the execution of tasks. The
execution of those tasks are being prioritized / scored by a ranker. The tasks
are then pushed onto a priority queue.

Within the project of KAT, the scheduler is tasked with scheduling boefje and
normalizer tasks.
Within the project of KAT, the scheduler is tasked with scheduling boefje,
normalizer, and report tasks.

## Architecture

Expand All @@ -20,12 +20,12 @@ rankers.

### Stack, packages and libraries

| Name | Version | Description |
| ---------- | -------- | ------------------- |
| Python | ^3.8 | |
| FastAPI | ^0.109.0 | Used for api server |
| SQLAlchemy | ^2.0.23 | |
| pydantic | ^2.5.2 | |
| Name | Version | Description |
| ---------- | --------- | ------------------- |
| Python | ^3.10 | |
| FastAPI | ^0.1115.2 | Used for api server |
| SQLAlchemy | ^2.0.23 | |
| pydantic | ^2.7.2 | |

### External services

Expand All @@ -41,36 +41,34 @@ The scheduler interfaces with the following services:
### Project structure

```
$ tree -L 3 --dirsfirst
.
├── docs/ # additional documentation
├── scheduler/ # scheduler python module
│   ├── config # application settings configuration
│   ├── connectors # external service connectors
│   │   ├── listeners # channel/socket listeners
│   │   ├── services # rest api connectors
│   │   └── __init__.py
│   ├── context/ # shared application context
│   ├── models/ # internal model definitions
│   ├── queues/ # priority queue
│   ├── rankers/ # priority/score calculations
│   ├── storage/ # data abstraction layer
│   ├── schedulers/ # schedulers
│   ├── server/ # http rest api server
│   ├── utils/ # common utility functions
│ ├── clients/ # external service clients
│ │ ├── amqp/ # rabbitmq clients
│ │ └── http/ # http clients
│ ├── context/ # shared application context, and configuration
│ ├── models/ # internal model definitions
│ ├── schedulers/ # schedulers
│ │ ├── queue/ # priority queue
│ │ ├── rankers/ # priority/score calculations
│ │ └── schedulers/ # schedulers implementations
│ ├── server/ # http rest api server
│ ├── storage/ # data abstraction layer
│ │ ├── migrations/ # database migrations
│ │ └── stores/ # data stores
│ ├── utils/ # common utility functions
│   ├── __init__.py
│   ├── __main__.py
│   ├── app.py # kat scheduler app implementation
│   ├── app.py # OpenKAT scheduler app implementation
│   └── version.py # version information
└─── tests/
   ├── factories/
   ├── integration/
   ├── mocks/
   ├── scripts/
   ├── simulation/
   ├── unit/
   ├── utils/
   └── __init__.py
└─── tests/ # test suite
   ├── factories/ # factories for test data
   ├── integration/ # integration tests
   ├── mocks/ # mocks for testing
   ├── simulation/ # simulation tests
   ├── unit/ # unit tests
   └── utils/ # utility functions for tests
```

## Running / Developing
Expand All @@ -93,6 +91,22 @@ scheduler. See the environment settings section under Installation and Deploymen
$ docker compose up --build -d scheduler
```

### Migrations

Creating a migration:

```
# Run migrations
make revid=0008 m="add_task_schedule" migrations
```

Sometimes it is helpful to run the migrations in a clean environment:

```
docker system prune
docker volume prune --force --all
```

## Testing

```
Expand Down
6 changes: 3 additions & 3 deletions mula/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ cov: ## Generate a test coverage report

sql: ## Generate raw sql for the migrations.
docker compose run --rm scheduler \
alembic --config /app/scheduler/scheduler/alembic.ini \
alembic --config /app/scheduler/scheduler/storage/migrations/alembic.ini \
upgrade $(rev1):$(rev2) --sql

migrations: ## Create migration.
Expand All @@ -84,14 +84,14 @@ else ifeq ($(revid),)
$(HIDE) (echo "ERROR: Specify a message with m={message} and a rev-id with revid={revid} (e.g. 0001 etc.)"; exit 1)
else
docker compose run --rm scheduler \
alembic --config /app/scheduler/scheduler/alembic.ini \
alembic --config /app/scheduler/scheduler/storage/migrations/alembic.ini \
revision --autogenerate \
-m "$(m)" --rev-id "$(revid)"
endif

migrate: ## Run migrations using alembic.
docker compose run scheduler \
alembic --config /app/scheduler/scheduler/alembic.ini \
alembic --config /app/scheduler/scheduler/storage/migrations/alembic.ini \
upgrade head

##
Expand Down
2 changes: 1 addition & 1 deletion mula/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -e
shopt -s nocasematch

if [ "$DATABASE_MIGRATION" = "1" ] || [[ $DATABASE_MIGRATION == "true" ]]; then
python -m alembic --config /app/scheduler/scheduler/alembic.ini upgrade head
python -m alembic --config /app/scheduler/scheduler/storage/migrations/alembic.ini upgrade head
fi

exec "$@"
9 changes: 4 additions & 5 deletions mula/scheduler/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
import structlog
from opentelemetry import trace

from scheduler import context, schedulers, server
from scheduler.connectors.errors import ExternalServiceError
from scheduler import clients, context, schedulers, server
from scheduler.utils import thread

tracer = trace.get_tracer(__name__)
Expand Down Expand Up @@ -83,7 +82,7 @@ def monitor_organisations(self) -> None:
}
try:
orgs = self.ctx.services.katalogus.get_organisations()
except ExternalServiceError:
except clients.errors.ExternalServiceError:
self.logger.exception("Failed to get organisations from Katalogus")
return

Expand Down Expand Up @@ -117,7 +116,7 @@ def monitor_organisations(self) -> None:
for org_id in additions:
try:
org = self.ctx.services.katalogus.get_organisation(org_id)
except ExternalServiceError as e:
except clients.errors.ExternalServiceError as e:
self.logger.error("Failed to get organisation from Katalogus", error=e, org_id=org_id)
continue

Expand Down Expand Up @@ -166,7 +165,7 @@ def start_schedulers(self) -> None:
# Initialize the schedulers
try:
orgs = self.ctx.services.katalogus.get_organisations()
except ExternalServiceError as e:
except clients.errors.ExternalServiceError as e:
self.logger.error("Failed to get organisations from Katalogus", error=e)
return

Expand Down
6 changes: 6 additions & 0 deletions mula/scheduler/clients/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from .amqp.raw_data import RawData
from .amqp.scan_profile import ScanProfileMutation
from .http.external.bytes import Bytes
from .http.external.katalogus import Katalogus
from .http.external.octopoes import Octopoes
from .http.external.rocky import Rocky
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 2 additions & 0 deletions mula/scheduler/clients/http/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .client import HTTPClient
from .service import HTTPService
77 changes: 77 additions & 0 deletions mula/scheduler/clients/http/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import socket
import time
from collections.abc import Callable

import httpx
import structlog


class HTTPClient:
"""A class that provides methods to check if a host is available and healthy."""

def __init__(self):
self.logger = structlog.get_logger(self.__class__.__name__)

def is_host_available(self, hostname: str, port: int) -> bool:
"""Check if the host is available.
Args:
hostname: A string representing the hostname.
port: An integer representing the port number.
Returns:
A boolean
"""
try:
socket.create_connection((hostname, port))
return True
except OSError:
return False

def is_host_healthy(self, host: str, health_endpoint: str) -> bool:
"""Check if host is healthy by inspecting the host's health endpoint.
Args:
host: A string representing the hostname.
health_endpoint: A string representing the health endpoint.
Returns:
A boolean
"""
try:
url = f"{host}/{health_endpoint}"
response = httpx.get(url, timeout=5)
healthy = response.json().get("healthy")
return healthy
except httpx.HTTPError as exc:
self.logger.warning("Exception: %s", exc)
return False

def retry(self, func: Callable, *args, **kwargs) -> bool:
"""Retry a function until it returns True.
Args:
func: A python callable that needs to be retried.
Returns:
A boolean signifying whether or not the func was executed successfully.
"""
for i in range(10):
if func(*args, **kwargs):
self.logger.info(
"Function %s, executed successfully. Retry count: %d",
func.__name__,
i,
name=func.__name__,
args=args,
kwargs=kwargs,
)
return True

self.logger.warning(
"Function %s, failed. Retry count: %d", func.__name__, i, name=func.__name__, args=args, kwargs=kwargs
)

time.sleep(10)

return False
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@

import httpx

from scheduler.connectors.errors import ExternalServiceResponseError, exception_handler
from scheduler.clients.errors import ExternalServiceResponseError, exception_handler
from scheduler.clients.http import HTTPService
from scheduler.models import BoefjeMeta

from .services import HTTPService

ClientSessionMethod = Callable[..., Any]


Expand Down
Loading

0 comments on commit 915e2cc

Please sign in to comment.