Skip to content

Commit

Permalink
Merge branch 'feature/disable-scheduled-reports' of https://github.co…
Browse files Browse the repository at this point in the history
…m/minvws/nl-kat-coordination into feature/disable-scheduled-reports
  • Loading branch information
madelondohmen committed Nov 25, 2024
2 parents ce7ebc3 + 4ee4428 commit 442f6b5
Show file tree
Hide file tree
Showing 24 changed files with 567 additions and 248 deletions.
3 changes: 0 additions & 3 deletions .env-dist
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ BYTES_DB_URI=postgresql://${BYTES_DB_USER}:${BYTES_DB_PASSWORD}@postgres:5432/${
# --- Octopoes --- #
# See `octopoes/octopoes/config/settings.py`

# Number of Celery workers (for the Octopoes API worker) that need to be started
CELERY_WORKER_CONCURRENCY=${CELERY_WORKER_CONCURRENCY:-4}

# --- Mula --- #
# See `mula/scheduler/config/settings.py`

Expand Down
4 changes: 2 additions & 2 deletions boefjes/boefjes/plugins/kat_fierce/boefje.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"id": "fierce",
"name": "Fierce",
"description": "Perform DNS reconnaissance using Fierce. Helps to locate non-contiguous IP space and hostnames against specified hostnames. No exploitation is performed.",
"description": "Perform DNS reconnaissance using Fierce. Helps to locate non-contiguous IP space and hostnames against specified hostnames. No exploitation is performed. Beware if your DNS is managed by an external party. This boefjes performs a brute force attack against the name server.",
"consumes": [
"Hostname"
],
"scan_level": 1
"scan_level": 3
}
4 changes: 2 additions & 2 deletions boefjes/boefjes/plugins/pdio_subfinder/boefje.json
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
{
"id": "pdio-subfinder",
"name": "Subfinder",
"description": "A subdomain discovery tool. (projectdiscovery.io)",
"description": "A subdomain discovery tool. (projectdiscovery.io). Returns valid subdomains for websites using passive online sources. Beware that many of the online sources require their own API key to get more accurate data.",
"consumes": [
"Hostname"
],
"environment_keys": [
"SUBFINDER_RATE_LIMIT",
"SUBFINDER_VERSION"
],
"scan_level": 2
"scan_level": 1
}
109 changes: 57 additions & 52 deletions mula/scheduler/connectors/services/katalogus.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,90 +44,95 @@ def __init__(self, host: str, source: str, timeout: int, pool_connections: int,

def flush_caches(self) -> None:
self.flush_plugin_cache()
self.flush_normalizer_cache()
self.flush_boefje_cache()
self.flush_boefje_cache(self.plugin_cache)
self.flush_normalizer_cache(self.plugin_cache)

def flush_plugin_cache(self) -> None:
def flush_plugin_cache(self):
self.logger.debug("Flushing the katalogus plugin cache for organisations")

plugin_cache: dict = {}
orgs = self.get_organisations()
for org in orgs:
plugin_cache.setdefault(org.id, {})

plugins = self.get_plugins_by_organisation(org.id)
plugin_cache[org.id] = {plugin.id: plugin for plugin in plugins if plugin.enabled}

with self.plugin_cache_lock:
# First, we reset the cache, to make sure we won't get any ExpiredError
self.plugin_cache.expiration_enabled = False
self.plugin_cache.reset()

orgs = self.get_organisations()
for org in orgs:
self.plugin_cache.setdefault(org.id, {})

plugins = self.get_plugins_by_organisation(org.id)
self.plugin_cache[org.id] = {plugin.id: plugin for plugin in plugins if plugin.enabled}

self.plugin_cache.cache = plugin_cache
self.plugin_cache.expiration_enabled = True

self.logger.debug("Flushed the katalogus plugin cache for organisations")

def flush_boefje_cache(self) -> None:
def flush_boefje_cache(self, plugins=None) -> None:
"""boefje.consumes -> plugin type boefje"""
self.logger.debug("Flushing the katalogus boefje type cache for organisations")

with self.boefje_cache_lock:
# First, we reset the cache, to make sure we won't get any ExpiredError
self.boefje_cache.expiration_enabled = False
self.boefje_cache.reset()

orgs = self.get_organisations()
for org in orgs:
self.boefje_cache[org.id] = {}
boefje_cache: dict = {}
orgs = self.get_organisations()
for org in orgs:
boefje_cache.setdefault(org.id, {})

for plugin in self.get_plugins_by_organisation(org.id):
if plugin.type != "boefje":
continue
org_plugins = plugins[org.id].values() if plugins else self.get_plugins_by_organisation(org.id)
for plugin in org_plugins:
if plugin.type != "boefje":
continue

if plugin.enabled is False:
continue
if plugin.enabled is False:
continue

if not plugin.consumes:
continue
if not plugin.consumes:
continue

# NOTE: backwards compatibility, when it is a boefje the
# consumes field is a string field.
if isinstance(plugin.consumes, str):
self.boefje_cache[org.id].setdefault(plugin.consumes, []).append(plugin)
continue
# NOTE: backwards compatibility, when it is a boefje the
# consumes field is a string field.
if isinstance(plugin.consumes, str):
boefje_cache[org.id].setdefault(plugin.consumes, []).append(plugin)
continue

for type_ in plugin.consumes:
self.boefje_cache[org.id].setdefault(type_, []).append(plugin)
for type_ in plugin.consumes:
boefje_cache[org.id].setdefault(type_, []).append(plugin)

with self.boefje_cache_lock:
# First, we reset the cache, to make sure we won't get any ExpiredError
self.boefje_cache.expiration_enabled = False
self.boefje_cache.reset()
self.boefje_cache.cache = boefje_cache
self.boefje_cache.expiration_enabled = True

self.logger.debug("Flushed the katalogus boefje type cache for organisations")

def flush_normalizer_cache(self) -> None:
def flush_normalizer_cache(self, plugins=None) -> None:
"""normalizer.consumes -> plugin type normalizer"""
self.logger.debug("Flushing the katalogus normalizer type cache for organisations")

with self.normalizer_cache_lock:
# First, we reset the cache, to make sure we won't get any ExpiredError
self.normalizer_cache.expiration_enabled = False
self.normalizer_cache.reset()
normalizer_cache: dict = {}
orgs = self.get_organisations()
for org in orgs:
normalizer_cache.setdefault(org.id, {})

orgs = self.get_organisations()
for org in orgs:
self.normalizer_cache[org.id] = {}
org_plugins = plugins[org.id].values() if plugins else self.get_plugins_by_organisation(org.id)
for plugin in org_plugins:
if plugin.type != "normalizer":
continue

for plugin in self.get_plugins_by_organisation(org.id):
if plugin.type != "normalizer":
continue
if plugin.enabled is False:
continue

if plugin.enabled is False:
continue
if not plugin.consumes:
continue

if not plugin.consumes:
continue

for type_ in plugin.consumes:
self.normalizer_cache[org.id].setdefault(type_, []).append(plugin)
for type_ in plugin.consumes:
normalizer_cache[org.id].setdefault(type_, []).append(plugin)

with self.normalizer_cache_lock:
# First, we reset the cache, to make sure we won't get any ExpiredError
self.normalizer_cache.expiration_enabled = False
self.normalizer_cache.reset()
self.normalizer_cache.cache = normalizer_cache
self.normalizer_cache.expiration_enabled = True

self.logger.debug("Flushed the katalogus normalizer type cache for organisations")
Expand Down
4 changes: 2 additions & 2 deletions mula/scheduler/rankers/boefje.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ def rank(self, obj: Any) -> int:
grace_period = timedelta(seconds=self.ctx.config.pq_grace_period)

# New tasks that have not yet run before
if obj.prior_tasks is None or not obj.prior_tasks:
if obj.latest_task is None or not obj.latest_task:
return 2

# Make sure that we don't have tasks that are still in the grace period
time_since_grace_period = ((datetime.now(timezone.utc) - obj.prior_tasks[0].modified_at) - grace_period).seconds
time_since_grace_period = ((datetime.now(timezone.utc) - obj.latest_task.modified_at) - grace_period).seconds
if time_since_grace_period < 0:
return -1

Expand Down
4 changes: 2 additions & 2 deletions mula/scheduler/schedulers/boefje.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,8 +562,8 @@ def push_boefje_task(self, boefje_task: BoefjeTask, caller: str = "") -> None:
)
return

prior_tasks = self.ctx.datastores.task_store.get_tasks_by_hash(boefje_task.hash)
score = self.priority_ranker.rank(SimpleNamespace(prior_tasks=prior_tasks, task=boefje_task))
latest_task = self.ctx.datastores.task_store.get_latest_task_by_hash(boefje_task.hash)
score = self.priority_ranker.rank(SimpleNamespace(latest_task=latest_task, task=boefje_task))

task = Task(
id=boefje_task.id,
Expand Down
6 changes: 3 additions & 3 deletions mula/tests/integration/test_boefje_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,10 +577,10 @@ def test_push_task_no_ooi(self):
@mock.patch("scheduler.schedulers.BoefjeScheduler.has_boefje_permission_to_run")
@mock.patch("scheduler.schedulers.BoefjeScheduler.has_boefje_task_grace_period_passed")
@mock.patch("scheduler.schedulers.BoefjeScheduler.is_item_on_queue_by_hash")
@mock.patch("scheduler.context.AppContext.datastores.task_store.get_tasks_by_hash")
@mock.patch("scheduler.context.AppContext.datastores.task_store.get_latest_task_by_hash")
def test_push_task_queue_full(
self,
mock_get_tasks_by_hash,
mock_get_latest_task_by_hash,
mock_is_item_on_queue_by_hash,
mock_has_boefje_task_grace_period_passed,
mock_has_boefje_permission_to_run,
Expand All @@ -606,7 +606,7 @@ def test_push_task_queue_full(
mock_has_boefje_task_started_running.return_value = False
mock_has_boefje_task_grace_period_passed.return_value = True
mock_is_item_on_queue_by_hash.return_value = False
mock_get_tasks_by_hash.return_value = None
mock_get_latest_task_by_hash.return_value = None
self.mock_get_plugin.return_value = PluginFactory(scan_level=0, consumes=[ooi.object_type])

# Act
Expand Down
2 changes: 2 additions & 0 deletions octopoes/octopoes/config/celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@
result_accept_content = ["application/json", "application/x-python-serialize"]

task_queues = (Queue(QUEUE_NAME_OCTOPOES),)

worker_concurrency = settings.workers
2 changes: 2 additions & 0 deletions octopoes/octopoes/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ class Settings(BaseSettings):

outgoing_request_timeout: int = Field(30, description="Timeout for outgoing HTTP requests")

workers: int = Field(4, description="Number of Octopoes Celery workers")

model_config = SettingsConfigDict(env_prefix="OCTOPOES_")

@classmethod
Expand Down
37 changes: 27 additions & 10 deletions rocky/reports/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,16 @@ class ReportScheduleStartDateChoiceForm(BaseRockyForm):
)


class ReportRecurrenceChoiceForm(BaseRockyForm):
choose_recurrence = forms.ChoiceField(
label="",
required=False,
widget=forms.RadioSelect(attrs={"class": "submit-on-click"}),
choices=(("once", _("No, just once")), ("repeat", _("Yes, repeat"))),
initial="once",
)


class ReportScheduleStartDateForm(BaseRockyForm):
start_date = forms.DateField(
label=_("Start date"),
Expand All @@ -47,25 +57,32 @@ class ReportScheduleStartDateForm(BaseRockyForm):
input_formats=["%Y-%m-%d"],
)


class ReportRecurrenceChoiceForm(BaseRockyForm):
choose_recurrence = forms.ChoiceField(
label="",
required=False,
widget=forms.RadioSelect(attrs={"class": "submit-on-click"}),
choices=(("once", _("No, just once")), ("repeat", _("Yes, repeat"))),
initial="once",
start_time = forms.TimeField(
label=_("Start time (UTC)"),
widget=forms.TimeInput(format="%H:%M", attrs={"form": "generate_report"}),
initial=lambda: datetime.now(tz=timezone.utc).time(),
required=True,
input_formats=["%H:%M"],
)


class ReportScheduleRecurrenceForm(BaseRockyForm):
recurrence = forms.ChoiceField(
label=_("Recurrence"),
required=True,
widget=forms.Select(attrs={"form": "generate_report"}),
choices=[("daily", _("Daily")), ("weekly", _("Weekly")), ("monthly", _("Monthly")), ("yearly", _("Yearly"))],
)

def clean(self):
cleaned_data = super().clean()
start_date = cleaned_data.get("start_date")
start_time = cleaned_data.get("start_time")

if start_date and start_time:
start_datetime = datetime.combine(start_date, start_time)
cleaned_data["start_datetime"] = start_datetime

return cleaned_data


class CustomReportScheduleForm(BaseRockyForm):
start_date = forms.DateField(
Expand Down
5 changes: 5 additions & 0 deletions rocky/reports/report_types/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ class ReportPlugins(TypedDict):
optional: set[str]


class SubReportPlugins(TypedDict):
required: list[str]
optional: list[str]


def report_plugins_union(report_types: list[type["BaseReport"]]) -> ReportPlugins:
"""Take the union of the required and optional plugin sets and remove optional plugins that are required"""

Expand Down
2 changes: 1 addition & 1 deletion rocky/reports/report_types/findings_report/report.html
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,5 @@ <h5>{% translate "Description" %}</h5>
</div>
</div>
{% else %}
<p>{% translate "No findings have been found." %}</p>
<p>{% translate "No findings have been identified yet." %}</p>
{% endif %}
14 changes: 7 additions & 7 deletions rocky/reports/templates/partials/export_report_settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,21 @@ <h2>{% translate "Report schedule" %}</h2>
{% include "partials/form/fieldset.html" with fields=report_schedule_form_recurrence_choice %}

{% if is_scheduled_report %}
<p>
{% blocktranslate trimmed %}
Please choose a start date, time and recurrence for scheduling your report(s).
If you select a date on the 28th-31st of the month, it will always be scheduled on the last day of the month.
{% endblocktranslate %}
</p>
<p>
{% blocktranslate trimmed %}
The date you select will be the reference date for the data set for your report.
Please allow for up to 24 hours for your report to be ready.
{% endblocktranslate %}
</p>
<div class="column-2">
<div>
{% include "partials/form/fieldset.html" with fields=report_schedule_form_start_date %}

</div>
<div>
{% include "partials/form/fieldset.html" with fields=report_schedule_form_recurrence %}
{% include "partials/form/fieldset.html" with fields=report_schedule_form_start_date %}

</div>
</div>
{% endif %}
</form>
Expand Down
Loading

0 comments on commit 442f6b5

Please sign in to comment.