diff --git a/.github/workflows/boefjes_tests.yml b/.github/workflows/boefjes_tests.yml index cefd0afb47a..377142a6479 100644 --- a/.github/workflows/boefjes_tests.yml +++ b/.github/workflows/boefjes_tests.yml @@ -39,7 +39,7 @@ jobs: run: python3 -m pip install --upgrade pip - name: Install dev requirements - run: pip install -r requirements-dev.txt + run: grep -v git+https:// requirements-dev.txt | pip install -r /dev/stdin && grep git+https:// requirements-dev.txt | pip install -r /dev/stdin working-directory: ./boefjes - name: Install requirements diff --git a/.github/workflows/build-rdo-package.yml b/.github/workflows/build-rdo-package.yml index 8732c24b1cb..7d14c59eaf1 100644 --- a/.github/workflows/build-rdo-package.yml +++ b/.github/workflows/build-rdo-package.yml @@ -187,7 +187,7 @@ jobs: run: python3.10 -m venv /var/www/html/.venv - name: Install requirements - run: source .venv/bin/activate; pip install --upgrade pip; find . -name requirements.txt | xargs -L 1 pip install -r; pip install ${{ github.workspace }}/octopoes/dist/octopoes*.whl + run: source .venv/bin/activate; pip install --upgrade pip; grep -v git+https:// requirements.txt | pip install -r /dev/stdin ; grep git+https:// requirements.txt | pip install -r /dev/stdin; pip install ${{ github.workspace }}/octopoes/dist/octopoes*.whl working-directory: /var/www/html - name: Create archive diff --git a/boefjes/Dockerfile b/boefjes/Dockerfile index acc5ef3211e..c0a75319776 100644 --- a/boefjes/Dockerfile +++ b/boefjes/Dockerfile @@ -18,8 +18,14 @@ COPY boefjes/requirements-dev.txt boefjes/requirements.txt . RUN --mount=type=cache,target=/root/.cache \ pip install --upgrade pip \ - && pip install -r requirements.txt \ - && if [ "$ENVIRONMENT" = "dev" ]; then pip install -r requirements-dev.txt; fi + && if [ "$ENVIRONMENT" = "dev" ]; \ + then \ + grep -v git+https:// requirements-dev.txt | pip install -r /dev/stdin ; \ + grep git+https:// requirements-dev.txt | pip install -r /dev/stdin ; \ + else \ + grep -v git+https:// requirements.txt | pip install -r /dev/stdin ;\ + grep git+https:// requirements.txt | pip install -r /dev/stdin ; \ + fi FROM dev diff --git a/boefjes/boefjes/plugins/kat_website_software/__init__.py b/boefjes/boefjes/plugins/kat_wappalyzer/__init__.py similarity index 100% rename from boefjes/boefjes/plugins/kat_website_software/__init__.py rename to boefjes/boefjes/plugins/kat_wappalyzer/__init__.py diff --git a/boefjes/boefjes/plugins/kat_wappalyzer/normalize.py b/boefjes/boefjes/plugins/kat_wappalyzer/normalize.py new file mode 100644 index 00000000000..532160a84c7 --- /dev/null +++ b/boefjes/boefjes/plugins/kat_wappalyzer/normalize.py @@ -0,0 +1,33 @@ +import json +from collections.abc import Iterable + +from Wappalyzer import Wappalyzer, WebPage + +from boefjes.job_models import NormalizerMeta +from octopoes.models import OOI, Reference +from octopoes.models.ooi.dns.zone import Hostname +from octopoes.models.ooi.network import Network +from octopoes.models.ooi.software import Software, SoftwareInstance + + +def run(normalizer_meta: NormalizerMeta, raw: bytes | str) -> Iterable[OOI]: + pk = normalizer_meta.raw_data.boefje_meta.input_ooi + tokenized_hostname = Reference.from_str(pk).tokenized["website"]["hostname"] + hostname = Hostname( + network=Network(name=tokenized_hostname["network"]["name"]).reference, name=tokenized_hostname["name"] + ) + raw_respsone, body = raw.split(b"\n\n", 1) + response_object = json.loads(raw_respsone) + url = response_object["response"]["url"] + + headers = response_object["response"]["headers"] + body = body.decode(response_object.get("encoding") or "utf-8", "replace") + + wappalyzer = Wappalyzer.latest() + web_page = WebPage(url, body, headers) + results = wappalyzer.analyze_with_versions_and_categories(web_page) + + for name, data in results.items(): + software = Software(name=name, version=data["versions"].pop(0)) + software_instance = SoftwareInstance(ooi=hostname.reference, software=software.reference) + yield from [software, software_instance] diff --git a/boefjes/boefjes/plugins/kat_website_software/normalizer.json b/boefjes/boefjes/plugins/kat_wappalyzer/normalizer.json similarity index 56% rename from boefjes/boefjes/plugins/kat_website_software/normalizer.json rename to boefjes/boefjes/plugins/kat_wappalyzer/normalizer.json index f5e0016a390..07a033d9930 100644 --- a/boefjes/boefjes/plugins/kat_website_software/normalizer.json +++ b/boefjes/boefjes/plugins/kat_wappalyzer/normalizer.json @@ -1,7 +1,7 @@ { - "id": "kat_website_software_normalize", + "id": "kat_wappalyzer_normalize", "consumes": [ - "boefje/website-software" + "openkat-http/response" ], "produces": [ "Software", diff --git a/boefjes/boefjes/plugins/kat_webpage_analysis/boefje.json b/boefjes/boefjes/plugins/kat_webpage_analysis/boefje.json index 52b29e0462d..44dbe2a9b68 100644 --- a/boefjes/boefjes/plugins/kat_webpage_analysis/boefje.json +++ b/boefjes/boefjes/plugins/kat_webpage_analysis/boefje.json @@ -6,8 +6,9 @@ "HTTPResource" ], "produces": [ - "openkat-http/full", + "openkat-http/response", "openkat-http/headers", + "openkat-http/body", "application/javascript", "application/javascript", diff --git a/boefjes/boefjes/plugins/kat_webpage_analysis/main.py b/boefjes/boefjes/plugins/kat_webpage_analysis/main.py index a422a25d3ed..e8accd74d7b 100644 --- a/boefjes/boefjes/plugins/kat_webpage_analysis/main.py +++ b/boefjes/boefjes/plugins/kat_webpage_analysis/main.py @@ -66,13 +66,35 @@ def run(boefje_meta: BoefjeMeta) -> list[tuple[set, bytes | str]]: if content_type[0] in ALLOWED_CONTENT_TYPES: body_mimetypes.add(content_type[0]) + # in case of a full response object, we hexdump to avoid issues with binary data or different encoding + response_dump = json.dumps(create_response_object(response)) + return [ - ({"openkat-http/full"}, f"{response.headers}\n\n{response.content}"), + ({"openkat-http/response"}, response_dump.encode() + b"\n\n" + response.content), ({"openkat-http/headers"}, json.dumps(dict(response.headers))), (body_mimetypes, response.content), ] +# todo: perhaps also implement response.history? +def create_response_object(response: requests.Response) -> dict: + return { + "response": { + "url": response.url, + "status_code": response.status_code, + "headers": dict(response.headers), + "cookies": dict(response.cookies), + "is_redirect": response.is_redirect, + "encoding": response.encoding, + }, + "request": { + "url": response.request.url, + "method": response.request.method, + "headers": dict(response.request.headers), + }, + } + + def do_request(hostname: str, session: Session, uri: str, useragent: str): response = session.get( uri, diff --git a/boefjes/boefjes/plugins/kat_website_software/boefje.json b/boefjes/boefjes/plugins/kat_website_software/boefje.json deleted file mode 100644 index bf8348c3d39..00000000000 --- a/boefjes/boefjes/plugins/kat_website_software/boefje.json +++ /dev/null @@ -1,9 +0,0 @@ - { - "id": "website-software", - "name": "Wappalyzer - Software Scan", - "description": "Scan for software on websites using Wappalyzer", - "consumes": [ - "HostnameHTTPURL" - ], - "scan_level": 2 - } diff --git a/boefjes/boefjes/plugins/kat_website_software/cover.jpg b/boefjes/boefjes/plugins/kat_website_software/cover.jpg deleted file mode 100644 index 66c29af187c..00000000000 Binary files a/boefjes/boefjes/plugins/kat_website_software/cover.jpg and /dev/null differ diff --git a/boefjes/boefjes/plugins/kat_website_software/description.md b/boefjes/boefjes/plugins/kat_website_software/description.md deleted file mode 100644 index 3acab43877a..00000000000 --- a/boefjes/boefjes/plugins/kat_website_software/description.md +++ /dev/null @@ -1,3 +0,0 @@ -Scan for software on websites using Wappalyzer - -**Cat name**: Floortje diff --git a/boefjes/boefjes/plugins/kat_website_software/main.py b/boefjes/boefjes/plugins/kat_website_software/main.py deleted file mode 100644 index f7dbebb77a3..00000000000 --- a/boefjes/boefjes/plugins/kat_website_software/main.py +++ /dev/null @@ -1,31 +0,0 @@ -import platform - -import docker - -from boefjes.job_models import BoefjeMeta - -# FIXME: We should build a multi-platform image -if platform.machine() in ["arm64", "aarch64"]: - WAPPALYZER_IMAGE = "noamblitz/wappalyzer:MacM1" -else: - WAPPALYZER_IMAGE = "noamblitz/wappalyzer:latest" - - -def run_wappalyzer(url: str) -> str: - client = docker.from_env() - - return client.containers.run(WAPPALYZER_IMAGE, ["wappalyzer", url], remove=True).decode() - - -def run(boefje_meta: BoefjeMeta) -> list[tuple[set, bytes | str]]: - input_ = boefje_meta.arguments["input"] - - hostname = input_["netloc"]["name"] - path = input_["path"] - scheme = input_["scheme"] - - url = f"{scheme}://{hostname}{path}" - - results = run_wappalyzer(url) - - return [(set(), results)] diff --git a/boefjes/boefjes/plugins/kat_website_software/normalize.py b/boefjes/boefjes/plugins/kat_website_software/normalize.py deleted file mode 100644 index e85b01b6357..00000000000 --- a/boefjes/boefjes/plugins/kat_website_software/normalize.py +++ /dev/null @@ -1,45 +0,0 @@ -import json -from collections.abc import Iterable - -from boefjes.job_models import NormalizerMeta -from octopoes.models import OOI, Reference -from octopoes.models.ooi.network import Network -from octopoes.models.ooi.software import Software, SoftwareInstance -from octopoes.models.ooi.web import URL - - -def run(normalizer_meta: NormalizerMeta, raw: bytes | str) -> Iterable[OOI]: - results = json.loads(raw) - boefje_meta = normalizer_meta.raw_data.boefje_meta - - input_ = boefje_meta.arguments["input"] - hostname = input_["netloc"]["name"] - path = input_["path"] - scheme = input_["scheme"] - url = f"{scheme}://{hostname}{path}" - - pk = boefje_meta.input_ooi - hostname_reference = Reference.from_str(pk) - - original_url_status = results["urls"][url]["status"] - - if 300 <= original_url_status < 400: - # The requested url was redirected, so only return the new url instance. If needed we rescan the new url. - results["urls"].pop(url) - - for redirected_url in results["urls"]: - yield URL( - network=Network(name=hostname_reference.tokenized.netloc.network.name).reference, raw=redirected_url - ) - - return - - for technology in results["technologies"]: - s = Software( - name=technology["name"], - version=technology["version"], - cpe=technology["cpe"], - ) - si = SoftwareInstance(ooi=hostname_reference, software=s.reference) - yield s - yield si diff --git a/boefjes/debian/rules b/boefjes/debian/rules index a9df3889bc0..ce5a950e9cf 100755 --- a/boefjes/debian/rules +++ b/boefjes/debian/rules @@ -22,7 +22,10 @@ override_dh_fixperms: chmod 755 $(DESTDIR)/usr/bin/update-katalogus-db override_dh_virtualenv: - dh_virtualenv $(DH_VENV_ARGS) + grep -v git+https:// requirements.txt > /tmp/requirements-nogit.txt + grep git+https:// requirements.txt > /tmp/requirements-git.txt + dh_virtualenv --requirements=/tmp/requirements-nogit.txt $(DH_VENV_ARGS) + $(DH_VENV_DIR)/bin/python -m pip install -r /tmp/requirements-git.txt $(DH_VENV_DIR)/bin/python -m pip install gunicorn==20.1.0 cd /octopoes && /usr/bin/python3 setup.py bdist_wheel diff --git a/boefjes/poetry.lock b/boefjes/poetry.lock index 5e0408a94e1..dcfb37cb477 100644 --- a/boefjes/poetry.lock +++ b/boefjes/poetry.lock @@ -2144,15 +2144,13 @@ files = [ defusedxml = ["defusedxml (>=0.6.0)"] [[package]] -name = "python-wappalyzer" -version = "0.3.1" +name = "python-Wappalyzer" +version = "0.4.0" description = "Python implementation of the Wappalyzer web application detection utility" optional = false python-versions = "*" -files = [ - {file = "python-Wappalyzer-0.3.1.tar.gz", hash = "sha256:28fc8d5b8ace221aad7c5729b923976af53c5b7116fd0ddc452a0dcaeaf4b831"}, - {file = "python_Wappalyzer-0.3.1-py3-none-any.whl", hash = "sha256:0c76e4bbc1e782795f2ccda627add6366153cd53d8f8eb5a5b62431c7c4ecdfe"}, -] +files = [] +develop = false [package.dependencies] aiohttp = "*" @@ -2162,6 +2160,16 @@ httpretty = "*" lxml = "*" requests = "*" +[package.extras] +dev = ["mypy (>=0.812)", "pytest", "pytest-asyncio", "tox"] +docs = ["docutils", "pydoctor"] + +[package.source] +type = "git" +url = "https://github.com/chorsley/python-Wappalyzer.git" +reference = "0.4.0" +resolved_reference = "ac651718af77804e52b826944933be831d491387" + [[package]] name = "pywin32" version = "306" diff --git a/boefjes/pyproject.toml b/boefjes/pyproject.toml index baa278027a9..8829e23c978 100644 --- a/boefjes/pyproject.toml +++ b/boefjes/pyproject.toml @@ -49,10 +49,9 @@ shodan = "1.25.0" cryptography = "^42.0.1" # required by kat_webpage_analysis forcediphttpsadapter = "1.1.0" +python-wappalyzer = {git = "https://github.com/chorsley/python-Wappalyzer.git", rev = "0.4.0"} # required by kat_webpage_analysis (forcediphttpsadapter) urllib3 = "^2.1.0" -# required by kat_website_software -python-Wappalyzer = "0.3.1" # required by kat_wpscan wpscan-out-parse = "1.9.3" # required by kat_sec_txt diff --git a/boefjes/requirements-dev.txt b/boefjes/requirements-dev.txt index 32b0819a6c0..b70a639af25 100644 --- a/boefjes/requirements-dev.txt +++ b/boefjes/requirements-dev.txt @@ -1068,9 +1068,7 @@ python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "4.0" \ --hash=sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a python-libnmap==0.7.3 ; python_version >= "3.10" and python_version < "4.0" \ --hash=sha256:d03629256c2ee9ab37390c28d4c4c2ae9637cd0861dd8ab9e0f32779545936c0 -python-wappalyzer==0.3.1 ; python_version >= "3.10" and python_version < "4.0" \ - --hash=sha256:0c76e4bbc1e782795f2ccda627add6366153cd53d8f8eb5a5b62431c7c4ecdfe \ - --hash=sha256:28fc8d5b8ace221aad7c5729b923976af53c5b7116fd0ddc452a0dcaeaf4b831 +python-wappalyzer @ git+https://github.com/chorsley/python-Wappalyzer.git@ac651718af77804e52b826944933be831d491387 ; python_version >= "3.10" and python_version < "4.0" pywin32==306 ; python_version >= "3.10" and python_version < "4.0" and sys_platform == "win32" \ --hash=sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d \ --hash=sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65 \ diff --git a/boefjes/requirements.txt b/boefjes/requirements.txt index ad023767764..ed07a859c42 100644 --- a/boefjes/requirements.txt +++ b/boefjes/requirements.txt @@ -1050,9 +1050,7 @@ python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "4.0" \ --hash=sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a python-libnmap==0.7.3 ; python_version >= "3.10" and python_version < "4.0" \ --hash=sha256:d03629256c2ee9ab37390c28d4c4c2ae9637cd0861dd8ab9e0f32779545936c0 -python-wappalyzer==0.3.1 ; python_version >= "3.10" and python_version < "4.0" \ - --hash=sha256:0c76e4bbc1e782795f2ccda627add6366153cd53d8f8eb5a5b62431c7c4ecdfe \ - --hash=sha256:28fc8d5b8ace221aad7c5729b923976af53c5b7116fd0ddc452a0dcaeaf4b831 +python-wappalyzer @ git+https://github.com/chorsley/python-Wappalyzer.git@ac651718af77804e52b826944933be831d491387 ; python_version >= "3.10" and python_version < "4.0" pywin32==306 ; python_version >= "3.10" and python_version < "4.0" and sys_platform == "win32" \ --hash=sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d \ --hash=sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65 \ diff --git a/boefjes/tests/examples/body-page-analysis-normalize.json b/boefjes/tests/examples/body-page-analysis-normalize.json new file mode 100644 index 00000000000..f85c7cfd126 --- /dev/null +++ b/boefjes/tests/examples/body-page-analysis-normalize.json @@ -0,0 +1,65 @@ +{ + "id": "312b968d-0453-48fd-8e7b-ecfcb757dc7e", + "raw_data": { + "id": "e20e3de6-4305-4344-bfcf-a8b9ecc76ccd", + "boefje_meta": { + "id": "a8d1830b-3e2e-4dab-928e-4493a9710ff1", + "boefje": { + "id": "webpage-analysis" + }, + "organization": "_dev", + "input_ooi": "HTTPResource|internet|134.209.85.72|tcp|443|https|internet|mispo.es|https|internet|mispo.es|443|/", + "arguments": { + "input": { + "object_type": "HTTPResource", + "scan_profile": "reference=Reference('HTTPResource|internet|134.209.85.72|tcp|443|https|internet|mispo.es|https|internet|mispo.es|443|/') level=4 scan_profile_type='inherited'", + "primary_key": "HTTPResource|internet|134.209.85.72|tcp|443|https|internet|mispo.es|https|internet|mispo.es|443|/", + "website": { + "ip_service": { + "ip_port": { + "address": { + "network": { + "name": "internet" + }, + "address": "134.209.85.72" + }, + "protocol": "tcp", + "port": "443" + }, + "service": { + "name": "https" + } + }, + "hostname": { + "network": { + "name": "internet" + }, + "name": "mispo.es" + } + }, + "web_url": { + "scheme": "https", + "netloc": { + "network": { + "name": "internet" + }, + "name": "mispo.es" + }, + "port": "443", + "path": "/" + }, + "redirects_to": "None" + } + } + }, + "mime_types": [ + { + "value": "openkat-http/response" + } + ] + }, + "normalizer": { + "id": "kat_wappalyzer_normalize", + "version": null + } +} diff --git a/boefjes/tests/examples/download_page_analysis.raw b/boefjes/tests/examples/download_page_analysis.raw new file mode 100644 index 00000000000..52b5fd1fa9b --- /dev/null +++ b/boefjes/tests/examples/download_page_analysis.raw @@ -0,0 +1,68 @@ +{ + "response": { + "url": "https://mispo.es/", + "status_code": 200, + "headers": { + "Server": "nginx/1.18.0", + "Date": "Tue, 26 Mar 2024 13:59:01 GMT", + "Content-Type": "text/html", + "Last-Modified": "Fri, 18 Feb 2022 09:21:01 GMT", + "Transfer-Encoding": "chunked", + "Connection": "keep-alive", + "ETag": "W/\"620f64fd-72f\"", + "Content-Security-Policy": "default-src * 'unsafe-inline' 'unsafe-eval'; script-src * 'unsafe-inline' 'unsafe-eval'; connect-src * 'unsafe-inline'; img-src * data: blob: 'unsafe-inline'; frame-src *; style-src * 'unsafe-inline';", + "Content-Encoding": "gzip" + }, + "cookies": {}, + "is_redirect": false + }, + "request": { + "url": "https://mispo.es/", + "method": "GET", + "headers": { + "User-Agent": "OpenKAT", + "Accept-Encoding": "gzip, deflate", + "Accept": "*/*", + "Connection": "keep-alive" + } + } +} + + + + + + + + Mispoes! + + + + + + + +
+

Mispoes!

+
+

+ Miauw miauw miauw +

+ + + + +
+
+ + + + + + + + diff --git a/boefjes/tests/loading.py b/boefjes/tests/loading.py index 3f8bb1c7bb6..3eae8bc6b5a 100644 --- a/boefjes/tests/loading.py +++ b/boefjes/tests/loading.py @@ -7,7 +7,7 @@ def get_dummy_data(filename: str) -> bytes: - path = BASE_DIR / ".." / "tests" / "examples" / filename + path = BASE_DIR.parent / "tests" / "examples" / filename return path.read_bytes() diff --git a/boefjes/tests/test_bodyimage.py b/boefjes/tests/test_bodyimage.py index 48d19700924..3750ea7a8ed 100644 --- a/boefjes/tests/test_bodyimage.py +++ b/boefjes/tests/test_bodyimage.py @@ -3,7 +3,7 @@ from unittest import TestCase, mock from unittest.mock import MagicMock -from requests.models import CaseInsensitiveDict, Response +from requests.models import CaseInsensitiveDict, PreparedRequest, Response from boefjes.job_models import BoefjeMeta, NormalizerMeta from boefjes.katalogus.local_repository import LocalPluginRepository @@ -14,7 +14,7 @@ class WebsiteAnalysisTest(TestCase): maxDiff = None - @mock.patch("boefjes.plugins.kat_webpage_analysis.main.do_request") + @mock.patch("boefjes.plugins.kat_webpage_analysis.main.do_request", spec=Response) def test_website_analysis(self, do_request_mock: MagicMock): meta = BoefjeMeta.model_validate_json(get_dummy_data("webpage-analysis.json")) local_repository = LocalPluginRepository(Path(__file__).parent.parent / "boefjes" / "plugins") @@ -23,13 +23,16 @@ def test_website_analysis(self, do_request_mock: MagicMock): mock_response = Response() mock_response._content = bytes(get_dummy_data("download_body")) + mock_response.request = MagicMock(spec=PreparedRequest()) + mock_response.request.url = "" + mock_response.request.method = "GET" mock_response.headers = CaseInsensitiveDict(json.loads(get_dummy_data("download_headers.json"))) do_request_mock.return_value = mock_response output = runner.run(meta, {}) - self.assertIn("openkat-http/full", output[0][0]) + self.assertIn("openkat-http/response", output[0][0]) self.assertIn("openkat-http/headers", output[1][0]) self.assertIn("openkat-http/body", output[2][0]) @@ -42,6 +45,9 @@ def test_website_analysis_for_image(self, do_request_mock: MagicMock): mock_response = Response() mock_response._content = bytes(get_dummy_data("cat_image")) + mock_response.request = MagicMock(spec=PreparedRequest()) + mock_response.request.url = "" + mock_response.request.method = "GET" mock_response.headers = CaseInsensitiveDict(json.loads(get_dummy_data("download_image_headers.json"))) do_request_mock.return_value = mock_response diff --git a/boefjes/tests/test_wappalizer.py b/boefjes/tests/test_wappalizer.py deleted file mode 100644 index bf69678b79e..00000000000 --- a/boefjes/tests/test_wappalizer.py +++ /dev/null @@ -1,52 +0,0 @@ -from unittest import TestCase - -from pydantic import parse_obj_as - -from boefjes.job_handler import serialize_ooi -from boefjes.plugins.kat_website_software.normalize import run -from octopoes.models.types import OOIType -from tests.loading import get_boefje_meta, get_dummy_data, get_normalizer_meta - - -class WappalizerNormalizerTest(TestCase): - def test_only_yield_redirected_url_when_redirected(self): - input_ooi = parse_obj_as( - OOIType, - { - "object_type": "HostnameHTTPURL", - "network": "Network|internet", - "scheme": "https", - "port": 443, - "path": "/", - "netloc": "Hostname|internet|web.site", - }, - ) - boefje_meta = get_boefje_meta(input_ooi=input_ooi.reference) - boefje_meta.arguments["input"] = serialize_ooi(input_ooi) - - output = [x for x in run(get_normalizer_meta(boefje_meta), get_dummy_data("raw/wappalizer_redirected.json"))] - - self.assertEqual(2, len(output)) - self.assertEqual("URL|internet|https://mid.url/", str(output[0])) - self.assertEqual("URL|internet|https://redirected.url/", str(output[1])) - - def test_yield_software_when_not_redirected(self): - input_ooi = parse_obj_as( - OOIType, - { - "object_type": "HostnameHTTPURL", - "network": "Network|internet", - "scheme": "https", - "port": 443, - "path": "/", - "netloc": "Hostname|internet|redirected.url", - }, - ) - boefje_meta = get_boefje_meta(input_ooi=input_ooi.reference) - boefje_meta.arguments["input"] = serialize_ooi(input_ooi) - output = [x for x in run(get_normalizer_meta(boefje_meta), get_dummy_data("raw/wappalizer.json"))] - - self.assertEqual(4, len(output)) - self.assertEqual("Software|Hugo|0.104.0|", str(output[0])) - self.assertEqual("HostnameHTTPURL|https|internet|redirected.url|443|/", str(output[1].ooi)) - self.assertEqual("Software|Hugo|0.104.0|", str(output[1].software)) diff --git a/boefjes/tests/test_wappalyzer_normalizer.py b/boefjes/tests/test_wappalyzer_normalizer.py new file mode 100644 index 00000000000..421616f2b7c --- /dev/null +++ b/boefjes/tests/test_wappalyzer_normalizer.py @@ -0,0 +1,23 @@ +from pathlib import Path +from unittest import TestCase + +from boefjes.job_models import NormalizerMeta +from boefjes.katalogus.local_repository import LocalPluginRepository +from boefjes.local import LocalNormalizerJobRunner +from tests.loading import get_dummy_data + + +class WappalyzerNormalizerTest(TestCase): + def test_page_analyzer_normalizer(self): + meta = NormalizerMeta.model_validate_json(get_dummy_data("body-page-analysis-normalize.json")) + local_repository = LocalPluginRepository(Path(__file__).parent.parent / "boefjes" / "plugins") + + runner = LocalNormalizerJobRunner(local_repository) + output = runner.run(meta, get_dummy_data("download_page_analysis.raw")) + + results = output.observations[0].results + self.assertEqual(6, len(results)) + self.assertCountEqual( + ["Software|jQuery Migrate|1.0.0|", "Software|jQuery|3.6.0|", "Software|Bootstrap|3.3.7|"], + [o.primary_key for o in results if o.object_type == "Software"], + )