Skip to content

Commit

Permalink
Replace Wappalyzer (#2727)
Browse files Browse the repository at this point in the history
Co-authored-by: Jan Klopper <[email protected]>
Co-authored-by: Jeroen Dekkers <[email protected]>
  • Loading branch information
3 people committed Apr 8, 2024
1 parent 8bf1c06 commit 28efb66
Show file tree
Hide file tree
Showing 24 changed files with 257 additions and 167 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/boefjes_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
run: python3 -m pip install --upgrade pip

- name: Install dev requirements
run: pip install -r requirements-dev.txt
run: grep -v git+https:// requirements-dev.txt | pip install -r /dev/stdin && grep git+https:// requirements-dev.txt | pip install -r /dev/stdin
working-directory: ./boefjes

- name: Install requirements
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-rdo-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ jobs:
run: python3.10 -m venv /var/www/html/.venv

- name: Install requirements
run: source .venv/bin/activate; pip install --upgrade pip; find . -name requirements.txt | xargs -L 1 pip install -r; pip install ${{ github.workspace }}/octopoes/dist/octopoes*.whl
run: source .venv/bin/activate; pip install --upgrade pip; grep -v git+https:// requirements.txt | pip install -r /dev/stdin ; grep git+https:// requirements.txt | pip install -r /dev/stdin; pip install ${{ github.workspace }}/octopoes/dist/octopoes*.whl
working-directory: /var/www/html

- name: Create archive
Expand Down
10 changes: 8 additions & 2 deletions boefjes/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,14 @@ COPY boefjes/requirements-dev.txt boefjes/requirements.txt .

RUN --mount=type=cache,target=/root/.cache \
pip install --upgrade pip \
&& pip install -r requirements.txt \
&& if [ "$ENVIRONMENT" = "dev" ]; then pip install -r requirements-dev.txt; fi
&& if [ "$ENVIRONMENT" = "dev" ]; \
then \
grep -v git+https:// requirements-dev.txt | pip install -r /dev/stdin ; \
grep git+https:// requirements-dev.txt | pip install -r /dev/stdin ; \
else \
grep -v git+https:// requirements.txt | pip install -r /dev/stdin ;\
grep git+https:// requirements.txt | pip install -r /dev/stdin ; \
fi

FROM dev

Expand Down
33 changes: 33 additions & 0 deletions boefjes/boefjes/plugins/kat_wappalyzer/normalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import json
from collections.abc import Iterable

from Wappalyzer import Wappalyzer, WebPage

from boefjes.job_models import NormalizerMeta
from octopoes.models import OOI, Reference
from octopoes.models.ooi.dns.zone import Hostname
from octopoes.models.ooi.network import Network
from octopoes.models.ooi.software import Software, SoftwareInstance


def run(normalizer_meta: NormalizerMeta, raw: bytes | str) -> Iterable[OOI]:
pk = normalizer_meta.raw_data.boefje_meta.input_ooi
tokenized_hostname = Reference.from_str(pk).tokenized["website"]["hostname"]
hostname = Hostname(
network=Network(name=tokenized_hostname["network"]["name"]).reference, name=tokenized_hostname["name"]
)
raw_respsone, body = raw.split(b"\n\n", 1)
response_object = json.loads(raw_respsone)
url = response_object["response"]["url"]

headers = response_object["response"]["headers"]
body = body.decode(response_object.get("encoding") or "utf-8", "replace")

wappalyzer = Wappalyzer.latest()
web_page = WebPage(url, body, headers)
results = wappalyzer.analyze_with_versions_and_categories(web_page)

for name, data in results.items():
software = Software(name=name, version=data["versions"].pop(0))
software_instance = SoftwareInstance(ooi=hostname.reference, software=software.reference)
yield from [software, software_instance]
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"id": "kat_website_software_normalize",
"id": "kat_wappalyzer_normalize",
"consumes": [
"boefje/website-software"
"openkat-http/response"
],
"produces": [
"Software",
Expand Down
3 changes: 2 additions & 1 deletion boefjes/boefjes/plugins/kat_webpage_analysis/boefje.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
"HTTPResource"
],
"produces": [
"openkat-http/full",
"openkat-http/response",
"openkat-http/headers",
"openkat-http/body",

"application/javascript",
"application/javascript",
Expand Down
24 changes: 23 additions & 1 deletion boefjes/boefjes/plugins/kat_webpage_analysis/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,35 @@ def run(boefje_meta: BoefjeMeta) -> list[tuple[set, bytes | str]]:
if content_type[0] in ALLOWED_CONTENT_TYPES:
body_mimetypes.add(content_type[0])

# in case of a full response object, we hexdump to avoid issues with binary data or different encoding
response_dump = json.dumps(create_response_object(response))

return [
({"openkat-http/full"}, f"{response.headers}\n\n{response.content}"),
({"openkat-http/response"}, response_dump.encode() + b"\n\n" + response.content),
({"openkat-http/headers"}, json.dumps(dict(response.headers))),
(body_mimetypes, response.content),
]


# todo: perhaps also implement response.history?
def create_response_object(response: requests.Response) -> dict:
return {
"response": {
"url": response.url,
"status_code": response.status_code,
"headers": dict(response.headers),
"cookies": dict(response.cookies),
"is_redirect": response.is_redirect,
"encoding": response.encoding,
},
"request": {
"url": response.request.url,
"method": response.request.method,
"headers": dict(response.request.headers),
},
}


def do_request(hostname: str, session: Session, uri: str, useragent: str):
response = session.get(
uri,
Expand Down
9 changes: 0 additions & 9 deletions boefjes/boefjes/plugins/kat_website_software/boefje.json

This file was deleted.

Binary file not shown.
3 changes: 0 additions & 3 deletions boefjes/boefjes/plugins/kat_website_software/description.md

This file was deleted.

31 changes: 0 additions & 31 deletions boefjes/boefjes/plugins/kat_website_software/main.py

This file was deleted.

45 changes: 0 additions & 45 deletions boefjes/boefjes/plugins/kat_website_software/normalize.py

This file was deleted.

5 changes: 4 additions & 1 deletion boefjes/debian/rules
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ override_dh_fixperms:
chmod 755 $(DESTDIR)/usr/bin/update-katalogus-db

override_dh_virtualenv:
dh_virtualenv $(DH_VENV_ARGS)
grep -v git+https:// requirements.txt > /tmp/requirements-nogit.txt
grep git+https:// requirements.txt > /tmp/requirements-git.txt
dh_virtualenv --requirements=/tmp/requirements-nogit.txt $(DH_VENV_ARGS)
$(DH_VENV_DIR)/bin/python -m pip install -r /tmp/requirements-git.txt

$(DH_VENV_DIR)/bin/python -m pip install gunicorn==20.1.0
cd /octopoes && /usr/bin/python3 setup.py bdist_wheel
Expand Down
20 changes: 14 additions & 6 deletions boefjes/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions boefjes/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,9 @@ shodan = "1.25.0"
cryptography = "^42.0.1"
# required by kat_webpage_analysis
forcediphttpsadapter = "1.1.0"
python-wappalyzer = {git = "https://github.com/chorsley/python-Wappalyzer.git", rev = "0.4.0"}
# required by kat_webpage_analysis (forcediphttpsadapter)
urllib3 = "^2.1.0"
# required by kat_website_software
python-Wappalyzer = "0.3.1"
# required by kat_wpscan
wpscan-out-parse = "1.9.3"
# required by kat_sec_txt
Expand Down
4 changes: 1 addition & 3 deletions boefjes/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1068,9 +1068,7 @@ python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a
python-libnmap==0.7.3 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:d03629256c2ee9ab37390c28d4c4c2ae9637cd0861dd8ab9e0f32779545936c0
python-wappalyzer==0.3.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:0c76e4bbc1e782795f2ccda627add6366153cd53d8f8eb5a5b62431c7c4ecdfe \
--hash=sha256:28fc8d5b8ace221aad7c5729b923976af53c5b7116fd0ddc452a0dcaeaf4b831
python-wappalyzer @ git+https://github.com/chorsley/python-Wappalyzer.git@ac651718af77804e52b826944933be831d491387 ; python_version >= "3.10" and python_version < "4.0"
pywin32==306 ; python_version >= "3.10" and python_version < "4.0" and sys_platform == "win32" \
--hash=sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d \
--hash=sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65 \
Expand Down
4 changes: 1 addition & 3 deletions boefjes/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1050,9 +1050,7 @@ python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a
python-libnmap==0.7.3 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:d03629256c2ee9ab37390c28d4c4c2ae9637cd0861dd8ab9e0f32779545936c0
python-wappalyzer==0.3.1 ; python_version >= "3.10" and python_version < "4.0" \
--hash=sha256:0c76e4bbc1e782795f2ccda627add6366153cd53d8f8eb5a5b62431c7c4ecdfe \
--hash=sha256:28fc8d5b8ace221aad7c5729b923976af53c5b7116fd0ddc452a0dcaeaf4b831
python-wappalyzer @ git+https://github.com/chorsley/python-Wappalyzer.git@ac651718af77804e52b826944933be831d491387 ; python_version >= "3.10" and python_version < "4.0"
pywin32==306 ; python_version >= "3.10" and python_version < "4.0" and sys_platform == "win32" \
--hash=sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d \
--hash=sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65 \
Expand Down
65 changes: 65 additions & 0 deletions boefjes/tests/examples/body-page-analysis-normalize.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"id": "312b968d-0453-48fd-8e7b-ecfcb757dc7e",
"raw_data": {
"id": "e20e3de6-4305-4344-bfcf-a8b9ecc76ccd",
"boefje_meta": {
"id": "a8d1830b-3e2e-4dab-928e-4493a9710ff1",
"boefje": {
"id": "webpage-analysis"
},
"organization": "_dev",
"input_ooi": "HTTPResource|internet|134.209.85.72|tcp|443|https|internet|mispo.es|https|internet|mispo.es|443|/",
"arguments": {
"input": {
"object_type": "HTTPResource",
"scan_profile": "reference=Reference('HTTPResource|internet|134.209.85.72|tcp|443|https|internet|mispo.es|https|internet|mispo.es|443|/') level=4 scan_profile_type='inherited'",
"primary_key": "HTTPResource|internet|134.209.85.72|tcp|443|https|internet|mispo.es|https|internet|mispo.es|443|/",
"website": {
"ip_service": {
"ip_port": {
"address": {
"network": {
"name": "internet"
},
"address": "134.209.85.72"
},
"protocol": "tcp",
"port": "443"
},
"service": {
"name": "https"
}
},
"hostname": {
"network": {
"name": "internet"
},
"name": "mispo.es"
}
},
"web_url": {
"scheme": "https",
"netloc": {
"network": {
"name": "internet"
},
"name": "mispo.es"
},
"port": "443",
"path": "/"
},
"redirects_to": "None"
}
}
},
"mime_types": [
{
"value": "openkat-http/response"
}
]
},
"normalizer": {
"id": "kat_wappalyzer_normalize",
"version": null
}
}
Loading

0 comments on commit 28efb66

Please sign in to comment.