DefectDojo · Maffooch · Feb 6, 2024 · Dec 30, 2023 · Dec 30, 2023 · Dec 30, 2023
diff --git a/docs/content/en/integrations/parsers/file/wazuh.md b/docs/content/en/integrations/parsers/file/wazuh.md
@@ -2,7 +2,51 @@
 title: "Wazuh Scanner"
 toc_hide: true
 ---
-Import JSON report.
+
+### File Types
+DefectDojo parser accepts a .json file from [Wazuh](https://wazuh.com). The export from Wazuh can be done via 2 ways. Choose the one which you prefer.
+
+- export the Wazuh findings from API and upload them to DefectDojo. This method may be the easiest one but does export all known vulnerabilities at once. It is not possible to sort them after clients or any other categories. You will receive all vulnerabilities in one engagement. It also does not output the endpoint of a finding.
+- export the findings via the script [available here](https://github.com/quirinziessler/wazuh-findings-exporter). The script fetches the findings by Wazuh client groups and saves them as json, ready for upload. You will receive one file per group allowing you to separate the clients via engagements in Wazuh. It also exports the endpoints hostname and displays them in DefectDojo UI.
+
+Independent of your above choice: Have in mind to adjust the max file size via "DD_SCAN_FILE_MAX_SIZE" if you see files larger than the default value of 100MB. Depending on the amount and category of integrated devices, the file size jumps rapidly.
+
+### Acceptable JSON Format
+Parser expects a .json file structured as below.
+
+~~~
+{
+  "data": {
+      "affected_items": [
+          {
+            "architecture": "amd64",
+            "condition": "Package less than 4.3.2",
+            "cve": "CVE-1234-123123",
+            "cvss2_score": 0,
+            "cvss3_score": 5.5,
+            "detection_time": "2023-02-08T13:55:10Z",
+            "external_references": [
+                "https://nvd.nist.gov/vuln/detail/CVE-YYYY-XXXXX",
+                "https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-YYYY-XXXXX"
+            ],
+            "name": "asdf",
+            "published": "2022-09-01",
+            "severity": "Medium",
+            "status": "VALID",
+            "title": "CVE-YYYY-XXXXX affects asdf",
+            "type": "PACKAGE",
+            "updated": "2022-09-07",
+            "version": "4.3.1"
+        }
+      ],
+      "failed_items": [],
+      "total_affected_items": 1,
+      "total_failed_items": 0
+  },
+  "error": 0,
+  "message": "All selected vulnerabilities were returned"
+}
+~~~
 
 ### Sample Scan Data
 Sample Wazuh Scanner scans can be found [here](https://github.com/DefectDojo/django-DefectDojo/tree/master/unittests/scans/wazuh).
diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py
@@ -1,4 +1,4 @@
 # Django settings for DefectDojo
 import os
 from datetime import timedelta
 from celery.schedules import crontab
@@ -1260,7 +1260,6 @@
     'NeuVector (compliance)': ['title', 'vuln_id_from_tool', 'description'],
     'Wpscan': ['title', 'description', 'severity'],
     'Popeye Scan': ['title', 'description'],
-    'Wazuh Scan': ['title'],
     'Nuclei Scan': ['title', 'cwe', 'severity'],
     'KubeHunter Scan': ['title', 'description'],
     'kube-bench Scan': ['title', 'vuln_id_from_tool', 'description'],
@@ -1473,6 +1472,7 @@
     'kube-bench Scan': DEDUPE_ALGO_HASH_CODE,
     'Threagile risks report': DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE,
     'Humble Json Importer': DEDUPE_ALGO_HASH_CODE,
+    'Wazuh Scan': DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL,
     'MSDefender Parser': DEDUPE_ALGO_HASH_CODE,
 }
 

diff --git a/dojo/tools/wazuh/parser.py b/dojo/tools/wazuh/parser.py
@@ -1,10 +1,10 @@
+import hashlib
 import json
-from dojo.models import Finding
+from dojo.models import Finding, Endpoint
 
 
 class WazuhParser(object):
     """
-    Use Wazuh Vulnerability API to retrieve the findings
     The vulnerabilities with condition "Package unfixed" are skipped because there is no fix out yet.
     https://github.com/wazuh/wazuh/issues/14560
     """
@@ -18,54 +18,49 @@
     def get_description_for_scan_types(self, scan_type):
         return "Wazuh"
 
-    def get_findings(self, filename, test):
-        data = json.load(filename)
-        # Detect duplications
-        dupes = dict()
+    def get_findings(self, file, test):
+        data = json.load(file)
 
-        try:
-            vulnerability = data[next(iter(data.keys()))]["affected_items"]
-        except (KeyError, StopIteration):
-            return list()
+        if not data:
+            return []
 
-        if vulnerability is None:
-            return list()
+        # Detect duplications
+        dupes = dict()
 
-        for item in vulnerability:
+        # Loop through each element in the list
+        vulnerabilities = data.get("data", {}).get("affected_items", [])
+        for item in vulnerabilities:
             if (
                 item["condition"] != "Package unfixed"
                 and item["severity"] != "Untriaged"
             ):
-                id = item.get("cve")
+                cve = item.get("cve")
                 package_name = item.get("name")
                 package_version = item.get("version")
                 description = item.get("condition")
-                if item.get("severity") == "Untriaged":
-                    severity = "Info"
-                else:
-                    severity = item.get("severity")
-                if item.get("status") == "VALID":
-                    active = True
-                else:
-                    active = False
+                severity = item.get("severity").capitalize()
+                agent_ip = item.get("agent_ip")
                 links = item.get("external_references")
-                title = (
-                    item.get("title") + " (version: " + package_version + ")"
-                )
-                severity = item.get("severity", "info").capitalize()
+                cvssv3_score = item.get("cvss3_score")
+                publish_date = item.get("published")
+                agent_name = item.get("agent_name")
+                agent_ip = item.get("agent_ip")
+                detection_time = item.get("detection_time")
+
                 if links:
-                    references = ""
-                    for link in links:
-                        references += f"{link}\n"
+                    references = "\n".join(links)
                 else:
                     references = None
 
-                if id and id.startswith("CVE"):
-                    vulnerability_id = id
-                else:
-                    vulnerability_id = None
+                title = (
+                    item.get("title") + " (version: " + package_version + ")"
+                )
 
-                dupe_key = title
+                if agent_name:
+                    dupe_key = title + cve + agent_name + package_name + package_version
+                else:
+                    dupe_key = title + cve + package_name + package_version
+                dupe_key = hashlib.sha256(dupe_key.encode('utf-8')).hexdigest()
 
                 if dupe_key in dupes:
                     find = dupes[dupe_key]
@@ -77,14 +72,25 @@
                         test=test,
                         description=description,
                         severity=severity,
-                        active=active,
-                        mitigation="mitigation",
                         references=references,
                         static_finding=True,
                         component_name=package_name,
                         component_version=package_version,
+                        cvssv3_score=cvssv3_score,
+                        publish_date=publish_date,
+                        unique_id_from_tool=dupe_key,
+                        date=detection_time,
                     )
-                    if vulnerability_id:
-                        find.unsaved_vulnerability_ids = [vulnerability_id]
+
+                    # in some cases the agent_ip is not the perfect way on how to identify a host. Thus prefer the agent_name, if existant.
+                    if agent_name:
+                        find.unsaved_endpoints = [Endpoint(host=agent_name)]
+                    elif agent_ip:
+                        find.unsaved_endpoints = [Endpoint(host=agent_ip)]
+
+                    if id:
+                        find.unsaved_vulnerability_ids = cve
+
                     dupes[dupe_key] = find
+
         return list(dupes.values())
diff --git a/unittests/scans/wazuh/one_finding_with_endpoint.json b/unittests/scans/wazuh/one_finding_with_endpoint.json
@@ -0,0 +1,29 @@
+{
+  "data": {
+    "affected_items": [
+      {
+        "name": "asdf",
+        "version": "1",
+        "cve": "CVE-1234-1234",
+        "cvss2_score": 0,
+        "title": "CVE-1234-1234 affects curl",
+        "published": "2023-12-07",
+        "architecture": "amd64",
+        "status": "VALID",
+        "cvss3_score": 6.5,
+        "external_references": [
+          "https://nvd.nist.gov/vuln/detail/CVE-1234-1234",
+          "https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-1234-1234"
+        ],
+        "updated": "2023-12-24",
+        "severity": "Medium",
+        "type": "PACKAGE",
+        "detection_time": "2023-12-13T22:11:57+00:00",
+        "condition": "Package less than 2",
+        "agent_ip": "111.111.111.111",
+        "agent_name": "agent-1"
+      }
+    ],
+    "total_affected_items": 1
+  }
+}
diff --git a/unittests/tools/test_wazuh_parser.py b/unittests/tools/test_wazuh_parser.py
@@ -20,7 +20,10 @@ def test_parse_one_finding(self):
                 endpoint.clean()
         self.assertEqual(1, len(findings))
         self.assertEqual("Medium", finding.severity)
-        self.assertEqual("CVE-1234-123123", finding.unsaved_vulnerability_ids[0])
+        self.assertEqual("CVE-1234-123123", finding.unsaved_vulnerability_ids)
+        self.assertEqual("asdf", finding.component_name)
+        self.assertEqual("4.3.1", finding.component_version)
+        self.assertEqual(5.5, finding.cvssv3_score)
 
     def test_parse_many_finding(self):
         testfile = open("unittests/scans/wazuh/many_findings.json")
@@ -30,3 +33,19 @@ def test_parse_many_finding(self):
             for endpoint in finding.unsaved_endpoints:
                 endpoint.clean()
         self.assertEqual(6, len(findings))
+
+    def test_parse_one_finding_with_endpoint(self):
+        testfile = open("unittests/scans/wazuh/one_finding_with_endpoint.json")
+        parser = WazuhParser()
+        findings = parser.get_findings(testfile, Test())
+        for finding in findings:
+            for endpoint in finding.unsaved_endpoints:
+                endpoint.clean()
+        self.assertEqual(1, len(findings))
+        self.assertEqual("Medium", finding.severity)
+        self.assertEqual("CVE-1234-1234", finding.unsaved_vulnerability_ids)
+        self.assertEqual(6.5, finding.cvssv3_score)
+        endpoint = finding.unsaved_endpoints[0]
+        self.assertEqual("agent-1", endpoint.host)
+        self.assertEqual("asdf", finding.component_name)
+        self.assertEqual("1", finding.component_version)