DefectDojo · rossops · Nov 18, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/.github/workflows/release-drafter.yml b/.github/workflows/release-drafter.yml
@@ -48,6 +48,8 @@ jobs:
     steps:
       - name: Load OAS files from artifacts
         uses: actions/download-artifact@v4
+        with:
+          pattern: oas-*
 
       - name: Upload Release Asset - OpenAPI Specification - YAML
         id: upload-release-asset-yaml

diff --git a/docs/content/en/usage/features.md b/docs/content/en/usage/features.md
@@ -244,7 +244,7 @@ The environment variable will override the settings in `settings.dist.py`, repla
 
 The available algorithms are:
 
-DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL
+DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL (value for `DD_DEDUPLICATION_ALGORITHM_PER_PARSER`: `unique_id_from_tool`)
 :   The deduplication occurs based on
     finding.unique_id_from_tool which is a unique technical
     id existing in the source tool. Few scanners populate this
@@ -266,12 +266,12 @@ DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL
             able to recognise that findings found in previous
             scans are actually the same as the new findings.
 
-DEDUPE_ALGO_HASH_CODE
+DEDUPE_ALGO_HASH_CODE (value for `DD_DEDUPLICATION_ALGORITHM_PER_PARSER`: `hash_code`)
 :   The deduplication occurs based on finding.hash_code. The
     hash_code itself is configurable for each scanner in
     parameter `HASHCODE_FIELDS_PER_SCANNER`.
 
-DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE
+DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE (value for `DD_DEDUPLICATION_ALGORITHM_PER_PARSER`: `unique_id_from_tool_or_hash_code`)
 :   A finding is a duplicate with another if they have the same
     unique_id_from_tool OR the same hash_code.
 
@@ -284,7 +284,7 @@ DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE
             cross-parser deduplication
 
 
-DEDUPE_ALGO_LEGACY
+DEDUPE_ALGO_LEGACY (value for `DD_DEDUPLICATION_ALGORITHM_PER_PARSER`: `legacy`)
 :   This is algorithm that was in place before the configuration
     per parser was made possible, and also the default one for
     backward compatibility reasons.

diff --git a/dojo/settings/.settings.dist.py.sha256sum b/dojo/settings/.settings.dist.py.sha256sum
@@ -1 +1 @@
-fc660db6c2f55181fd8515d9b13c75197d8272c5c635235f6f60e4b1fc77af04
+01215b397651163c0403b028adb08b18fa83c4abb188b0536dfb9e43eddcd9cd
diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py
@@ -1301,6 +1301,12 @@ def saml2_attrib_map_format(dict):
 if len(env("DD_HASHCODE_FIELDS_PER_SCANNER")) > 0:
     env_hashcode_fields_per_scanner = json.loads(env("DD_HASHCODE_FIELDS_PER_SCANNER"))
     for key, value in env_hashcode_fields_per_scanner.items():
+        if not isinstance(value, list):
+            msg = f"Fields definition '{value}' for hashcode calculation of '{key}' is not valid. It needs to be list of strings but it is {type(value)}."
+            raise TypeError(msg)
+        if not all(isinstance(field, str) for field in value):
+            msg = f"Fields for hashcode calculation for {key} are not valid. It needs to be list of strings. Some of fields are not string."
+            raise AttributeError(msg)
         if key in HASHCODE_FIELDS_PER_SCANNER:
             logger.info(f"Replacing {key} with value {value} (previously set to {HASHCODE_FIELDS_PER_SCANNER[key]}) from env var DD_HASHCODE_FIELDS_PER_SCANNER")
             HASHCODE_FIELDS_PER_SCANNER[key] = value
@@ -1382,6 +1388,13 @@ def saml2_attrib_map_format(dict):
 # Makes it possible to deduplicate on a technical id (same parser) and also on some functional fields (cross-parsers deduplication)
 DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE = "unique_id_from_tool_or_hash_code"
 
+DEDUPE_ALGOS = [
+    DEDUPE_ALGO_LEGACY,
+    DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL,
+    DEDUPE_ALGO_HASH_CODE,
+    DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE,
+]
+
 # Allows to deduplicate with endpoints if endpoints is not included in the hashcode.
 # Possible values are: scheme, host, port, path, query, fragment, userinfo, and user. For a details description see https://hyperlink.readthedocs.io/en/latest/api.html#attributes.
 # Example:
@@ -1532,6 +1545,9 @@ def saml2_attrib_map_format(dict):
 if len(env("DD_DEDUPLICATION_ALGORITHM_PER_PARSER")) > 0:
     env_dedup_algorithm_per_parser = json.loads(env("DD_DEDUPLICATION_ALGORITHM_PER_PARSER"))
     for key, value in env_dedup_algorithm_per_parser.items():
+        if value not in DEDUPE_ALGOS:
+            msg = f"DEDUP algorithm '{value}' for '{key}' is not valid. Use one of following values: {', '.join(DEDUPE_ALGOS)}"
+            raise AttributeError(msg)
         if key in DEDUPLICATION_ALGORITHM_PER_PARSER:
             logger.info(f"Replacing {key} with value {value} (previously set to {DEDUPLICATION_ALGORITHM_PER_PARSER[key]}) from env var DD_DEDUPLICATION_ALGORITHM_PER_PARSER")
             DEDUPLICATION_ALGORITHM_PER_PARSER[key] = value
@@ -1750,9 +1766,14 @@ def saml2_attrib_map_format(dict):
     "ELSA": "https://linux.oracle.com/errata/&&.html",  # e.g. https://linux.oracle.com/errata/ELSA-2024-12714.html
     "ELBA": "https://linux.oracle.com/errata/&&.html",  # e.g. https://linux.oracle.com/errata/ELBA-2024-7457.html
     "RXSA": "https://errata.rockylinux.org/",  # e.g. https://errata.rockylinux.org/RXSA-2024:4928
+    "C-": "https://hub.armosec.io/docs/",  # e.g. https://hub.armosec.io/docs/c-0085
+    "AVD": "https://avd.aquasec.com/misconfig/",  # e.g. https://avd.aquasec.com/misconfig/avd-ksv-01010
+    "KHV": "https://avd.aquasec.com/misconfig/kubernetes/",  # e.g. https://avd.aquasec.com/misconfig/kubernetes/khv045
     "CAPEC": "https://capec.mitre.org/data/definitions/&&.html",  # e.g. https://capec.mitre.org/data/definitions/157.html
     "CWE": "https://cwe.mitre.org/data/definitions/&&.html",  # e.g. https://cwe.mitre.org/data/definitions/79.html
     "TEMP": "https://security-tracker.debian.org/tracker/",  # e.g. https://security-tracker.debian.org/tracker/TEMP-0841856-B18BAF
+    "DSA": "https://security-tracker.debian.org/tracker/",  # e.g. https://security-tracker.debian.org/tracker/DSA-5791-1
+    "RLSA": "https://errata.rockylinux.org/",  # e.g. https://errata.rockylinux.org/RLSA-2024:7001
 }
 # List of acceptable file types that can be uploaded to a given object via arbitrary file upload
 FILE_UPLOAD_TYPES = env("DD_FILE_UPLOAD_TYPES")

diff --git a/dojo/templatetags/display_tags.py b/dojo/templatetags/display_tags.py
@@ -780,6 +780,8 @@ def vulnerability_url(vulnerability_id):
 
     for key in settings.VULNERABILITY_URLS:
         if vulnerability_id.upper().startswith(key):
+            if key in ["AVD", "KHV", "C-"]:
+                return settings.VULNERABILITY_URLS[key] + str(vulnerability_id.lower())
             if "&&" in settings.VULNERABILITY_URLS[key]:
                 # Process specific keys specially if need
                 if key in ["CAPEC", "CWE"]:

diff --git a/dojo/tools/aws_prowler_v3plus/prowler_v4.py b/dojo/tools/aws_prowler_v3plus/prowler_v4.py
@@ -37,7 +37,8 @@ def process_ocsf_json(self, file, test):
             documentation = deserialized.get("remediation", {}).get("references", "")
             documentation = str(documentation) + "\n" + str(deserialized.get("unmapped", {}).get("related_url", ""))
             security_domain = deserialized.get("resources", [{}])[0].get("type", "")
-            timestamp = deserialized.get("event_time")
+            # Prowler v4.5.0 changed 'event_time' key in report with 'time_dt'
+            timestamp = deserialized.get("time_dt") or deserialized.get("event_time")
             resource_arn = deserialized.get("resources", [{}])[0].get("uid", "")
             resource_id = deserialized.get("resources", [{}])[0].get("name", "")
             unique_id_from_tool = deserialized.get("finding_info", {}).get("uid", "")

diff --git a/dojo/tools/bearer_cli/parser.py b/dojo/tools/bearer_cli/parser.py
@@ -33,7 +33,7 @@ def get_findings(self, file, test):
                 finding = Finding(
                     title=bearerfinding["title"] + " in " + bearerfinding["filename"] + ":" + str(bearerfinding["line_number"]),
                     test=test,
-                    description=bearerfinding["description"] + "\n Detected code snippet: \n" + bearerfinding["snippet"],
+                    description=bearerfinding["description"] + "\n Detected code snippet: \n" + bearerfinding.get("snippet", bearerfinding.get("code_extract")),
                     severity=severity,
                     cwe=bearerfinding["cwe_ids"][0],
                     static_finding=True,

diff --git a/dojo/tools/kubescape/parser.py b/dojo/tools/kubescape/parser.py
@@ -59,65 +59,65 @@ def get_findings(self, filename, test):
             controls = results[0].get("controls", [])
 
             for control in controls:
-                # This condition is true if the result doesn't contain the status for each control (old format)
-                retrocompatibility_condition = "status" not in control or "status" not in control["status"]
-                if retrocompatibility_condition or control["status"]["status"] == "failed":
-                    control_name = control["name"]
-                    if resource_type and resource_name and control_name:
-                        title = f"{control_name} - {resource_type} {resource_name}"
-                    else:
-                        title = f"{control_name} - {resourceid}"
-                    controlID = control["controlID"]
-
-                    # Find control details
-                    controlSummary = self.find_control_summary_by_id(data, controlID)
-                    if controlSummary is None:
-                        severity = "Info"
-                        mitigation = ""
-                    else:
-                        severity = self.severity_mapper(controlSummary.get("scoreFactor", 0))
-                        # Define mitigation if available
-                        if "mitigation" in controlSummary:
-                            mitigation = controlSummary["mitigation"]
+                for rule in control["rules"]:
+                    if rule["status"] == "passed":
+                        continue
+                    # This condition is true if the result doesn't contain the status for each control (old format)
+                    retrocompatibility_condition = "status" not in control or "status" not in control["status"]
+                    if retrocompatibility_condition or control["status"]["status"] == "failed":
+                        control_name = control["name"]
+                        if resource_type and resource_name and control_name:
+                            title = f"{control_name} - {resource_type} {resource_name}"
                         else:
-                            mitigation = ""
+                            title = f"{control_name} - {resourceid}"
+                        controlID = control["controlID"]
 
-                    armoLink = f"https://hub.armosec.io/docs/{controlID.lower()}"
-                    description = "**Summary:** " + f"The ressource '{resourceid}' has failed the control '{control_name}'." + "\n"
-                    if controlSummary is not None and "description" in controlSummary:
-                        description += "**Description:** " + controlSummary["description"] + "\n"
-
-                    # Define category if available
-                    if controlSummary is not None and "category" in controlSummary and "subCategory" in controlSummary["category"]:
-                        category_name = controlSummary["category"]["name"]
-                        category_subname = controlSummary["category"]["subCategory"]["name"]
-                        category = f"{category_name} > {category_subname}"
-                        description += "**Category:** " + category + "\n"
-                    elif controlSummary is not None and "category" in controlSummary and "name" in controlSummary["category"]:
-                        category = controlSummary["category"]["name"]
-                        description += "**Category:** " + category + "\n"
-
-                    description += "View control details here: " + self.__hyperlink(armoLink)
-
-                    steps_to_reproduce = "The following rules have failed :" + "\n"
-                    steps_to_reproduce += "\t**Rules:** " + str(json.dumps(control["rules"], indent=4)) + "\n"
-
-                    steps_to_reproduce += "Resource object may contain evidence:" + "\n"
-                    steps_to_reproduce += "\t**Resource object:** " + str(json.dumps(resource["object"], indent=4))
-
-                    references = armoLink
-
-                    find = Finding(
-                        title=textwrap.shorten(title, 150),
-                        test=test,
-                        description=description,
-                        mitigation=mitigation,
-                        steps_to_reproduce=steps_to_reproduce,
-                        references=references,
-                        severity=severity,
-                        component_name=resourceid,
-                        static_finding=True,
-                        dynamic_finding=False,
-                    )
-                    findings.append(find)
+                        # Find control details
+                        controlSummary = self.find_control_summary_by_id(data, controlID)
+                        if controlSummary is None:
+                            severity = "Info"
+                            mitigation = ""
+                        else:
+                            severity = self.severity_mapper(controlSummary.get("scoreFactor", 0))
+                            # Define mitigation if available
+                            if "mitigation" in controlSummary:
+                                mitigation = controlSummary["mitigation"]
+                            else:
+                                mitigation = ""
+
+                        description = "**Summary:** " + f"The ressource '{resourceid}' has failed the control '{control_name}'." + "\n"
+                        if controlSummary is not None and "description" in controlSummary:
+                            description += "**Description:** " + controlSummary["description"] + "\n"
+
+                        # Define category if available
+                        if controlSummary is not None and "category" in controlSummary and "subCategory" in controlSummary["category"]:
+                            category_name = controlSummary["category"]["name"]
+                            category_subname = controlSummary["category"]["subCategory"]["name"]
+                            category = f"{category_name} > {category_subname}"
+                            description += "**Category:** " + category + "\n"
+                        elif controlSummary is not None and "category" in controlSummary and "name" in controlSummary["category"]:
+                            category = controlSummary["category"]["name"]
+                            description += "**Category:** " + category + "\n"
+
+                        steps_to_reproduce = "The following rules have failed :" + "\n"
+                        steps_to_reproduce += "\t**Rules:** " + str(json.dumps(control["rules"], indent=4)) + "\n"
+                        steps_to_reproduce += "Resource object may contain evidence:" + "\n"
+                        steps_to_reproduce += "\t**Resource object:** " + str(json.dumps(resource["object"], indent=4))
+
+                        find = Finding(
+                            title=textwrap.shorten(title, 150),
+                            test=test,
+                            description=description,
+                            mitigation=mitigation,
+                            steps_to_reproduce=steps_to_reproduce,
+                            references=f"https://hub.armosec.io/docs/{controlID.lower()}",
+                            severity=severity,
+                            component_name=resourceid,
+                            static_finding=True,
+                            dynamic_finding=False,
+                        )
+                        if controlID is not None:
+                            find.unsaved_vulnerability_ids = []
+                            find.unsaved_vulnerability_ids.append(controlID)
+                        findings.append(find)
         return findings
diff --git a/dojo/tools/trivy_operator/checks_handler.py b/dojo/tools/trivy_operator/checks_handler.py
@@ -1,4 +1,5 @@
 from dojo.models import Finding
+from dojo.tools.trivy_operator.uniform_vulnid import UniformTrivyVulnID
 
 TRIVY_SEVERITIES = {
     "CRITICAL": "Critical",
@@ -47,6 +48,6 @@ def handle_checks(self, labels, checks, test):
                 tags=[resource_namespace],
             )
             if check_id:
-                finding.unsaved_vulnerability_ids = [check_id]
+                finding.unsaved_vulnerability_ids = [UniformTrivyVulnID().return_uniformed_vulnid(check_id)]
             findings.append(finding)
         return findings
diff --git a/dojo/tools/trivy_operator/compliance_handler.py b/dojo/tools/trivy_operator/compliance_handler.py
@@ -1,4 +1,5 @@
 from dojo.models import Finding
+from dojo.tools.trivy_operator.uniform_vulnid import UniformTrivyVulnID
 
 TRIVY_SEVERITIES = {
     "CRITICAL": "Critical",
@@ -54,6 +55,6 @@ def handle_compliance(self, benchmarkreport, test):
                         dynamic_finding=True,
                     )
                     if check_checkID:
-                        finding.unsaved_vulnerability_ids = [check_checkID]
+                        finding.unsaved_vulnerability_ids = [UniformTrivyVulnID().return_uniformed_vulnid(check_checkID)]
                     findings.append(finding)
         return findings
diff --git a/dojo/tools/trivy_operator/parser.py b/dojo/tools/trivy_operator/parser.py
@@ -20,7 +20,6 @@ def get_description_for_scan_types(self, scan_type):
 
     def get_findings(self, scan_file, test):
         scan_data = scan_file.read()
-
         try:
             data = json.loads(str(scan_data, "utf-8"))
         except Exception:
@@ -29,6 +28,11 @@ def get_findings(self, scan_file, test):
         if type(data) is list:
             for listitems in data:
                 findings += self.output_findings(listitems, test)
+        elif type(data) is dict and bool(set(data.keys()) & {"clustercompliancereports.aquasecurity.github.io", "clusterconfigauditreports.aquasecurity.github.io", "clusterinfraassessmentreports.aquasecurity.github.io", "clusterrbacassessmentreports.aquasecurity.github.io", "configauditreports.aquasecurity.github.io", "exposedsecretreports.aquasecurity.github.io", "infraassessmentreports.aquasecurity.github.io", "rbacassessmentreports.aquasecurity.github.io", "vulnerabilityreports.aquasecurity.github.io"}):
+            for datakey in list(data.keys()):
+                if datakey not in ["clustersbomreports.aquasecurity.github.io", "sbomreports.aquasecurity.github.io"]:
+                    for listitems in (data[datakey]):
+                        findings += self.output_findings(listitems, test)
         else:
             findings += self.output_findings(data, test)
         return findings

diff --git a/dojo/tools/trivy_operator/secrets_handler.py b/dojo/tools/trivy_operator/secrets_handler.py
@@ -42,6 +42,7 @@ def handle_secrets(self, labels, secrets, test):
             secret_description += "\n**resource.kind:** " + resource_kind
             secret_description += "\n**resource.name:** " + resource_name
             secret_description += "\n**resource.namespace:** " + resource_namespace
+            secret_description += "\n**ruleID:** " + secret_rule_id
             finding = Finding(
                 test=test,
                 title=title,
@@ -54,7 +55,5 @@ def handle_secrets(self, labels, secrets, test):
                 service=service,
                 tags=[resource_namespace],
             )
-            if secret_rule_id:
-                finding.unsaved_vulnerability_ids = [secret_rule_id]
             findings.append(finding)
         return findings
diff --git a/dojo/tools/trivy_operator/uniform_vulnid.py b/dojo/tools/trivy_operator/uniform_vulnid.py
@@ -0,0 +1,20 @@
+import re
+
+
+class UniformTrivyVulnID:
+    def return_uniformed_vulnid(self, vulnid):
+        if vulnid is None:
+            return vulnid
+        if "cve" in vulnid.lower():
+            return vulnid
+        if "khv" in vulnid.lower():
+            temp = re.compile("([a-zA-Z-_]+)([0-9]+)")
+            number = str(temp.match(vulnid).groups()[1]).zfill(3)
+            avd_category = str(temp.match(vulnid.lower()).groups()[0])
+            return avd_category.upper() + number
+        if "ksv" in vulnid.lower() or "kcv" in vulnid.lower():
+            temp = re.compile("([a-zA-Z-_]+)([0-9]+)")
+            number = str(temp.match(vulnid).groups()[1]).zfill(4)
+            avd_category = str(temp.match(vulnid.lower().replace("_", "").replace("-", "")).groups()[0].replace("avd", ""))
+            return "AVD-" + avd_category.upper() + "-" + number
+        return vulnid
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		fc660db6c2f55181fd8515d9b13c75197d8272c5c635235f6f60e4b1fc77af04
		01215b397651163c0403b028adb08b18fa83c4abb188b0536dfb9e43eddcd9cd