From 674dc8d51fe821fea2011530b6c136349ece1067 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Tue, 17 Oct 2023 01:33:44 +0700 Subject: [PATCH] Add translation, fix bug, and some improvements for SARIF parser (#8822) * Add translation, fix bug, and some improvements * Improve unittest: remove region, contextRegion field from some result. * Add markdown for message in codeFlows, fix handler, and fix unittest * Fix unittest * Fix unittest * Fix unittest --------- Co-authored-by: Dmitry Mukovkin --- dojo/tools/sarif/parser.py | 63 ++++++++++++++--------- unittests/scans/sarif/codeQL-output.sarif | 12 ----- unittests/tools/test_sarif_parser.py | 8 +-- 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/dojo/tools/sarif/parser.py b/dojo/tools/sarif/parser.py index d604279218..14d8184957 100644 --- a/dojo/tools/sarif/parser.py +++ b/dojo/tools/sarif/parser.py @@ -3,6 +3,8 @@ import re import textwrap import dateutil.parser +from django.utils.translation import gettext as _ + from dojo.tools.parser_test import ParserTest from dojo.models import Finding @@ -214,29 +216,43 @@ def get_snippet(result): def get_codeFlowsDescription(codeFlows): description = "" for codeFlow in codeFlows: - if "threadFlows" not in codeFlow: - continue - for threadFlow in codeFlow["threadFlows"]: + for threadFlow in codeFlow.get('threadFlows', []): if "locations" not in threadFlow: continue - description = "**Code flow:**\n" - for location in threadFlow["locations"]: - physicalLocation = location["location"]["physicalLocation"] - region = physicalLocation["region"] - description += ( - "\t" + physicalLocation["artifactLocation"]["uri"] - if "byteOffset" in region - else "\t" - + physicalLocation["artifactLocation"]["uri"] - + ":" - + str(region["startLine"]) - ) + description = f"**{_('Code flow')}:**\n" + line = 1 + + for location in threadFlow.get('locations', []): + physicalLocation = location.get('location', {}).get('physicalLocation', {}) + region = physicalLocation.get("region", {}) + uri = physicalLocation.get("artifactLocation").get("uri") + + start_line = "" + start_column = "" + snippet = "" + + if "startLine" in region: + start_line = f":L{str(region.get('startLine'))}" + if "startColumn" in region: - description += ":" + str(region["startColumn"]) + start_column = f":C{str(region.get('startColumn'))}" + if "snippet" in region: - description += "\t-\t" + region["snippet"]["text"] - description += "\n" + snippet = f"\t-\t{region.get('snippet').get('text')}" + + description += f"{line}. {uri}{start_line}{start_column}{snippet}\n" + + if 'message' in location.get('location', {}): + message_field = location.get('location', {}).get('message', {}) + if 'markdown' in message_field: + message = message_field.get('markdown', '') + else: + message = message_field.get('text', '') + + description += f"\t{message}\n" + + line += 1 return description @@ -253,16 +269,14 @@ def get_description(result, rule): description += "**Snippet:**\n```{}```\n".format(get_snippet(result)) if rule is not None: if "name" in rule: - description += "**Rule name:** {}\n".format(rule.get("name")) + description += f"**{_('Rule name')}:** {rule.get('name')}\n" shortDescription = "" if "shortDescription" in rule: shortDescription = get_message_from_multiformatMessageString( rule["shortDescription"], rule ) if shortDescription != message: - description += "**Rule short description:** {}\n".format( - shortDescription - ) + description += f"**{_('Rule short description')}:** {shortDescription}\n" if "fullDescription" in rule: fullDescription = get_message_from_multiformatMessageString( rule["fullDescription"], rule @@ -271,9 +285,7 @@ def get_description(result, rule): fullDescription != message and fullDescription != shortDescription ): - description += "**Rule full description:** {}\n".format( - fullDescription - ) + description += f"**{_('Rule full description')}:** {fullDescription}\n" if len(result.get("codeFlows", [])) > 0: description += get_codeFlowsDescription(result["codeFlows"]) @@ -420,6 +432,7 @@ def get_item(result, rules, artifacts, run_date): # manage tags provided in the report and rule and remove duplicated tags = list(set(get_properties_tags(rule) + get_properties_tags(result))) + tags = [s.removeprefix('external/cwe/') for s in tags] finding.tags = tags # manage fingerprints diff --git a/unittests/scans/sarif/codeQL-output.sarif b/unittests/scans/sarif/codeQL-output.sarif index a01a8779d9..3da6a9aeb1 100644 --- a/unittests/scans/sarif/codeQL-output.sarif +++ b/unittests/scans/sarif/codeQL-output.sarif @@ -5876,18 +5876,6 @@ "uri" : "bad/libapi.py", "uriBaseId" : "%SRCROOT%", "index" : 31 - }, - "region" : { - "startLine" : 8, - "startColumn" : 12, - "endColumn" : 20 - }, - "contextRegion" : { - "startLine" : 6, - "endLine" : 10, - "snippet" : { - "text" : "\n\ndef keygen(username, password=None):\n\n if password:\n" - } } }, "message" : { diff --git a/unittests/tools/test_sarif_parser.py b/unittests/tools/test_sarif_parser.py index 7bfa4c944c..8902f84630 100644 --- a/unittests/tools/test_sarif_parser.py +++ b/unittests/tools/test_sarif_parser.py @@ -57,9 +57,11 @@ def test_example2_report(self): **Rule short description:** A variable was used without being initialized. **Rule full description:** A variable was used without being initialized. This can result in runtime errors such as null reference exceptions. **Code flow:** -\tcollections/list.h:15\t-\tint *ptr; -\tcollections/list.h:15\t-\toffset = (y + z) * q + 1; -\tcollections/list.h:25\t-\tadd_core(ptr, offset, val)""" +1. collections/list.h:L15\t-\tint *ptr; +\tVariable `ptr` declared. +2. collections/list.h:L15\t-\toffset = (y + z) * q + 1; +3. collections/list.h:L25\t-\tadd_core(ptr, offset, val) +\tUninitialized variable `ptr` passed to method `add_core`.""" self.assertEqual(description, item.description) self.assertEqual(datetime.datetime(2016, 7, 16, 14, 19, 1, tzinfo=datetime.timezone.utc), item.date) for finding in findings: