Skip to content

Commit

Permalink
Add translation, fix bug, and some improvements for SARIF parser (#8822)
Browse files Browse the repository at this point in the history
* Add translation, fix bug, and some improvements

* Improve unittest: remove region, contextRegion field from some result.

* Add markdown for message in codeFlows, fix handler, and fix unittest

* Fix unittest

* Fix unittest

* Fix unittest

---------

Co-authored-by: Dmitry Mukovkin <[email protected]>
  • Loading branch information
shipko and Dmitry Mukovkin authored Oct 16, 2023
1 parent cebde8d commit 674dc8d
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 40 deletions.
63 changes: 38 additions & 25 deletions dojo/tools/sarif/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import re
import textwrap
import dateutil.parser
from django.utils.translation import gettext as _

from dojo.tools.parser_test import ParserTest
from dojo.models import Finding

Expand Down Expand Up @@ -214,29 +216,43 @@ def get_snippet(result):
def get_codeFlowsDescription(codeFlows):
description = ""
for codeFlow in codeFlows:
if "threadFlows" not in codeFlow:
continue
for threadFlow in codeFlow["threadFlows"]:
for threadFlow in codeFlow.get('threadFlows', []):
if "locations" not in threadFlow:
continue

description = "**Code flow:**\n"
for location in threadFlow["locations"]:
physicalLocation = location["location"]["physicalLocation"]
region = physicalLocation["region"]
description += (
"\t" + physicalLocation["artifactLocation"]["uri"]
if "byteOffset" in region
else "\t"
+ physicalLocation["artifactLocation"]["uri"]
+ ":"
+ str(region["startLine"])
)
description = f"**{_('Code flow')}:**\n"
line = 1

for location in threadFlow.get('locations', []):
physicalLocation = location.get('location', {}).get('physicalLocation', {})
region = physicalLocation.get("region", {})
uri = physicalLocation.get("artifactLocation").get("uri")

start_line = ""
start_column = ""
snippet = ""

if "startLine" in region:
start_line = f":L{str(region.get('startLine'))}"

if "startColumn" in region:
description += ":" + str(region["startColumn"])
start_column = f":C{str(region.get('startColumn'))}"

if "snippet" in region:
description += "\t-\t" + region["snippet"]["text"]
description += "\n"
snippet = f"\t-\t{region.get('snippet').get('text')}"

description += f"{line}. {uri}{start_line}{start_column}{snippet}\n"

if 'message' in location.get('location', {}):
message_field = location.get('location', {}).get('message', {})
if 'markdown' in message_field:
message = message_field.get('markdown', '')
else:
message = message_field.get('text', '')

description += f"\t{message}\n"

line += 1

return description

Expand All @@ -253,16 +269,14 @@ def get_description(result, rule):
description += "**Snippet:**\n```{}```\n".format(get_snippet(result))
if rule is not None:
if "name" in rule:
description += "**Rule name:** {}\n".format(rule.get("name"))
description += f"**{_('Rule name')}:** {rule.get('name')}\n"
shortDescription = ""
if "shortDescription" in rule:
shortDescription = get_message_from_multiformatMessageString(
rule["shortDescription"], rule
)
if shortDescription != message:
description += "**Rule short description:** {}\n".format(
shortDescription
)
description += f"**{_('Rule short description')}:** {shortDescription}\n"
if "fullDescription" in rule:
fullDescription = get_message_from_multiformatMessageString(
rule["fullDescription"], rule
Expand All @@ -271,9 +285,7 @@ def get_description(result, rule):
fullDescription != message
and fullDescription != shortDescription
):
description += "**Rule full description:** {}\n".format(
fullDescription
)
description += f"**{_('Rule full description')}:** {fullDescription}\n"

if len(result.get("codeFlows", [])) > 0:
description += get_codeFlowsDescription(result["codeFlows"])
Expand Down Expand Up @@ -420,6 +432,7 @@ def get_item(result, rules, artifacts, run_date):

# manage tags provided in the report and rule and remove duplicated
tags = list(set(get_properties_tags(rule) + get_properties_tags(result)))
tags = [s.removeprefix('external/cwe/') for s in tags]
finding.tags = tags

# manage fingerprints
Expand Down
12 changes: 0 additions & 12 deletions unittests/scans/sarif/codeQL-output.sarif
Original file line number Diff line number Diff line change
Expand Up @@ -5876,18 +5876,6 @@
"uri" : "bad/libapi.py",
"uriBaseId" : "%SRCROOT%",
"index" : 31
},
"region" : {
"startLine" : 8,
"startColumn" : 12,
"endColumn" : 20
},
"contextRegion" : {
"startLine" : 6,
"endLine" : 10,
"snippet" : {
"text" : "\n\ndef keygen(username, password=None):\n\n if password:\n"
}
}
},
"message" : {
Expand Down
8 changes: 5 additions & 3 deletions unittests/tools/test_sarif_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,11 @@ def test_example2_report(self):
**Rule short description:** A variable was used without being initialized.
**Rule full description:** A variable was used without being initialized. This can result in runtime errors such as null reference exceptions.
**Code flow:**
\tcollections/list.h:15\t-\tint *ptr;
\tcollections/list.h:15\t-\toffset = (y + z) * q + 1;
\tcollections/list.h:25\t-\tadd_core(ptr, offset, val)"""
1. collections/list.h:L15\t-\tint *ptr;
\tVariable `ptr` declared.
2. collections/list.h:L15\t-\toffset = (y + z) * q + 1;
3. collections/list.h:L25\t-\tadd_core(ptr, offset, val)
\tUninitialized variable `ptr` passed to method `add_core`."""
self.assertEqual(description, item.description)
self.assertEqual(datetime.datetime(2016, 7, 16, 14, 19, 1, tzinfo=datetime.timezone.utc), item.date)
for finding in findings:
Expand Down

0 comments on commit 674dc8d

Please sign in to comment.