Skip to content

Commit

Permalink
Indicate duplicate string (#953)
Browse files Browse the repository at this point in the history
* introduced #duplicate tag

* Count string occurences and Tag > 1

---------

Co-authored-by: Moritz <[email protected]>
  • Loading branch information
ooprathamm and mr-tz authored Mar 8, 2024
1 parent 88a3485 commit 24bf661
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions floss/qs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import functools
import itertools
import contextlib
from collections import defaultdict
from typing import Set, Dict, List, Union, Tuple, Literal, Callable, Iterable, Optional, Sequence
from dataclasses import field, dataclass

Expand Down Expand Up @@ -651,13 +652,21 @@ def tag_strings(self, taggers: Sequence[Tagger]):
this can be overridden, if a subclass has more ways of tagging strings,
such as a PE file and code/reloc regions.
"""
string_counts = defaultdict(int)

tagged_strings: List[TaggedString] = []

for string in self.strings:
# at this moment, the list of strings contains only ExtractedStrings.
# this routine will transform them into TaggedStrings.
assert isinstance(string, ExtractedString)
tags: Set[Tag] = set()

string_counts[string.string] += 1

if string_counts[string.string] > 1:
tags.add("#duplicate")

for tagger in taggers:
tags.update(tagger(string))

Expand Down Expand Up @@ -1233,6 +1242,7 @@ def main():
tag_rules: TagRules = {
"#capa": "highlight",
"#common": "mute",
"#duplicate": "mute",
"#code": "hide",
"#reloc": "hide",
# lib strings are muted (default)
Expand Down

0 comments on commit 24bf661

Please sign in to comment.