diff --git a/floss/qs/main.py b/floss/qs/main.py index 355c033e8..8cc2a542c 100644 --- a/floss/qs/main.py +++ b/floss/qs/main.py @@ -11,6 +11,7 @@ import functools import itertools import contextlib +from collections import defaultdict from typing import Set, Dict, List, Union, Tuple, Literal, Callable, Iterable, Optional, Sequence from dataclasses import field, dataclass @@ -651,13 +652,21 @@ def tag_strings(self, taggers: Sequence[Tagger]): this can be overridden, if a subclass has more ways of tagging strings, such as a PE file and code/reloc regions. """ + string_counts = defaultdict(int) + tagged_strings: List[TaggedString] = [] + for string in self.strings: # at this moment, the list of strings contains only ExtractedStrings. # this routine will transform them into TaggedStrings. assert isinstance(string, ExtractedString) tags: Set[Tag] = set() + string_counts[string.string] += 1 + + if string_counts[string.string] > 1: + tags.add("#duplicate") + for tagger in taggers: tags.update(tagger(string)) @@ -1233,6 +1242,7 @@ def main(): tag_rules: TagRules = { "#capa": "highlight", "#common": "mute", + "#duplicate": "mute", "#code": "hide", "#reloc": "hide", # lib strings are muted (default)