From 5a4b7b6e44b10cab86b8cf3ebeb6aedd0851feff Mon Sep 17 00:00:00 2001 From: ooprathamm Date: Thu, 29 Feb 2024 20:44:30 +0530 Subject: [PATCH 1/4] Added get_duplicate_strings(), introduced #duplicate tag --- floss/qs/main.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/floss/qs/main.py b/floss/qs/main.py index 02b2409ee..51a4ce155 100644 --- a/floss/qs/main.py +++ b/floss/qs/main.py @@ -355,6 +355,18 @@ def render_string(width: int, s: TaggedString, tag_rules: TagRules) -> Text: return line +def get_duplicate_strings(strings: Sequence[ExtractedString]) -> Set[str]: + seen = set() + duplicates = set() + + for s in strings: + if s.string in seen: + duplicates.add(s.string) + seen.add(s.string) + + return duplicates + + def get_reloc_offsets(slice: Slice, pe: pefile.PE) -> Set[int]: ret: Set[int] = set() @@ -644,13 +656,20 @@ def tag_strings(self, taggers: Sequence[Tagger]): this can be overridden, if a subclass has more ways of tagging strings, such as a PE file and code/reloc regions. """ + duplicate_strings = get_duplicate_strings(self.strings) + tagged_strings: List[TaggedString] = [] + for string in self.strings: # at this moment, the list of strings contains only ExtractedStrings. # this routine will transform them into TaggedStrings. assert isinstance(string, ExtractedString) tags: Set[Tag] = set() + # check for duplicates + if string.string in duplicate_strings: + tags.add("#duplicate") + for tagger in taggers: tags.update(tagger(string)) @@ -1193,6 +1212,7 @@ def main(): tag_rules: TagRules = { "#capa": "highlight", "#common": "mute", + "#duplicate": "mute", "#code": "hide", "#reloc": "hide", # lib strings are muted (default) From ff0d47a28715ccb741cffb0c35b72e2100001ccc Mon Sep 17 00:00:00 2001 From: ooprathamm Date: Fri, 1 Mar 2024 11:36:41 +0530 Subject: [PATCH 2/4] Count string occurences and Tag > 1 --- floss/qs/main.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/floss/qs/main.py b/floss/qs/main.py index 51a4ce155..7ad2e894a 100644 --- a/floss/qs/main.py +++ b/floss/qs/main.py @@ -355,18 +355,6 @@ def render_string(width: int, s: TaggedString, tag_rules: TagRules) -> Text: return line -def get_duplicate_strings(strings: Sequence[ExtractedString]) -> Set[str]: - seen = set() - duplicates = set() - - for s in strings: - if s.string in seen: - duplicates.add(s.string) - seen.add(s.string) - - return duplicates - - def get_reloc_offsets(slice: Slice, pe: pefile.PE) -> Set[int]: ret: Set[int] = set() @@ -656,7 +644,7 @@ def tag_strings(self, taggers: Sequence[Tagger]): this can be overridden, if a subclass has more ways of tagging strings, such as a PE file and code/reloc regions. """ - duplicate_strings = get_duplicate_strings(self.strings) + string_counts = {} tagged_strings: List[TaggedString] = [] @@ -666,10 +654,11 @@ def tag_strings(self, taggers: Sequence[Tagger]): assert isinstance(string, ExtractedString) tags: Set[Tag] = set() - # check for duplicates - if string.string in duplicate_strings: - tags.add("#duplicate") + string_counts[string.string] = string_counts.get(string.string, 0) + 1 + if string_counts[string.string] > 1: + tags.add("#duplicate") + for tagger in taggers: tags.update(tagger(string)) From 275f59733e44f75cf739b04678c83ce5c050197b Mon Sep 17 00:00:00 2001 From: ooprathamm Date: Fri, 8 Mar 2024 14:24:51 +0530 Subject: [PATCH 3/4] defaultdict for string_counts --- floss/qs/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/floss/qs/main.py b/floss/qs/main.py index 23a1c7a21..c901ad771 100644 --- a/floss/qs/main.py +++ b/floss/qs/main.py @@ -11,6 +11,7 @@ import functools import itertools import contextlib +from collections import defaultdict from typing import Set, Dict, List, Union, Tuple, Literal, Callable, Iterable, Optional, Sequence from dataclasses import field, dataclass @@ -651,7 +652,7 @@ def tag_strings(self, taggers: Sequence[Tagger]): this can be overridden, if a subclass has more ways of tagging strings, such as a PE file and code/reloc regions. """ - string_counts = {} + string_counts = defaultdict(int) tagged_strings: List[TaggedString] = [] From 2732e4b196eb1ad01133ba7b5f281116472ee9eb Mon Sep 17 00:00:00 2001 From: ooprathamm <89736193+ooprathamm@users.noreply.github.com> Date: Fri, 8 Mar 2024 14:59:36 +0530 Subject: [PATCH 4/4] Update floss/qs/main.py Co-authored-by: Moritz --- floss/qs/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/floss/qs/main.py b/floss/qs/main.py index c901ad771..8cc2a542c 100644 --- a/floss/qs/main.py +++ b/floss/qs/main.py @@ -662,7 +662,7 @@ def tag_strings(self, taggers: Sequence[Tagger]): assert isinstance(string, ExtractedString) tags: Set[Tag] = set() - string_counts[string.string] = string_counts.get(string.string, 0) + 1 + string_counts[string.string] += 1 if string_counts[string.string] > 1: tags.add("#duplicate")