Skip to content

Commit

Permalink
added evaluation to sflkit
Browse files Browse the repository at this point in the history
  • Loading branch information
smythi93 committed Jul 27, 2024
1 parent c4fcc59 commit 03de84d
Show file tree
Hide file tree
Showing 4 changed files with 303 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "sflkit"
version = "0.2.21"
version = "0.2.22"
authors = [
{ name = "Marius Smytzek", email = "[email protected]" },
]
Expand Down
22 changes: 20 additions & 2 deletions src/sflkit/analysis/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ def __init__(
self.irrelevant_event_files = irrelevant_event_files
self.model = Model(factory)
self.paths: Dict[int, os.PathLike] = dict()
self.max_suspiciousness = 0
self.min_suspiciousness = 0
self.avg_suspiciousness = 0

def _analyze(self, event_file):
self.model.prepare(event_file)
Expand Down Expand Up @@ -48,20 +51,35 @@ def get_analysis_by_type(self, type_: AnalysisType) -> Set[AnalysisObject]:

def get_sorted_suggestions(
self, base_dir, metric: Callable = None, type_: AnalysisType = None
):
) -> List[Suggestion]:
if type_:
objects = self.get_analysis_by_type(type_)
else:
objects = self.get_analysis()
return self.get_sorted_suggestions_from_analysis(base_dir, objects, metric)

def get_sorted_suggestions_from_analysis(
self, base_dir, analysis: Set[AnalysisObject], metric: Callable = None
) -> List[Suggestion]:
suggestions = dict()
max_suspiciousness = float("-inf")
min_suspiciousness = float("inf")
avg_suspiciousness = 0
for suggestion in map(
lambda p: p.get_suggestion(metric=metric, base_dir=base_dir), objects
lambda p: p.get_suggestion(metric=metric, base_dir=base_dir), analysis
):
max_suspiciousness = max(max_suspiciousness, suggestion.suspiciousness)
min_suspiciousness = min(min_suspiciousness, suggestion.suspiciousness)
avg_suspiciousness += suggestion.suspiciousness
if suggestion.suspiciousness not in suggestions:
suggestions[suggestion.suspiciousness] = set(suggestion.lines)
else:
suggestions[suggestion.suspiciousness] |= set(suggestion.lines)

self.max_suspiciousness = max_suspiciousness
self.min_suspiciousness = min_suspiciousness
self.avg_suspiciousness = avg_suspiciousness / len(analysis)

return sorted(
[
Suggestion(list(lines), suspiciousness)
Expand Down
116 changes: 116 additions & 0 deletions src/sflkit/evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import enum
import random
from typing import List, Dict, Callable, Set, Optional

from sflkit.analysis.suggestion import Suggestion, Location


class Average:
def __init__(self):
self.number_of_locations = 0

def average(self, suspiciousness: float, current_suspiciousness: float):
current_suspiciousness *= self.number_of_locations
self.number_of_locations += 1
return (current_suspiciousness + suspiciousness) / self.number_of_locations


class Scenario(enum.Enum):
BEST_CASE = "best_case"
AVG_CASE = "avg_case"
WORST_CASE = "worst_case"


class Rank:
def __init__(
self,
suggestions: List[Suggestion],
metric: Callable[[float, float], float] = max,
default_suspiciousness: float = float("-inf"),
):
self.suggestions = sorted(suggestions, reverse=True)
self.suspiciousness: Dict[Location, float] = dict()
self.ranks: Dict[float, List[Location]] = dict()
self.locations: Dict[Location, float] = dict()
current_rank = 1
for i, suggestion in enumerate(self.suggestions):
lines = suggestion.lines
if len(lines) == 0:
continue
elif len(lines) == 1:
rank = current_rank
current_rank += 1
else:
rank = (len(lines)) / 2 + (current_rank - 1)
current_rank += len(lines)
self.ranks[rank] = lines
for line in lines:
self.suspiciousness[line] = metric(
suggestion.suspiciousness,
self.locations.get(line, default_suspiciousness),
)
self.locations[line] = rank
self.number_of_locations = len(self.locations)

def top_n(
self,
faulty: Set[Location],
n: int,
scenario: Optional[Scenario] = None,
repeat: int = 1000,
) -> float:
top_n_locations = list()
for suggestion in self.suggestions:
if len(top_n_locations) >= n:
break
for line in suggestion.lines:
if line not in top_n_locations:
top_n_locations.append(line)
if len(top_n_locations) <= n:
return self._top_n(faulty, top_n_locations, scenario)
else:
sum_ = 0
for _ in range(repeat):
sum_ += self._top_n(
faulty, random.sample(top_n_locations, k=n), scenario
)
return sum_ / repeat

@staticmethod
def _top_n(
faulty: Set[Location],
top_n_locations: List[Location],
scenario: Optional[Scenario] = None,
) -> float:
found = len(faulty.intersection(top_n_locations))
if scenario == Scenario.BEST_CASE:
return 1 if found > 0 else 0
elif scenario == Scenario.WORST_CASE:
return found / len(faulty)
elif scenario == Scenario.AVG_CASE:
return min(found / (len(faulty) / 2), 1)
else:
return found / len(top_n_locations)

def get_rank(
self, faulty: Set[Location], scenario: Optional[Scenario] = None
) -> float:
if scenario == Scenario.BEST_CASE:
rank = min(self.locations[location] for location in faulty)
elif scenario == Scenario.WORST_CASE:
rank = max(self.locations[location] for location in faulty)
elif scenario == Scenario.AVG_CASE:
rank = sorted([self.locations[location] for location in faulty])[
max(len(faulty) // 2 - 1, 0)
]
else:
rank = sum(self.locations[location] for location in faulty) / len(faulty)
return rank

def exam(self, faulty: Set[Location], scenario: Optional[Scenario] = None) -> float:
return self.get_rank(faulty, scenario) / self.number_of_locations

def wasted_effort(
self, faulty: Set[Location], scenario: Optional[Scenario] = None
) -> float:
return self.get_rank(faulty, scenario)
166 changes: 166 additions & 0 deletions tests/test_evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import random
from typing import Optional

from sflkit.analysis.suggestion import Suggestion, Location
from sflkit.evaluation import Rank, Scenario
from utils import BaseTest


class TestEvaluation(BaseTest):
def setUp(self):
random.seed(0)
self.suggestions = [
Suggestion([Location("a.py", 1)], 0.5),
Suggestion([Location("a.py", 2)], 0.3),
Suggestion([Location("a.py", 3)], 0.7),
Suggestion([Location("a.py", 4)], 0.1),
Suggestion([Location("a.py", 5)], 0.9),
Suggestion([Location("a.py", 6)], 0.2),
Suggestion([Location("a.py", 7)], 0.8),
Suggestion([Location("a.py", 8)], 0.4),
Suggestion([Location("a.py", 9)], 0.6),
Suggestion([Location("a.py", 10)], 0.0),
]

def get_rank(self, multi: bool = False):
suggestions = (
self.suggestions
if not multi
else self.suggestions
+ [
Suggestion(
[Location("a.py", 11), Location("a.py", 12), Location("a.py", 13)],
0.55,
)
]
)
return (
Rank(suggestions),
{
Location("a.py", 2),
Location("a.py", 6),
Location("a.py", 7),
Location("a.py", 8),
},
)

def get_top_n(
self, n: int, scenario: Optional[Scenario] = None, multi: bool = False
):
rank, locations = self.get_rank(multi=multi)
return rank.top_n(
locations,
n,
scenario=scenario,
repeat=10000,
)

def get_exam(self, scenario: Optional[Scenario] = None):
rank, locations = self.get_rank(multi=True)
return rank.exam(
locations,
scenario=scenario,
)

def get_wasted_effort(self, scenario: Optional[Scenario] = None):
rank, locations = self.get_rank(multi=True)
return rank.wasted_effort(
locations,
scenario=scenario,
)

def test_top_1(self):
top_1 = self.get_top_n(1)
self.assertAlmostEqual(0, top_1, delta=self.DELTA)

def test_top_5(self):
top_5 = self.get_top_n(5)
self.assertAlmostEqual(0.2, top_5, delta=self.DELTA)

def test_top_10(self):
top_10 = self.get_top_n(10)
self.assertAlmostEqual(0.4, top_10, delta=self.DELTA)

def test_top_5_multi(self):
top_5_multi = self.get_top_n(5, multi=True)
self.assertAlmostEqual(0.14285, top_5_multi, delta=0.05)

def test_top_1_avg(self):
top_1_avg = self.get_top_n(1, scenario=Scenario.AVG_CASE)
self.assertAlmostEqual(0, top_1_avg, delta=self.DELTA)

def test_top_5_avg(self):
top_5_avg = self.get_top_n(5, scenario=Scenario.AVG_CASE)
self.assertAlmostEqual(0.5, top_5_avg, delta=self.DELTA)

def test_top_10_avg(self):
top_10_avg = self.get_top_n(10, scenario=Scenario.AVG_CASE)
self.assertAlmostEqual(1, top_10_avg, delta=self.DELTA)

def test_top_5_avg_multi(self):
top_5_avg_multi = self.get_top_n(5, scenario=Scenario.AVG_CASE, multi=True)
self.assertAlmostEqual(0.35714, top_5_avg_multi, delta=0.005)

def test_top_1_best(self):
top_1_best = self.get_top_n(1, scenario=Scenario.BEST_CASE)
self.assertAlmostEqual(0, top_1_best, delta=self.DELTA)

def test_top_5_best(self):
top_5_best = self.get_top_n(5, scenario=Scenario.BEST_CASE)
self.assertAlmostEqual(1, top_5_best, delta=self.DELTA)

def test_top_10_best(self):
top_10_best = self.get_top_n(10, scenario=Scenario.BEST_CASE)
self.assertAlmostEqual(1, top_10_best, delta=self.DELTA)

def test_top_5_best_multi(self):
top_5_best_multi = self.get_top_n(5, scenario=Scenario.BEST_CASE, multi=True)
self.assertAlmostEqual(0.71429, top_5_best_multi, delta=0.005)

def test_top_1_worst(self):
top_1_worst = self.get_top_n(1, scenario=Scenario.WORST_CASE)
self.assertAlmostEqual(0, top_1_worst, delta=self.DELTA)

def test_top_5_worst(self):
top_5_worst = self.get_top_n(5, scenario=Scenario.WORST_CASE)
self.assertAlmostEqual(0.25, top_5_worst, delta=self.DELTA)

def test_top_10_worst(self):
top_10_worst = self.get_top_n(10, scenario=Scenario.WORST_CASE)
self.assertAlmostEqual(1, top_10_worst, delta=self.DELTA)

def test_top_5_worst_multi(self):
top_5_worst_multi = self.get_top_n(5, scenario=Scenario.WORST_CASE, multi=True)
self.assertAlmostEqual(0.17857, top_5_worst_multi, delta=0.005)

def test_exam_avg(self):
exam_avg = self.get_exam(scenario=Scenario.AVG_CASE)
self.assertAlmostEqual(9 / 13, exam_avg, delta=self.DELTA)

def test_exam_best(self):
exam_best = self.get_exam(scenario=Scenario.BEST_CASE)
self.assertAlmostEqual(2 / 13, exam_best, delta=self.DELTA)

def test_exam_worst(self):
exam_worst = self.get_exam(scenario=Scenario.WORST_CASE)
self.assertAlmostEqual(11 / 13, exam_worst, delta=self.DELTA)

def test_exam(self):
exam = self.get_exam()
self.assertAlmostEqual((2 + 9 + 10 + 11) / (13 * 4), exam, delta=self.DELTA)

def test_wasted_effort_avg(self):
wasted_effort_avg = self.get_wasted_effort(scenario=Scenario.AVG_CASE)
self.assertAlmostEqual(9, wasted_effort_avg, delta=self.DELTA)

def test_wasted_effort_best(self):
wasted_effort_best = self.get_wasted_effort(scenario=Scenario.BEST_CASE)
self.assertAlmostEqual(2, wasted_effort_best, delta=self.DELTA)

def test_wasted_effort_worst(self):
wasted_effort_worst = self.get_wasted_effort(scenario=Scenario.WORST_CASE)
self.assertAlmostEqual(11, wasted_effort_worst, delta=self.DELTA)

def test_wasted_effort(self):
wasted_effort = self.get_wasted_effort()
self.assertAlmostEqual((2 + 9 + 10 + 11) / 4, wasted_effort, delta=self.DELTA)

0 comments on commit 03de84d

Please sign in to comment.