From 8333c79367634393e806e634a2f4e32936b63202 Mon Sep 17 00:00:00 2001
From: Ondrej Lichtner <olichtne@redhat.com>
Date: Wed, 1 Nov 2023 15:40:51 +0100
Subject: [PATCH 1/8] RecipeCommon.Perf.Results: add metrics property

This will be helpful for automation and easier implementation of classes
that use the results - we can now write generic code that evaluates ANY
result class by simply iterating the metrics to evaluate.

Signed-off-by: Ondrej Lichtner <olichtne@redhat.com>
---
 .../Results/BaseMeasurementResults.py         |  4 +
 .../Results/CPUMeasurementResults.py          |  4 +
 .../Results/FlowMeasurementResults.py         | 99 +++++++++++++------
 .../RDMABandwidthMeasurementResults.py        |  4 +
 .../Results/TcRunMeasurementResults.py        |  4 +
 .../Results/XDPBenchMeasurementResults.py     |  4 +
 6 files changed, 90 insertions(+), 29 deletions(-)

diff --git a/lnst/RecipeCommon/Perf/Measurements/Results/BaseMeasurementResults.py b/lnst/RecipeCommon/Perf/Measurements/Results/BaseMeasurementResults.py
index e5e26eda4..a8840376c 100644
--- a/lnst/RecipeCommon/Perf/Measurements/Results/BaseMeasurementResults.py
+++ b/lnst/RecipeCommon/Perf/Measurements/Results/BaseMeasurementResults.py
@@ -14,6 +14,10 @@ def measurement(self) -> BaseMeasurement:
     def warmup_duration(self):
         return self._warmup_duration
 
+    @property
+    def metrics(self) -> list[str]:
+        return []
+
     def align_data(self, start, end):
         return self
 
diff --git a/lnst/RecipeCommon/Perf/Measurements/Results/CPUMeasurementResults.py b/lnst/RecipeCommon/Perf/Measurements/Results/CPUMeasurementResults.py
index 1b4c5a6ea..7f7a4fb21 100644
--- a/lnst/RecipeCommon/Perf/Measurements/Results/CPUMeasurementResults.py
+++ b/lnst/RecipeCommon/Perf/Measurements/Results/CPUMeasurementResults.py
@@ -26,6 +26,10 @@ def utilization(self):
     def utilization(self, value):
         self._utilization = value
 
+    @property
+    def metrics(self) -> list[str]:
+        return ['utilization']
+
     def describe(self):
         return "host {host} cpu '{cpu}' utilization: {average:.2f} +-{deviation:.2f} {unit} per second".format(
             host=self.host.hostid,
diff --git a/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py b/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py
index eb503e73e..1d8732d4c 100644
--- a/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py
+++ b/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py
@@ -1,16 +1,29 @@
 from lnst.RecipeCommon.Perf.Results import ParallelPerfResult
-from lnst.RecipeCommon.Perf.Measurements.Results.BaseMeasurementResults import BaseMeasurementResults
+from lnst.RecipeCommon.Perf.Measurements.Results.BaseMeasurementResults import (
+    BaseMeasurementResults,
+)
 
 
 class FlowMeasurementResults(BaseMeasurementResults):
     def __init__(self, measurement, flow, warmup_duration=0):
-        super(FlowMeasurementResults, self).__init__(measurement, warmup_duration)
+        super(FlowMeasurementResults, self).__init__(
+            measurement, warmup_duration
+        )
         self._flow = flow
         self._generator_results = None
         self._generator_cpu_stats = None
         self._receiver_results = None
         self._receiver_cpu_stats = None
 
+    @property
+    def metrics(self) -> list[str]:
+        return [
+            "generator_results",
+            "generator_cpu_stats",
+            "receiver_results",
+            "receiver_cpu_stats",
+        ]
+
     @property
     def flow(self):
         return self._flow
@@ -77,7 +90,10 @@ def warmup_end(self):
         return max(
             [
                 parallel[self.warmup_duration - 1].end_timestamp
-                for parallel in (*self.generator_results, *self.receiver_results)
+                for parallel in (
+                    *self.generator_results,
+                    *self.receiver_results,
+                )
             ]
         )
 
@@ -89,18 +105,31 @@ def warmdown_start(self):
         return min(
             [
                 parallel[-self.warmup_duration].start_timestamp
-                for parallel in (*self.generator_results, *self.receiver_results)
+                for parallel in (
+                    *self.generator_results,
+                    *self.receiver_results,
+                )
             ]
         )
 
     def time_slice(self, start, end):
-        result_copy = FlowMeasurementResults(self.measurement, self.flow, warmup_duration=0)
+        result_copy = FlowMeasurementResults(
+            self.measurement, self.flow, warmup_duration=0
+        )
 
-        result_copy.generator_cpu_stats = self.generator_cpu_stats.time_slice(start, end)
-        result_copy.receiver_cpu_stats = self.receiver_cpu_stats.time_slice(start, end)
+        result_copy.generator_cpu_stats = self.generator_cpu_stats.time_slice(
+            start, end
+        )
+        result_copy.receiver_cpu_stats = self.receiver_cpu_stats.time_slice(
+            start, end
+        )
 
-        result_copy.generator_results = self.generator_results.time_slice(start, end)
-        result_copy.receiver_results = self.receiver_results.time_slice(start, end)
+        result_copy.generator_results = self.generator_results.time_slice(
+            start, end
+        )
+        result_copy.receiver_results = self.receiver_results.time_slice(
+            start, end
+        )
 
         return result_copy
 
@@ -110,29 +139,41 @@ def describe(self):
         receiver = self.receiver_results
         receiver_cpu = self.receiver_cpu_stats
         desc = []
-        desc.append("Generator measured throughput: {tput:.2f} +-{deviation:.2f}({percentage:.2f}%) {unit} per second."
-                .format(tput=generator.average,
-                        deviation=generator.std_deviation,
-                        percentage=self._deviation_percentage(generator),
-                        unit=generator.unit))
-        desc.append("Generator process CPU data: {cpu:.2f} +-{cpu_deviation:.2f} {cpu_unit} per second."
-                .format(cpu=generator_cpu.average,
-                        cpu_deviation=generator_cpu.std_deviation,
-                        cpu_unit=generator_cpu.unit))
-        desc.append("Receiver measured throughput: {tput:.2f} +-{deviation:.2f}({percentage:.2f}%) {unit} per second."
-                .format(tput=receiver.average,
-                        deviation=receiver.std_deviation,
-                        percentage=self._deviation_percentage(receiver),
-                        unit=receiver.unit))
-        desc.append("Receiver process CPU data: {cpu:.2f} +-{cpu_deviation:.2f} {cpu_unit} per second."
-                .format(cpu=receiver_cpu.average,
-                        cpu_deviation=receiver_cpu.std_deviation,
-                        cpu_unit=receiver_cpu.unit))
+        desc.append(
+            "Generator measured throughput: {tput:.2f} +-{deviation:.2f}({percentage:.2f}%) {unit} per second.".format(
+                tput=generator.average,
+                deviation=generator.std_deviation,
+                percentage=self._deviation_percentage(generator),
+                unit=generator.unit,
+            )
+        )
+        desc.append(
+            "Generator process CPU data: {cpu:.2f} +-{cpu_deviation:.2f} {cpu_unit} per second.".format(
+                cpu=generator_cpu.average,
+                cpu_deviation=generator_cpu.std_deviation,
+                cpu_unit=generator_cpu.unit,
+            )
+        )
+        desc.append(
+            "Receiver measured throughput: {tput:.2f} +-{deviation:.2f}({percentage:.2f}%) {unit} per second.".format(
+                tput=receiver.average,
+                deviation=receiver.std_deviation,
+                percentage=self._deviation_percentage(receiver),
+                unit=receiver.unit,
+            )
+        )
+        desc.append(
+            "Receiver process CPU data: {cpu:.2f} +-{cpu_deviation:.2f} {cpu_unit} per second.".format(
+                cpu=receiver_cpu.average,
+                cpu_deviation=receiver_cpu.std_deviation,
+                cpu_unit=receiver_cpu.unit,
+            )
+        )
         return "\n".join(desc)
 
     @staticmethod
     def _deviation_percentage(result):
         try:
-            return (result.std_deviation/result.average) * 100
+            return (result.std_deviation / result.average) * 100
         except ZeroDivisionError:
-            return float('inf') if result.std_deviation >= 0 else float("-inf")
+            return float("inf") if result.std_deviation >= 0 else float("-inf")
diff --git a/lnst/RecipeCommon/Perf/Measurements/Results/RDMABandwidthMeasurementResults.py b/lnst/RecipeCommon/Perf/Measurements/Results/RDMABandwidthMeasurementResults.py
index 84df3215f..a0fa82e09 100644
--- a/lnst/RecipeCommon/Perf/Measurements/Results/RDMABandwidthMeasurementResults.py
+++ b/lnst/RecipeCommon/Perf/Measurements/Results/RDMABandwidthMeasurementResults.py
@@ -9,6 +9,10 @@ def __init__(self, measurement: BaseMeasurement, flow: "Flow"):
 
         self._flow = flow
 
+    @property
+    def metrics(self) -> list[str]:
+        return ['bandwidth']
+
     @property
     def flow(self):
         return self._flow
diff --git a/lnst/RecipeCommon/Perf/Measurements/Results/TcRunMeasurementResults.py b/lnst/RecipeCommon/Perf/Measurements/Results/TcRunMeasurementResults.py
index aa26ebeb2..18a42223b 100644
--- a/lnst/RecipeCommon/Perf/Measurements/Results/TcRunMeasurementResults.py
+++ b/lnst/RecipeCommon/Perf/Measurements/Results/TcRunMeasurementResults.py
@@ -18,6 +18,10 @@ def __init__(
         self._rule_install_rate: ParallelPerfResult = None
         self._run_success: bool = None
 
+    @property
+    def metrics(self) -> list[str]:
+        return ['rule_install_rate']
+
     @property
     def device(self) -> Device:
         return self._device
diff --git a/lnst/RecipeCommon/Perf/Measurements/Results/XDPBenchMeasurementResults.py b/lnst/RecipeCommon/Perf/Measurements/Results/XDPBenchMeasurementResults.py
index 4ff535051..9965092e2 100644
--- a/lnst/RecipeCommon/Perf/Measurements/Results/XDPBenchMeasurementResults.py
+++ b/lnst/RecipeCommon/Perf/Measurements/Results/XDPBenchMeasurementResults.py
@@ -11,6 +11,10 @@ def __init__(self, *args, **kwargs):
         self._generator_results = ParallelPerfResult()  # multiple instances of pktgen
         self._receiver_results = ParallelPerfResult()  # single instance of xdpbench
 
+    @property
+    def metrics(self) -> list[str]:
+        return ['generator_results', 'receiver_results']
+
     def add_results(self, results):
         if results is None:
             return

From 381462a8fd41714cad2e5fcb46ed45271fd0bbdf Mon Sep 17 00:00:00 2001
From: Ondrej Lichtner <olichtne@redhat.com>
Date: Fri, 24 Nov 2023 16:23:23 +0100
Subject: [PATCH 2/8] BaseFlowMeasurement: move flow description into the
 FlowMeasurementResults class

This is a better place for the generic flow description generation, and
is more consistent with the cpu measurement and cpu measurement results
as well.

Signed-off-by: Ondrej Lichtner <olichtne@redhat.com>
---
 lnst/RecipeCommon/Perf/Measurements/BaseFlowMeasurement.py       | 1 -
 .../Perf/Measurements/Results/FlowMeasurementResults.py          | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/lnst/RecipeCommon/Perf/Measurements/BaseFlowMeasurement.py b/lnst/RecipeCommon/Perf/Measurements/BaseFlowMeasurement.py
index e24cfccbe..176a751fd 100644
--- a/lnst/RecipeCommon/Perf/Measurements/BaseFlowMeasurement.py
+++ b/lnst/RecipeCommon/Perf/Measurements/BaseFlowMeasurement.py
@@ -92,7 +92,6 @@ def _report_flow_results(cls, recipe, flow_results):
         receiver_cpu = flow_results.receiver_cpu_stats
 
         desc = []
-        desc.append(str(flow_results.flow))
         desc.append(flow_results.describe())
 
         recipe_result = ResultType.PASS
diff --git a/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py b/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py
index 1d8732d4c..7ac2c8729 100644
--- a/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py
+++ b/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py
@@ -139,6 +139,7 @@ def describe(self):
         receiver = self.receiver_results
         receiver_cpu = self.receiver_cpu_stats
         desc = []
+        desc.append(str(self.flow))
         desc.append(
             "Generator measured throughput: {tput:.2f} +-{deviation:.2f}({percentage:.2f}%) {unit} per second.".format(
                 tput=generator.average,

From f014c4308c9d269b08d15c46b3545ae337694727 Mon Sep 17 00:00:00 2001
From: Ondrej Lichtner <olichtne@redhat.com>
Date: Fri, 24 Nov 2023 16:36:27 +0100
Subject: [PATCH 3/8] BaselineEvaluator: refactor to unify baseline evaluators

Now that MeasurementResults export a `metrics` property, Baseline
evaluation can be refactored and unified to work with almost any
MeasurementResult in a much simpler manner.

This commit does just that, additionally there are two additional
related changes:
* MetricComparison class is extended to contain more information so that
  we can unify it and remove the use of a "comparison dictionary"
* BaselineEvaluationResult class is added to store all of the grouped
  MetricComparisons and to generate a nice description for them.

This does change the Result description format as it was previously more
customized to the individual MeasurementResults, but this IMO actually
is more accurate although maybe a little less nice to look at.

Finally this gives us a bunch more power to actually machine post
process the baseline evaluations so I've also removed the "good
direction -> improvement warning" override which IMO should be done as a
post processing step and not directly as a part of the evaluation
method.

The unification of the BaselineEvaluator means that we can completely
remove:
* BaselineFlowAverageEvaluator
* BaselineRDMABandwidthAverageEvaluator
* BaselineTcRunAverageEvaluator

We need to keep the BaselineCPUAverageEvaluator as this does still
override some methods wrt. how filtering and result grouping works for
CPU Measurements.

Signed-off-by: Ondrej Lichtner <olichtne@redhat.com>
---
 .../Evaluators/BaselineCPUAverageEvaluator.py |  90 +---------
 .../Perf/Evaluators/BaselineEvaluator.py      | 155 ++++++++++++------
 .../BaselineFlowAverageEvaluator.py           | 111 -------------
 .../BaselineRDMABandwidthAverageEvaluator.py  |  64 --------
 .../BaselineTcRunAverageEvaluator.py          |  67 --------
 lnst/RecipeCommon/Perf/Evaluators/__init__.py |   6 +-
 6 files changed, 116 insertions(+), 377 deletions(-)
 delete mode 100644 lnst/RecipeCommon/Perf/Evaluators/BaselineFlowAverageEvaluator.py
 delete mode 100644 lnst/RecipeCommon/Perf/Evaluators/BaselineRDMABandwidthAverageEvaluator.py
 delete mode 100644 lnst/RecipeCommon/Perf/Evaluators/BaselineTcRunAverageEvaluator.py

diff --git a/lnst/RecipeCommon/Perf/Evaluators/BaselineCPUAverageEvaluator.py b/lnst/RecipeCommon/Perf/Evaluators/BaselineCPUAverageEvaluator.py
index 520e71c61..8232fca60 100644
--- a/lnst/RecipeCommon/Perf/Evaluators/BaselineCPUAverageEvaluator.py
+++ b/lnst/RecipeCommon/Perf/Evaluators/BaselineCPUAverageEvaluator.py
@@ -1,25 +1,23 @@
 from __future__ import division
-from typing import List, Dict
+from typing import List, Dict, Optional
 
 
 from lnst.Controller.Recipe import BaseRecipe
-from lnst.Controller.RecipeResults import ResultType
 
 from lnst.RecipeCommon.Perf.Recipe import RecipeConf as PerfRecipeConf
-from lnst.RecipeCommon.Perf.Results import result_averages_difference
 from lnst.RecipeCommon.Perf.Measurements.Results import (
     BaseMeasurementResults as PerfMeasurementResults,
 )
-from lnst.RecipeCommon.Perf.Evaluators.BaselineEvaluator import (
-    BaselineEvaluator, MetricComparison
-)
+from lnst.RecipeCommon.Perf.Evaluators.BaselineEvaluator import BaselineEvaluator
 
 
 class BaselineCPUAverageEvaluator(BaselineEvaluator):
     def __init__(
-        self, thresholds: dict, evaluation_filter: Dict[str, str] = None
+        self,
+        metrics_to_evaluate: Optional[List[str]] = None,
+        evaluation_filter: Optional[Dict[str, str]] = None,
     ):
-        self._thresholds = thresholds
+        super().__init__(metrics_to_evaluate)
         self._evaluation_filter = evaluation_filter
 
     def filter_results(
@@ -57,79 +55,3 @@ def _divide_results_by_host(self, results: List[PerfMeasurementResults]):
                 results_by_host[result.host] = []
             results_by_host[result.host].append(result)
         return results_by_host
-
-    def describe_group_results(
-        self,
-        recipe: BaseRecipe,
-        recipe_conf: PerfRecipeConf,
-        results: List[PerfMeasurementResults],
-    ) -> List[str]:
-        return [
-            "CPU Baseline average evaluation for Host {hostid}:".format(
-                hostid=results[0].host.hostid
-            )
-        ]
-
-    def compare_result_with_baseline(
-        self,
-        recipe: BaseRecipe,
-        recipe_conf: PerfRecipeConf,
-        result: PerfMeasurementResults,
-        baseline: PerfMeasurementResults,
-        result_index: int = 0
-    ) -> List[MetricComparison]:
-        comparison = ResultType.FAIL
-
-        metric_name = f"{result_index}_utilization"
-
-        if baseline is None:
-            return [
-                MetricComparison(
-                    metric_name=metric_name,
-                    result=ResultType.FAIL,
-                    text=f"FAIL: CPU {result.cpu}: no baseline found",
-                )
-            ]
-        elif (threshold := self._thresholds.get(metric_name, None)) is not None:
-            try:
-                difference = result_averages_difference(
-                    result.utilization, baseline.utilization
-                )
-
-                text = (
-                    "CPU {cpuid}: {metric_name} {diff:.2f}% {direction} than baseline. "
-                    "Allowed difference: {threshold}%".format(
-                        cpuid=result.cpu,
-                        metric_name=metric_name,
-                        diff=abs(difference),
-                        direction="higher" if difference >= 0 else "lower",
-                        threshold=threshold
-                    )
-                )
-
-                if difference < -threshold:
-                    comparison = ResultType.WARNING
-                    text = "IMPROVEMENT: " + text
-                elif difference <= threshold:
-                    comparison = ResultType.PASS
-                    text = "PASS: " + text
-                else:
-                    comparison = ResultType.FAIL
-                    text = "FAIL: " + text
-            except ZeroDivisionError:
-                text = f"CPU {result.cpu}: {metric_name} zero division by baseline"
-            return [
-                MetricComparison(
-                    metric_name=metric_name,
-                    result=comparison,
-                    text=text,
-                )
-            ]
-        else:
-            return [
-                MetricComparison(
-                    metric_name=metric_name,
-                    result=ResultType.FAIL,
-                    text=f"FAIL: CPU {result.cpu}: {metric_name} no threshold found",
-                )
-            ]
diff --git a/lnst/RecipeCommon/Perf/Evaluators/BaselineEvaluator.py b/lnst/RecipeCommon/Perf/Evaluators/BaselineEvaluator.py
index 6c072b55f..4504976a1 100644
--- a/lnst/RecipeCommon/Perf/Evaluators/BaselineEvaluator.py
+++ b/lnst/RecipeCommon/Perf/Evaluators/BaselineEvaluator.py
@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Optional
 from functools import reduce
 from dataclasses import dataclass
 
@@ -6,23 +6,58 @@
 from lnst.Controller.RecipeResults import ResultType, Result
 from lnst.RecipeCommon.BaseResultEvaluator import BaseResultEvaluator
 from lnst.RecipeCommon.Perf.Recipe import RecipeConf as PerfRecipeConf
+from lnst.RecipeCommon.Perf.Results import result_averages_difference
 from lnst.RecipeCommon.Perf.Measurements.Results import (
     BaseMeasurementResults as PerfMeasurementResults,
 )
 
 
-class BaselineEvaluationResult(Result):
-    pass
-
-
 @dataclass
 class MetricComparison:
+    measurement_type: str
+    current_result: PerfMeasurementResults
+    baseline_result: Optional[PerfMeasurementResults]
+    threshold: float
     metric_name: str
-    result: ResultType
+    difference: float
+    comparison_result: ResultType
     text: str
 
 
+class BaselineEvaluationResult(Result):
+    def __init__(
+        self, comparisons: list[MetricComparison], recipe_conf: PerfRecipeConf
+    ):
+        super().__init__(ResultType.PASS)
+        self.comparisons = comparisons
+        self.recipe_conf = recipe_conf
+
+    @property
+    def result(self) -> ResultType:
+        return reduce(
+            ResultType.max_severity,
+            [comparison.comparison_result for comparison in self.comparisons],
+            ResultType.PASS,
+        )
+
+    @property
+    def description(self) -> str:
+        res = []
+        current_result = None
+        for comparison in self.comparisons:
+            if comparison.current_result != current_result:
+                res.append(comparison.current_result.describe())
+                current_result = comparison.current_result
+            res.append(f"{comparison.comparison_result}: {comparison.text}")
+        return "\n".join(
+            ["Baseline evaluation of"] + res
+        )
+
+
 class BaselineEvaluator(BaseResultEvaluator):
+    def __init__(self, metrics_to_evaluate: Optional[List[str]] = None):
+        self._metrics_to_evaluate = metrics_to_evaluate
+
     def evaluate_results(
         self,
         recipe: BaseRecipe,
@@ -59,53 +94,22 @@ def evaluate_group_results(
     ):
         cumulative_result = ResultType.PASS
         comparisons = []
-        result_text = self.describe_group_results(recipe, recipe_conf, results)
 
         baselines = self.get_baselines(recipe, recipe_conf, results)
-        result_index = len(recipe.current_run.results)
-        for i, (result, baseline) in enumerate(zip(results, baselines)):
-            metric_comparisons = self.compare_result_with_baseline(
-                recipe, recipe_conf, result, baseline, result_index
-            )
-            cumulative_result = reduce(
-                ResultType.max_severity,
-                [metric.result for metric in metric_comparisons],
-                cumulative_result,
-            )
-            result_text.extend(
-                [metric.text for metric in metric_comparisons]
-            )
+        for result, baseline in zip(results, baselines):
             comparisons.extend(
-                [
-                    {
-                        "measurement_type": result.measurement.__class__.__name__,
-                        "current_result": result,
-                        "baseline_result": baseline,
-                        "comparison_result": metric.result,
-                        "metric_name": metric.metric_name,
-                        "text": metric.text,
-                        "recipe_conf": recipe_conf,
-                    }
-                    for metric in metric_comparisons
-                ]
+                self.compare_result_with_baseline(
+                    recipe, recipe_conf, result, baseline
+                )
             )
 
         recipe.add_custom_result(
             BaselineEvaluationResult(
-                cumulative_result,
-                "\n".join(result_text),
-                data={"comparisons": comparisons},
+                comparisons=comparisons,
+                recipe_conf=recipe_conf,
             )
         )
 
-    def describe_group_results(
-        self,
-        recipe: BaseRecipe,
-        recipe_conf: PerfRecipeConf,
-        results: List[PerfMeasurementResults],
-    ) -> List[str]:
-        return []
-
     def get_baselines(
         self,
         recipe: BaseRecipe,
@@ -121,7 +125,14 @@ def get_baseline(
         recipe: BaseRecipe,
         recipe_conf: PerfRecipeConf,
         result: PerfMeasurementResults,
-    ) -> PerfMeasurementResults:
+    ) -> Optional[PerfMeasurementResults]:
+        return None
+
+    def get_threshold(
+        self,
+        baseline: PerfMeasurementResults,
+        metric_name: str,
+    ) -> Optional[float]:
         return None
 
     def compare_result_with_baseline(
@@ -130,6 +141,58 @@ def compare_result_with_baseline(
         recipe_conf: PerfRecipeConf,
         result: PerfMeasurementResults,
         baseline: PerfMeasurementResults,
-        result_index: int = 0,
     ) -> List[MetricComparison]:
-        raise NotImplementedError("Result to baseline metric comparison not implemented")
+        comparisons = []
+
+        if self._metrics_to_evaluate:
+            metrics_to_evaluate = [
+                i for i in result.metrics if i in self._metrics_to_evaluate
+            ]
+        else:
+            metrics_to_evaluate = result.metrics
+
+        for metric in metrics_to_evaluate:
+            comparisons.append(
+                self.compare_metrics_with_threshold(
+                    result=result,
+                    baseline=baseline,
+                    metric_name=metric,
+                )
+            )
+        return comparisons
+
+    def compare_metrics_with_threshold(self, result, baseline, metric_name):
+        threshold = None
+        diff = None
+
+        if not baseline:
+            comparison_result = ResultType.FAIL
+            text = "No baseline found"
+        elif (threshold := self.get_threshold(baseline, metric_name)) is None:
+            comparison_result = ResultType.FAIL
+            text = "No threshold found"
+        else:
+            diff = result_averages_difference(
+                getattr(result, metric_name),
+                getattr(baseline, metric_name),
+            )
+            direction = "higher" if diff >= 0 else "lower"
+
+            comparison_result = (
+                ResultType.PASS if abs(diff) <= threshold else ResultType.FAIL
+            )
+            text = (
+                f"New {metric_name} average is {abs(diff):.2f}% {direction} from the baseline. "
+                f"Allowed difference: {threshold}%"
+            )
+
+        return MetricComparison(
+            measurement_type=result.measurement.__class__.__name__,
+            current_result=result,
+            baseline_result=baseline,
+            threshold=threshold,
+            metric_name=metric_name,
+            difference=diff,
+            comparison_result=comparison_result,
+            text=text,
+        )
diff --git a/lnst/RecipeCommon/Perf/Evaluators/BaselineFlowAverageEvaluator.py b/lnst/RecipeCommon/Perf/Evaluators/BaselineFlowAverageEvaluator.py
deleted file mode 100644
index 193ab56c8..000000000
--- a/lnst/RecipeCommon/Perf/Evaluators/BaselineFlowAverageEvaluator.py
+++ /dev/null
@@ -1,111 +0,0 @@
-from __future__ import division
-from typing import List
-
-from lnst.Controller.Recipe import BaseRecipe
-from lnst.Controller.RecipeResults import ResultType
-
-from lnst.RecipeCommon.Perf.Recipe import RecipeConf as PerfRecipeConf
-from lnst.RecipeCommon.Perf.Results import result_averages_difference
-from lnst.RecipeCommon.Perf.Results import SequentialPerfResult
-from lnst.RecipeCommon.Perf.Measurements.Results import (
-    BaseMeasurementResults as PerfMeasurementResults,
-)
-from lnst.RecipeCommon.Perf.Evaluators.BaselineEvaluator import (
-    BaselineEvaluator, MetricComparison
-)
-
-
-class BaselineFlowAverageEvaluator(BaselineEvaluator):
-    def __init__(
-        self, thresholds: dict, metrics_to_evaluate: List[str] = None
-    ):
-        self._thresholds = thresholds
-
-        if metrics_to_evaluate is not None:
-            self._metrics_to_evaluate = metrics_to_evaluate
-        else:
-            self._metrics_to_evaluate = [
-                "generator_results",
-                "generator_cpu_stats",
-                "receiver_results",
-                "receiver_cpu_stats",
-            ]
-
-    def describe_group_results(
-        self,
-        recipe: BaseRecipe,
-        recipe_conf: PerfRecipeConf,
-        results: List[PerfMeasurementResults],
-    ) -> List[str]:
-        result = results[0]
-        return [
-            "Baseline average evaluation of flow:",
-            "{}".format(result.flow)
-        ]
-
-    def compare_result_with_baseline(
-        self,
-        recipe: BaseRecipe,
-        recipe_conf: PerfRecipeConf,
-        result: PerfMeasurementResults,
-        baseline: PerfMeasurementResults,
-        result_index: int = 0,
-    ) -> List[MetricComparison]:
-        metric_comparisons = []
-        for i in self._metrics_to_evaluate:
-            metric = f"{result_index}_{i}"
-            if baseline is None:
-                comparison_result = ResultType.FAIL
-                text = f"FAIL: Metric {metric} baseline not found for this flow"
-            elif (threshold := self._thresholds.get(metric, None)) is not None:
-                comparison_result, text = self._average_diff_comparison(
-                    name=metric,
-                    target=getattr(result, i),
-                    baseline=getattr(baseline, i),
-                    threshold=threshold
-                )
-            else:
-                comparison_result = ResultType.FAIL
-                text = f"FAIL: Metric {metric} threshold not found"
-
-            metric_comparisons.append(
-                MetricComparison(
-                    metric_name=metric,
-                    result=comparison_result,
-                    text=text,
-                )
-            )
-        return metric_comparisons
-
-    def _average_diff_comparison(
-        self,
-        name: str,
-        target: SequentialPerfResult,
-        baseline: SequentialPerfResult,
-        threshold: int
-    ):
-        difference = result_averages_difference(target, baseline)
-        result_text = "New {name} average is {diff:.2f}% {direction} from the baseline. " \
-                      "Allowed difference: {threshold}%".format(
-            name=name,
-            diff=abs(difference),
-            direction="higher" if difference >= 0 else "lower",
-            threshold=threshold
-        )
-
-        cpu = "_cpu_" in name
-
-        #  (           flow metrics           ) or (          cpu metrics          )
-        if (not cpu and difference > threshold) or (cpu and difference < -threshold):
-            comparison = ResultType.WARNING
-        elif (not cpu and difference >= -threshold) or (cpu and difference <= threshold):
-            comparison = ResultType.PASS
-        else:
-            comparison = ResultType.FAIL
-
-        if comparison == ResultType.WARNING:
-            result_text = f"IMPROVEMENT: {result_text}"
-        else:
-            result_text = f"{comparison}: {result_text}"
-
-        return comparison, result_text
diff --git a/lnst/RecipeCommon/Perf/Evaluators/BaselineRDMABandwidthAverageEvaluator.py b/lnst/RecipeCommon/Perf/Evaluators/BaselineRDMABandwidthAverageEvaluator.py
deleted file mode 100644
index 81e2b3b56..000000000
--- a/lnst/RecipeCommon/Perf/Evaluators/BaselineRDMABandwidthAverageEvaluator.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from lnst.Controller.Recipe import BaseRecipe
-from lnst.Controller.RecipeResults import ResultType
-from lnst.RecipeCommon.Perf.Evaluators.BaselineEvaluator import BaselineEvaluator, MetricComparison
-from lnst.RecipeCommon.Perf.Measurements.Results import RDMABandwidthMeasurementResults
-
-
-class BaselineRDMABandwidthAverageEvaluator(BaselineEvaluator):
-
-    def __init__(self, thresholds: dict):
-        self._thresholds = thresholds
-
-    def compare_result_with_baseline(
-        self,
-        recipe: BaseRecipe,
-        recipe_conf: "EnrtConfiguration",
-        result: RDMABandwidthMeasurementResults,
-        baseline: RDMABandwidthMeasurementResults,
-        result_index: int = 0,
-    ) -> list[MetricComparison]:
-        metric_name = f"{result_index}_bandwidth"
-
-        if baseline is None:
-            return [
-                MetricComparison(
-                    metric_name=metric_name,
-                    result=ResultType.FAIL,
-                    text=f"{self.__class__.__name__} FAIL:\n Metric {metric_name} baseline not found",
-                )
-            ]
-        elif (threshold := self._thresholds.get(metric_name, None)) is None:
-            return [
-                MetricComparison(
-                    metric_name=metric_name,
-                    result=ResultType.FAIL,
-                    text=f"{self.__class__.__name__}\nFAIL: Metric {metric_name} threshold not found",
-                )
-            ]
-
-        difference = ((result.bandwidth.average / baseline.bandwidth.average) * 100) - 100
-        direction = "higher" if difference >= 0 else "lower"
-        text = [
-            f"{self.__class__.__name__} of {metric_name}",
-            f"Baseline: {baseline.bandwidth.average} MiB/s",
-            f"Measured: {result.bandwidth.average} MiB/s",
-            f"{abs(difference):2f}% {direction} than baseline",
-            f"Allowed difference: {threshold}%",
-        ]
-        if difference > threshold:
-            comparison = ResultType.WARNING
-            text[0] = f"IMPROVEMENT: {text[0]}"
-        elif difference >= -threshold:
-            comparison = ResultType.PASS
-            text[0] = f"PASS: {text[0]}"
-        else:
-            comparison = ResultType.FAIL
-            text[0] = f"FAIL: {text[0]}"
-
-        return [
-            MetricComparison(
-                metric_name=metric_name,
-                result=comparison,
-                text="\n".join(text)
-            )
-        ]
diff --git a/lnst/RecipeCommon/Perf/Evaluators/BaselineTcRunAverageEvaluator.py b/lnst/RecipeCommon/Perf/Evaluators/BaselineTcRunAverageEvaluator.py
deleted file mode 100644
index 01c30a3fe..000000000
--- a/lnst/RecipeCommon/Perf/Evaluators/BaselineTcRunAverageEvaluator.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from lnst.Controller.RecipeResults import ResultType
-from lnst.RecipeCommon.Perf.Evaluators.BaselineEvaluator import BaselineEvaluator, MetricComparison
-from lnst.RecipeCommon.Perf.Measurements.Results import TcRunMeasurementResults
-from lnst.Recipes.ENRT.TrafficControlRecipe import TrafficControlRecipe, TcRecipeConfiguration
-
-
-class BaselineTcRunAverageEvaluator(BaselineEvaluator):
-
-    def __init__(self, thresholds: dict):
-        self._thresholds = thresholds
-
-    def compare_result_with_baseline(
-            self,
-            recipe: TrafficControlRecipe,
-            recipe_conf: TcRecipeConfiguration,
-            result: TcRunMeasurementResults,
-            baseline: TcRunMeasurementResults,
-            result_index: int = 0,
-    ) -> list[MetricComparison]:
-
-        metric_name = f"{result_index}_rule_install_rate"
-
-        if baseline is None:
-            return [
-                MetricComparison(
-                    metric_name=metric_name,
-                    result=ResultType.FAIL,
-                    text=f"{self.__class__.__name__} FAIL:\n {result.device.name} {metric_name} baseline not found",
-                )
-            ]
-        elif (threshold := self._thresholds.get(metric_name, None)) is not None:
-            difference = ((result.rule_install_rate.average / baseline.rule_install_rate.average) * 100) - 100
-            direction = "higher" if difference >= 0 else "lower"
-            text = [
-                f"{self.__class__.__name__} of tc run with {metric_name}",
-                f"{result.description}",
-                f"Baseline: {baseline.rule_install_rate.average} rules/sec",
-                f"Measured: {result.rule_install_rate.average} rules/sec",
-                f"{abs(difference):2f}% {direction} than baseline ",
-                f"Allowed difference: {threshold}% ",
-            ]
-            if difference > threshold:
-                comparison = ResultType.WARNING
-                text[0] = f"IMPROVEMENT: {text[0]}"
-            elif difference >= -threshold:
-                comparison = ResultType.PASS
-                text[0] = f"PASS: {text[0]}"
-            else:
-                comparison = ResultType.FAIL
-                text[0] = f"FAIL: {text[0]}"
-
-            return [
-                MetricComparison(
-                    metric_name=metric_name,
-                    result=comparison,
-                    text="\n".join(text)
-                )
-            ]
-        else:
-            return [
-                MetricComparison(
-                    metric_name=metric_name,
-                    result=ResultType.FAIL,
-                    text=f"{self.__class__.__name__}\nFAIL: {result.device.name} {metric_name} no threshold found",
-                )
-            ]
-
diff --git a/lnst/RecipeCommon/Perf/Evaluators/__init__.py b/lnst/RecipeCommon/Perf/Evaluators/__init__.py
index 15a4fd7f4..1bac79501 100644
--- a/lnst/RecipeCommon/Perf/Evaluators/__init__.py
+++ b/lnst/RecipeCommon/Perf/Evaluators/__init__.py
@@ -1,8 +1,4 @@
 from lnst.RecipeCommon.Perf.Evaluators.NonzeroFlowEvaluator import NonzeroFlowEvaluator
-from lnst.RecipeCommon.Perf.Evaluators.BaselineFlowAverageEvaluator import BaselineFlowAverageEvaluator
-
+from lnst.RecipeCommon.Perf.Evaluators.BaselineEvaluator import BaselineEvaluator
 from lnst.RecipeCommon.Perf.Evaluators.BaselineCPUAverageEvaluator import BaselineCPUAverageEvaluator
-
 from lnst.RecipeCommon.Perf.Evaluators.MaxTimeTakenEvaluator import MaxTimeTakenEvaluator
-from lnst.RecipeCommon.Perf.Evaluators.BaselineTcRunAverageEvaluator import BaselineTcRunAverageEvaluator
-from lnst.RecipeCommon.Perf.Evaluators.BaselineRDMABandwidthAverageEvaluator import BaselineRDMABandwidthAverageEvaluator

From 6c67306f724c06c5e8d31d3ecb79a9d54a0948ba Mon Sep 17 00:00:00 2001
From: Ondrej Lichtner <olichtne@redhat.com>
Date: Fri, 24 Nov 2023 16:47:20 +0100
Subject: [PATCH 4/8] BaseEnrtRecipe: remove perf_evaluation_strategy parameter

This parameter logically doesn't fit into the generic upstream
BaseEnrtRecipe class.

The reason for this is that from the upstream point of view it doesn't
actually do anything - it only recognized the "none" and "nonzero"
values which... are special use cases and the implementation was a bit
clunky.

We could refactor this to make it make a bit more sense in the upstream
and for it to be extensible - I think this should be an extensible Mixin
class that handles evaluator registration for enrt perf measurements...

But I think it doesn't have a valid use case in upstream yet so I'm
removing it for now. We may want to revisit this later.

Signed-off-by: Ondrej Lichtner <olichtne@redhat.com>
---
 lnst/Recipes/ENRT/BaseEnrtRecipe.py | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/lnst/Recipes/ENRT/BaseEnrtRecipe.py b/lnst/Recipes/ENRT/BaseEnrtRecipe.py
index 4d20083df..68979680b 100644
--- a/lnst/Recipes/ENRT/BaseEnrtRecipe.py
+++ b/lnst/Recipes/ENRT/BaseEnrtRecipe.py
@@ -165,12 +165,6 @@ class BaseEnrtRecipe(
         specify how many times should each performance measurement be repeated
         to generate cumulative results which can be statistically analyzed.
     :type perf_iterations: :any:`IntParam` (default 5)
-
-    :param perf_evaluation_strategy:
-        Parameter used by the :any:`evaluator_by_measurement` selector to
-        pick correct performance measurement evaluators based on the strategy
-        specified.
-    :type perf_evaluation_strategy: :any:`StrParam` (default "all")
     """
 
     driver = StrParam()
@@ -187,7 +181,6 @@ class BaseEnrtRecipe(
 
     # generic perf test params
     perf_iterations = IntParam(default=5)
-    perf_evaluation_strategy = StrParam(default="all")
 
     def test(self):
         """Main test loop shared by all the Enrt recipes
@@ -472,27 +465,14 @@ def evaluator_by_measurement(self, measurement):
         The selector looks at the input measurement to pick
         appropriate evaluator.
 
-        If :any: `perf_evaluation_strategy` property is set
-        to either "none" or "nonzero", selector returns
-        given evaluators based on their strategy.
-
         :return: list of Result evaluators
         :rtype: List[:any:`BaseResultEvaluator`]
 
         """
-        if self.params.perf_evaluation_strategy == "none":
-            return []
-
         if isinstance(measurement, BaseCPUMeasurement):
-            if self.params.perf_evaluation_strategy in ["nonzero", "none"]:
-                evaluators = []
-            else:
-                evaluators = self.cpu_perf_evaluators
+            evaluators = self.cpu_perf_evaluators
         elif isinstance(measurement, BaseFlowMeasurement):
-            if self.params.perf_evaluation_strategy == "nonzero":
-                evaluators = [NonzeroFlowEvaluator()]
-            else:
-                evaluators = self.net_perf_evaluators
+            evaluators = self.net_perf_evaluators
         else:
             evaluators = []
 

From dfb460b1f9c0765212310339577d2b62f0eef93c Mon Sep 17 00:00:00 2001
From: Ondrej Lichtner <olichtne@redhat.com>
Date: Fri, 1 Dec 2023 14:46:14 +0100
Subject: [PATCH 5/8] Perf/Measurement/Results: add metric names to result
 descriptions

Signed-off-by: Ondrej Lichtner <olichtne@redhat.com>
---
 .../Perf/Measurements/Results/FlowMeasurementResults.py   | 8 ++++----
 .../Perf/Measurements/Results/TcRunMeasurementResults.py  | 2 +-
 .../Measurements/Results/XDPBenchMeasurementResults.py    | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py b/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py
index 7ac2c8729..5d45b9c60 100644
--- a/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py
+++ b/lnst/RecipeCommon/Perf/Measurements/Results/FlowMeasurementResults.py
@@ -141,7 +141,7 @@ def describe(self):
         desc = []
         desc.append(str(self.flow))
         desc.append(
-            "Generator measured throughput: {tput:.2f} +-{deviation:.2f}({percentage:.2f}%) {unit} per second.".format(
+            "Generator measured throughput (generator_results): {tput:.2f} +-{deviation:.2f}({percentage:.2f}%) {unit} per second.".format(
                 tput=generator.average,
                 deviation=generator.std_deviation,
                 percentage=self._deviation_percentage(generator),
@@ -149,14 +149,14 @@ def describe(self):
             )
         )
         desc.append(
-            "Generator process CPU data: {cpu:.2f} +-{cpu_deviation:.2f} {cpu_unit} per second.".format(
+            "Generator process CPU data (generator_cpu_stats): {cpu:.2f} +-{cpu_deviation:.2f} {cpu_unit} per second.".format(
                 cpu=generator_cpu.average,
                 cpu_deviation=generator_cpu.std_deviation,
                 cpu_unit=generator_cpu.unit,
             )
         )
         desc.append(
-            "Receiver measured throughput: {tput:.2f} +-{deviation:.2f}({percentage:.2f}%) {unit} per second.".format(
+            "Receiver measured throughput (receiver_results): {tput:.2f} +-{deviation:.2f}({percentage:.2f}%) {unit} per second.".format(
                 tput=receiver.average,
                 deviation=receiver.std_deviation,
                 percentage=self._deviation_percentage(receiver),
@@ -164,7 +164,7 @@ def describe(self):
             )
         )
         desc.append(
-            "Receiver process CPU data: {cpu:.2f} +-{cpu_deviation:.2f} {cpu_unit} per second.".format(
+            "Receiver process CPU data (receiver_cpu_stats): {cpu:.2f} +-{cpu_deviation:.2f} {cpu_unit} per second.".format(
                 cpu=receiver_cpu.average,
                 cpu_deviation=receiver_cpu.std_deviation,
                 cpu_unit=receiver_cpu.unit,
diff --git a/lnst/RecipeCommon/Perf/Measurements/Results/TcRunMeasurementResults.py b/lnst/RecipeCommon/Perf/Measurements/Results/TcRunMeasurementResults.py
index 18a42223b..98aac5ef8 100644
--- a/lnst/RecipeCommon/Perf/Measurements/Results/TcRunMeasurementResults.py
+++ b/lnst/RecipeCommon/Perf/Measurements/Results/TcRunMeasurementResults.py
@@ -52,7 +52,7 @@ def description(self):
                f" tc run with {self.rule_install_rate.value} rules" \
                f" num_instances={self.measurement.num_instances}" \
                f" took {self.rule_install_rate.duration} seconds " \
-               f"({self.rule_install_rate.average} rules/sec)"
+               f"(rule_install_rate={self.rule_install_rate.average} rules/sec)"
 
     @property
     def time_taken(self):
diff --git a/lnst/RecipeCommon/Perf/Measurements/Results/XDPBenchMeasurementResults.py b/lnst/RecipeCommon/Perf/Measurements/Results/XDPBenchMeasurementResults.py
index 9965092e2..244592ac4 100644
--- a/lnst/RecipeCommon/Perf/Measurements/Results/XDPBenchMeasurementResults.py
+++ b/lnst/RecipeCommon/Perf/Measurements/Results/XDPBenchMeasurementResults.py
@@ -41,12 +41,12 @@ def describe(self) -> str:
         desc = []
         desc.append(str(self.flow))
         desc.append(
-            "Generator generated: {tput:,f} {unit} per second.".format(
+            "Generator generated (generator_results): {tput:,f} {unit} per second.".format(
                 tput=generator.average, unit=generator.unit
             )
         )
         desc.append(
-            "Receiver processed: {tput:,f} {unit} per second.".format(
+            "Receiver processed (receiver_results): {tput:,f} {unit} per second.".format(
                 tput=receiver.average, unit=receiver.unit
             )
         )

From caa0c0121a5ca272b8ede506efeda49bcd61e71b Mon Sep 17 00:00:00 2001
From: Ondrej Lichtner <olichtne@redhat.com>
Date: Tue, 23 Jan 2024 12:27:26 +0100
Subject: [PATCH 6/8] BaselineEvaluator: type fixes for MetricComparison

Signed-off-by: Ondrej Lichtner <olichtne@redhat.com>
---
 lnst/RecipeCommon/Perf/Evaluators/BaselineEvaluator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lnst/RecipeCommon/Perf/Evaluators/BaselineEvaluator.py b/lnst/RecipeCommon/Perf/Evaluators/BaselineEvaluator.py
index 4504976a1..07e401725 100644
--- a/lnst/RecipeCommon/Perf/Evaluators/BaselineEvaluator.py
+++ b/lnst/RecipeCommon/Perf/Evaluators/BaselineEvaluator.py
@@ -17,9 +17,9 @@ class MetricComparison:
     measurement_type: str
     current_result: PerfMeasurementResults
     baseline_result: Optional[PerfMeasurementResults]
-    threshold: float
+    threshold: Optional[float]
     metric_name: str
-    difference: float
+    difference: Optional[float]
     comparison_result: ResultType
     text: str
 

From 01c80b3a9312234b4b554e55934e85f893fc72ee Mon Sep 17 00:00:00 2001
From: Ondrej Lichtner <olichtne@redhat.com>
Date: Tue, 6 Feb 2024 09:46:26 +0100
Subject: [PATCH 7/8] Recipes.ENRT.PvPRecipes: HostReq consistency in hostids

These two tests are using hostids that are different than every other
ENRT recipe for no reason.

Making them consistent should make the data migration that we need to do
for our measurement database easier.

Signed-off-by: Ondrej Lichtner <olichtne@redhat.com>
---
 lnst/Recipes/ENRT/OvS_DPDK_PvP.py      | 50 ++++++++++-----------
 lnst/Recipes/ENRT/VhostNetPvPRecipe.py | 60 +++++++++++++-------------
 2 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/lnst/Recipes/ENRT/OvS_DPDK_PvP.py b/lnst/Recipes/ENRT/OvS_DPDK_PvP.py
index 50d9e58c1..a388e170e 100644
--- a/lnst/Recipes/ENRT/OvS_DPDK_PvP.py
+++ b/lnst/Recipes/ENRT/OvS_DPDK_PvP.py
@@ -37,13 +37,13 @@ def __init__(self):
 
 
 class OvSDPDKPvPRecipe(BasePvPRecipe):
-    m1 = HostReq()
-    m1.eth0 = DeviceReq(label="net1", driver=RecipeParam("driver"))
-    m1.eth1 = DeviceReq(label="net1", driver=RecipeParam("driver"))
+    host1 = HostReq()
+    host1.eth0 = DeviceReq(label="net1", driver=RecipeParam("driver"))
+    host1.eth1 = DeviceReq(label="net1", driver=RecipeParam("driver"))
 
-    m2 = HostReq(with_guest="yes")
-    m2.eth0 = DeviceReq(label="net1", driver=RecipeParam("driver"))
-    m2.eth1 = DeviceReq(label="net1", driver=RecipeParam("driver"))
+    host2 = HostReq(with_guest="yes")
+    host2.eth0 = DeviceReq(label="net1", driver=RecipeParam("driver"))
+    host2.eth1 = DeviceReq(label="net1", driver=RecipeParam("driver"))
 
     net_ipv4 = IPv4NetworkParam(default="192.168.1.0/24")
 
@@ -76,33 +76,33 @@ def check_dependencies(self):
 
     def gen_ping_config(self):
         return [
-            (self.matched.m1, self.matched.m1.eth0, self.matched.m2.eth0),
-            (self.matched.m1, self.matched.m1.eth1, self.matched.m2.eth1),
-            (self.matched.m2, self.matched.m2.eth0, self.matched.m1.eth0),
-            (self.matched.m2, self.matched.m2.eth1, self.matched.m2.eth1)
+            (self.matched.host1, self.matched.host1.eth0, self.matched.host2.eth0),
+            (self.matched.host1, self.matched.host1.eth1, self.matched.host2.eth1),
+            (self.matched.host2, self.matched.host2.eth0, self.matched.host1.eth0),
+            (self.matched.host2, self.matched.host2.eth1, self.matched.host2.eth1)
         ]
 
     def test_wide_configuration(self, config):
-        config.generator.host = self.matched.m1
-        config.generator.nics.append(self.matched.m1.eth0)
-        config.generator.nics.append(self.matched.m1.eth1)
+        config.generator.host = self.matched.host1
+        config.generator.nics.append(self.matched.host1.eth0)
+        config.generator.nics.append(self.matched.host1.eth1)
 
         ipv4_addr = interface_addresses(self.params.net_ipv4)
         nic_addrs = {
-            self.matched.m1.eth0: next(ipv4_addr),
-            self.matched.m2.eth0: next(ipv4_addr),
-            self.matched.m1.eth1: next(ipv4_addr),
-            self.matched.m2.eth1: next(ipv4_addr),
+            self.matched.host1.eth0: next(ipv4_addr),
+            self.matched.host2.eth0: next(ipv4_addr),
+            self.matched.host1.eth1: next(ipv4_addr),
+            self.matched.host2.eth1: next(ipv4_addr),
         }
-        self.matched.m1.eth0.ip_add(nic_addrs[self.matched.m1.eth0])
-        self.matched.m1.eth1.ip_add(nic_addrs[self.matched.m1.eth1])
+        self.matched.host1.eth0.ip_add(nic_addrs[self.matched.host1.eth0])
+        self.matched.host1.eth1.ip_add(nic_addrs[self.matched.host1.eth1])
         self.base_dpdk_configuration(config.generator)
 
-        config.dut.host = self.matched.m2
-        config.dut.nics.append(self.matched.m2.eth0)
-        config.dut.nics.append(self.matched.m2.eth1)
-        self.matched.m2.eth0.ip_add(nic_addrs[self.matched.m2.eth0])
-        self.matched.m2.eth1.ip_add(nic_addrs[self.matched.m2.eth1])
+        config.dut.host = self.matched.host2
+        config.dut.nics.append(self.matched.host2.eth0)
+        config.dut.nics.append(self.matched.host2.eth1)
+        self.matched.host2.eth0.ip_add(nic_addrs[self.matched.host2.eth0])
+        self.matched.host2.eth1.ip_add(nic_addrs[self.matched.host2.eth1])
         self.base_dpdk_configuration(config.dut)
         self.ovs_dpdk_bridge_configuration(config.dut)
 
@@ -228,7 +228,7 @@ def ovs_dpdk_bridge_configuration(self, host_conf):
         host.run("systemctl restart openvswitch")
 
         #  TODO use an actual OvS Device object
-        #  TODO config.dut.nics.append(CachedRemoteDevice(m2.ovs))
+        #  TODO config.dut.nics.append(CachedRemoteDevice(host2.ovs))
         host.run("ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev")
 
         host_conf.dpdk_ports = []
diff --git a/lnst/Recipes/ENRT/VhostNetPvPRecipe.py b/lnst/Recipes/ENRT/VhostNetPvPRecipe.py
index be0630bb8..fe2f5bcad 100644
--- a/lnst/Recipes/ENRT/VhostNetPvPRecipe.py
+++ b/lnst/Recipes/ENRT/VhostNetPvPRecipe.py
@@ -23,13 +23,13 @@ def __init__(self):
 
 
 class VhostNetPvPRecipe(BasePvPRecipe):
-    generator_req = HostReq()
-    generator_req.eth0 = DeviceReq(label="net1", driver=RecipeParam("driver"))
-    generator_req.eth1 = DeviceReq(label="net1", driver=RecipeParam("driver"))
+    host1 = HostReq()
+    host1.eth0 = DeviceReq(label="net1", driver=RecipeParam("driver"))
+    host1.eth1 = DeviceReq(label="net1", driver=RecipeParam("driver"))
 
-    host_req = HostReq(with_guest="yes")
-    host_req.eth0 = DeviceReq(label="net1", driver=RecipeParam("driver"))
-    host_req.eth1 = DeviceReq(label="net1", driver=RecipeParam("driver"))
+    host2 = HostReq(with_guest="yes")
+    host2.eth0 = DeviceReq(label="net1", driver=RecipeParam("driver"))
+    host2.eth1 = DeviceReq(label="net1", driver=RecipeParam("driver"))
 
     net_ipv4 = IPv4NetworkParam(default="192.168.101.0/24")
 
@@ -64,39 +64,39 @@ def check_params(self):
 
     def gen_ping_config(self):
         return [
-            (self.matched.generator_req,
-             self.matched.generator_req.eth0,
-             self.matched.host_req.eth0),
-            (self.matched.generator_req,
-             self.matched.generator_req.eth1,
-             self.matched.host_req.eth1),
-            (self.matched.host_req,
-             self.matched.host_req.eth0,
-             self.matched.generator_req.eth0),
-            (self.matched.host_req,
-             self.matched.host_req.eth1,
-             self.matched.host_req.eth1)
+            (self.matched.host1,
+             self.matched.host1.eth0,
+             self.matched.host2.eth0),
+            (self.matched.host1,
+             self.matched.host1.eth1,
+             self.matched.host2.eth1),
+            (self.matched.host2,
+             self.matched.host2.eth0,
+             self.matched.host1.eth0),
+            (self.matched.host2,
+             self.matched.host2.eth1,
+             self.matched.host2.eth1)
         ]
 
     def test_wide_configuration(self, config):
 
-        config.generator.host = self.matched.generator_req
-        config.generator.nics.append(self.matched.generator_req.eth0)
-        config.generator.nics.append(self.matched.generator_req.eth1)
+        config.generator.host = self.matched.host1
+        config.generator.nics.append(self.matched.host1.eth0)
+        config.generator.nics.append(self.matched.host1.eth1)
 
         ipv4_addr = interface_addresses(self.params.net_ipv4)
-        self.matched.generator_req.eth0.ip_add(next(ipv4_addr))
-        self.matched.generator_req.eth1.ip_add(next(ipv4_addr))
-        self.matched.generator_req.eth0.up()
-        self.matched.generator_req.eth1.up()
+        self.matched.host1.eth0.ip_add(next(ipv4_addr))
+        self.matched.host1.eth1.ip_add(next(ipv4_addr))
+        self.matched.host1.eth0.up()
+        self.matched.host1.eth1.up()
 
         self.base_dpdk_configuration(config.generator)
 
-        config.dut.host = self.matched.host_req
-        config.dut.nics.append(self.matched.host_req.eth0)
-        config.dut.nics.append(self.matched.host_req.eth1)
-        self.matched.host_req.eth0.up()
-        self.matched.host_req.eth1.up()
+        config.dut.host = self.matched.host2
+        config.dut.nics.append(self.matched.host2.eth0)
+        config.dut.nics.append(self.matched.host2.eth1)
+        self.matched.host2.eth0.up()
+        self.matched.host2.eth1.up()
 
         self.host_forwarding_configuration(config.dut)
 

From 3d07e4ec0b8d63264f8d23bdbb3734ec732fe344 Mon Sep 17 00:00:00 2001
From: Ondrej Lichtner <olichtne@redhat.com>
Date: Thu, 8 Feb 2024 10:46:26 +0100
Subject: [PATCH 8/8] lnst.Common.Utils: use statistics in std_deviation

This function was written back in 2015 when we couldn't use python3
yet and statistics module was added in 3.4.

At the same time, the formula used is "incorrect", this calculates the
POPULATION std deviation... whereas we really should be using a SAMPLE
standard deviation formula.

Signed-off-by: Ondrej Lichtner <olichtne@redhat.com>
---
 lnst/Common/Utils.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lnst/Common/Utils.py b/lnst/Common/Utils.py
index 5c3a80ab6..044330187 100644
--- a/lnst/Common/Utils.py
+++ b/lnst/Common/Utils.py
@@ -21,7 +21,7 @@
 import errno
 import ast
 import collections
-import math
+import statistics
 import itertools
 from collections.abc import Iterable, Callable
 from contextlib import AbstractContextManager
@@ -287,10 +287,9 @@ def dict_to_dot(original_dict, prefix=""):
     return return_list
 
 def std_deviation(values):
-    if len(values) <= 0:
+    if len(values) <= 1:
         return 0.0
-    avg = sum(values) / float(len(values))
-    return math.sqrt(sum([(float(i) - avg)**2 for i in values])/len(values))
+    return statistics.stdev(values)
 
 def deprecated(func):
     """