fix(eap): Handle nans in other timeseries for EAP (#82131)

This fixes the nans in the other timeseries for EAP when some numeric attribute does not exist for a time bucket, the avg aggregate will produce a nan. It does not fix it for the spans indexed dataset because the query there is actually wrong and treats entries without the attribute as 0. It also does not fix it for the rpc calls because rpc calls are failing with a division by 0 error.
getsentry · Dec 17, 2024 · dd203b7 · dd203b7
1 parent f7d1caf
commit dd203b7
Show file tree

Hide file tree

Showing 2 changed files with 68 additions and 1 deletion.
diff --git a/src/sentry/snuba/spans_eap.py b/src/sentry/snuba/spans_eap.py
@@ -1,6 +1,7 @@
 import logging
 from collections.abc import Mapping, Sequence
 from datetime import timedelta
+from typing import Any, TypedDict
 
 import sentry_sdk
 from snuba_sdk import Column, Condition
@@ -244,14 +245,16 @@ def top_events_timeseries(
             snuba_params.end_date,
             rollup,
         )
+
     with sentry_sdk.start_span(op="spans_indexed", name="top_events.transform_results") as span:
         span.set_data("result_count", len(result.get("data", [])))
         result = top_events_builder.process_results(result)
+        other_result = top_events_builder.process_results(other_result)
 
         issues: Mapping[int, str | None] = {}
         translated_groupby = top_events_builder.translated_groupby
 
-        results = (
+        results: dict[str, TimeseriesResult] = (
             {discover.OTHER_KEY: {"order": limit, "data": other_result["data"]}}
             if len(other_result.get("data", []))
             else {}
@@ -292,3 +295,8 @@ def top_events_timeseries(
             )
 
     return top_events_results
+
+
+class TimeseriesResult(TypedDict):
+    order: int
+    data: list[dict[str, Any]]
diff --git a/tests/snuba/api/endpoints/test_organization_events_stats_span_indexed.py b/tests/snuba/api/endpoints/test_organization_events_stats_span_indexed.py
@@ -90,6 +90,61 @@ def test_handle_nans_from_snuba(self):
         )
         assert response.status_code == 200, response.content
 
+    def test_handle_nans_from_snuba_top_n(self):
+        self.store_spans(
+            [
+                self.create_span(
+                    {
+                        "description": "foo",
+                        "measurements": {"lcp": {"value": 1}},
+                    },
+                    start_ts=self.day_ago,
+                ),
+                self.create_span({"description": "foo"}, start_ts=self.day_ago),
+                self.create_span({"description": "foo"}, start_ts=self.two_days_ago),
+                self.create_span(
+                    {
+                        "description": "bar",
+                        "measurements": {"lcp": {"value": 2}},
+                    },
+                    start_ts=self.day_ago,
+                ),
+                self.create_span({"description": "bar"}, start_ts=self.day_ago),
+                self.create_span({"description": "bar"}, start_ts=self.two_days_ago),
+            ],
+            is_eap=self.is_eap,
+        )
+
+        response = self._do_request(
+            data={
+                "field": ["span.description", "p50(measurements.lcp)", "avg(measurements.lcp)"],
+                "yAxis": ["p50(measurements.lcp)", "avg(measurements.lcp)"],
+                "project": self.project.id,
+                "dataset": self.dataset,
+                "excludeOther": 0,
+                "topEvents": 1,
+                "partial": 1,
+                "per_page": 50,
+                "interval": "1d",
+                "statsPeriod": "7d",
+                "orderby": "-avg_measurements_lcp",
+                "sort": "-avg_measurements_lcp",
+            },
+        )
+        assert response.status_code == 200, response.content
+
+        # We cannot actually assert the value because the `spans_indexed` is
+        # producing the wrong result and treating missing values as 0s which
+        # skews the final aggregation.
+        # This is also the reason it never errored because snuba never returns
+        # nans in this situation.
+        #
+        # for k in response.data:
+        #     for agg in ["p50(measurements.lcp)", "avg(measurements.lcp)"]:
+        #         for row in response.data[k][agg]["data"]:
+        #             assert row[1][0]["count"] in {0, 1, 2}
+        # assert response.data["Other"]
+
     def test_count_unique(self):
         event_counts = [6, 0, 6, 3, 0, 3]
         for hour, count in enumerate(event_counts):
@@ -857,3 +912,7 @@ def test_invalid_intervals(self):
             },
         )
         assert response.status_code == 400, response.content
+
+    @pytest.mark.xfail(reason="division by 0 error in snuba")
+    def test_handle_nans_from_snuba_top_n(self):
+        super().test_handle_nans_from_snuba_top_n()