Skip to content

Commit

Permalink
fix(eap): Handle nans in other timeseries for EAP (#82131)
Browse files Browse the repository at this point in the history
This fixes the nans in the other timeseries for EAP when some numeric
attribute does not exist for a time bucket, the avg aggregate will
produce a nan. It does not fix it for the spans indexed dataset because
the query there is actually wrong and treats entries without the
attribute as 0. It also does not fix it for the rpc calls because rpc
calls are failing with a division by 0 error.
  • Loading branch information
Zylphrex authored and evanh committed Dec 17, 2024
1 parent f7d1caf commit dd203b7
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 1 deletion.
10 changes: 9 additions & 1 deletion src/sentry/snuba/spans_eap.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from collections.abc import Mapping, Sequence
from datetime import timedelta
from typing import Any, TypedDict

import sentry_sdk
from snuba_sdk import Column, Condition
Expand Down Expand Up @@ -244,14 +245,16 @@ def top_events_timeseries(
snuba_params.end_date,
rollup,
)

with sentry_sdk.start_span(op="spans_indexed", name="top_events.transform_results") as span:
span.set_data("result_count", len(result.get("data", [])))
result = top_events_builder.process_results(result)
other_result = top_events_builder.process_results(other_result)

issues: Mapping[int, str | None] = {}
translated_groupby = top_events_builder.translated_groupby

results = (
results: dict[str, TimeseriesResult] = (
{discover.OTHER_KEY: {"order": limit, "data": other_result["data"]}}
if len(other_result.get("data", []))
else {}
Expand Down Expand Up @@ -292,3 +295,8 @@ def top_events_timeseries(
)

return top_events_results


class TimeseriesResult(TypedDict):
order: int
data: list[dict[str, Any]]
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,61 @@ def test_handle_nans_from_snuba(self):
)
assert response.status_code == 200, response.content

def test_handle_nans_from_snuba_top_n(self):
self.store_spans(
[
self.create_span(
{
"description": "foo",
"measurements": {"lcp": {"value": 1}},
},
start_ts=self.day_ago,
),
self.create_span({"description": "foo"}, start_ts=self.day_ago),
self.create_span({"description": "foo"}, start_ts=self.two_days_ago),
self.create_span(
{
"description": "bar",
"measurements": {"lcp": {"value": 2}},
},
start_ts=self.day_ago,
),
self.create_span({"description": "bar"}, start_ts=self.day_ago),
self.create_span({"description": "bar"}, start_ts=self.two_days_ago),
],
is_eap=self.is_eap,
)

response = self._do_request(
data={
"field": ["span.description", "p50(measurements.lcp)", "avg(measurements.lcp)"],
"yAxis": ["p50(measurements.lcp)", "avg(measurements.lcp)"],
"project": self.project.id,
"dataset": self.dataset,
"excludeOther": 0,
"topEvents": 1,
"partial": 1,
"per_page": 50,
"interval": "1d",
"statsPeriod": "7d",
"orderby": "-avg_measurements_lcp",
"sort": "-avg_measurements_lcp",
},
)
assert response.status_code == 200, response.content

# We cannot actually assert the value because the `spans_indexed` is
# producing the wrong result and treating missing values as 0s which
# skews the final aggregation.
# This is also the reason it never errored because snuba never returns
# nans in this situation.
#
# for k in response.data:
# for agg in ["p50(measurements.lcp)", "avg(measurements.lcp)"]:
# for row in response.data[k][agg]["data"]:
# assert row[1][0]["count"] in {0, 1, 2}
# assert response.data["Other"]

def test_count_unique(self):
event_counts = [6, 0, 6, 3, 0, 3]
for hour, count in enumerate(event_counts):
Expand Down Expand Up @@ -857,3 +912,7 @@ def test_invalid_intervals(self):
},
)
assert response.status_code == 400, response.content

@pytest.mark.xfail(reason="division by 0 error in snuba")
def test_handle_nans_from_snuba_top_n(self):
super().test_handle_nans_from_snuba_top_n()

0 comments on commit dd203b7

Please sign in to comment.