Skip to content

Commit

Permalink
chore(similarity): Do not send over 30 system-only frames to seer (#8…
Browse files Browse the repository at this point in the history
…0946)

If an event has no in-app frames and over 30 system frames, return an
null formatted stacktrace string
  • Loading branch information
jangjodi authored and andrewshie-sentry committed Dec 2, 2024
1 parent 9ca2efd commit ad33741
Show file tree
Hide file tree
Showing 7 changed files with 215 additions and 16 deletions.
10 changes: 7 additions & 3 deletions src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
from sentry.seer.similarity.types import SimilarIssuesEmbeddingsRequest
from sentry.seer.similarity.utils import (
ReferrerOptions,
event_content_is_seer_eligible,
filter_null_from_string,
get_stacktrace_string,
get_stacktrace_string_with_metrics,
killswitch_enabled,
)
from sentry.utils import metrics
Expand Down Expand Up @@ -187,14 +188,17 @@ def get_seer_similar_issues(
should go in (if any), or None if no neighbor was near enough.
"""
event_hash = event.get_primary_hash()
stacktrace_string = get_stacktrace_string(get_grouping_info_from_variants(variants))
stacktrace_string = get_stacktrace_string_with_metrics(
get_grouping_info_from_variants(variants), event.platform, ReferrerOptions.INGEST
)
exception_type = get_path(event.data, "exception", "values", -1, "type")

request_data: SimilarIssuesEmbeddingsRequest = {
"event_id": event.event_id,
"hash": event_hash,
"project_id": event.project.id,
"stacktrace": stacktrace_string,
# TODO: remove this once we do the stacktrace null check
"stacktrace": stacktrace_string if stacktrace_string else "",
"exception_type": filter_null_from_string(exception_type) if exception_type else None,
"k": num_neighbors,
"referrer": "ingest",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
from sentry.seer.similarity.types import SeerSimilarIssueData, SimilarIssuesEmbeddingsRequest
from sentry.seer.similarity.utils import (
TooManyOnlySystemFramesException,
event_content_has_stacktrace,
get_stacktrace_string,
killswitch_enabled,
Expand Down Expand Up @@ -82,9 +83,12 @@ def get(self, request: Request, group) -> Response:
stacktrace_string = ""
if latest_event and event_content_has_stacktrace(latest_event):
grouping_info = get_grouping_info(None, project=group.project, event=latest_event)
stacktrace_string = get_stacktrace_string(grouping_info)
try:
stacktrace_string = get_stacktrace_string(grouping_info)
except TooManyOnlySystemFramesException:
stacktrace_string = ""

if stacktrace_string == "" or not latest_event:
if not stacktrace_string or not latest_event:
return Response([]) # No exception, stacktrace or in-app frames, or event

similar_issues_params: SimilarIssuesEmbeddingsRequest = {
Expand Down
41 changes: 41 additions & 0 deletions src/sentry/seer/similarity/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from enum import StrEnum
from typing import Any, TypeVar

from sentry import options
Expand Down Expand Up @@ -151,6 +152,15 @@
]


class ReferrerOptions(StrEnum):
INGEST = "ingest"
BACKFILL = "backfill"


class TooManyOnlySystemFramesException(Exception):
pass


def _get_value_if_exists(exception_value: dict[str, Any]) -> str:
return exception_value["values"][0] if exception_value.get("values") else ""

Expand All @@ -177,6 +187,7 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:

frame_count = 0
html_frame_count = 0 # for a temporary metric
is_frames_truncated = False
stacktrace_str = ""
found_non_snipped_context_line = False

Expand All @@ -185,12 +196,15 @@ def get_stacktrace_string(data: dict[str, Any]) -> str:
def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
nonlocal frame_count
nonlocal html_frame_count
nonlocal is_frames_truncated
nonlocal found_non_snipped_context_line
frame_strings = []

contributing_frames = [
frame for frame in frames if frame.get("id") == "frame" and frame.get("contributes")
]
if len(contributing_frames) + frame_count > MAX_FRAME_COUNT:
is_frames_truncated = True
contributing_frames = _discard_excess_frames(
contributing_frames, MAX_FRAME_COUNT, frame_count
)
Expand Down Expand Up @@ -255,6 +269,8 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
exc_value = _get_value_if_exists(exception_value)
elif exception_value.get("id") == "stacktrace" and frame_count < MAX_FRAME_COUNT:
frame_strings = _process_frames(exception_value["values"])
if is_frames_truncated and not app_hash:
raise TooManyOnlySystemFramesException
# Only exceptions have the type and value properties, so we don't need to handle the threads
# case here
header = f"{exc_type}: {exc_value}\n" if exception["id"] == "exception" else ""
Expand Down Expand Up @@ -290,6 +306,31 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]:
return stacktrace_str.strip()


def get_stacktrace_string_with_metrics(
data: dict[str, Any], platform: str | None, referrer: ReferrerOptions
) -> str | None:
try:
stacktrace_string = get_stacktrace_string(data)
except TooManyOnlySystemFramesException:
platform = platform if platform else "unknown"
metrics.incr(
"grouping.similarity.over_threshold_only_system_frames",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={"platform": platform, "referrer": referrer},
)
if referrer == ReferrerOptions.INGEST:
metrics.incr(
"grouping.similarity.did_call_seer",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={
"call_made": False,
"blocker": "over-threshold-only-system-frames",
},
)
stacktrace_string = None
return stacktrace_string


def event_content_has_stacktrace(event: Event) -> bool:
# If an event has no stacktrace, there's no data for Seer to analyze, so no point in making the
# API call. If we ever start analyzing message-only events, we'll need to add `event.title in
Expand Down
9 changes: 6 additions & 3 deletions src/sentry/tasks/embeddings_grouping/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,10 @@
SimilarHashNotFoundError,
)
from sentry.seer.similarity.utils import (
ReferrerOptions,
event_content_has_stacktrace,
filter_null_from_string,
get_stacktrace_string,
get_stacktrace_string_with_metrics,
)
from sentry.snuba.dataset import Dataset
from sentry.snuba.referrer import Referrer
Expand Down Expand Up @@ -355,8 +356,10 @@ def get_events_from_nodestore(
event._project_cache = project
if event and event.data and event_content_has_stacktrace(event):
grouping_info = get_grouping_info(None, project=project, event=event)
stacktrace_string = get_stacktrace_string(grouping_info)
if stacktrace_string == "":
stacktrace_string = get_stacktrace_string_with_metrics(
grouping_info, event.platform, ReferrerOptions.BACKFILL
)
if not stacktrace_string:
invalid_event_group_ids.append(group_id)
continue
primary_hash = event.get_primary_hash()
Expand Down
55 changes: 54 additions & 1 deletion tests/sentry/grouping/ingest/test_seer.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from dataclasses import asdict
from time import time
from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock, Mock, call, patch

from sentry import options
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
from sentry.eventstore.models import Event
from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping
from sentry.models.grouphash import GroupHash
from sentry.seer.similarity.types import SeerSimilarIssueData
from sentry.seer.similarity.utils import MAX_FRAME_COUNT
from sentry.testutils.cases import TestCase
from sentry.testutils.helpers.eventprocessing import save_new_event
from sentry.testutils.helpers.options import override_options
Expand Down Expand Up @@ -261,3 +263,54 @@ def test_returns_no_grouphash_and_empty_metadata_if_no_similar_group_found(self)
expected_metadata,
None,
)

@patch("sentry.seer.similarity.utils.metrics")
def test_too_many_only_system_frames(self, mock_metrics: Mock) -> None:
type = "FailedToFetchError"
value = "Charlie didn't bring the ball back"
context_line = f"raise {type}('{value}')"
new_event = Event(
project_id=self.project.id,
event_id="22312012112120120908201304152013",
data={
"title": f"{type}('{value}')",
"exception": {
"values": [
{
"type": type,
"value": value,
"stacktrace": {
"frames": [
{
"function": f"play_fetch_{i}",
"filename": f"dogpark{i}.py",
"context_line": context_line,
}
for i in range(MAX_FRAME_COUNT + 1)
]
},
}
]
},
"platform": "python",
},
)
get_seer_similar_issues(new_event, new_event.get_grouping_variants())

sample_rate = options.get("seer.similarity.metrics_sample_rate")
expected_metrics_calls = [
call(
"grouping.similarity.over_threshold_only_system_frames",
sample_rate=sample_rate,
tags={"platform": "python", "referrer": "ingest"},
),
call(
"grouping.similarity.did_call_seer",
sample_rate=1.0,
tags={
"call_made": False,
"blocker": "over-threshold-only-system-frames",
},
),
]
assert mock_metrics.incr.call_args_list == expected_metrics_calls
42 changes: 36 additions & 6 deletions tests/sentry/seer/similarity/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
from typing import Any, Literal, cast
from uuid import uuid1

import pytest

from sentry.eventstore.models import Event
from sentry.seer.similarity.utils import (
BASE64_ENCODED_PREFIXES,
MAX_FRAME_COUNT,
SEER_ELIGIBLE_PLATFORMS,
TooManyOnlySystemFramesException,
_is_snipped_context_line,
event_content_is_seer_eligible,
filter_null_from_string,
Expand Down Expand Up @@ -670,18 +674,18 @@ def test_chained_too_many_frames_minified_js_frame_limit(self):
)

def test_chained_too_many_exceptions(self):
"""Test that we restrict number of chained exceptions to 30."""
"""Test that we restrict number of chained exceptions to MAX_FRAME_COUNT."""
data_chained_exception = copy.deepcopy(self.CHAINED_APP_DATA)
data_chained_exception["app"]["component"]["values"][0]["values"] = [
self.create_exception(
exception_type_str="Exception",
exception_value=f"exception {i} message!",
frames=self.create_frames(num_frames=1, context_line_factory=lambda i: f"line {i}"),
)
for i in range(1, 32)
for i in range(1, MAX_FRAME_COUNT + 2)
]
stacktrace_str = get_stacktrace_string(data_chained_exception)
for i in range(2, 32):
for i in range(2, MAX_FRAME_COUNT + 2):
assert f"exception {i} message!" in stacktrace_str
assert "exception 1 message!" not in stacktrace_str

Expand Down Expand Up @@ -710,9 +714,35 @@ def test_no_app_no_system(self):
stacktrace_str = get_stacktrace_string(data)
assert stacktrace_str == ""

def test_over_30_contributing_frames(self):
"""Check that when there are over 30 contributing frames, the last 30 are included."""
def test_too_many_system_frames_single_exception(self):
data_system = copy.deepcopy(self.BASE_APP_DATA)
data_system["system"] = data_system.pop("app")
data_system["system"]["component"]["values"][0]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT + 1, True)

with pytest.raises(TooManyOnlySystemFramesException):
get_stacktrace_string(data_system)

def test_too_many_system_frames_chained_exception(self):
data_system = copy.deepcopy(self.CHAINED_APP_DATA)
data_system["system"] = data_system.pop("app")
# Split MAX_FRAME_COUNT across the two exceptions
data_system["system"]["component"]["values"][0]["values"][0]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT // 2, True)
data_system["system"]["component"]["values"][0]["values"][1]["values"][0][
"values"
] += self.create_frames(MAX_FRAME_COUNT // 2, True)

with pytest.raises(TooManyOnlySystemFramesException):
get_stacktrace_string(data_system)

def test_too_many_in_app_contributing_frames(self):
"""
Check that when there are over MAX_FRAME_COUNT contributing frames, the last MAX_FRAME_COUNT
are included.
"""
data_frames = copy.deepcopy(self.BASE_APP_DATA)
# Create 30 contributing frames, 1-20 -> last 10 should be included
data_frames["app"]["component"]["values"][0]["values"][0]["values"] = self.create_frames(
Expand All @@ -739,7 +769,7 @@ def test_over_30_contributing_frames(self):
for i in range(41, 61):
num_frames += 1
assert ("test = " + str(i) + "!") in stacktrace_str
assert num_frames == 30
assert num_frames == MAX_FRAME_COUNT

def test_too_many_frames_minified_js_frame_limit(self):
"""Test that we restrict fully-minified stacktraces to 20 frames, and all other stacktraces to 30 frames."""
Expand Down
Loading

0 comments on commit ad33741

Please sign in to comment.