From bd3ab5f599fffd7c87d74918bc323b575e31460e Mon Sep 17 00:00:00 2001 From: getsentry-bot Date: Mon, 25 Nov 2024 18:42:01 +0000 Subject: [PATCH] Revert "chore(similarity): Add logging for over 30 system frames (#81130)" This reverts commit a3dba875a0772519c7f272fe923e04b1affe0a0a. Co-authored-by: jangjodi <116035587+jangjodi@users.noreply.github.com> --- src/sentry/grouping/ingest/seer.py | 7 +- src/sentry/seer/similarity/utils.py | 12 ---- src/sentry/tasks/embeddings_grouping/utils.py | 4 -- tests/sentry/grouping/ingest/test_seer.py | 46 ------------- tests/sentry/seer/similarity/test_utils.py | 50 +-------------- .../test_backfill_seer_grouping_records.py | 64 ------------------- 6 files changed, 2 insertions(+), 181 deletions(-) diff --git a/src/sentry/grouping/ingest/seer.py b/src/sentry/grouping/ingest/seer.py index 369848f5cc4712..52c6eaadca5e6d 100644 --- a/src/sentry/grouping/ingest/seer.py +++ b/src/sentry/grouping/ingest/seer.py @@ -182,12 +182,7 @@ def _circuit_breaker_broken(event: Event, project: Project) -> bool: def _has_empty_stacktrace_string(event: Event, variants: Mapping[str, BaseVariant]) -> bool: - # Temporarily add project and event id to this for logging purposes - # TODO: Remove when grouping.similarity.over_threshold_system_only_frames is removed - grouping_info = get_grouping_info_from_variants(variants) - grouping_info["project_id"] = event.project.id # type: ignore[assignment] - grouping_info["event_id"] = event.event_id # type: ignore[assignment] - stacktrace_string = get_stacktrace_string(grouping_info) + stacktrace_string = get_stacktrace_string(get_grouping_info_from_variants(variants)) if stacktrace_string == "": metrics.incr( "grouping.similarity.did_call_seer", diff --git a/src/sentry/seer/similarity/utils.py b/src/sentry/seer/similarity/utils.py index faf54791795428..29c8b3810c28dd 100644 --- a/src/sentry/seer/similarity/utils.py +++ b/src/sentry/seer/similarity/utils.py @@ -179,7 +179,6 @@ def get_stacktrace_string(data: dict[str, Any]) -> str: html_frame_count = 0 # for a temporary metric stacktrace_str = "" found_non_snipped_context_line = False - is_frames_truncated = False metrics.distribution("seer.grouping.exceptions.length", len(exceptions)) @@ -187,14 +186,11 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]: nonlocal frame_count nonlocal html_frame_count nonlocal found_non_snipped_context_line - nonlocal is_frames_truncated frame_strings = [] contributing_frames = [ frame for frame in frames if frame.get("id") == "frame" and frame.get("contributes") ] - if len(contributing_frames) + frame_count > MAX_FRAME_COUNT: - is_frames_truncated = True contributing_frames = _discard_excess_frames( contributing_frames, MAX_FRAME_COUNT, frame_count ) @@ -291,14 +287,6 @@ def _process_frames(frames: list[dict[str, Any]]) -> list[str]: }, ) - if is_frames_truncated and not app_hash: - logger_extra = { - "project_id": data.get("project_id", ""), - "event_id": data.get("event_id", ""), - "hash": system_hash, - } - logger.info("grouping.similarity.over_threshold_system_only_frames", extra=logger_extra) - return stacktrace_str.strip() diff --git a/src/sentry/tasks/embeddings_grouping/utils.py b/src/sentry/tasks/embeddings_grouping/utils.py index 7c30b4bd44551e..7dd7b95c129981 100644 --- a/src/sentry/tasks/embeddings_grouping/utils.py +++ b/src/sentry/tasks/embeddings_grouping/utils.py @@ -355,10 +355,6 @@ def get_events_from_nodestore( event._project_cache = project if event and event.data and event_content_has_stacktrace(event): grouping_info = get_grouping_info(None, project=project, event=event) - # Temporarily add project and event id to this for logging purposes - # TODO: Remove when grouping.similarity.over_threshold_system_only_frames is removed - grouping_info["project_id"] = project.id - grouping_info["event_id"] = event.event_id # type: ignore[assignment] stacktrace_string = get_stacktrace_string(grouping_info) if stacktrace_string == "": invalid_event_group_ids.append(group_id) diff --git a/tests/sentry/grouping/ingest/test_seer.py b/tests/sentry/grouping/ingest/test_seer.py index 0a044449582416..33c872f07325a3 100644 --- a/tests/sentry/grouping/ingest/test_seer.py +++ b/tests/sentry/grouping/ingest/test_seer.py @@ -5,11 +5,9 @@ from sentry import options from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION from sentry.eventstore.models import Event -from sentry.grouping.grouping_info import get_grouping_info_from_variants from sentry.grouping.ingest.seer import get_seer_similar_issues, should_call_seer_for_grouping from sentry.models.grouphash import GroupHash from sentry.seer.similarity.types import SeerSimilarIssueData -from sentry.seer.similarity.utils import MAX_FRAME_COUNT from sentry.testutils.cases import TestCase from sentry.testutils.helpers.eventprocessing import save_new_event from sentry.testutils.helpers.options import override_options @@ -308,47 +306,3 @@ def test_returns_no_grouphash_and_empty_metadata_if_no_similar_group_found(self) expected_metadata, None, ) - - @patch("sentry.seer.similarity.utils.logger") - def test_too_many_only_system_frames(self, mock_logger: Mock) -> None: - type = "FailedToFetchError" - value = "Charlie didn't bring the ball back" - context_line = f"raise {type}('{value}')" - new_event = Event( - project_id=self.project.id, - event_id="22312012112120120908201304152013", - data={ - "title": f"{type}('{value}')", - "exception": { - "values": [ - { - "type": type, - "value": value, - "stacktrace": { - "frames": [ - { - "function": f"play_fetch_{i}", - "filename": f"dogpark{i}.py", - "context_line": context_line, - } - for i in range(MAX_FRAME_COUNT + 1) - ] - }, - } - ] - }, - "platform": "python", - }, - ) - variants = new_event.get_grouping_variants() - get_seer_similar_issues(new_event, variants) - - grouping_info = get_grouping_info_from_variants(variants) - mock_logger.info.assert_called_with( - "grouping.similarity.over_threshold_system_only_frames", - extra={ - "project_id": "", - "event_id": "", - "hash": grouping_info["system"]["hash"], - }, - ) diff --git a/tests/sentry/seer/similarity/test_utils.py b/tests/sentry/seer/similarity/test_utils.py index 54d94c736be474..2bbe76601e432f 100644 --- a/tests/sentry/seer/similarity/test_utils.py +++ b/tests/sentry/seer/similarity/test_utils.py @@ -1,13 +1,11 @@ import copy from collections.abc import Callable from typing import Any, Literal, cast -from unittest.mock import patch from uuid import uuid1 from sentry.eventstore.models import Event from sentry.seer.similarity.utils import ( BASE64_ENCODED_PREFIXES, - MAX_FRAME_COUNT, SEER_ELIGIBLE_PLATFORMS, _is_snipped_context_line, event_content_is_seer_eligible, @@ -712,53 +710,7 @@ def test_no_app_no_system(self): stacktrace_str = get_stacktrace_string(data) assert stacktrace_str == "" - @patch("sentry.seer.similarity.utils.logger") - def test_too_many_system_frames_single_exception(self, mock_logger): - data_system = copy.deepcopy(self.BASE_APP_DATA) - data_system["system"] = data_system.pop("app") - data_system["system"]["component"]["values"][0]["values"][0][ - "values" - ] += self.create_frames(MAX_FRAME_COUNT + 1, True) - data_system["project_id"] = self.project.id - data_system["event_id"] = "39485673049520" - - get_stacktrace_string(data_system) - - mock_logger.info.assert_called_with( - "grouping.similarity.over_threshold_system_only_frames", - extra={ - "project_id": self.project.id, - "event_id": data_system["event_id"], - "hash": data_system["system"]["hash"], - }, - ) - - @patch("sentry.seer.similarity.utils.logger") - def test_too_many_system_frames_chained_exception(self, mock_logger): - data_system = copy.deepcopy(self.CHAINED_APP_DATA) - data_system["system"] = data_system.pop("app") - data_system["project_id"] = self.project.id - data_system["event_id"] = "39485673049520" - # Split MAX_FRAME_COUNT across the two exceptions - data_system["system"]["component"]["values"][0]["values"][0]["values"][0][ - "values" - ] += self.create_frames(MAX_FRAME_COUNT // 2, True) - data_system["system"]["component"]["values"][0]["values"][1]["values"][0][ - "values" - ] += self.create_frames(MAX_FRAME_COUNT // 2, True) - - get_stacktrace_string(data_system) - - mock_logger.info.assert_called_with( - "grouping.similarity.over_threshold_system_only_frames", - extra={ - "project_id": self.project.id, - "event_id": data_system["event_id"], - "hash": data_system["system"]["hash"], - }, - ) - - def test_too_many_in_app_contributing_frames(self): + def test_over_30_contributing_frames(self): """Check that when there are over 30 contributing frames, the last 30 are included.""" data_frames = copy.deepcopy(self.BASE_APP_DATA) diff --git a/tests/sentry/tasks/test_backfill_seer_grouping_records.py b/tests/sentry/tasks/test_backfill_seer_grouping_records.py index 3cdf4c36d232a1..efad864ac4809d 100644 --- a/tests/sentry/tasks/test_backfill_seer_grouping_records.py +++ b/tests/sentry/tasks/test_backfill_seer_grouping_records.py @@ -25,7 +25,6 @@ from sentry.models.grouphash import GroupHash from sentry.seer.similarity.grouping_records import CreateGroupingRecordData from sentry.seer.similarity.types import RawSeerSimilarIssueData -from sentry.seer.similarity.utils import MAX_FRAME_COUNT from sentry.snuba.dataset import Dataset from sentry.snuba.referrer import Referrer from sentry.tasks.embeddings_grouping.backfill_seer_grouping_records_for_project import ( @@ -366,69 +365,6 @@ def test_lookup_group_data_stacktrace_bulk_no_stacktrace_exception(self): assert bulk_group_data_stacktraces["data"] == expected_group_data assert bulk_group_data_stacktraces["stacktrace_list"] == expected_stacktraces - @patch("sentry.seer.similarity.utils.logger") - def test_lookup_group_data_stacktrace_bulk_too_many_system_frames(self, mock_logger): - """ - Test that if a group has over MAX_FRAME_COUNT only system frames, it is logged - and included in results - """ - # Use 2 events - rows, events, hashes = self.bulk_rows[:2], self.bulk_events[:2], {} - group_ids = [row["group_id"] for row in rows] - for group_id in group_ids: - hashes.update({group_id: self.group_hashes[group_id]}) - # Create one event where the stacktrace has over MAX_FRAME_COUNT system only frames - exception: Any = copy.deepcopy(EXCEPTION) - exception["values"][0]["stacktrace"]["frames"] += [ - { - "function": f"divide_by_zero_{i}", - "module": "__main__", - "filename": "python_onboarding_{i}.py", - "abs_path": "/Users/user/python_onboarding/python_onboarding_{i}.py", - "lineno": i, - "in_app": True, - } - for i in range(MAX_FRAME_COUNT + 1) - ] - event = self.store_event( - data={ - "platform": "python", - "exception": exception, - "title": "title", - "timestamp": before_now(seconds=10).isoformat(), - }, - project_id=self.project.id, - assert_no_errors=False, - ) - rows.append({"event_id": event.event_id, "group_id": event.group_id}) - group_hash = GroupHash.objects.filter(group_id=event.group.id).first() - assert group_hash - hashes.update({event.group_id: group_hash.hash}) - events.append(event) - - bulk_group_data_stacktraces, _ = get_events_from_nodestore(self.project, rows, group_ids) - expected_group_data = [ - CreateGroupingRecordData( - group_id=event.group.id, - hash=hashes[event.group.id], - project_id=self.project.id, - exception_type=get_path(event.data, "exception", "values", -1, "type"), - ) - for event in events - ] - - assert bulk_group_data_stacktraces["data"] == expected_group_data - assert len(bulk_group_data_stacktraces["stacktrace_list"]) == len(events) - - mock_logger.info.assert_called_with( - "grouping.similarity.over_threshold_system_only_frames", - extra={ - "project_id": self.project.id, - "event_id": event.event_id, - "hash": group_hash.hash, - }, - ) - def test_lookup_group_data_stacktrace_bulk_with_fallback_success(self): """Test successful bulk lookup with fallback, where the fallback isn't used""" rows, events, hashes = (