Skip to content

Commit

Permalink
feat(priority-alerts): Add a ratelimit to severity service calls (#67713
Browse files Browse the repository at this point in the history
)

Adding a ratelimit of 5 requests per second to the Seer severity
service. We can adjust this if we see the ratelimit kick in as we're
rolling out the service.

Refactored a bit to only call the service for python/js projects and
check the conditions before checking the ratelimit.
  • Loading branch information
snigdhas authored Mar 28, 2024
1 parent d6b8362 commit 31ae2c7
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 25 deletions.
62 changes: 52 additions & 10 deletions src/sentry/event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1829,13 +1829,10 @@ def _create_group(project: Project, event: Event, **group_creation_kwargs: Any)
# add sdk tag to metadata
group_data.setdefault("metadata", {}).update(sdk_metadata_from_event(event))

# Add severity to metadata for alert filtering for errors events.
# We can skip this if the group type is explicitly NOT an error.
# add severity to metadata for alert filtering
group_type = group_creation_kwargs.get("type", None)
severity: Mapping[str, Any] = {}
if not group_type or group_type == ErrorGroupType.type_id:
severity = _get_severity_metadata_for_group(event)
group_data["metadata"].update(severity)
severity = _get_severity_metadata_for_group(event, project.id, group_type)
group_data["metadata"].update(severity)

if features.has("projects:issue-priority", project, actor=None):
# the kwargs only include priority for non-error issue platform events, which takes precedence.
Expand Down Expand Up @@ -2163,12 +2160,57 @@ def _process_existing_aggregate(
)


def _get_severity_metadata_for_group(event: Event) -> Mapping[str, Any]:
def _get_severity_metadata_for_group(
event: Event, project_id: int, group_type: int | None
) -> Mapping[str, Any]:
"""
Returns severity metadata for an event if the feature flag is enabled.
Returns {} on feature flag not enabled or exception.
Returns severity metadata for an event if all of the following are true
- the feature flag is enabled
- the event platform supports severity
- the event group type is an error
Returns {} if conditions aren't met or on exception.
"""
if features.has("projects:first-event-severity-calculation", event.project):
from sentry.receivers.rules import PLATFORMS_WITH_PRIORITY_ALERTS

is_supported_platform = (
any(event.platform.startswith(platform) for platform in PLATFORMS_WITH_PRIORITY_ALERTS)
if event.platform
else False
)
is_error_group = group_type == ErrorGroupType.type_id if group_type else True
feature_enabled = features.has("projects:first-event-severity-calculation", event.project)

should_calculate_severity = feature_enabled and is_supported_platform and is_error_group
if should_calculate_severity:
from sentry import ratelimits as ratelimiter

limit = options.get("issues.severity.seer-global-rate-limit", 25)
if ratelimiter.backend.is_limited(
"seer:severity-calculation:global-limit",
limit=limit,
window=1, # starting this out 25 requests per second
):
logger.warning(
"get_severity_metadata_for_group.rate_limited_globally",
extra={"event_id": event.event_id, "project_id": project_id},
)
metrics.incr("issues.severity.rate_limited_globally")

limit = options.get("issues.severity.seer-project-rate-limit", 5)
if ratelimiter.backend.is_limited(
f"seer:severity-calculation:{project_id}",
limit=limit,
window=1, # starting this out 5 requests per second
):
logger.warning(
"get_severity_metadata_for_group.rate_limited_for_project",
extra={"event_id": event.event_id, "project_id": project_id},
)
metrics.incr(
"issues.severity.rate_limited_for_project", tags={"project_id": project_id}
)

try:
severity, reason = _get_severity_score(event)

Expand Down
14 changes: 14 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,20 @@
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

register(
"issues.severity.seer-project-rate-limit",
type=Int,
default=5,
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

register(
"issues.severity.seer-global-rate-limit",
type=Int,
default=25,
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

register(
"issues.severity.default-high-priority-alerts-orgs-allowlist",
type=Sequence,
Expand Down
4 changes: 2 additions & 2 deletions src/sentry/receivers/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
}


PLATFORMS_WITH_NEW_DEFAULT = ["python", "javascript"]
PLATFORMS_WITH_PRIORITY_ALERTS = ["python", "javascript"]


def has_high_priority_issue_alerts(project: Project) -> bool:
Expand All @@ -48,7 +48,7 @@ def has_high_priority_issue_alerts(project: Project) -> bool:
and project.platform is not None
and any(
project.platform.startswith(base_platform)
for base_platform in PLATFORMS_WITH_NEW_DEFAULT
for base_platform in PLATFORMS_WITH_PRIORITY_ALERTS
)
)

Expand Down
16 changes: 9 additions & 7 deletions tests/sentry/event_manager/test_priority.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class TestEventManagerPriority(TestCase):
@with_feature("projects:issue-priority")
@with_feature("projects:first-event-severity-calculation")
def test_flag_on(self, mock_get_severity_score: MagicMock):
manager = EventManager(make_event(level=logging.FATAL))
manager = EventManager(make_event(level=logging.FATAL, platform="python"))
event = manager.save(self.project.id)

mock_get_severity_score.assert_called()
Expand All @@ -30,7 +30,7 @@ def test_flag_on(self, mock_get_severity_score: MagicMock):
@patch("sentry.event_manager._get_severity_score", return_value=(0.1121, "ml"))
@with_feature("projects:first-event-severity-calculation")
def test_flag_off(self, mock_get_severity_score: MagicMock):
manager = EventManager(make_event(level=logging.FATAL))
manager = EventManager(make_event(level=logging.FATAL, platform="python"))
event = manager.save(self.project.id)

mock_get_severity_score.assert_called()
Expand All @@ -46,10 +46,12 @@ def test_flag_off(self, mock_get_severity_score: MagicMock):
def test_get_priority_for_group_not_called_on_second_event(
self, mock_get_priority_for_group: MagicMock, mock_get_severity_score: MagicMock
):
event = EventManager(make_event(level=logging.FATAL)).save(self.project.id)
event = EventManager(make_event(level=logging.FATAL, platform="python")).save(
self.project.id
)
assert mock_get_priority_for_group.call_count == 1

event2 = EventManager(make_event()).save(self.project.id)
event2 = EventManager(make_event(platform="python")).save(self.project.id)

# Same group, but no extra `_get_priority_for_group` call
assert event2.group_id == event.group_id
Expand All @@ -64,7 +66,7 @@ def test_priority_scores_without_severity(self):
(logging.ERROR, PriorityLevel.HIGH),
(logging.FATAL, PriorityLevel.HIGH),
):
manager = EventManager(make_event(level=level, fingerprint=[level]))
manager = EventManager(make_event(level=level, fingerprint=[level], platform="python"))
event = manager.save(self.project.id)

assert event.group
Expand All @@ -83,7 +85,7 @@ def test_priority_scores_with_low_severity(self, mock_get_severity_score: MagicM
(logging.ERROR, PriorityLevel.MEDIUM),
(logging.FATAL, PriorityLevel.HIGH),
):
manager = EventManager(make_event(level=level, fingerprint=[level]))
manager = EventManager(make_event(level=level, fingerprint=[level], platform="python"))
event = manager.save(self.project.id)

assert event.group
Expand All @@ -102,7 +104,7 @@ def test_priority_scores_with_high_severity(self, mock_get_severity_score: Magic
(logging.ERROR, PriorityLevel.HIGH),
(logging.FATAL, PriorityLevel.HIGH),
):
manager = EventManager(make_event(level=level, fingerprint=[level]))
manager = EventManager(make_event(level=level, fingerprint=[level], platform="python"))
event = manager.save(self.project.id)

assert event.group
Expand Down
21 changes: 16 additions & 5 deletions tests/sentry/event_manager/test_severity.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def test_error_event_simple(
"mechanism": {"type": "generic", "handled": True},
}
]
}
},
platform="python",
)
)
event = manager.save(self.project.id)
Expand Down Expand Up @@ -140,6 +141,7 @@ def test_uses_exception(
manager = EventManager(
make_event(
exception={"values": [{"type": "NopeError", "value": "Nopey McNopeface"}]},
platform="python",
)
)
event = manager.save(self.project.id)
Expand Down Expand Up @@ -174,6 +176,7 @@ def test_short_circuit_level(
make_event(
exception={"values": [{"type": "NopeError", "value": "Nopey McNopeface"}]},
level=level,
platform="python",
)
)
event = manager.save(self.project.id)
Expand All @@ -193,7 +196,7 @@ def test_unusable_event_title(
mock_urlopen: MagicMock,
) -> None:
for title in NON_TITLE_EVENT_TITLES:
manager = EventManager(make_event(exception={"values": []}))
manager = EventManager(make_event(exception={"values": []}, platform="python"))
event = manager.save(self.project.id)
# `title` is a property with no setter, but it pulls from `metadata`, so it's equivalent
# to set it there. (We have to ignore mypy because `metadata` isn't supposed to be mutable.)
Expand Down Expand Up @@ -238,7 +241,8 @@ def test_max_retry_exception(
"mechanism": {"type": "generic", "handled": True},
}
]
}
},
platform="python",
)
)
event = manager.save(self.project.id)
Expand Down Expand Up @@ -284,6 +288,7 @@ def test_other_exception(
}
],
},
platform="python",
)
)
event = manager.save(self.project.id)
Expand Down Expand Up @@ -313,7 +318,8 @@ def test_flag_on(self, mock_get_severity_score: MagicMock):
with self.feature({"projects:first-event-severity-calculation": True}):
manager = EventManager(
make_event(
exception={"values": [{"type": "NopeError", "value": "Nopey McNopeface"}]}
exception={"values": [{"type": "NopeError", "value": "Nopey McNopeface"}]},
platform="python",
)
)
event = manager.save(self.project.id)
Expand All @@ -330,7 +336,8 @@ def test_flag_off(self, mock_get_severity_score: MagicMock):
with self.feature({"projects:first-event-severity-calculation": False}):
manager = EventManager(
make_event(
exception={"values": [{"type": "NopeError", "value": "Nopey McNopeface"}]}
exception={"values": [{"type": "NopeError", "value": "Nopey McNopeface"}]},
platform="python",
)
)
event = manager.save(self.project.id)
Expand All @@ -351,6 +358,7 @@ def test_get_severity_score_not_called_on_second_event(
make_event(
exception={"values": [{"type": "NopeError", "value": "Nopey McNopeface"}]},
fingerprint=["dogs_are_great"],
platform="python",
)
).save(self.project.id)

Expand All @@ -360,6 +368,7 @@ def test_get_severity_score_not_called_on_second_event(
make_event(
exception={"values": [{"type": "BrokenStuffError", "value": "It broke"}]},
fingerprint=["dogs_are_great"],
platform="python",
)
).save(self.project.id)

Expand All @@ -375,6 +384,7 @@ def test_score_not_clobbered_by_second_event(self, mock_get_severity_score: Magi
make_event(
exception={"values": [{"type": "NopeError", "value": "Nopey McNopeface"}]},
fingerprint=["dogs_are_great"],
platform="python",
)
).save(self.project.id)

Expand All @@ -389,6 +399,7 @@ def test_score_not_clobbered_by_second_event(self, mock_get_severity_score: Magi
make_event(
exception={"values": [{"type": "BrokenStuffError", "value": "It broke"}]},
fingerprint=["dogs_are_great"],
platform="python",
)
).save(self.project.id)

Expand Down
3 changes: 2 additions & 1 deletion tests/sentry/issues/test_occurrence_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@ def test_issue_platform_default_priority(self):
assert group.priority == PriorityLevel.LOW

@with_feature("projects:issue-priority")
@mock.patch("sentry.event_manager._get_severity_metadata_for_group")
@with_feature("projects:first-event-severity-calculation")
@mock.patch("sentry.event_manager._get_severity_score")
def test_issue_platform_override_priority(self, mock_get_severity_score):
# test explicitly set priority of HIGH
message = get_test_message(self.project.id)
Expand Down

0 comments on commit 31ae2c7

Please sign in to comment.