Skip to content

Commit

Permalink
feat(similarity): Do not block on project platform (#82952)
Browse files Browse the repository at this point in the history
This will allow attempting to backfill all projects. Only the event's
platform will prevent attempting a backfill.
  • Loading branch information
armenzg authored Jan 7, 2025
1 parent 9ff9ae1 commit 2cbc63b
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 113 deletions.
16 changes: 2 additions & 14 deletions src/sentry/seer/similarity/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,6 @@
"ruby",
]
)
# Existing projects with these platforms shouldn't be backfilled and new projects with these
# platforms shouldn't have Seer enabled.
SEER_INELIGIBLE_PROJECT_PLATFORMS = frozenset(
[
# We have no clue what's in these projects
"other",
"",
None,
]
)
BASE64_ENCODED_PREFIXES = [
"data:text/html;base64",
"data:text/javascript;base64",
Expand Down Expand Up @@ -455,11 +445,9 @@ def _is_snipped_context_line(context_line: str) -> bool:

def project_is_seer_eligible(project: Project) -> bool:
"""
Return True if the project hasn't already been backfilled, is a Seer-eligible platform, and
the feature is enabled in the region.
Return True if the project hasn't already been backfilled and the feature is enabled in the region.
"""
is_backfill_completed = project.get_option("sentry:similarity_backfill_completed")
is_seer_eligible_platform = project.platform not in SEER_INELIGIBLE_PROJECT_PLATFORMS
is_region_enabled = options.get("similarity.new_project_seer_grouping.enabled")

return not is_backfill_completed and is_seer_eligible_platform and is_region_enabled
return not is_backfill_completed and is_region_enabled
99 changes: 0 additions & 99 deletions tests/sentry/tasks/test_backfill_seer_grouping_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from sentry.grouping.enhancer.exceptions import InvalidEnhancerConfig
from sentry.models.group import Group, GroupStatus
from sentry.models.grouphash import GroupHash
from sentry.models.project import Project
from sentry.seer.similarity.grouping_records import CreateGroupingRecordData
from sentry.seer.similarity.types import RawSeerSimilarIssueData
from sentry.seer.similarity.utils import MAX_FRAME_COUNT
Expand Down Expand Up @@ -114,10 +113,6 @@

@django_db_all
class TestBackfillSeerGroupingRecords(SnubaTestCase, TestCase):
def create_project(self, **kwargs) -> Project:
"""We overwrite the default create_project method to make sure that all created projects are seer eligible."""
return super().create_project(**kwargs, platform=kwargs.get("platform", "python"))

def create_exception_values(self, function_name: str, type: str, value: str):
return {
"values": [
Expand Down Expand Up @@ -220,9 +215,6 @@ def assert_group_metadata_not_updated(self, group: Group) -> None:

def setUp(self):
super().setUp()
# Make the project seer eligible
self.project.platform = "python"
self.project.save()
bulk_data = self.create_group_event_rows(5)
self.event = bulk_data["events"][0]
self.bulk_rows, self.bulk_events = (bulk_data["rows"], bulk_data["events"])
Expand Down Expand Up @@ -1716,8 +1708,6 @@ def test_backfill_seer_grouping_records_cohort_creation(
project_same_cohort = self.create_project(
organization=self.organization, id=self.project.id + thread_number
)
project_same_cohort.platform = "javascript"
project_same_cohort.save()
event_same_cohort = self.store_event(
data={
"exception": EXCEPTION,
Expand Down Expand Up @@ -1806,93 +1796,6 @@ def test_backfill_seer_grouping_records_cohort_creation(

assert mock_logger.info.call_args_list == expected_call_args_list

@override_options({"similarity.new_project_seer_grouping.enabled": True})
@patch("sentry.tasks.embeddings_grouping.backfill_seer_grouping_records_for_project.logger")
@patch("sentry.tasks.embeddings_grouping.utils.post_bulk_grouping_records")
def test_backfill_seer_grouping_records_cohort_creation_not_seer_eligible(
self, mock_post_bulk_grouping_records, mock_logger
):
"""
Test that non Seer eligible projects are not processed when worker_number is provided.
"""
# Create 1 seer eligible project that project_id % thread_number == worker_number
thread_number = options.get("similarity.backfill_total_worker_count")
worker_number = self.project.id % thread_number

# Create 1 non seer eligible project that project_id % thread_number != worker_number
project_same_cohort_not_eligible = self.create_project(
organization=self.organization, id=self.project.id + thread_number
)
project_same_cohort_not_eligible.platform = "other" # Not currently eligible
project_same_cohort_not_eligible.save()
self.create_event(project_same_cohort_not_eligible.id, times_seen=5)

# Create one project where project_id % thread_number != worker_number
self.create_project(organization=self.organization, id=self.project.id + 1)

mock_post_bulk_grouping_records.return_value = {"success": True, "groups_with_neighbor": {}}
with TaskRunner():
backfill_seer_grouping_records_for_project(
current_project_id=None,
worker_number=worker_number,
)

project_last_group_id = sorted(
[group.id for group in Group.objects.filter(project_id=self.project.id)]
)[0]

expected_cohort = [self.project.id, project_same_cohort_not_eligible.id]
expected_call_args_list = [
call(
"backfill_seer_grouping_records",
extra={
"current_project_id": self.project.id,
"last_processed_group_id": None,
"cohort": expected_cohort,
"last_processed_project_index": None,
"only_delete": False,
"skip_processed_projects": True,
"skip_project_ids": None,
"worker_number": worker_number,
},
),
call(
"backfill_seer_grouping_records",
extra={
"current_project_id": self.project.id,
"last_processed_group_id": project_last_group_id,
"cohort": expected_cohort,
"last_processed_project_index": 0,
"only_delete": False,
"skip_processed_projects": True,
"skip_project_ids": None,
"worker_number": worker_number,
},
),
call(
"backfill_seer_grouping_records",
extra={
"current_project_id": project_same_cohort_not_eligible.id,
"last_processed_group_id": None,
"cohort": expected_cohort,
"last_processed_project_index": 1,
"only_delete": False,
"skip_processed_projects": True,
"skip_project_ids": None,
"worker_number": worker_number,
},
),
call(
"backfill_seer_grouping_records.project_is_not_seer_eligible",
extra={"project_id": project_same_cohort_not_eligible.id},
),
call(
"reached the end of the projects in cohort", extra={"worker_number": worker_number}
),
]

assert mock_logger.info.call_args_list == expected_call_args_list

@override_options({"similarity.new_project_seer_grouping.enabled": True})
@override_options({"similarity.backfill_project_cohort_size": 1})
@patch("sentry.tasks.embeddings_grouping.backfill_seer_grouping_records_for_project.logger")
Expand All @@ -1910,8 +1813,6 @@ def test_backfill_seer_grouping_records_cohort_creation_multiple_batches(
project_same_worker = self.create_project(
organization=self.organization, id=self.project.id + thread_number
)
project_same_worker.platform = "javascript"
project_same_worker.save()
event_same_worker = self.store_event(
data={
"exception": EXCEPTION,
Expand Down

0 comments on commit 2cbc63b

Please sign in to comment.