From f68258636528fce052645138a08f9d373fdc0a08 Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Wed, 18 Oct 2023 14:12:56 -0400 Subject: [PATCH] =?UTF-8?q?chore(statistical-detectors):=20Add=20feature?= =?UTF-8?q?=20flags=20for=20function=20regressi=E2=80=A6=20(#58197)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …on detection This adds feature flags to allow the incremental roll out for function regression detection and some metrics for better monitoring. --- src/sentry/tasks/statistical_detectors.py | 136 +++++++++++------- .../tasks/test_statistical_detectors.py | 43 ++++-- 2 files changed, 120 insertions(+), 59 deletions(-) diff --git a/src/sentry/tasks/statistical_detectors.py b/src/sentry/tasks/statistical_detectors.py index ffa1c399e05647..9c4e68ba0fa629 100644 --- a/src/sentry/tasks/statistical_detectors.py +++ b/src/sentry/tasks/statistical_detectors.py @@ -2,7 +2,7 @@ import logging from collections import defaultdict -from datetime import datetime, timedelta, timezone +from datetime import datetime, timedelta from typing import Any, Dict, Generator, List, Optional, Set, Tuple, Union, cast import sentry_sdk @@ -25,7 +25,7 @@ Request, ) -from sentry import options +from sentry import features, options from sentry.api.serializers.snuba import SnubaTSResultSerializer from sentry.constants import ObjectStatus from sentry.models.project import Project @@ -88,13 +88,21 @@ def run_detection() -> None: performance_projects = [] profiling_projects = [] - # TODO: make the amount of projects per batch configurable + performance_projects_count = 0 + profiling_projects_count = 0 + for project in RangeQuerySetWrapper( - Project.objects.filter(status=ObjectStatus.ACTIVE), + Project.objects.filter(status=ObjectStatus.ACTIVE).select_related("organization"), step=100, ): - if project.flags.has_transactions and project.id in enabled_performance_projects: + if project.flags.has_transactions and ( + features.has( + "organizations:performance-statistical-detectors-ema", project.organization + ) + or project.id in enabled_performance_projects + ): performance_projects.append(project) + performance_projects_count += 1 if len(performance_projects) >= PROJECTS_PER_BATCH: detect_transaction_trends.delay( @@ -104,8 +112,12 @@ def run_detection() -> None: ) performance_projects = [] - if project.flags.has_profiles and project.id in enabled_profiling_projects: + if project.flags.has_profiles and ( + features.has("organizations:profiling-statistical-detectors-ema", project.organization) + or project.id in enabled_profiling_projects + ): profiling_projects.append(project.id) + profiling_projects_count += 1 if len(profiling_projects) >= PROJECTS_PER_BATCH: detect_function_trends.delay(profiling_projects, now) @@ -121,6 +133,18 @@ def run_detection() -> None: if profiling_projects: detect_function_trends.delay(profiling_projects, now) + metrics.incr( + "statistical_detectors.performance.projects.total", + amount=performance_projects_count, + sample_rate=1.0, + ) + + metrics.incr( + "statistical_detectors.profiling.projects.total", + amount=profiling_projects_count, + sample_rate=1.0, + ) + @instrumented_task( name="sentry.tasks.statistical_detectors.detect_transaction_trends", @@ -141,7 +165,12 @@ def detect_transaction_trends( delay = 12 # hours delayed_start = start + timedelta(hours=delay) + unique_project_ids: Set[int] = set() + for trends in chunked(regressions, TRANSACTIONS_PER_BATCH): + for _, payload in trends: + unique_project_ids.add(payload.project_id) + detect_transaction_change_points.apply_async( args=[ [(payload.project_id, payload.group) for _, payload in trends], @@ -153,6 +182,12 @@ def detect_transaction_trends( countdown=delay * 60 * 60, ) + metrics.incr( + "statistical_detectors.performance.projects.active", + amount=len(unique_project_ids), + sample_rate=1.0, + ) + @instrumented_task( name="sentry.tasks.statistical_detectors.detect_transaction_change_points", @@ -165,18 +200,6 @@ def detect_transaction_change_points( if not options.get("statistical_detectors.enable"): return - for project_id, transaction_name in transactions: - with sentry_sdk.push_scope() as scope: - scope.set_tag("regressed_project_id", project_id) - scope.set_tag("regressed_transaction", transaction_name) - scope.set_tag("breakpoint", "no") - - scope.set_context( - "statistical_detectors", - {"timestamp": start.isoformat()}, - ) - sentry_sdk.capture_message("Potential Transaction Regression") - breakpoint_count = 0 for breakpoints in chunked(_detect_transaction_change_points(transactions, start), 10): @@ -396,7 +419,12 @@ def detect_function_trends(project_ids: List[int], start: datetime, *args, **kwa delay = 12 # hours delayed_start = start + timedelta(hours=delay) + unique_project_ids: Set[int] = set() + for regression_chunk in chunked(regressions, FUNCTIONS_PER_BATCH): + for _, payload in regression_chunk: + unique_project_ids.add(payload.project_id) + detect_function_change_points.apply_async( args=[ [(payload.project_id, payload.group) for _, payload in regression_chunk], @@ -408,6 +436,12 @@ def detect_function_trends(project_ids: List[int], start: datetime, *args, **kwa countdown=delay * 60 * 60, ) + metrics.incr( + "statistical_detectors.profiling.projects.active", + amount=len(unique_project_ids), + sample_rate=1.0, + ) + @instrumented_task( name="sentry.tasks.statistical_detectors.detect_function_change_points", @@ -420,18 +454,6 @@ def detect_function_change_points( if not options.get("statistical_detectors.enable"): return - for project_id, fingerprint in functions_list: - with sentry_sdk.push_scope() as scope: - scope.set_tag("regressed_project_id", project_id) - scope.set_tag("regressed_function_id", fingerprint) - scope.set_tag("breakpoint", "no") - - scope.set_context( - "statistical_detectors", - {"timestamp": start.isoformat()}, - ) - sentry_sdk.capture_message("Potential Function Regression") - breakpoint_count = 0 emitted_count = 0 @@ -612,24 +634,6 @@ def emit_function_regression_issue( payloads = [] for entry in breakpoints: - with sentry_sdk.push_scope() as scope: - scope.set_tag("breakpoint", "yes") - scope.set_tag("regressed_project_id", entry["project"]) - # the service was originally meant for transactions so this - # naming is a result of this - scope.set_tag("regressed_function_id", entry["transaction"]) - - breakpoint_ts = datetime.fromtimestamp(entry["breakpoint"], tz=timezone.utc) - scope.set_context( - "statistical_detectors", - { - **entry, - "timestamp": start.isoformat(), - "breakpoint_timestamp": breakpoint_ts.isoformat(), - }, - ) - sentry_sdk.capture_message("Potential Function Regression") - project_id = int(entry["project"]) fingerprint = int(entry["transaction"]) example = examples.get((project_id, fingerprint)) @@ -669,10 +673,24 @@ def all_function_payloads( project_ids: List[int], start: datetime, ) -> Generator[DetectorPayload, None, None]: + enabled_profiling_projects: Set[int] = set( + options.get("statistical_detectors.enable.projects.profiling") + ) + projects_per_query = options.get("statistical_detectors.query.batch_size") assert projects_per_query > 0 - for projects in chunked(Project.objects.filter(id__in=project_ids), projects_per_query): + selected_projects = Project.objects.filter(id__in=project_ids).select_related("organization") + selected_projects = filter( + lambda project: ( + features.has( + "organizations:profiling-statistical-detectors-breakpoint", project.organization + ) + or project.id in enabled_profiling_projects + ), + selected_projects, + ) + for projects in chunked(selected_projects, projects_per_query): try: yield from query_functions(projects, start) except Exception as e: @@ -726,6 +744,26 @@ def query_transactions( end: datetime, transactions_per_project: int, ) -> List[DetectorPayload]: + enabled_performance_projects: Set[int] = set( + options.get("statistical_detectors.enable.projects.performance") + ) + + projects = [ + project + for project in Project.objects.filter(id__in=project_ids).select_related("organization") + if ( + project.id in enabled_performance_projects + or features.has( + "organizations:performance-statistical-detectors-breakpoint", project.organization + ) + ) + ] + project_ids = [project.id for project in projects] + org_ids = list({project.organization_id for project in projects}) + + if not project_ids or not org_ids: + return [] + use_case_id = UseCaseID.TRANSACTIONS # both the metric and tag that we are using are hardcoded values in sentry_metrics.indexer.strings diff --git a/tests/sentry/tasks/test_statistical_detectors.py b/tests/sentry/tasks/test_statistical_detectors.py index accf7b4257d72e..6d63bc37cfd915 100644 --- a/tests/sentry/tasks/test_statistical_detectors.py +++ b/tests/sentry/tasks/test_statistical_detectors.py @@ -216,7 +216,12 @@ def test_detect_function_trends_options( timestamp, project, ): - with override_options({"statistical_detectors.enable": enabled}): + with override_options( + { + "statistical_detectors.enable": enabled, + "statistical_detectors.enable.projects.profiling": [project.id], + } + ): detect_function_trends([project.id], timestamp) assert query_functions.called == enabled @@ -224,7 +229,12 @@ def test_detect_function_trends_options( @mock.patch("sentry.snuba.functions.query") @django_db_all def test_detect_function_trends_query_timerange(functions_query, timestamp, project): - with override_options({"statistical_detectors.enable": True}): + with override_options( + { + "statistical_detectors.enable": True, + "statistical_detectors.enable.projects.profiling": [project.id], + } + ): detect_function_trends([project.id], timestamp) assert functions_query.called @@ -290,7 +300,12 @@ def test_detect_function_trends( for i, ts in enumerate(timestamps) ] - with override_options({"statistical_detectors.enable": True}), TaskRunner(): + with override_options( + { + "statistical_detectors.enable": True, + "statistical_detectors.enable.projects.profiling": [project.id], + } + ), TaskRunner(): for ts in timestamps: detect_function_trends([project.id], ts) assert detect_function_change_points.apply_async.called @@ -511,13 +526,21 @@ def now(self): return MetricsAPIBaseTestCase.MOCK_DATETIME def test_transactions_query(self) -> None: - res = query_transactions( - [self.org.id], - [p.id for p in self.projects], - self.hour_ago, - self.now, - self.num_transactions + 1, # detect if any extra transactions are returned - ) + with override_options( + { + "statistical_detectors.enable.projects.performance": [ + project.id for project in self.projects + ], + } + ): + res = query_transactions( + [self.org.id], + [p.id for p in self.projects], + self.hour_ago, + self.now, + self.num_transactions + 1, # detect if any extra transactions are returned + ) + assert len(res) == len(self.projects) * self.num_transactions for trend_payload in res: assert trend_payload.count == 2