diff --git a/src/sentry/tsdb/base.py b/src/sentry/tsdb/base.py index e3623b967933a7..3eb30556916fae 100644 --- a/src/sentry/tsdb/base.py +++ b/src/sentry/tsdb/base.py @@ -117,11 +117,7 @@ class BaseTSDB(Service): "get_sums", "get_distinct_counts_series", "get_distinct_counts_totals", - "get_distinct_counts_union", - "get_most_frequent", - "get_most_frequent_series", "get_frequency_series", - "get_frequency_totals", "get_distinct_counts_totals_with_conditions", ] ) @@ -574,22 +570,6 @@ def get_distinct_counts_totals_with_conditions( """ raise NotImplementedError - def get_distinct_counts_union( - self, - model: TSDBModel, - keys: list[int] | None, - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> int: - """ - Count the total number of distinct items across multiple counters - during a time range. - """ - raise NotImplementedError - def merge_distinct_counts( self, model: TSDBModel, @@ -632,52 +612,6 @@ def record_frequency_multi( """ raise NotImplementedError - def get_most_frequent( - self, - model: TSDBModel, - keys: Sequence[TSDBKey], - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - limit: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> dict[TSDBKey, list[tuple[str, float]]]: - """ - Retrieve the most frequently seen items in a frequency table. - - Results are returned as a mapping, where the key is the key requested - and the value is a list of ``(member, score)`` tuples, ordered by the - highest (most frequent) to lowest (least frequent) score. The maximum - number of items returned is ``index capacity * rollup intervals`` if no - ``limit`` is provided. - """ - raise NotImplementedError - - def get_most_frequent_series( - self, - model: TSDBModel, - keys: Iterable[str], - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - limit: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> dict[str, list[tuple[int, dict[str, float]]]]: - """ - Retrieve the most frequently seen items in a frequency table for each - interval in a series. (This is in contrast with ``get_most_frequent``, - which returns the most frequent items seen over the entire requested - range.) - - Results are returned as a mapping, where the key is the key requested - and the value is a list of ``(timestamp, {item: score, ...})`` pairs - over the series. The maximum number of items returned for each interval - is the index capacity if no ``limit`` is provided. - """ - raise NotImplementedError - def get_frequency_series( self, model: TSDBModel, @@ -701,29 +635,6 @@ def get_frequency_series( """ raise NotImplementedError - def get_frequency_totals( - self, - model: TSDBModel, - items: Mapping[TSDBKey, Sequence[TSDBItem]], - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> dict[TSDBKey, dict[TSDBItem, float]]: - """ - Retrieve the total frequency of known items in a table over time. - - The items requested should be passed as a mapping, where the key is the - metric key, and the value is a sequence of members to retrieve scores - for. - - Results are returned as a mapping, where the key is the key requested - and the value is a mapping of ``{item: score, ...}`` containing the - total score of items over the interval. - """ - raise NotImplementedError - def merge_frequencies( self, model: TSDBModel, diff --git a/src/sentry/tsdb/dummy.py b/src/sentry/tsdb/dummy.py index 3370876e9a83e3..671078aa7c4f88 100644 --- a/src/sentry/tsdb/dummy.py +++ b/src/sentry/tsdb/dummy.py @@ -93,19 +93,6 @@ def get_distinct_counts_totals_with_conditions( self.validate_arguments([model], [environment_id]) return 0 - def get_distinct_counts_union( - self, - model: TSDBModel, - keys: list[int] | None, - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> int: - self.validate_arguments([model], [environment_id]) - return 0 - def merge_distinct_counts( self, model, destination, sources, timestamp=None, environment_ids=None ): @@ -124,35 +111,6 @@ def record_frequency_multi( ): self.validate_arguments([model for model, request in requests], [environment_id]) - def get_most_frequent( - self, - model: TSDBModel, - keys: Sequence[TSDBKey], - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - limit: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> dict[TSDBKey, list[tuple[str, float]]]: - self.validate_arguments([model], [environment_id]) - return {key: [] for key in keys} - - def get_most_frequent_series( - self, - model: TSDBModel, - keys: Iterable[str], - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - limit: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> dict[str, list[tuple[int, dict[str, float]]]]: - self.validate_arguments([model], [environment_id]) - rollup, series = self.get_optimal_rollup_series(start, end, rollup) - return {key: [(timestamp, {}) for timestamp in series] for key in keys} - def get_frequency_series( self, model: TSDBModel, @@ -171,22 +129,6 @@ def get_frequency_series( for key, members in items.items() } - def get_frequency_totals( - self, - model: TSDBModel, - items: Mapping[TSDBKey, Sequence[TSDBItem]], - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> dict[TSDBKey, dict[TSDBItem, float]]: - self.validate_arguments([model], [environment_id]) - results = {} - for key, members in items.items(): - results[key] = {member: 0.0 for member in members} - return results - def merge_frequencies( self, model: TSDBModel, diff --git a/src/sentry/tsdb/redis.py b/src/sentry/tsdb/redis.py index e2c886189e74b7..b40e40e644c6a3 100644 --- a/src/sentry/tsdb/redis.py +++ b/src/sentry/tsdb/redis.py @@ -1,12 +1,10 @@ import binascii import itertools import logging -import random import uuid from collections import defaultdict, namedtuple from collections.abc import Callable, Iterable, Mapping, Sequence from datetime import datetime -from functools import reduce from hashlib import md5 from typing import Any, ContextManager, Generic, TypeVar @@ -17,12 +15,7 @@ from sentry.tsdb.base import BaseTSDB, IncrMultiOptions, TSDBItem, TSDBKey, TSDBModel from sentry.utils.dates import to_datetime -from sentry.utils.redis import ( - check_cluster_versions, - get_cluster_from_options, - is_instance_rb_cluster, - load_redis_script, -) +from sentry.utils.redis import check_cluster_versions, get_cluster_from_options, load_redis_script from sentry.utils.versioning import Version logger = logging.getLogger(__name__) @@ -563,95 +556,6 @@ def get_distinct_counts_totals( return {key: value.value for key, value in responses.items()} - def get_distinct_counts_union( - self, - model: TSDBModel, - keys: list[int] | None, - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> int: - self.validate_arguments([model], [environment_id]) - - if not keys: - return 0 - - rollup, series = self.get_optimal_rollup_series(start, end, rollup) - - temporary_id = uuid.uuid1().hex - - def make_temporary_key(key: str | int) -> str: - return f"{self.prefix}{temporary_id}:{key}" - - def expand_key(key: int) -> list[int | str]: - """ - Return a list containing all keys for each interval in the series for a key. - """ - return [ - self.make_key(model, rollup, timestamp, key, environment_id) for timestamp in series - ] - - cluster, _ = self.get_cluster(environment_id) - if is_instance_rb_cluster(cluster, False): - router = cluster.get_router() - else: - raise AssertionError("unreachable") - - def map_key_to_host(hosts: dict[int, set[int]], key: int) -> dict[int, set[int]]: - """ - Identify the host where a key is located and add it to the host map. - """ - hosts[router.get_host_for_key(key)].add(key) - return hosts - - def get_partition_aggregate(value: tuple[int, set[int]]) -> tuple[int, int]: - """ - Fetch the HyperLogLog value (in its raw byte representation) that - results from merging all HyperLogLogs at the provided keys. - """ - (host, _keys) = value - destination = make_temporary_key(f"p:{host}") - client = cluster.get_local_client(host) - with client.pipeline(transaction=False) as pipeline: - pipeline.execute_command( - "PFMERGE", - destination, - *itertools.chain.from_iterable(expand_key(key) for key in _keys), - ) - pipeline.get(destination) - pipeline.delete(destination) - return host, pipeline.execute()[1] - - def merge_aggregates(values: list[tuple[int, int]]) -> int: - """ - Calculate the cardinality of the provided HyperLogLog values. - """ - destination = make_temporary_key("a") # all values will be merged into this key - aggregates = {make_temporary_key(f"a:{host}"): value for host, value in values} - - # Choose a random host to execute the reduction on. (We use a host - # here that we've already accessed as part of this process -- this - # way, we constrain the choices to only hosts that we know are - # running.) - client = cluster.get_local_client(random.choice(values)[0]) - with client.pipeline(transaction=False) as pipeline: - pipeline.mset(aggregates) - pipeline.execute_command("PFMERGE", destination, *aggregates.keys()) - pipeline.execute_command("PFCOUNT", destination) - pipeline.delete(destination, *aggregates.keys()) - return pipeline.execute()[2] - - # TODO: This could be optimized to skip the intermediate step for the - # host that has the largest number of keys if the final merge and count - # is performed on that host. If that host contains *all* keys, the - # final reduction could be performed as a single PFCOUNT, skipping the - # MSET and PFMERGE operations entirely. - - reduced: dict[int, set[int]] = reduce(map_key_to_host, set(keys), defaultdict(set)) - return merge_aggregates([get_partition_aggregate(x) for x in reduced.items()]) - def merge_distinct_counts( self, model: TSDBModel, @@ -828,90 +732,6 @@ def record_frequency_multi( if durable: raise - def get_most_frequent( - self, - model: TSDBModel, - keys: Sequence[TSDBKey], - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - limit: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, int | str] | None = None, - ) -> dict[TSDBKey, list[tuple[str, float]]]: - self.validate_arguments([model], [environment_id]) - - if not self.enable_frequency_sketches: - raise NotImplementedError("Frequency sketches are disabled.") - - rollup, series = self.get_optimal_rollup_series(start, end, rollup) - - arguments = ["RANKED"] + list(self.DEFAULT_SKETCH_PARAMETERS) - if limit is not None: - arguments.append(int(limit)) - - commands = {} - for key in keys: - ks = [] - for timestamp in series: - ks.extend( - self.make_frequency_table_keys(model, rollup, timestamp, key, environment_id) - ) - commands[key] = [(CountMinScript, ks, arguments)] - - results = {} - cluster, _ = self.get_cluster(environment_id) - for _key, responses in cluster.execute_commands(commands).items(): - results[_key] = [ - (member.decode("utf-8"), float(score)) for member, score in responses[0].value - ] - - return results - - def get_most_frequent_series( - self, - model: TSDBModel, - keys: Iterable[str], - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - limit: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, int | str] | None = None, - ) -> dict[str, list[tuple[int, dict[str, float]]]]: - self.validate_arguments([model], [environment_id]) - - if not self.enable_frequency_sketches: - raise NotImplementedError("Frequency sketches are disabled.") - - rollup, series = self.get_optimal_rollup_series(start, end, rollup) - - arguments = ["RANKED"] + list(self.DEFAULT_SKETCH_PARAMETERS) - if limit is not None: - arguments.append(int(limit)) - - commands: dict[str, list[tuple[Script, list[str], list[str | int]]]] = {} - for key in keys: - commands[key] = [ - ( - CountMinScript, - self.make_frequency_table_keys(model, rollup, timestamp, key, environment_id), - arguments, - ) - for timestamp in series - ] - - def unpack_response(response: rb.Promise) -> dict[str, float]: - return {item.decode("utf-8"): float(score) for item, score in response.value} - - results: dict[str, list[tuple[int, dict[str, float]]]] = {} - cluster, _ = self.get_cluster(environment_id) - for key, responses in cluster.execute_commands(commands).items(): - zipped_series = zip(series, (unpack_response(response) for response in responses)) - results[key] = list(zipped_series) - - return results - def get_frequency_series( self, model: TSDBModel, @@ -961,33 +781,6 @@ def get_frequency_series( return results - def get_frequency_totals( - self, - model: TSDBModel, - items: Mapping[TSDBKey, Sequence[TSDBItem]], - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> dict[TSDBKey, dict[TSDBItem, float]]: - self.validate_arguments([model], [environment_id]) - - if not self.enable_frequency_sketches: - raise NotImplementedError("Frequency sketches are disabled.") - - responses: dict[TSDBKey, dict[TSDBItem, float]] = {} - frequency_series = self.get_frequency_series( - model, items, start, end, rollup, environment_id - ) - for _key, series in frequency_series.items(): - response = responses[_key] = defaultdict(float) - for timestamp, results in series: - for member, value in results.items(): - response[member] = response.get(member, 0) + value - - return responses - def merge_frequencies( self, model: TSDBModel, diff --git a/src/sentry/tsdb/redissnuba.py b/src/sentry/tsdb/redissnuba.py index 266e612fbec13e..891248581fa222 100644 --- a/src/sentry/tsdb/redissnuba.py +++ b/src/sentry/tsdb/redissnuba.py @@ -31,11 +31,7 @@ def dont_do_this(callargs): "get_distinct_counts_series": (READ, single_model_argument), "get_distinct_counts_totals": (READ, single_model_argument), "get_distinct_counts_totals_with_conditions": (READ, single_model_argument), - "get_distinct_counts_union": (READ, single_model_argument), - "get_most_frequent": (READ, single_model_argument), - "get_most_frequent_series": (READ, single_model_argument), "get_frequency_series": (READ, single_model_argument), - "get_frequency_totals": (READ, single_model_argument), "incr": (WRITE, single_model_argument), "incr_multi": (WRITE, lambda callargs: {item[0] for item in callargs["items"]}), "merge": (WRITE, single_model_argument), diff --git a/src/sentry/tsdb/snuba.py b/src/sentry/tsdb/snuba.py index 5e3d2d20051bea..f98c541c79a4e6 100644 --- a/src/sentry/tsdb/snuba.py +++ b/src/sentry/tsdb/snuba.py @@ -832,88 +832,6 @@ def get_distinct_counts_totals_with_conditions( conditions=conditions, ) - def get_distinct_counts_union( - self, model, keys, start, end=None, rollup=None, environment_id=None, tenant_ids=None - ): - return self.get_data( - model, - keys, - start, - end, - rollup, - [environment_id] if environment_id is not None else None, - aggregation="uniq", - group_on_model=False, - tenant_ids=tenant_ids, - ) - - def get_most_frequent( - self, - model, - keys: Sequence[TSDBKey], - start, - end=None, - rollup=None, - limit=10, - environment_id=None, - tenant_ids=None, - ): - aggregation = f"topK({limit})" - result = self.get_data( - model, - keys, - start, - end, - rollup, - [environment_id] if environment_id is not None else None, - aggregation=aggregation, - tenant_ids=tenant_ids, - ) - # convert - # {group:[top1, ...]} - # into - # {group: [(top1, score), ...]} - for k, top in result.items(): - item_scores = [(v, float(i + 1)) for i, v in enumerate(reversed(top or []))] - result[k] = list(reversed(item_scores)) - - return result - - def get_most_frequent_series( - self, - model, - keys, - start, - end=None, - rollup=None, - limit=10, - environment_id=None, - tenant_ids=None, - ): - aggregation = f"topK({limit})" - result = self.get_data( - model, - keys, - start, - end, - rollup, - [environment_id] if environment_id is not None else None, - aggregation=aggregation, - group_on_time=True, - tenant_ids=tenant_ids, - ) - # convert - # {group:{timestamp:[top1, ...]}} - # into - # {group: [(timestamp, {top1: score, ...}), ...]} - return { - k: sorted( - (timestamp, {v: float(i + 1) for i, v in enumerate(reversed(topk or []))}) - for (timestamp, topk) in result[k].items() - ) - for k in result.keys() - } - def get_frequency_series( self, model: TSDBModel, @@ -941,27 +859,6 @@ def get_frequency_series( # {group: [(timestamp, {agg: count, ...}), ...]} return {k: sorted(result[k].items()) for k in result} - def get_frequency_totals( - self, - model: TSDBModel, - items: Mapping[TSDBKey, Sequence[TSDBItem]], - start: datetime, - end: datetime | None = None, - rollup: int | None = None, - environment_id: int | None = None, - tenant_ids: dict[str, str | int] | None = None, - ) -> dict[TSDBKey, dict[TSDBItem, float]]: - return self.get_data( - model, - items, - start, - end, - rollup, - [environment_id] if environment_id is not None else None, - aggregation="count()", - tenant_ids=tenant_ids, - ) - def flatten_keys(self, items: Mapping | Sequence | Set) -> tuple[list, Sequence | None]: """ Returns a normalized set of keys based on the various formats accepted diff --git a/tests/sentry/event_manager/test_event_manager.py b/tests/sentry/event_manager/test_event_manager.py index 9a2c80a4a79884..b1e8d07e85acc5 100644 --- a/tests/sentry/event_manager/test_event_manager.py +++ b/tests/sentry/event_manager/test_event_manager.py @@ -1163,16 +1163,6 @@ def query(model: TSDBModel, key: int, **kwargs: Any) -> int: assert query(TSDBModel.project, project.id, environment_id=environment_id) == 1 assert query(TSDBModel.group, event.group.id, environment_id=environment_id) == 1 - @pytest.mark.xfail - def test_record_frequencies(self) -> None: - project = self.project - manager = EventManager(make_event()) - event = manager.save(project.id) - - assert tsdb.backend.get_most_frequent( - TSDBModel.frequent_issues_by_project, (event.project.id,), event.datetime - ) == {event.project.id: [(event.group_id, 1.0)]} - def test_event_user(self) -> None: event_id = uuid.uuid4().hex manager = EventManager( diff --git a/tests/sentry/tsdb/test_redis.py b/tests/sentry/tsdb/test_redis.py index f0fdd90acdffda..f0a7b6fc6a70a1 100644 --- a/tests/sentry/tsdb/test_redis.py +++ b/tests/sentry/tsdb/test_redis.py @@ -253,21 +253,6 @@ def timestamp(d): ) assert results == {1: 0, 2: 0} - assert self.db.get_distinct_counts_union(model, [], dts[0], dts[-1], rollup=3600) == 0 - assert self.db.get_distinct_counts_union(model, [1, 2], dts[0], dts[-1], rollup=3600) == 3 - assert ( - self.db.get_distinct_counts_union( - model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=1 - ) - == 1 - ) - assert ( - self.db.get_distinct_counts_union( - model, [1, 2], dts[0], dts[-1], rollup=3600, environment_id=0 - ) - == 0 - ) - self.db.merge_distinct_counts(model, 1, [2], dts[0], environment_ids=[0, 1]) assert self.db.get_distinct_counts_series(model, [1], dts[0], dts[-1], rollup=3600) == { @@ -308,11 +293,6 @@ def timestamp(d): results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1], rollup=3600) assert results == {1: 3, 2: 0} - assert self.db.get_distinct_counts_union(model, [], dts[0], dts[-1], rollup=3600) == 0 - assert self.db.get_distinct_counts_union(model, [1], dts[0], dts[-1], rollup=3600) == 3 - assert self.db.get_distinct_counts_union(model, [1, 2], dts[0], dts[-1], rollup=3600) == 3 - assert self.db.get_distinct_counts_union(model, [2], dts[0], dts[-1], rollup=3600) == 0 - self.db.delete_distinct_counts([model], [1, 2], dts[0], dts[-1], environment_ids=[0, 1]) results = self.db.get_distinct_counts_totals(model, [1, 2], dts[0], dts[-1]) @@ -369,74 +349,8 @@ def test_frequency_tables(self): environment_id=1, ) - assert self.db.get_most_frequent( - model, ("organization:1", "organization:2"), now, rollup=rollup - ) == { - "organization:1": [("project:3", 3.0), ("project:2", 2.0), ("project:1", 1.0)], - "organization:2": [], - } - - assert self.db.get_most_frequent( - model, - ("organization:1", "organization:2"), - now - timedelta(hours=1), - now, - rollup=rollup, - environment_id=1, - ) == { - "organization:1": [("project:4", 3.0), ("project:3", 2.0), ("project:2", 1.0)], - "organization:2": [("project:5", 0.5)], - } - - assert self.db.get_most_frequent( - model, ("organization:1", "organization:2"), now, limit=1, rollup=rollup - ) == {"organization:1": [("project:3", 3.0)], "organization:2": []} - - assert self.db.get_most_frequent( - model, - ("organization:1", "organization:2"), - now - timedelta(hours=1), - now, - rollup=rollup, - ) == { - "organization:1": [ - ("project:3", 3.0 + 3.0), - ("project:2", 2.0 + 2.0), - ("project:4", 4.0), - ("project:1", 1.0 + 1.0), - ], - "organization:2": [("project:5", 1.5)], - } - - assert self.db.get_most_frequent( - model, - ("organization:1", "organization:2"), - now - timedelta(hours=1), - now, - rollup=rollup, - environment_id=0, - ) == {"organization:1": [], "organization:2": []} - timestamp = int(now.timestamp() // rollup) * rollup - assert self.db.get_most_frequent_series( - model, - ("organization:1", "organization:2", "organization:3"), - now - timedelta(hours=1), - now, - rollup=rollup, - ) == { - "organization:1": [ - ( - timestamp - rollup, - {"project:1": 1.0, "project:2": 2.0, "project:3": 3.0, "project:4": 4.0}, - ), - (timestamp, {"project:1": 1.0, "project:2": 2.0, "project:3": 3.0}), - ], - "organization:2": [(timestamp - rollup, {"project:5": 1.5}), (timestamp, {})], - "organization:3": [(timestamp - rollup, {}), (timestamp, {})], - } - assert self.db.get_frequency_series( model, { @@ -490,89 +404,10 @@ def test_frequency_tables(self): ], } - assert self.db.get_frequency_totals( - model, - { - "organization:1": ("project:1", "project:2", "project:3", "project:4", "project:5"), - "organization:2": ("project:1", "project:2", "project:3", "project:4", "project:5"), - }, - now - timedelta(hours=1), - now, - rollup=rollup, - ) == { - "organization:1": { - "project:1": 1.0 + 1.0, - "project:2": 2.0 + 2.0, - "project:3": 3.0 + 3.0, - "project:4": 4.0, - "project:5": 0.0, - }, - "organization:2": { - "project:1": 0.0, - "project:2": 0.0, - "project:3": 0.0, - "project:4": 0.0, - "project:5": 1.5, - }, - } - self.db.merge_frequencies( model, "organization:1", ["organization:2"], now, environment_ids=[0, 1] ) - assert self.db.get_frequency_totals( - model, - { - "organization:1": ("project:1", "project:2", "project:3", "project:4", "project:5"), - "organization:2": ("project:1", "project:2", "project:3", "project:4", "project:5"), - }, - now - timedelta(hours=1), - now, - rollup=rollup, - ) == { - "organization:1": { - "project:1": 1.0 + 1.0, - "project:2": 2.0 + 2.0, - "project:3": 3.0 + 3.0, - "project:4": 4.0, - "project:5": 1.5, - }, - "organization:2": { - "project:1": 0.0, - "project:2": 0.0, - "project:3": 0.0, - "project:4": 0.0, - "project:5": 0.0, - }, - } - - assert self.db.get_frequency_totals( - model, - { - "organization:1": ("project:1", "project:2", "project:3", "project:4", "project:5"), - "organization:2": ("project:1", "project:2", "project:3", "project:4", "project:5"), - }, - now - timedelta(hours=1), - now, - rollup=rollup, - environment_id=1, - ) == { - "organization:1": { - "project:1": 0.0, - "project:2": 1.0, - "project:3": 2.0, - "project:4": 3.0, - "project:5": 0.5, - }, - "organization:2": { - "project:1": 0.0, - "project:2": 0.0, - "project:3": 0.0, - "project:4": 0.0, - "project:5": 0.0, - }, - } - self.db.delete_frequencies( [model], ["organization:1", "organization:2"], @@ -581,15 +416,6 @@ def test_frequency_tables(self): environment_ids=[0, 1], ) - assert self.db.get_most_frequent(model, ("organization:1", "organization:2"), now) == { - "organization:1": [], - "organization:2": [], - } - - assert self.db.get_most_frequent( - model, ("organization:1", "organization:2"), now, environment_id=1 - ) == {"organization:1": [], "organization:2": []} - def test_frequency_table_import_export_no_estimators(self): client = self.db.cluster.get_local_client_for_key("key") diff --git a/tests/snuba/tsdb/test_tsdb_backend.py b/tests/snuba/tsdb/test_tsdb_backend.py index acecfd19e199aa..83c6156bd9a328 100644 --- a/tests/snuba/tsdb/test_tsdb_backend.py +++ b/tests/snuba/tsdb/test_tsdb_backend.py @@ -483,31 +483,6 @@ def test_get_distinct_counts_totals_users_with_conditions(self): == {} ) - def test_most_frequent(self): - assert self.db.get_most_frequent( - TSDBModel.frequent_issues_by_project, - [self.proj1.id], - self.now, - self.now + timedelta(hours=4), - rollup=3600, - tenant_ids={"referrer": "r", "organization_id": 1234}, - ) in [ - {self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)]}, - {self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)]}, - ] # Both issues equally frequent - - assert ( - self.db.get_most_frequent( - TSDBModel.frequent_issues_by_project, - [], - self.now, - self.now + timedelta(hours=4), - rollup=3600, - tenant_ids={"referrer": "r", "organization_id": 1234}, - ) - == {} - ) - def test_frequency_series(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_frequency_series( @@ -555,81 +530,45 @@ def test_result_shape(self): project_id = self.proj1.id dts = [self.now + timedelta(hours=i) for i in range(4)] - results = self.db.get_most_frequent( - TSDBModel.frequent_issues_by_project, - [project_id], - dts[0], - dts[0], - tenant_ids={"referrer": "r", "organization_id": 1234}, - ) - assert has_shape(results, {1: [(1, 1.0)]}) - - results = self.db.get_most_frequent_series( - TSDBModel.frequent_issues_by_project, - [project_id], - dts[0], - dts[0], - tenant_ids={"referrer": "r", "organization_id": 1234}, - ) - assert has_shape(results, {1: [(1, {1: 1.0})]}) - items = { # {project_id: (issue_id, issue_id, ...)} project_id: (self.proj1group1.id, self.proj1group2.id) } - results = self.db.get_frequency_series( - TSDBModel.frequent_issues_by_project, - items, - dts[0], - dts[-1], - tenant_ids={"referrer": "r", "organization_id": 1234}, - ) - assert has_shape(results, {1: [(1, {1: 1})]}) - - results = self.db.get_frequency_totals( + results1 = self.db.get_frequency_series( TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1], tenant_ids={"referrer": "r", "organization_id": 1234}, ) - assert has_shape(results, {1: {1: 1}}) + assert has_shape(results1, {1: [(1, {1: 1})]}) - results = self.db.get_range( + results2 = self.db.get_range( TSDBModel.project, [project_id], dts[0], dts[-1], tenant_ids={"referrer": "r", "organization_id": 1234}, ) - assert has_shape(results, {1: [(1, 1)]}) - - results = self.db.get_distinct_counts_series( - TSDBModel.users_affected_by_project, - [project_id], - dts[0], - dts[-1], - tenant_ids={"referrer": "r", "organization_id": 1234}, - ) - assert has_shape(results, {1: [(1, 1)]}) + assert has_shape(results2, {1: [(1, 1)]}) - results = self.db.get_distinct_counts_totals( + results3 = self.db.get_distinct_counts_series( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1], tenant_ids={"referrer": "r", "organization_id": 1234}, ) - assert has_shape(results, {1: 1}) + assert has_shape(results3, {1: [(1, 1)]}) - results = self.db.get_distinct_counts_union( + results4 = self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1], tenant_ids={"referrer": "r", "organization_id": 1234}, ) - assert has_shape(results, 1) + assert has_shape(results4, {1: 1}) def test_calculated_limit(self):