-
-
Notifications
You must be signed in to change notification settings - Fork 4.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(events tracking): add abstract class and logging implementation (#…
…80117) [design doc](https://www.notion.so/sentry/Conversion-rate-of-ingest-transactions-to-save-trx-1298b10e4b5d801ab517c8e2218d13d5) need to track the completion of each stage, to 1) compute events conversion rates 2) enable debugging visibility into where events are being dropped the usage will be heavily sampled to not blow up traffic this PR only adds REDIS_PUT stage, in subsequent PRs I will add all the other stages listed in EventStageStatus class **!!!!!IMPORTANT!!!!!!** hash based sampling here's a [blog post](https://www.rsyslog.com/doc/tutorials/hash_sampling.html) explaining hash based sampling, which would provide "all or nothing" logging for the events sampled across the entire pipeline. That's the idea I want to implement the hashing algorithm used must be consistent and uniformly distributed in order for all or nothing sampling to work. I cannot find references that say that md5 is consistent and evenly distributed other than various [stackoverflow pages](https://crypto.stackexchange.com/questions/14967/distribution-for-a-subset-of-md5). All the official sources are too academic and long and i can't understand ---------- for reviewers: please review with the thoughts of how this can be generalized to other pipelines as well, such as errors
- Loading branch information
1 parent
d2c1f14
commit c8bfef0
Showing
4 changed files
with
105 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import logging | ||
from enum import StrEnum | ||
|
||
from sentry import options | ||
|
||
|
||
class EventType(StrEnum): | ||
TRANSACTION = "transaction" | ||
ERROR = "error" | ||
|
||
|
||
class TransactionStageStatus(StrEnum): | ||
# the transaction is stored to rc-transactions | ||
REDIS_PUT = "redis_put" | ||
|
||
# a save_transactions task is kicked off | ||
SAVE_TRX_STARTED = "save_trx_started" | ||
|
||
# a save_transactions task is finished | ||
SAVE_TRX_FINISHED = "save_trx_finished" | ||
|
||
# the transaction is published to the `events` topic for snuba/sbc consumers to consume | ||
SNUBA_TOPIC_PUT = "snuba_topic_put" | ||
|
||
# the transaction is published to the `snuba-commit-log` topic | ||
COMMIT_LOG_TOPIC_PUT = "commit_log_topic_put" | ||
|
||
# a post_process task is kicked off | ||
POST_PROCESS_STARTED = "post_process_started" | ||
|
||
# the transaction is deleted from rc-transactions | ||
REDIS_DELETED = "redis_deleted" | ||
|
||
|
||
logger = logging.getLogger("EventTracker") | ||
|
||
|
||
def track_sampled_event(event_id: str, event_type: str, status: TransactionStageStatus) -> None: | ||
""" | ||
Records how far an event has made it through the ingestion pipeline. | ||
Each event type will pick up its sampling rate from its registered option. | ||
""" | ||
|
||
sample_rate = options.get(f"performance.event-tracker.sample-rate.{event_type}") | ||
if sample_rate == 0: | ||
return | ||
|
||
event_float = (int(event_id, 16) % 10000) / 10000 | ||
if event_float < sample_rate: | ||
extra = { | ||
"event_id": event_id, | ||
"event_type": getattr(EventType, event_type.upper(), None), | ||
"status": status, | ||
} | ||
_do_record(extra) | ||
|
||
|
||
def _do_record(extra): | ||
# All Python logs will be picked up by Google Cloud Logging. | ||
# TODO: make a google Cloud Sink to filter for these EventTracker logs and put them into BigQuery and do data analysis downstream | ||
logger.info("EventTracker.recorded", extra=extra) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import unittest | ||
from unittest.mock import patch | ||
|
||
from sentry.testutils.cases import TestCase | ||
from sentry.testutils.helpers.options import override_options | ||
from sentry.utils.event_tracker import EventType, TransactionStageStatus, track_sampled_event | ||
|
||
EVENT_ID = "9cdc4c32dff14fbbb012b0aa9e908126" | ||
EVENT_TYPE_STR = "transaction" | ||
STATUS = TransactionStageStatus.REDIS_PUT | ||
|
||
EXPECTED_EVENT_TYPE = EventType.TRANSACTION | ||
|
||
|
||
class TestEventTracking(TestCase): | ||
|
||
@patch("sentry.utils.event_tracker._do_record") | ||
def test_track_sampled_event_logs_event(self, mock_do_record): | ||
with override_options({"performance.event-tracker.sample-rate.transaction": 1.0}): | ||
track_sampled_event(EVENT_ID, EVENT_TYPE_STR, STATUS) | ||
mock_do_record.assert_called_once_with( | ||
{"event_id": EVENT_ID, "event_type": EXPECTED_EVENT_TYPE, "status": STATUS} | ||
) | ||
|
||
@patch("sentry.utils.event_tracker._do_record") | ||
def test_track_sampled_event_does_not_log_event(self, mock_do_record): | ||
with override_options({"performance.event-tracker.sample-rate.transaction": 0.0}): | ||
track_sampled_event(EVENT_ID, EVENT_TYPE_STR, STATUS) | ||
mock_do_record.assert_not_called() | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |