diff --git a/src/meltano/core/tracking/contexts/cli.py b/src/meltano/core/tracking/contexts/cli.py index 86686cdc39..2264bca6ce 100644 --- a/src/meltano/core/tracking/contexts/cli.py +++ b/src/meltano/core/tracking/contexts/cli.py @@ -4,9 +4,9 @@ from enum import Enum, auto import click -from snowplow_tracker import SelfDescribingJson from meltano.core.tracking.schemas import CliContextSchema +from meltano.core.tracking.tracker import SelfDescribingJson from meltano.core.utils import hash_sha256 diff --git a/src/meltano/core/tracking/contexts/environment.py b/src/meltano/core/tracking/contexts/environment.py index f251a79413..de0e5dc116 100644 --- a/src/meltano/core/tracking/contexts/environment.py +++ b/src/meltano/core/tracking/contexts/environment.py @@ -14,11 +14,11 @@ from warnings import warn import psutil -from snowplow_tracker import SelfDescribingJson from structlog.stdlib import get_logger import meltano from meltano.core.tracking.schemas import EnvironmentContextSchema +from meltano.core.tracking.tracker import SelfDescribingJson from meltano.core.utils import get_boolean_env_var, hash_sha256, safe_hasattr, strtobool logger = get_logger(__name__) diff --git a/src/meltano/core/tracking/contexts/exception.py b/src/meltano/core/tracking/contexts/exception.py index ad4422ab0d..c254a93a03 100644 --- a/src/meltano/core/tracking/contexts/exception.py +++ b/src/meltano/core/tracking/contexts/exception.py @@ -9,9 +9,8 @@ from pathlib import Path from types import TracebackType -from snowplow_tracker import SelfDescribingJson - from meltano.core.tracking.schemas import ExceptionContextSchema +from meltano.core.tracking.tracker import SelfDescribingJson from meltano.core.utils import hash_sha256 BASE_PATHS = (sys.prefix, sys.exec_prefix, sys.base_prefix, sys.base_exec_prefix) diff --git a/src/meltano/core/tracking/contexts/plugins.py b/src/meltano/core/tracking/contexts/plugins.py index e9bd698aa6..75db7d4c50 100644 --- a/src/meltano/core/tracking/contexts/plugins.py +++ b/src/meltano/core/tracking/contexts/plugins.py @@ -4,7 +4,6 @@ import uuid -from snowplow_tracker import SelfDescribingJson from structlog.stdlib import get_logger from meltano.core.block.blockset import BlockSet @@ -12,6 +11,7 @@ from meltano.core.elt_context import ELTContext from meltano.core.plugin.project_plugin import ProjectPlugin from meltano.core.tracking.schemas import PluginsContextSchema +from meltano.core.tracking.tracker import SelfDescribingJson from meltano.core.utils import hash_sha256, safe_hasattr logger = get_logger(__name__) diff --git a/src/meltano/core/tracking/contexts/project.py b/src/meltano/core/tracking/contexts/project.py index 780b926e26..18d28533a3 100644 --- a/src/meltano/core/tracking/contexts/project.py +++ b/src/meltano/core/tracking/contexts/project.py @@ -6,11 +6,11 @@ from enum import Enum, auto from functools import cached_property -from snowplow_tracker import SelfDescribingJson from structlog.stdlib import get_logger from meltano.core.project import Project from meltano.core.tracking.schemas import ProjectContextSchema +from meltano.core.tracking.tracker import SelfDescribingJson from meltano.core.utils import hash_sha256 logger = get_logger(__name__) diff --git a/src/meltano/core/tracking/tracker.py b/src/meltano/core/tracking/tracker.py index 133aa6154d..1ec576b454 100644 --- a/src/meltano/core/tracking/tracker.py +++ b/src/meltano/core/tracking/tracker.py @@ -20,8 +20,6 @@ import structlog import tzlocal from psutil import Process -from snowplow_tracker import Emitter, SelfDescribing, SelfDescribingJson -from snowplow_tracker import Tracker as SnowplowTracker from meltano.core.project import Project from meltano.core.tracking.schemas import ( @@ -41,14 +39,67 @@ from functools import cached_property +logger = structlog.get_logger(__name__) + +# Supress these imports from snowplow_tracker if tracking is disabled +# It helps to resolve issues related to import conflicts - +# snowplow-tracker vs. minimal-snowplow tracker +SNOWPLOW_TRACKER_AVAILABLE = False +try: + from snowplow_tracker import Emitter, SelfDescribing, SelfDescribingJson + from snowplow_tracker import Tracker as SnowplowTracker + + SNOWPLOW_TRACKER_AVAILABLE = True +except Exception as e: + logger.warning( + "Import of snowplow_tracker failed. " + f"Disable tracking to fix it. Reason: {str(e)}", + ) + + class SelfDescribingJson: + """A self-describing JSON object. + + It is a copy from snowplow_tracker library. + We must define it if above imports fail, + it is used on many places across the whole codebase + """ + + def __init__(self, schema, data) -> None: + """Init a self-describing JSON object. + + Args: + schema: JSON schema. + data: JSON payload + """ + self.schema = schema + self.data = data + + def to_json(self) -> dict: + """Return a JSON representation of the object. + + Returns: + Dictionary with schema and data + """ + return { + "schema": self.schema, + "data": self.data, + } + + def to_string(self) -> str: + """Return a string representation of the object. + + Returns: + String representation of the JSON + """ + return json.dumps(self.to_json()) + + URL_REGEX = ( r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" ) MICROSECONDS_PER_SECOND = 1000000 -logger = structlog.get_logger(__name__) - class BlockEvents(Enum): """Events describing a block state.""" @@ -85,7 +136,7 @@ class TelemetrySettings(t.NamedTuple): class Tracker: # noqa: WPS214, WPS230 - too many (public) methods """Meltano tracker backed by Snowplow.""" - def __init__( # noqa: WPS210 - too many local variables + def __init__( # noqa: WPS210, C901, WPS213 - too many local variables self, project: Project, request_timeout: float | tuple[float, float] | None = 3.5, @@ -112,21 +163,29 @@ def __init__( # noqa: WPS210 - too many local variables endpoints = project.settings.get("snowplow.collector_endpoints") - emitters: list[Emitter] = [] - for endpoint in endpoints: - if not check_url(endpoint): - logger.warning("invalid_snowplow_endpoint", endpoint=endpoint) - continue - parsed_url = urlparse(endpoint) - emitters.append( - Emitter( - endpoint=parsed_url.hostname + parsed_url.path, - protocol=parsed_url.scheme or "http", - port=parsed_url.port, - request_timeout=request_timeout, - ), + # Supress errors when instantiating Emmiters + # It helps to resolve issues related to import conflicts - + # snowplow-tracker vs. minimal-snowplow tracker + if SNOWPLOW_TRACKER_AVAILABLE: + emitters: list[Emitter] = [] + for endpoint in endpoints: + if not check_url(endpoint): + logger.warning("invalid_snowplow_endpoint", endpoint=endpoint) + continue + parsed_url = urlparse(endpoint) + emitters.append( + Emitter( + endpoint=parsed_url.hostname + parsed_url.path, + protocol=parsed_url.scheme or "http", + port=parsed_url.port, + request_timeout=request_timeout, + ), + ) + else: + logger.warning( + "Snowplow tracker is not available, setting emitters to empty list", ) - + emitters = [] if emitters: self.snowplow_tracker = SnowplowTracker( namespace="meltano-core",