diff --git a/sql_generators/glean_usage/__init__.py b/sql_generators/glean_usage/__init__.py index 3adead7d4bc..e42dfbbd8d6 100644 --- a/sql_generators/glean_usage/__init__.py +++ b/sql_generators/glean_usage/__init__.py @@ -18,6 +18,10 @@ baseline_clients_first_seen, baseline_clients_last_seen, clients_last_seen_joined, + dau_reporting_clients_daily, + dau_reporting_clients_first_seen, + dau_reporting_clients_last_seen, + dau_reporting_active_users_aggregates, event_error_monitoring, event_flow_monitoring, event_monitoring_live, @@ -43,6 +47,10 @@ event_error_monitoring.EventErrorMonitoring(), event_flow_monitoring.EventFlowMonitoring(), events_stream.EventsStreamTable(), + dau_reporting_clients_daily.DauReportingClientsDailyTable(), + dau_reporting_clients_first_seen.DauReportingClientsFirstSeenTable(), + dau_reporting_clients_last_seen.DauReportingClientsLastSeenTable(), + dau_reporting_active_users_aggregates.DauReportingActiveUsersAggregatesTable(), ] @@ -136,7 +144,7 @@ def get_tables(table_name="baseline_v1"): not in ConfigLoader.get("generate", "glean_usage", "skip_apps", fallback=[]) ] - id_token=get_id_token() + id_token = get_id_token() # Prepare parameters so that generation of all Glean datasets can be done in parallel @@ -151,7 +159,7 @@ def get_tables(table_name="baseline_v1"): use_cloud_function=use_cloud_function, app_info=app_info, parallelism=parallelism, - id_token=id_token + id_token=id_token, ), baseline_table, ) @@ -169,7 +177,7 @@ def get_tables(table_name="baseline_v1"): output_dir=output_dir, use_cloud_function=use_cloud_function, parallelism=parallelism, - id_token=id_token + id_token=id_token, ), info, ) diff --git a/sql_generators/glean_usage/common.py b/sql_generators/glean_usage/common.py index 0d1bb444abd..b15f9e37cdd 100644 --- a/sql_generators/glean_usage/common.py +++ b/sql_generators/glean_usage/common.py @@ -151,6 +151,16 @@ def table_names_from_baseline(baseline_table, include_project_id=True): events_view=f"{prefix}.events", events_stream_table=f"{prefix}_derived.events_stream_v1", events_stream_view=f"{prefix}.events_stream", + dau_reporting_stable_table=f"{prefix}_stable.dau_reporting_v1", + dau_reporting_clients_daily_table=f"{prefix}_derived.dau_reporting_clients_daily_v1", + dau_reporting_clients_first_seen_table=f"{prefix}_derived.dau_reporting_clients_first_seen_v1", + dau_reporting_clients_last_seen_table=f"{prefix}_derived.dau_reporting_clients_last_seen_v1", + dau_reporting_active_users_aggregates_table=f"{prefix}_derived.dau_reporting_active_users_aggregates_v1", + dau_reporting_clients_daily_view=f"{prefix}.dau_reporting_clients_daily", + dau_reporting_clients_first_seen_view=f"{prefix}.dau_reporting_clients_first_seen", + dau_reporting_clients_last_seen_view=f"{prefix}.dau_reporting_clients_last_seen", + dau_reporting_active_users_view=f"{prefix}.dau_reporting_active_users", + dau_reporting_active_users_aggregates_view=f"{prefix}.dau_reporting_active_users_aggregates", ) @@ -234,7 +244,7 @@ def generate_per_app_id( use_cloud_function=True, app_info=[], parallelism=8, - id_token=None + id_token=None, ): """Generate the baseline table query per app_id.""" if not self.per_app_id_enabled: @@ -268,7 +278,7 @@ def generate_per_app_id( derived_dataset=derived_dataset, app_name=app_name, has_distribution_id=app_name in APPS_WITH_DISTRIBUTION_ID, - has_profile_group_id= app_name in APPS_WITH_PROFILE_GROUP_ID, + has_profile_group_id=app_name in APPS_WITH_PROFILE_GROUP_ID, ) render_kwargs.update(self.custom_render_kwargs) @@ -364,7 +374,7 @@ def generate_per_app( output_dir=None, use_cloud_function=True, parallelism=8, - id_token=None + id_token=None, ): """Generate the baseline table query per app_name.""" if not self.per_app_enabled: diff --git a/sql_generators/glean_usage/dau_reporting_active_users_aggregates.py b/sql_generators/glean_usage/dau_reporting_active_users_aggregates.py new file mode 100644 index 00000000000..77ec99ff937 --- /dev/null +++ b/sql_generators/glean_usage/dau_reporting_active_users_aggregates.py @@ -0,0 +1,17 @@ +"""Generating and run dau_reporting_active_users_aggregates queries for Glean apps.""" + +from sql_generators.glean_usage.common import GleanTable + +TARGET_TABLE_ID = "dau_reporting_active_users_aggregates_v1" +PREFIX = "dau_reporting_active_users_aggregates" + + +class DauReportingActiveUsersAggregatesTable(GleanTable): + """Represents generated dau_reporting_active_users_aggregates table.""" + + def __init__(self): + """Initialize dau_reporting_active_users_aggregates table.""" + GleanTable.__init__(self) + self.target_table_id = TARGET_TABLE_ID + self.prefix = PREFIX + self.base_table_name = "dau_reporting_v1" diff --git a/sql_generators/glean_usage/dau_reporting_clients_daily.py b/sql_generators/glean_usage/dau_reporting_clients_daily.py new file mode 100644 index 00000000000..61a4cda2bd6 --- /dev/null +++ b/sql_generators/glean_usage/dau_reporting_clients_daily.py @@ -0,0 +1,17 @@ +"""Generating and run dau_reporting_clients_daily queries for Glean apps.""" + +from sql_generators.glean_usage.common import GleanTable + +TARGET_TABLE_ID = "dau_reporting_clients_daily_v1" +PREFIX = "dau_reporting_clients_daily" + + +class DauReportingClientsDailyTable(GleanTable): + """Represents generated dau_reporting_clients_daily table.""" + + def __init__(self): + """Initialize dau_reporting_clients_daily table.""" + GleanTable.__init__(self) + self.target_table_id = TARGET_TABLE_ID + self.prefix = PREFIX + self.base_table_name = "dau_reporting_v1" diff --git a/sql_generators/glean_usage/dau_reporting_clients_first_seen.py b/sql_generators/glean_usage/dau_reporting_clients_first_seen.py new file mode 100644 index 00000000000..ae69fc64389 --- /dev/null +++ b/sql_generators/glean_usage/dau_reporting_clients_first_seen.py @@ -0,0 +1,17 @@ +"""Generating and run dau_reporting_clients_first_seen queries for Glean apps.""" + +from sql_generators.glean_usage.common import GleanTable + +TARGET_TABLE_ID = "dau_reporting_clients_first_seen_v1" +PREFIX = "dau_reporting_clients_first_seen" + + +class DauReportingClientsFirstSeenTable(GleanTable): + """Represents generated dau_reporting_clients_first_seen table.""" + + def __init__(self): + """Initialize dau_reporting_clients_first_seen table.""" + GleanTable.__init__(self) + self.target_table_id = TARGET_TABLE_ID + self.prefix = PREFIX + self.base_table_name = "dau_reporting_v1" diff --git a/sql_generators/glean_usage/dau_reporting_clients_last_seen.py b/sql_generators/glean_usage/dau_reporting_clients_last_seen.py new file mode 100644 index 00000000000..31571ce6e6f --- /dev/null +++ b/sql_generators/glean_usage/dau_reporting_clients_last_seen.py @@ -0,0 +1,17 @@ +"""Generating and run dau_reporting_clients_last_seen queries for Glean apps.""" + +from sql_generators.glean_usage.common import GleanTable + +TARGET_TABLE_ID = "dau_reporting_clients_last_seen_v1" +PREFIX = "dau_reporting_clients_last_seen" + + +class DauReportingClientsLastSeenTable(GleanTable): + """Represents generated dau_reporting_clients_last_seen table.""" + + def __init__(self): + """Initialize dau_reporting_clients_last_seen table.""" + GleanTable.__init__(self) + self.target_table_id = TARGET_TABLE_ID + self.prefix = PREFIX + self.base_table_name = "dau_reporting_v1" diff --git a/sql_generators/glean_usage/templates/cross_channel.view.sql b/sql_generators/glean_usage/templates/cross_channel.view.sql index 108750f1400..3fb991b2f8c 100644 --- a/sql_generators/glean_usage/templates/cross_channel.view.sql +++ b/sql_generators/glean_usage/templates/cross_channel.view.sql @@ -7,11 +7,11 @@ AS UNION ALL {% endif -%} {% if app_name == "fenix" -%} -SELECT +SELECT "{{ dataset }}" AS normalized_app_id, * REPLACE(mozfun.norm.fenix_app_info("{{ dataset }}", app_build).channel AS normalized_channel), {% else -%} -SELECT +SELECT "{{ dataset }}" AS normalized_app_id, * REPLACE("{{ channel }}" AS normalized_channel) {% endif -%} diff --git a/sql_generators/glean_usage/templates/dau_reporting_active_users.metadata.yaml b/sql_generators/glean_usage/templates/dau_reporting_active_users.metadata.yaml new file mode 100644 index 00000000000..f60d0aecee0 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_active_users.metadata.yaml @@ -0,0 +1,15 @@ +{{ header_yaml }} +friendly_name: DAU Reporting Clients Last Seen +description: |- + A daily client aggregation metrics for dau_reporting ping. Merges the computations for client first seen + and last seen metrics + +owners: + - gkatre@mozilla.com +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:dataops-managed/taar + - workgroup:mozilla-confidential diff --git a/sql_generators/glean_usage/templates/dau_reporting_active_users.schema.yaml b/sql_generators/glean_usage/templates/dau_reporting_active_users.schema.yaml new file mode 100644 index 00000000000..dbba25c5a6c --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_active_users.schema.yaml @@ -0,0 +1,137 @@ +fields: +- mode: NULLABLE + name: submission_date + type: DATE + description: | + Logical date used for processing and paritioning. + +- mode: NULLABLE + name: usage_profile_id + type: STRING + description: + +- mode: NULLABLE + name: first_run_date + type: DATE + description: | + The date of the first run of the application. + +- mode: NULLABLE + name: normalized_channel + type: STRING + description: | + The channel the application is being distributed on. + +- mode: NULLABLE + name: normalized_os + type: STRING + description: | + The name of the operating system. + +- mode: NULLABLE + name: normalized_os_version + type: STRING + description: | + The user-visible version of the operating system (e.g. "1.2.3"). + If the version detection fails, this metric gets set to Unknown. + +- mode: NULLABLE + name: locale + type: STRING + description: | + The locale of the application during initialization (e.g. "es-ES"). + If the locale can't be determined on the system, the value is "und", to indicate "undetermined". + +- mode: NULLABLE + name: app_build + type: STRING + description: | + The build identifier generated by the CI system (e.g. "1234/A"). + If the value was not provided through configuration, this metric gets set to Unknown. + +- mode: NULLABLE + name: app_display_version + type: STRING + description: | + The user visible version string (e.g. "1.0.3"). + If the value was not provided through configuration, this metric gets set to Unknown. + +- mode: NULLABLE + name: distribution_id + type: STRING + description: | + A string containing the distribution identifier. This was used to identify installs + from Mozilla Online, but now also identifies partnership deal distributions. + +- mode: NULLABLE + name: is_active + type: BOOLEAN + description: | + A flag field indicating whether the specific client was active. + +- mode: NULLABLE + name: first_seen_date + type: DATE + description: | + Logical date of when we observed the client for the first time in our warehouse. + +- mode: NULLABLE + name: days_seen_bits + type: INTEGER + description: | + Bit field shows on which of the last 28 days a client sent us the dau_reporting ping. + +- mode: NULLABLE + name: days_active_bits + type: INTEGER + description: | + Bit field shows on which of the last 28 days a client fulfilled the active criteria. + +- mode: NULLABLE + name: days_created_profile_bits + type: INTEGER + description: | + bit field indicating how many days lapsed since profile creation. + +- mode: NULLABLE + name: activity_segment + type: STRING + description: | + categorizing activity days into segments + +- mode: NULLABLE + name: is_dau + type: BOOLEAN + description: | + A flag field indicating whether the specific client was active on the submission_date. + +- mode: NULLABLE + name: is_wau + type: BOOLEAN + description: | + A flag field indicating whether the specific client was active on any of the 7 days prior to the submission_date. + +- mode: NULLABLE + name: is_mau + type: BOOLEAN + description: | + A flag field indicating whether the specific client was active on any of the 28 days prior to the submission_date. + +- mode: NULLABLE + name: is_daily_user + type: BOOLEAN + description: | + A flag field indicating whether the specific client sent the dau_reporting ping on the submission_date. + +- mode: NULLABLE + name: is_weekly_user + type: BOOLEAN + description: | + A flag field indicating whether the specific client sent the dau_reporting ping on any of the 7 days prior to the submission_date. + +- mode: NULLABLE + name: is_monthly_user + type: BOOLEAN + description: | + A flag field indicating whether the specific client sent the dau_reporting ping on any of the 28 days prior to the + submission_date. diff --git a/sql_generators/glean_usage/templates/dau_reporting_active_users.view.sql b/sql_generators/glean_usage/templates/dau_reporting_active_users.view.sql new file mode 100644 index 00000000000..baac6499b7a --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_active_users.view.sql @@ -0,0 +1,86 @@ +{{ header }} +CREATE OR REPLACE VIEW + `{{ project_id }}.{{ dau_reporting_active_users_view }}` +AS +SELECT + submission_date, + usage_profile_id, + first_run_date, + normalized_channel, + normalized_os, + normalized_os_version, + locale, + app_build, + app_display_version, + distribution_id, + is_active, + first_seen_date, + days_seen_bits, + days_active_bits, + days_created_profile_bits, + CASE + WHEN BIT_COUNT(days_active_bits) + BETWEEN 1 + AND 6 + THEN 'infrequent_user' + WHEN BIT_COUNT(days_active_bits) + BETWEEN 7 + AND 13 + THEN 'casual_user' + WHEN BIT_COUNT(days_active_bits) + BETWEEN 14 + AND 20 + THEN 'regular_user' + WHEN BIT_COUNT(days_active_bits) >= 21 + THEN 'core_user' + ELSE 'other' + END AS activity_segment, + IFNULL(mozfun.bits28.days_since_seen(days_active_bits) = 0, FALSE) AS is_dau, + IFNULL(mozfun.bits28.days_since_seen(days_active_bits) < 7, FALSE) AS is_wau, + IFNULL(mozfun.bits28.days_since_seen(days_active_bits) < 28, FALSE) AS is_mau, + IFNULL(mozfun.bits28.days_since_seen(days_seen_bits) = 0, FALSE) AS is_daily_user, + IFNULL(mozfun.bits28.days_since_seen(days_seen_bits) < 7, FALSE) AS is_weekly_user, + IFNULL(mozfun.bits28.days_since_seen(days_seen_bits) < 28, FALSE) AS is_monthly_user + +-- +-- TODO: uncomment once duration is added to the dau_reporting ping +-- +-- -- Bit patterns capturing activity dates relative to the submission date. +-- days_seen_session_start_bits, +-- days_seen_session_end_bits, +-- + +-- -- TODO: verify if these fields are needed +-- app_version, +-- country, +-- city, +-- locale, +-- os, +-- windows_build_number, +-- scalar_parent_browser_engagement_total_uri_count_normal_and_private_mode_sum, +-- scalar_parent_browser_engagement_total_uri_count_sum, +-- is_default_browser, +-- isp_name, +-- CASE +-- WHEN isp_name = 'BrowserStack' +-- THEN CONCAT('Firefox Desktop', ' ', isp_name) +-- WHEN distribution_id = 'MozillaOnline' +-- THEN CONCAT('Firefox Desktop', ' ', distribution_id) +-- ELSE 'Firefox Desktop' +-- END AS app_name, +-- IF( +-- LOWER(IFNULL(isp_name, '')) <> "browserstack" +-- AND LOWER(IFNULL(distribution_id, '')) <> "mozillaonline", +-- TRUE, +-- FALSE +-- ) AS is_desktop + + +FROM + `{{ dau_reporting_clients_daily_table }}` +LEFT JOIN + `{{ dau_reporting_clients_first_seen_table }}` + USING (usage_profile_id) +LEFT JOIN + `{{ dau_reporting_clients_last_seen_table }}` + USING (usage_profile_id) diff --git a/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates.metadata.yaml b/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates.metadata.yaml new file mode 100644 index 00000000000..db1934b9301 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates.metadata.yaml @@ -0,0 +1,14 @@ +{{ header_yaml }} +friendly_name: DAU Reporting Clients Last Seen +description: |- + A daily aggregate of the dau_reporting ping representing user activity. + +owners: + - gkatre@mozilla.com +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:dataops-managed/taar + - workgroup:mozilla-confidential diff --git a/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates.view.sql b/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates.view.sql new file mode 100644 index 00000000000..f0099fa9fa2 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates.view.sql @@ -0,0 +1,8 @@ +{{ header }} +CREATE OR REPLACE VIEW + `{{ project_id }}.{{ dau_reporting_active_users_aggregates_view }}` +AS +SELECT + * +FROM + `{{ project_id }}.{{ dau_reporting_active_users_aggregates_table }}` diff --git a/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates_v1.metadata.yaml b/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates_v1.metadata.yaml new file mode 100644 index 00000000000..a2e76d0ff4a --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates_v1.metadata.yaml @@ -0,0 +1,22 @@ +{{ header_yaml }} +friendly_name: DAU Reporting Clients Daily +description: |- + A daily aggregate of the dau_reporting ping representing user activity. + +owners: + - gkatre@mozilla.com +labels: + incremental: true + schedule: daily +scheduling: + dag_name: bqetl_glean_usage + task_group: {{ app_name }} +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: true + clustering: + fields: + - channel + - locale diff --git a/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates_v1.query.sql b/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates_v1.query.sql new file mode 100644 index 00000000000..eb14cf8c2ba --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates_v1.query.sql @@ -0,0 +1,71 @@ +{{ header }} + +--- Query generated via sql_generators.active_users. +WITH todays_metrics AS ( + SELECT + submission_date, + + usage_profile_id, + normalized_channel AS channel, + EXTRACT(YEAR FROM first_seen_date) AS first_seen_year, + COALESCE( + `mozfun.norm.windows_version_info`(os, normalized_os_version, windows_build_number), + normalized_os_version + ) AS os_version, + COALESCE( + CAST(NULLIF(SPLIT(normalized_os_version, ".")[SAFE_OFFSET(0)], "") AS INTEGER), + 0 + ) AS os_version_major, + COALESCE( + CAST(NULLIF(SPLIT(normalized_os_version, ".")[SAFE_OFFSET(1)], "") AS INTEGER), + 0 + ) AS os_version_minor, + COALESCE(REGEXP_EXTRACT(locale, r'^(.+?)-'), locale, NULL) AS locale, + distribution_id, + is_active, + activity_segment AS segment, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau + +-- -- TODO: verify if these fields are needed +-- app_name, +-- app_version AS app_version, +-- IFNULL(country, '??') country, +-- city, +-- os, +-- COALESCE( +-- scalar_parent_browser_engagement_total_uri_count_normal_and_private_mode_sum, +-- scalar_parent_browser_engagement_total_uri_count_sum +-- ) AS uri_count, +-- is_default_browser, + + FROM + `{{ dau_reporting_active_users_view }}` + WHERE + submission_date = @submission_date +) +SELECT + todays_metrics.* EXCEPT ( + usage_profile_id, + is_daily_user, + is_weekly_user, + is_monthly_user, + is_dau, + is_wau, + is_mau, + is_active + ), + COUNTIF(is_daily_user) AS daily_users, + COUNTIF(is_weekly_user) AS weekly_users, + COUNTIF(is_monthly_user) AS monthly_users, + COUNTIF(is_dau) AS dau, + COUNTIF(is_wau) AS wau, + COUNTIF(is_mau) AS mau +FROM + todays_metrics +GROUP BY + ALL diff --git a/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates_v1.schema.yaml b/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates_v1.schema.yaml new file mode 100644 index 00000000000..a51d0ebb6bb --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_active_users_aggregates_v1.schema.yaml @@ -0,0 +1,57 @@ +fields: +- mode: NULLABLE + name: submission_date + type: DATE + description: | + Logical date used for processing and paritioning. + +- mode: NULLABLE + name: channel + type: STRING + description: | + The channel the application is being distributed on. + +- mode: NULLABLE + name: os_version + type: STRING + description: | + The user-visible version of the operating system (e.g. "1.2.3"). + If the version detection fails, this metric gets set to Unknown. + +- mode: NULLABLE + name: os_version_major + type: STRING + description: | + The operating system major version. + +- mode: NULLABLE + name: os_version_minor + type: STRING + description: | + The operating system minor version. + +- mode: NULLABLE + name: locale + type: STRING + description: | + The locale of the application during initialization (e.g. "es-ES"). + If the locale can't be determined on the system, the value is "und", to indicate "undetermined". + +- mode: NULLABLE + name: distribution_id + type: STRING + description: | + A string containing the distribution identifier. This was used to identify installs + from Mozilla Online, but now also identifies partnership deal distributions. + +- mode: NULLABLE + name: first_seen_year + type: DATE + description: | + Year when we observed the client for the first time in our warehouse. + +- mode: NULLABLE + name: segment + type: STRING + description: | + categorizing activity days into segments diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_daily.metadata.yaml b/sql_generators/glean_usage/templates/dau_reporting_clients_daily.metadata.yaml new file mode 100644 index 00000000000..3670ca5701b --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_daily.metadata.yaml @@ -0,0 +1,12 @@ +{{ header_yaml }} +friendly_name: Clients Daily Based on the DAU Reporting Ping. +description: |- + A daily aggregate of dau_reporting pings per `profile_usage_id`. + + Cluster by: `normalized_channel`, `locale` + +owners: + - kik@mozilla.com +labels: + incremental: true + schedule: daily diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_daily.view.sql b/sql_generators/glean_usage/templates/dau_reporting_clients_daily.view.sql new file mode 100644 index 00000000000..0e090a8cf65 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_daily.view.sql @@ -0,0 +1,8 @@ +{{ header }} +CREATE OR REPLACE VIEW + `{{ project_id }}.{{ dau_reporting_clients_daily_view }}` +AS +SELECT + * +FROM + `{{ project_id }}.{{ dau_reporting_clients_daily_table }}` diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_daily_v1.metadata.yaml b/sql_generators/glean_usage/templates/dau_reporting_clients_daily_v1.metadata.yaml new file mode 100644 index 00000000000..fc49f1a54b5 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_daily_v1.metadata.yaml @@ -0,0 +1,24 @@ +{{ header_yaml }} +friendly_name: Clients Daily Based on the DAU Reporting Ping. +description: |- + A daily aggregate of dau_reporting pings per `profile_usage_id`. + + Cluster by: `normalized_channel`, `locale` + +owners: + - kik@mozilla.com +labels: + incremental: true + schedule: daily +scheduling: + dag_name: bqetl_glean_usage + task_group: {{ app_name }} +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: true + clustering: + fields: + - normalized_channel + - locale diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_daily_v1.query.sql b/sql_generators/glean_usage/templates/dau_reporting_clients_daily_v1.query.sql new file mode 100644 index 00000000000..1a6b7e37ffc --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_daily_v1.query.sql @@ -0,0 +1,80 @@ +{{ header }} + +WITH base AS ( + SELECT + submission_timestamp, + DATE(submission_timestamp) AS submission_date, + metrics.uuid.usage_profile_id, + normalized_channel, + client_info.app_display_version, + client_info.app_build, + normalized_os, + normalized_os_version, + client_info.locale, + {% if has_distribution_id %} + metrics.string.metrics_distribution_id AS distribution_id, + {% else %} + CAST(NULL AS STRING) AS distribution_id, + {% endif %} + {% if "_desktop" in app_name %} + metrics.counter.browser_engagement_uri_count, + metrics.counter.browser_engagement_active_ticks, + {% endif %} + CAST(NULL AS BOOLEAN) AS is_active, + SAFE.PARSE_DATE('%F', SUBSTR(client_info.first_run_date, 1, 10)) AS first_run_date, + FROM + `{{ project_id }}.{{ dau_reporting_stable_table }}` + WHERE + usage_profile_id IS NOT NULL +) +SELECT + submission_date, + usage_profile_id, + -- + -- Take the earliest first_run_date if ambiguous. + MIN(first_run_date) OVER w1 AS first_run_date, + -- For all other dimensions, we use the mode of observed values in the day. + udf.mode_last(ARRAY_AGG(normalized_channel) OVER w1) AS normalized_channel, + udf.mode_last(ARRAY_AGG(normalized_os) OVER w1) AS normalized_os, + udf.mode_last(ARRAY_AGG(normalized_os_version) OVER w1) AS normalized_os_version, + udf.mode_last(ARRAY_AGG(locale) OVER w1) AS locale, + udf.mode_last(ARRAY_AGG(app_build) OVER w1) AS app_build, + udf.mode_last(ARRAY_AGG(app_display_version) OVER w1) AS app_display_version, + udf.mode_last(ARRAY_AGG(distribution_id) OVER w1) AS distribution_id, + {% if "_desktop" in app_name %} + COALESCE(is_active, SUM(browser_engagement_uri_count) > 0 AND SUM(browser_engagement_active_ticks) > 0, False) AS is_active, + {% else %} + -- At the moment we do not have duration, default to True. + -- COALESCE(is_active, SUM(IF(duration BETWEEN 0 AND 100000, duration, 0)) OVER w1 > 0, False) AS is_active, + TRUE AS is_active + {% endif %} +FROM + base +WHERE + {% raw %} + {% if is_init() %} + submission_date >= '2024-10-10' + {% else %} + submission_date = @submission_date + {% endif %} + {% endraw %} +QUALIFY + ROW_NUMBER() OVER ( + PARTITION BY + usage_profile_id, + submission_date + ORDER BY + submission_timestamp + ) = 1 + +WINDOW + w1 AS ( + PARTITION BY + usage_profile_id, + submission_date + ORDER BY + submission_timestamp + ROWS BETWEEN + UNBOUNDED PRECEDING + AND UNBOUNDED FOLLOWING + ) diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_daily_v1.schema.yaml b/sql_generators/glean_usage/templates/dau_reporting_clients_daily_v1.schema.yaml new file mode 100644 index 00000000000..4b980075afe --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_daily_v1.schema.yaml @@ -0,0 +1,70 @@ +fields: +- mode: NULLABLE + name: submission_date + type: DATE + description: | + Logical date used for processing and paritioning. + +- mode: NULLABLE + name: usage_profile_id + type: STRING + description: + +- mode: NULLABLE + name: first_run_date + type: DATE + description: | + The date of the first run of the application. + +- mode: NULLABLE + name: normalized_channel + type: STRING + description: | + The channel the application is being distributed on. + +- mode: NULLABLE + name: normalized_os + type: STRING + description: | + The name of the operating system. + +- mode: NULLABLE + name: normalized_os_version + type: STRING + description: | + The user-visible version of the operating system (e.g. "1.2.3"). + If the version detection fails, this metric gets set to Unknown. + +- mode: NULLABLE + name: locale + type: STRING + description: | + The locale of the application during initialization (e.g. "es-ES"). + If the locale can't be determined on the system, the value is "und", to indicate "undetermined". + +- mode: NULLABLE + name: app_build + type: STRING + description: | + The build identifier generated by the CI system (e.g. "1234/A"). + If the value was not provided through configuration, this metric gets set to Unknown. + +- mode: NULLABLE + name: app_display_version + type: STRING + description: | + The user visible version string (e.g. "1.0.3"). + If the value was not provided through configuration, this metric gets set to Unknown. + +- mode: NULLABLE + name: distribution_id + type: STRING + description: | + A string containing the distribution identifier. This was used to identify installs + from Mozilla Online, but now also identifies partnership deal distributions. + +- mode: NULLABLE + name: is_active + type: BOOLEAN + description: | + A flag field indicating whether the specific client was active. diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen.metadata.yaml b/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen.metadata.yaml new file mode 100644 index 00000000000..b599bed5ae6 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen.metadata.yaml @@ -0,0 +1,11 @@ +{{ header_yaml }} +friendly_name: Clients First Seen Based on the DAU Reporting Ping. +description: |- + A representation of when we saw each `profile_usage_id` + for the first time based on the dau_reporting ping. + +owners: + - kik@mozilla.com +labels: + incremental: true + schedule: daily diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen.view.sql b/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen.view.sql new file mode 100644 index 00000000000..f73465b7cea --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen.view.sql @@ -0,0 +1,8 @@ +{{ header }} +CREATE OR REPLACE VIEW + `{{ project_id }}.{{ dau_reporting_clients_first_seen_view }}` +AS +SELECT + * +FROM + `{{ project_id }}.{{ dau_reporting_clients_first_seen_table }}` diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen_v1.metadata.yaml b/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen_v1.metadata.yaml new file mode 100644 index 00000000000..5e9082a2947 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen_v1.metadata.yaml @@ -0,0 +1,19 @@ +{{ header_yaml }} +friendly_name: Clients First Seen Based on the DAU Reporting Ping. +description: |- + A representation of when we saw each `profile_usage_id` + for the first time based on the dau_reporting ping. + +owners: + - kik@mozilla.com +labels: + incremental: true + schedule: daily +scheduling: + dag_name: bqetl_glean_usage + task_group: {{ app_name }} +bigquery: + time_partitioning: + type: day + field: first_seen_date + require_partition_filter: false diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen_v1.query.sql b/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen_v1.query.sql new file mode 100644 index 00000000000..a0b8e90d167 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen_v1.query.sql @@ -0,0 +1,58 @@ +{{ header }} + +WITH + _current AS ( + SELECT + usage_profile_id, + {% raw %} + {% if is_init() %} + DATE(MIN(submission_timestamp)) AS first_seen_date, + {% else %} + @submission_date AS first_seen_date, + {% endif %} + {% endraw %} + FROM + `{{ dau_reporting_clients_daily_table }}` + WHERE + usage_profile_id IS NOT NULL + {% raw %} + {% if is_init() %} + AND DATE(submission_timestamp) > "2014-10-10" + {% else %} + AND DATE(submission_timestamp) = @submission_date + {% endif %} + {% endraw %} + GROUP BY + usage_profile_id + ), +_previous AS ( + SELECT + usage_profile_id, + FROM + `{{ dau_reporting_clients_first_seen_table }}` + WHERE + {% raw %} + {% if is_init() %} + False + {% else %} + first_seen_date < @submission_date + {% endif %} + {% endraw %} +) + +SELECT + first_seen_date, + usage_profile_id, +FROM + _current +LEFT JOIN + _previous + USING (usage_profile_id) +WHERE + _previous.usage_profile_id IS NULL +QUALIFY + IF( + COUNT(*) OVER (PARTITION BY usage_profile_id) > 1, + ERROR("Duplicate usage_profile_id combination detected."), + TRUE + ) diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen_v1.schema.yaml b/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen_v1.schema.yaml new file mode 100644 index 00000000000..7e8f77c4745 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_first_seen_v1.schema.yaml @@ -0,0 +1,13 @@ +fields: + +- mode: NULLABLE + name: usage_profile_id + type: STRING + description: | + A UUID of the usage_profile. + +- mode: NULLABLE + name: first_seen_date + type: DATE + description: | + Logical date of when we observed the client for the first time in our warehouse. diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen.metadata.yaml b/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen.metadata.yaml new file mode 100644 index 00000000000..988ae3b8fcf --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen.metadata.yaml @@ -0,0 +1,11 @@ +{{ header_yaml }} +friendly_name: Clients Last Seen Based on the DAU Reporting Ping. +description: |- + A daily aggregate of the dau_reporting ping for each `profile_usage_id` + representing their activity. + +owners: + - kik@mozilla.com +labels: + incremental: true + schedule: daily diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen.view.sql b/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen.view.sql new file mode 100644 index 00000000000..a5711b7bd3e --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen.view.sql @@ -0,0 +1,8 @@ +{{ header }} +CREATE OR REPLACE VIEW + `{{ project_id }}.{{ dau_reporting_clients_last_seen_view }}` +AS +SELECT + * +FROM + `{{ project_id }}.{{ dau_reporting_clients_last_seen_table }}` diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen_v1.metadata.yaml b/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen_v1.metadata.yaml new file mode 100644 index 00000000000..897893fbe74 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen_v1.metadata.yaml @@ -0,0 +1,19 @@ +{{ header_yaml }} +friendly_name: Clients Last Seen Based on the DAU Reporting Ping. +description: |- + A daily aggregate of the dau_reporting ping for each `profile_usage_id` + representing their activity. + +owners: + - kik@mozilla.com +labels: + incremental: true + schedule: daily +scheduling: + dag_name: bqetl_glean_usage + task_group: {{ app_name }} +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: true diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen_v1.query.sql b/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen_v1.query.sql new file mode 100644 index 00000000000..a21c7e91701 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen_v1.query.sql @@ -0,0 +1,54 @@ +{{ header }} + +WITH _current AS ( + SELECT + usage_profile_id, + -- In this raw table, we capture the history of activity over the past + -- 28 days for each usage criterion as a single 64-bit integer. The + -- rightmost bit in 'days_since_seen' represents whether the user sent a + -- dau_reporting ping in the submission_date and similarly, the rightmost bit in + -- days_active_bits represents whether the user counts as active on that date. + CAST(TRUE AS INT64) AS days_seen_bits, + CAST(TRUE AS INT64) & CAST(is_active AS INT64) AS days_active_bits, + udf.days_since_created_profile_as_28_bits( + DATE_DIFF(submission_date, first_run_date, DAY) + ) AS days_created_profile_bits, + FROM + `{{ dau_reporting_clients_daily_table }}` + WHERE + submission_date = @submission_date +), +_previous AS ( + SELECT + usage_profile_id, + days_seen_bits, + days_active_bits, + days_created_profile_bits, + FROM + `{{ dau_reporting_clients_last_seen_table }}` + WHERE + submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) + -- Filter out rows from yesterday that have now fallen outside the 28-day window. + AND udf.shift_28_bits_one_day(days_seen_bits) > 0 +) +SELECT + @submission_date AS submission_date, + IF(_current.usage_profile_id IS NOT NULL, _current, _previous).* REPLACE ( + udf.combine_adjacent_days_28_bits( + _previous.days_seen_bits, + _current.days_seen_bits + ) AS days_seen_bits, + udf.combine_adjacent_days_28_bits( + _previous.days_active_bits, + _current.days_active_bits + ) AS days_active_bits, + udf.combine_adjacent_days_28_bits( + _previous.days_created_profile_bits, + _current.days_created_profile_bits + ) AS days_created_profile_bits + ) +FROM + _current +FULL JOIN + _previous + USING (usage_profile_id) diff --git a/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen_v1.schema.yaml b/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen_v1.schema.yaml new file mode 100644 index 00000000000..fae73f60e19 --- /dev/null +++ b/sql_generators/glean_usage/templates/dau_reporting_clients_last_seen_v1.schema.yaml @@ -0,0 +1,30 @@ +fields: +- mode: NULLABLE + name: submission_date + type: DATE + description: | + Logical date used for processing and paritioning. + +- mode: NULLABLE + name: usage_profile_id + type: STRING + description: | + A UUID of the usage_profile. + +- mode: NULLABLE + name: days_seen_bits + type: INTEGER + description: | + Bit field shows on which of the last 28 days a client sent us the dau_reporting ping. + +- mode: NULLABLE + name: days_active_bits + type: INTEGER + description: | + Bit field shows on which of the last 28 days a client fulfilled the active criteria. + +- mode: NULLABLE + name: days_created_profile_bits + type: INTEGER + description: | + bit field indicating how many days lapsed since profile creation.