Skip to content

Commit

Permalink
feature: build models matching dset gaps analysis (et/somenergia-jard…
Browse files Browse the repository at this point in the history
…iner!89)

Merge branch 'feature/build-gaps-models-matching-dset' into 'main'
  • Loading branch information
diegoquintanav committed Feb 7, 2024
2 parents 848ad28 + 857b4dd commit 922774c
Show file tree
Hide file tree
Showing 18 changed files with 172 additions and 192 deletions.
5 changes: 4 additions & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ dbt-build:
DBUSER: $SOM_JARDINER_DB_USER
DBPASSWORD: $SOM_JARDINER_DB_PASSWORD
DBNAME: $SOM_JARDINER_DB_DBNAME
DBT_MODELS_SELECTOR: "--models state:modified+"
DBT_MANIFEST_ARTIFACT_URL: "https://$CI_SERVER_HOST/api/v4/projects/$CI_PROJECT_ID/jobs/artifacts/${CI_DEFAULT_BRANCH}/download?job=pages&job_token=$CI_JOB_TOKEN"
image: ${SOM_HARBOR_DADES_URL}/${SOM_PROJECT_NAME}-dbt-docs:latest
script:
Expand All @@ -145,20 +146,22 @@ dbt-build:
- curl --location --output /tmp/artifacts.zip ${DBT_MANIFEST_ARTIFACT_URL}
- unzip -o /tmp/artifacts.zip -d /tmp/artifacts
- cp /tmp/artifacts/public/dbt_docs/manifest.json ${CI_PROJECT_DIR}/${DBT_PROJECT_DIR_NAME}/state/prod/manifest.json
- dbt build --target ${DBT_TARGET_NAME} --store-failures --threads 4 --models state:modified+ --state state/prod
- dbt build --target ${DBT_TARGET_NAME} --store-failures --threads 4 ${DBT_MODELS_SELECTOR} --state state/prod
tags:
- somenergia-et
rules:
- if: $CI_COMMIT_REF_NAME == $CI_DEFAULT_BRANCH && $CI_PIPELINE_SOURCE == "push"
when: always
variables:
DBT_TARGET_NAME: prod
DBT_MODELS_SELECTOR: "--models state:modified+"
changes:
paths: *dbt-build-changes-paths
allow_failure: false
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
when: always
variables:
DBT_MODELS_SELECTOR: "--models state:modified+ tag:dset_responses_fresh"
DBT_TARGET_NAME: pre
DBT_FAIL_FAST: "True"
changes:
Expand Down
39 changes: 39 additions & 0 deletions dbt_jardiner/analyses/dset_gaps_preview_20240110.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
with
som_uuids as (
select distinct group_name, signal_code, signal_uuid
from {{ ref("int_dset_responses__materialized") }}
where ts >= '2024-01-10' and ts < '2024-01-11'
order by signal_uuid desc),

dset_uuids as (
select a.gru_codi, a.gru_nom, a.sen_codi, a.sen_descripcio, a.esperats_frequencia, a.trobats_senyal, b.signal_uuid
from analytics.se_forats_hornsby_dades_dia_10 as a
left join som_uuids as b
on a.sen_codi = b.signal_code
and a.gru_nom = b.group_name),

som_count as (
select
signal_uuid, count(*) as cnt
from {{ ref("int_dset_responses__materialized") }}
where ts >= '2024-01-10' and ts < '2024-01-11'
and signal_value is not null
group by signal_uuid
order by signal_uuid desc, cnt desc),

summary as (
select b.*, a.cnt as som_trobats_senyal from som_count as a
left join dset_uuids as b
on a.signal_uuid = b.signal_uuid
order by b.trobats_senyal desc, a.cnt desc
),

final as (
select *,
esperats_frequencia - trobats_senyal as n_forats_dset,
288 - som_trobats_senyal as n_forats_som
from summary
)

select * from final
order by gru_codi, gru_nom, sen_codi
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ tests:
config:
severity: warning

- name: test_dset_gaps_per_month_and_signal_all_time
- name: test_dset_gaps_per_day_and_signal_last_month
description: >
Taula que recull el número de forats per mes i senyal, de tota la història de senyals que hem rebut
Taula que recull el número de forats per dia i senyal, del últim mes de dades en curs
config:
severity: warning

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dset_from_december_2023 as (
{# if we don't limit queried_at the planner shits the bed #}
and queried_at > '2023-12-01'
),

spined_dset as (
select
spined.ts,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
{{ config(severity="warn") }}

with
window_observed as (
select
signal_value,
group_name,
signal_code,
signal_id,
signal_device_type,
signal_uuid,
queried_at,
ts as current_ts,
signal_frequency::interval as signal_frequency
from {{ ref("int_dset_responses__materialized") }}
where current_date - interval '1 month' < ts
and signal_value is not null
),

window_lagged as (
select
*,
lag(current_ts) over (
partition by signal_uuid
order by current_ts asc) as previous_ts
from window_observed
),

gaps_observed as (
select
*,
current_ts::date as "date",
current_ts - previous_ts as gap
from window_lagged
where current_ts - previous_ts > signal_frequency
),

gaps_summarized as (
select
"date",
group_name,
signal_code,
signal_id,
signal_device_type,
signal_uuid,
gap,
signal_frequency,
count(signal_uuid) as n_gaps
from gaps_observed
group by
"date",
group_name,
signal_uuid,
signal_code,
signal_id,
signal_device_type,
gap,
signal_frequency
order by
"date" desc,
count(signal_uuid) desc,
gap desc,
group_name asc,
signal_code asc,
signal_id asc,
signal_device_type asc
),

gaps_converted_to_n_missing_samples as (
select
*,
-- n_gaps * ceiling(gap/frequency - 1) as n_missing_samples.
-- The -1 is because the starting point in the gap can't be counted as missing
n_gaps * ceil(extract(epoch from gap) / extract(epoch from signal_frequency) - 1) as n_missing_samples,
-- 24*60 are the minutes in a day
(24 * 60) / (extract(epoch from signal_frequency) / 60) as n_samples_per_day
from gaps_summarized
),

gaps_ratio as (
select
*,
n_missing_samples / n_samples_per_day as ratio_missing_samples
from gaps_converted_to_n_missing_samples
)

select *
from gaps_ratio
38 changes: 38 additions & 0 deletions dbt_jardiner/tests/dset/test_dset_signals_receiver_last_hour.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{{ config(error_if=">500") }}
{# error limit is set on half the number of signal uuids available #}

with
uuids_received_recently as (
select
signal_uuid,
true as is_received_recently,
max(ts) as last_received_ts
from {{ ref("int_dset_responses__materialized") }}
where ts >= (now() - interval '2 hours')
group by signal_uuid
{# interval used of two hours is depending on the natural delay of dset data + materialization cycle -#}
),

uuids_expected as (
select
s.plant_uuid,
s.plant_name,
s.signal_name,
s.signal_uuid,
s.device_name,
s.device_type,
s.device_uuid,
coalesce(r.is_received_recently, false) as received_from_dset
from {{ ref("raw_gestio_actius__signal_denormalized") }} as s
left join uuids_received_recently as r
on s.signal_uuid = r.signal_uuid
order by s.plant_name
),

uuids_not_received as (
select *
from uuids_expected
where received_from_dset is false
)

select * from uuids_not_received
File renamed without changes.

This file was deleted.

This file was deleted.

Loading

0 comments on commit 922774c

Please sign in to comment.