From 032a1b77d1fe67a75317d44bef14fc1bc619764b Mon Sep 17 00:00:00 2001 From: Michael Silberling <4006780+MSilb7@users.noreply.github.com> Date: Tue, 29 Aug 2023 16:30:09 -0400 Subject: [PATCH] Add Base to HFT Bots Logic (#4057) * migrate * comma * op_chains bots * lbl * rm spells * plus * dunesql * func * jinja * chain name * lbl * lower transfer * mod alias * legacy * comma * rerun * label * add chain * group * group --------- Co-authored-by: jeff <102681548+jeff-dude@users.noreply.github.com> --- .../likely_bots/labels_likely_bot_labels.sql | 7 +- .../labels_likely_bot_labels_legacy.sql | 3 +- .../labels_op_chains_likely_bot_addresses.sql | 164 ++++++++++++++++++ ..._op_chains_likely_bot_addresses_legacy.sql | 9 + .../labels_op_chains_likely_bot_contracts.sql | 123 +++++++++++++ ..._op_chains_likely_bot_contracts_legacy.sql | 10 ++ .../labels_op_chains_likely_bots_schema.yml | 80 +++++++++ 7 files changed, 392 insertions(+), 4 deletions(-) create mode 100644 models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_addresses.sql create mode 100644 models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_addresses_legacy.sql create mode 100644 models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_contracts.sql create mode 100644 models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_contracts_legacy.sql create mode 100644 models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bots_schema.yml diff --git a/models/labels/addresses/__single_category_labels__/likely_bots/labels_likely_bot_labels.sql b/models/labels/addresses/__single_category_labels__/likely_bots/labels_likely_bot_labels.sql index 873b06e38c9..50b70c3a444 100644 --- a/models/labels/addresses/__single_category_labels__/likely_bots/labels_likely_bot_labels.sql +++ b/models/labels/addresses/__single_category_labels__/likely_bots/labels_likely_bot_labels.sql @@ -2,13 +2,14 @@ config( alias = alias('likely_bot_labels'), tags = ['dunesql'], - post_hook='{{ expose_spells(\'["optimism"]\', + post_hook='{{ expose_spells(\'["optimism","base"]\', "sector", "labels", \'["msilb7"]\') }}' ) }} -SELECT * FROM {{ ref('labels_optimism_likely_bot_addresses') }} +-- op_chains includes optimism, so we don't union the optimism labels here +SELECT * FROM {{ ref('labels_op_chains_likely_bot_addresses') }} UNION ALL -SELECT * FROM {{ ref('labels_optimism_likely_bot_contracts') }} \ No newline at end of file +SELECT * FROM {{ ref('labels_op_chains_likely_bot_contracts') }} \ No newline at end of file diff --git a/models/labels/addresses/__single_category_labels__/likely_bots/labels_likely_bot_labels_legacy.sql b/models/labels/addresses/__single_category_labels__/likely_bots/labels_likely_bot_labels_legacy.sql index 0ba00ba9972..c508f0cbd5a 100644 --- a/models/labels/addresses/__single_category_labels__/likely_bots/labels_likely_bot_labels_legacy.sql +++ b/models/labels/addresses/__single_category_labels__/likely_bots/labels_likely_bot_labels_legacy.sql @@ -9,7 +9,8 @@ \'["msilb7"]\') }}' ) }} ---in legacy keep old + +-- in legacy, keep just op, since we didn't re create the labels SELECT * FROM {{ ref('labels_optimism_likely_bot_addresses_legacy') }} UNION ALL SELECT * FROM {{ ref('labels_optimism_likely_bot_contracts_legacy') }} \ No newline at end of file diff --git a/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_addresses.sql b/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_addresses.sql new file mode 100644 index 00000000000..8e3254c188f --- /dev/null +++ b/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_addresses.sql @@ -0,0 +1,164 @@ +{{ + config( + alias = alias('op_chains_likely_bot_addresses'), + tags = ['dunesql'], + post_hook='{{ expose_spells(\'["optimism","base"]\', + "sector", + "labels", + \'["msilb7"]\') }}' + ) +}} + +{% set op_chains = all_op_chains() %} --macro: all_op_chains.sql + +-- This could/should become a spell with some kind of modular logic approach so that we can plug in new detection logic over time (i.e. many of X method, or Y project's contracts) +-- the core of this logic is on transaction frequency and sender concentration The "sender concentration" piece will get tested by mutlisigs / smart contract wallets. + +WITH sender_transfer_rates AS ( + {% for chain in op_chains %} + -- For each transaction sender, get their hourly transaction data + SELECT '{{chain}}' as blockchain + ,"from" AS sender, DATE_TRUNC('hour',block_time) AS hr + , MIN(block_time) AS min_block_time + , MAX(block_time) AS max_block_time + , COUNT(*) AS hr_txs + , SUM(CASE WHEN to IN (SELECT address FROM {{ ref('labels_op_chains_likely_bot_contracts') }} WHERE name != 'chain ops bot' AND blockchain = '{{chain}}') THEN 1 ELSE 0 END) AS bot_concentration_txs + + , SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ source('erc20_' + chain,'evt_transfer') }} r WHERE t.hash = r.evt_tx_hash AND t.block_number = r.evt_block_number) THEN 1 ELSE 0 END) AS num_erc20_tfer_txs + , SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ ref('nft_transfers') }} r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND blockchain = '{{chain}}') THEN 1 ELSE 0 END) AS num_nft_tfer_txs + + , SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ source('erc20_' + chain,'evt_transfer') }} r WHERE t.hash = r.evt_tx_hash AND t.block_number = r.evt_block_number) THEN 1 + WHEN EXISTS (SELECT 1 FROM {{ ref('nft_transfers') }} r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND blockchain = '{{chain}}') THEN 1 + ELSE 0 END) AS num_token_tfer_txs + + , 0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('dex_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_time = r.block_time AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_dex_trade_txs + , 0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('perpetual_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_time = r.block_time AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_perp_trade_txs + , 0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('nft_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_nft_trade_txs + + FROM {{ source( chain ,'transactions') }} t + + + GROUP BY 1,2,3 + + {% if not loop.last %} + UNION ALL + {% endif %} + {% endfor %} + +) + +, first_pass_throughput_filter AS +( + -- Filter down this list a bit to help with later mappings + SELECT + blockchain + ,sender, DATE_TRUNC('week',hr) AS wk, SUM(hr_txs) AS wk_txs, MAX(hr_txs) AS max_hr_txs, SUM(bot_concentration_txs) AS bot_concentration_txs, + cast(COUNT(*) as double) /cast(7.0*24.0 as double) AS pct_weekly_hours_active, + MIN(min_block_time) AS min_block_time, + MAX(max_block_time) AS max_block_time, + + SUM(num_erc20_tfer_txs) AS num_erc20_tfer_txs, + SUM(num_nft_tfer_txs) AS num_nft_tfer_txs, + SUM(num_token_tfer_txs) AS num_token_tfer_txs, + SUM(num_dex_trade_txs) AS num_dex_trade_txs, + SUM(num_perp_trade_txs) AS num_perp_trade_txs, + SUM(num_nft_trade_txs) AS num_nft_trade_txs + FROM sender_transfer_rates e + GROUP BY 1,2,3 + HAVING MAX(hr_txs) >= 20 --had some high-ish frequency - gte 20 txs per hour at least once + OR + SUM(bot_concentration_txs) >= 0 --had a tx to a bot address + +) + + +, bot_addresses AS ( +SELECT *, + cast(num_erc20_tfer_txs as double) / cast( num_txs as double) AS pct_erc20_tfer_txs, + cast(num_nft_tfer_txs as double) / cast( num_txs as double) AS pct_nft_tfer_txs, + cast(num_token_tfer_txs as double) / cast( num_txs as double) AS pct_token_tfer_txs, + cast(num_dex_trade_txs as double) / cast( num_txs as double) AS pct_dex_trade_txs, + cast(num_perp_trade_txs as double) / cast( num_txs as double) AS pct_perp_trade_txs, -- perpetual.trades has some dunesql incompatability + cast(num_nft_trade_txs as double) / cast( num_txs as double) AS pct_nft_trade_txs + +FROM ( + SELECT + '{{chain}}' AS blockchain + , sender, MAX(wk_txs) AS max_wk_txs, MAX(max_hr_txs) AS max_hr_txs, AVG(wk_txs) AS avg_wk_txs + ,MIN(min_block_time) AS min_block_time + ,MAX(max_block_time) AS max_block_time + ,MAX(pct_weekly_hours_active) AS max_pct_weekly_hours_active + ,AVG(pct_weekly_hours_active) AS avg_pct_weekly_hours_active + ,SUM(wk_txs) AS num_txs + ,SUM(bot_concentration_txs) AS bot_concentration_txs + + ,cast(SUM(bot_concentration_txs) as double) / cast(SUM(wk_txs) as double) AS pct_bot_concentration_txs + ,( cast( date_DIFF('second', MIN(min_block_time), MAX(max_block_time)) as double) / (60.0*60.0) ) AS txs_per_hour + + ,SUM(num_erc20_tfer_txs) AS num_erc20_tfer_txs + ,SUM(num_nft_tfer_txs) AS num_nft_tfer_txs + ,SUM(num_token_tfer_txs) AS num_token_tfer_txs + ,SUM(num_dex_trade_txs) AS num_dex_trade_txs + ,SUM(num_perp_trade_txs) AS num_perp_trade_txs + ,SUM(num_nft_trade_txs) AS num_nft_trade_txs + + FROM first_pass_throughput_filter f + GROUP BY 1,2 + -- various cases to detect bots + HAVING (MAX(wk_txs) >= 2000 AND MAX(max_hr_txs) >= 100) --frequency (gt 2k txs in one week and gt 100 txs in one hour) + OR (MAX(wk_txs) >= 4000 AND MAX(max_hr_txs) >= 50) --frequency (gt 4k txs in one week and gt 50 txs in one hour) + OR AVG(wk_txs) >= 1000 --frequency (avg 1k txs per week) + OR + ( + cast(COUNT(*) as double) / + ( cast( date_DIFF('second', MIN(min_block_time), MAX(max_block_time)) as double) / (60.0*60.0) ) >= 25 + AND SUM(wk_txs) >= 100 + ) --frequency gt 25 txs per hour + OR AVG(pct_weekly_hours_active) > 0.5 -- aliveness: transacting at least 50% of hours per week + OR MAX(pct_weekly_hours_active) > 0.95 -- aliveness: at peack, transacted at least 95% of hours in a week + OR (cast(SUM(bot_concentration_txs) as double) / cast(SUM(wk_txs) as double) > 0.5) --at least half txs go to bots + ) ff + +) + +select + blockchain, + address, + name, + category, + 'msilb7' AS contributor, + 'query' AS source, + timestamp '2023-03-11' as created_at, + now() as updated_at, + 'likely_bot_addresses' as model_name, + 'persona' as label_type + + FROM ( + -- show ther entire list as one label + SELECT + blockchain, + sender AS address, + 'likely bots' AS category, + 'likely bot addresses' AS name + + from bot_addresses + + UNION ALL + -- create another set of labels, cohorted by "type" + SELECT + blockchain, + sender AS address, + 'likely bot types' AS category, + CASE + WHEN pct_dex_trade_txs >= 0.5 THEN 'dex trade bot address' + WHEN pct_nft_trade_txs >= 0.5 THEN 'nft trade bot address' + WHEN pct_perp_trade_txs >= 0.5 THEN 'perp trade bot address' + WHEN pct_erc20_tfer_txs >= 0.5 THEN 'erc20 transfer bot address' + WHEN pct_nft_tfer_txs >= 0.5 THEN 'nft transfer bot address' + WHEN pct_token_tfer_txs >= 0.5 THEN 'other token transfer bot address' + ELSE 'non-token bot address' + END AS name + + from bot_addresses + + ) a \ No newline at end of file diff --git a/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_addresses_legacy.sql b/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_addresses_legacy.sql new file mode 100644 index 00000000000..83fb42129a0 --- /dev/null +++ b/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_addresses_legacy.sql @@ -0,0 +1,9 @@ +{{ + config( + tags=['legacy'], + + alias = alias('op_chains_likely_bot_addresses', legacy_model=True) + ) +}} + +SELECT 1 \ No newline at end of file diff --git a/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_contracts.sql b/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_contracts.sql new file mode 100644 index 00000000000..1d32717359c --- /dev/null +++ b/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_contracts.sql @@ -0,0 +1,123 @@ +{{ + config( + alias = alias('op_chains_likely_bot_contracts'), + tags = ['dunesql'], + post_hook='{{ expose_spells(\'["optimism","base"]\', + "sector", + "labels", + \'["msilb7"]\') }}' + ) +}} + +{% set op_chains = all_op_chains() %} --macro: all_op_chains.sql + +-- This could/should become a spell with some kind of modular logic approach so that we can plug in new detection logic over time (i.e. many of X method, or Y project's contracts) +-- the core of this logic is on transaction frequency and sender concentration The "sender concentration" piece will get tested by mutlisigs / smart contract wallets. + +WITH first_contracts AS ( +SELECT *, + cast(num_erc20_tfer_txs as double) / cast( num_txs as double) AS pct_erc20_tfer_txs, + cast(num_nft_tfer_txs as double) / cast( num_txs as double) AS pct_nft_tfer_txs, + cast(num_token_tfer_txs as double) / cast( num_txs as double) AS pct_token_tfer_txs, + cast(num_dex_trade_txs as double) / cast( num_txs as double) AS pct_dex_trade_txs, + cast(num_perp_trade_txs as double) / cast( num_txs as double) AS pct_perp_trade_txs, + cast(num_nft_trade_txs as double) / cast( num_txs as double) AS pct_nft_trade_txs + +FROM ( + {% for chain in op_chains %} + SELECT + '{{chain}}' as blockchain, + to AS contract, + SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ source('erc20_' + chain,'evt_transfer') }} r WHERE t.hash = r.evt_tx_hash AND t.block_number = r.evt_block_number) THEN 1 ELSE 0 END) AS num_erc20_tfer_txs, + SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ ref('nft_transfers') }} r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND r.blockchain = '{{chain}}') THEN 1 ELSE 0 END) AS num_nft_tfer_txs, + + SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ source('erc20_' + chain,'evt_transfer') }} r WHERE t.hash = r.evt_tx_hash AND t.block_number = r.evt_block_number) THEN 1 + WHEN EXISTS (SELECT 1 FROM {{ ref('nft_transfers') }} r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND r.blockchain = '{{chain}}') THEN 1 + ELSE 0 END) AS num_token_tfer_txs, + + 0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('dex_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_time = r.block_time AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_dex_trade_txs, + 0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('perpetual_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_time = r.block_time AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_perp_trade_txs, + 0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('nft_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_nft_trade_txs, + COUNT(*) AS num_txs, COUNT(DISTINCT "from") AS num_senders, COUNT(*)/COUNT(DISTINCT "from") AS txs_per_sender, + + cast(cast(COUNT(*) as double)/cast(COUNT(DISTINCT "from") as double) as double) / + ( cast( date_DIFF('second', MIN(block_time), MAX(block_time)) as double) / (60.0*60.0) ) + AS txs_per_addr_per_hour, + + cast(COUNT(*) as double) / + ( cast( date_DIFF('second', MIN(block_time), MAX(block_time)) as double) / (60.0*60.0) ) + AS txs_per_hour + + -- SUM( CASE WHEN substring(data from 1 for 10) = mode(substring(data from 1 for 10) THEN 1 ELSE 0 END) ) AS method_dupe + FROM {{ source(chain ,'transactions') }} t + GROUP BY 1,2 + + -- search for various potential bot indicators + HAVING + -- early bots: > 25 txs / hour per address + (COUNT(*) >= 100 AND + cast(cast(COUNT(*) as double)/cast(COUNT(DISTINCT "from") as double) as double) / + ( cast( date_DIFF('second', MIN(block_time), MAX(block_time)) as double) / (60.0*60.0) ) >= 25 + ) + OR + -- established bots: less than 30 senders & > 2.5k txs & > 0.5 txs / hr (to make sure we don't accidently catch active multisigs) + (COUNT(*) >= 2500 AND COUNT(DISTINCT "from") <=30 + AND cast(COUNT(*) as double) / + ( cast( date_DIFF('second', MIN(block_time), MAX(block_time)) as double) / (60.0*60.0) ) >= 0.5 + ) + OR + -- wider distribution bots: > 2.5k txs and > 1k txs per sender & > 0.5 txs / hr (to make sure we don't accidently catch active multisigs) + ( + COUNT(*) >= 2500 AND cast(COUNT(*) as double)/cast(COUNT(DISTINCT "from") as double) >= 1000 + AND cast(COUNT(*) as double) / + ( cast( date_DIFF('second', MIN(block_time), MAX(block_time)) as double) / (60.0*60.0) ) >= 0.5 + ) + + {% if not loop.last %} + UNION ALL + {% endif %} + {% endfor %} + ) a +) + +select + blockchain, + address, + name, + category, + 'msilb7' AS contributor, + 'query' AS source, + timestamp '2023-03-11' as created_at, + now() as updated_at, + 'likely_bot_contracts' as model_name, + 'persona' as label_type + + FROM ( + + SELECT + blockchain, + contract AS address, + 'likely bots' AS category, + 'likely bot contracts' AS name + + from first_contracts + + UNION ALL + + SELECT + blockchain, + contract AS address, + 'likely bot types' AS category, + CASE + WHEN pct_dex_trade_txs >= 0.5 THEN 'dex trade bot contract' + WHEN pct_nft_trade_txs >= 0.5 THEN 'nft trade bot contract' + WHEN pct_perp_trade_txs >= 0.5 THEN 'perp trade bot contract' + WHEN pct_erc20_tfer_txs >= 0.5 THEN 'erc20 transfer bot contract' + WHEN pct_nft_tfer_txs >= 0.5 THEN 'nft transfer bot contract' + WHEN pct_token_tfer_txs >= 0.5 THEN 'other token transfer bot contract' + ELSE 'non-token bot contract' + END AS name + + from first_contracts + + ) a \ No newline at end of file diff --git a/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_contracts_legacy.sql b/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_contracts_legacy.sql new file mode 100644 index 00000000000..2633c8faff5 --- /dev/null +++ b/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_contracts_legacy.sql @@ -0,0 +1,10 @@ +{{ + config( + tags=['legacy'], + + alias = alias('op_chains_likely_bot_contracts', legacy_model=True) + ) +}} + + +SELECT 1 \ No newline at end of file diff --git a/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bots_schema.yml b/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bots_schema.yml new file mode 100644 index 00000000000..242d63668ce --- /dev/null +++ b/models/labels/addresses/__single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bots_schema.yml @@ -0,0 +1,80 @@ +version: 2 + +models: + - name: labels_op_chains_likely_bot_contracts + meta: + blockchain: optimism, base + sector: labels + category: bot_contracts + contributors: msilb7 + config: + tags: ['labels', 'optimism','base','op chain','bots'] + description: "Likely bot contract labels" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - address + - name + - category + - blockchain + + columns: + - &blockchain + name: blockchain + description: "Blockchain" + - &address + name: address + description: "Address" + - &name + name: name + description: "Label name" + - &category + name: category + description: "Label category" + - &contributor + name: contributor + description: "Wizard(s) contributing to labels" + - &source + name: source + description: "How were labels generated (query)" + - &created_at + name: created_at + description: "When were labels created" + - &updated_at + name: updated_at + description: "When were labels updated for the last time" + - &model_name + name: model_name + description: "Name of the label model sourced from" + - &label_type + name: label_type + description: "Type of label (see labels overall readme)" + + - name: labels_op_chains_likely_bot_addresses + meta: + blockchain: optimism, base + sector: labels + category: bot_addresses + contributors: msilb7 + config: + tags: ['labels', 'optimism','base','op chain','bots'] + description: "Likely bot address labels" + tests: + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - address + - name + - category + - blockchain + + columns: + - *blockchain + - *address + - *name + - *category + - *contributor + - *source + - *created_at + - *updated_at + - *model_name + - *label_type \ No newline at end of file