forked from duneanalytics/spellbook
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Base to HFT Bots Logic (duneanalytics#4057)
* migrate * comma * op_chains bots * lbl * rm spells * plus * dunesql * func * jinja * chain name * lbl * lower transfer * mod alias * legacy * comma * rerun * label * add chain * group * group --------- Co-authored-by: jeff <[email protected]>
- Loading branch information
Showing
7 changed files
with
392 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
164 changes: 164 additions & 0 deletions
164
..._single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_addresses.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
{{ | ||
config( | ||
alias = alias('op_chains_likely_bot_addresses'), | ||
tags = ['dunesql'], | ||
post_hook='{{ expose_spells(\'["optimism","base"]\', | ||
"sector", | ||
"labels", | ||
\'["msilb7"]\') }}' | ||
) | ||
}} | ||
|
||
{% set op_chains = all_op_chains() %} --macro: all_op_chains.sql | ||
|
||
-- This could/should become a spell with some kind of modular logic approach so that we can plug in new detection logic over time (i.e. many of X method, or Y project's contracts) | ||
-- the core of this logic is on transaction frequency and sender concentration The "sender concentration" piece will get tested by mutlisigs / smart contract wallets. | ||
|
||
WITH sender_transfer_rates AS ( | ||
{% for chain in op_chains %} | ||
-- For each transaction sender, get their hourly transaction data | ||
SELECT '{{chain}}' as blockchain | ||
,"from" AS sender, DATE_TRUNC('hour',block_time) AS hr | ||
, MIN(block_time) AS min_block_time | ||
, MAX(block_time) AS max_block_time | ||
, COUNT(*) AS hr_txs | ||
, SUM(CASE WHEN to IN (SELECT address FROM {{ ref('labels_op_chains_likely_bot_contracts') }} WHERE name != 'chain ops bot' AND blockchain = '{{chain}}') THEN 1 ELSE 0 END) AS bot_concentration_txs | ||
|
||
, SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ source('erc20_' + chain,'evt_transfer') }} r WHERE t.hash = r.evt_tx_hash AND t.block_number = r.evt_block_number) THEN 1 ELSE 0 END) AS num_erc20_tfer_txs | ||
, SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ ref('nft_transfers') }} r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND blockchain = '{{chain}}') THEN 1 ELSE 0 END) AS num_nft_tfer_txs | ||
|
||
, SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ source('erc20_' + chain,'evt_transfer') }} r WHERE t.hash = r.evt_tx_hash AND t.block_number = r.evt_block_number) THEN 1 | ||
WHEN EXISTS (SELECT 1 FROM {{ ref('nft_transfers') }} r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND blockchain = '{{chain}}') THEN 1 | ||
ELSE 0 END) AS num_token_tfer_txs | ||
|
||
, 0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('dex_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_time = r.block_time AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_dex_trade_txs | ||
, 0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('perpetual_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_time = r.block_time AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_perp_trade_txs | ||
, 0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('nft_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_nft_trade_txs | ||
|
||
FROM {{ source( chain ,'transactions') }} t | ||
|
||
|
||
GROUP BY 1,2,3 | ||
|
||
{% if not loop.last %} | ||
UNION ALL | ||
{% endif %} | ||
{% endfor %} | ||
|
||
) | ||
|
||
, first_pass_throughput_filter AS | ||
( | ||
-- Filter down this list a bit to help with later mappings | ||
SELECT | ||
blockchain | ||
,sender, DATE_TRUNC('week',hr) AS wk, SUM(hr_txs) AS wk_txs, MAX(hr_txs) AS max_hr_txs, SUM(bot_concentration_txs) AS bot_concentration_txs, | ||
cast(COUNT(*) as double) /cast(7.0*24.0 as double) AS pct_weekly_hours_active, | ||
MIN(min_block_time) AS min_block_time, | ||
MAX(max_block_time) AS max_block_time, | ||
|
||
SUM(num_erc20_tfer_txs) AS num_erc20_tfer_txs, | ||
SUM(num_nft_tfer_txs) AS num_nft_tfer_txs, | ||
SUM(num_token_tfer_txs) AS num_token_tfer_txs, | ||
SUM(num_dex_trade_txs) AS num_dex_trade_txs, | ||
SUM(num_perp_trade_txs) AS num_perp_trade_txs, | ||
SUM(num_nft_trade_txs) AS num_nft_trade_txs | ||
FROM sender_transfer_rates e | ||
GROUP BY 1,2,3 | ||
HAVING MAX(hr_txs) >= 20 --had some high-ish frequency - gte 20 txs per hour at least once | ||
OR | ||
SUM(bot_concentration_txs) >= 0 --had a tx to a bot address | ||
|
||
) | ||
|
||
|
||
, bot_addresses AS ( | ||
SELECT *, | ||
cast(num_erc20_tfer_txs as double) / cast( num_txs as double) AS pct_erc20_tfer_txs, | ||
cast(num_nft_tfer_txs as double) / cast( num_txs as double) AS pct_nft_tfer_txs, | ||
cast(num_token_tfer_txs as double) / cast( num_txs as double) AS pct_token_tfer_txs, | ||
cast(num_dex_trade_txs as double) / cast( num_txs as double) AS pct_dex_trade_txs, | ||
cast(num_perp_trade_txs as double) / cast( num_txs as double) AS pct_perp_trade_txs, -- perpetual.trades has some dunesql incompatability | ||
cast(num_nft_trade_txs as double) / cast( num_txs as double) AS pct_nft_trade_txs | ||
|
||
FROM ( | ||
SELECT | ||
'{{chain}}' AS blockchain | ||
, sender, MAX(wk_txs) AS max_wk_txs, MAX(max_hr_txs) AS max_hr_txs, AVG(wk_txs) AS avg_wk_txs | ||
,MIN(min_block_time) AS min_block_time | ||
,MAX(max_block_time) AS max_block_time | ||
,MAX(pct_weekly_hours_active) AS max_pct_weekly_hours_active | ||
,AVG(pct_weekly_hours_active) AS avg_pct_weekly_hours_active | ||
,SUM(wk_txs) AS num_txs | ||
,SUM(bot_concentration_txs) AS bot_concentration_txs | ||
|
||
,cast(SUM(bot_concentration_txs) as double) / cast(SUM(wk_txs) as double) AS pct_bot_concentration_txs | ||
,( cast( date_DIFF('second', MIN(min_block_time), MAX(max_block_time)) as double) / (60.0*60.0) ) AS txs_per_hour | ||
|
||
,SUM(num_erc20_tfer_txs) AS num_erc20_tfer_txs | ||
,SUM(num_nft_tfer_txs) AS num_nft_tfer_txs | ||
,SUM(num_token_tfer_txs) AS num_token_tfer_txs | ||
,SUM(num_dex_trade_txs) AS num_dex_trade_txs | ||
,SUM(num_perp_trade_txs) AS num_perp_trade_txs | ||
,SUM(num_nft_trade_txs) AS num_nft_trade_txs | ||
|
||
FROM first_pass_throughput_filter f | ||
GROUP BY 1,2 | ||
-- various cases to detect bots | ||
HAVING (MAX(wk_txs) >= 2000 AND MAX(max_hr_txs) >= 100) --frequency (gt 2k txs in one week and gt 100 txs in one hour) | ||
OR (MAX(wk_txs) >= 4000 AND MAX(max_hr_txs) >= 50) --frequency (gt 4k txs in one week and gt 50 txs in one hour) | ||
OR AVG(wk_txs) >= 1000 --frequency (avg 1k txs per week) | ||
OR | ||
( | ||
cast(COUNT(*) as double) / | ||
( cast( date_DIFF('second', MIN(min_block_time), MAX(max_block_time)) as double) / (60.0*60.0) ) >= 25 | ||
AND SUM(wk_txs) >= 100 | ||
) --frequency gt 25 txs per hour | ||
OR AVG(pct_weekly_hours_active) > 0.5 -- aliveness: transacting at least 50% of hours per week | ||
OR MAX(pct_weekly_hours_active) > 0.95 -- aliveness: at peack, transacted at least 95% of hours in a week | ||
OR (cast(SUM(bot_concentration_txs) as double) / cast(SUM(wk_txs) as double) > 0.5) --at least half txs go to bots | ||
) ff | ||
|
||
) | ||
|
||
select | ||
blockchain, | ||
address, | ||
name, | ||
category, | ||
'msilb7' AS contributor, | ||
'query' AS source, | ||
timestamp '2023-03-11' as created_at, | ||
now() as updated_at, | ||
'likely_bot_addresses' as model_name, | ||
'persona' as label_type | ||
|
||
FROM ( | ||
-- show ther entire list as one label | ||
SELECT | ||
blockchain, | ||
sender AS address, | ||
'likely bots' AS category, | ||
'likely bot addresses' AS name | ||
|
||
from bot_addresses | ||
|
||
UNION ALL | ||
-- create another set of labels, cohorted by "type" | ||
SELECT | ||
blockchain, | ||
sender AS address, | ||
'likely bot types' AS category, | ||
CASE | ||
WHEN pct_dex_trade_txs >= 0.5 THEN 'dex trade bot address' | ||
WHEN pct_nft_trade_txs >= 0.5 THEN 'nft trade bot address' | ||
WHEN pct_perp_trade_txs >= 0.5 THEN 'perp trade bot address' | ||
WHEN pct_erc20_tfer_txs >= 0.5 THEN 'erc20 transfer bot address' | ||
WHEN pct_nft_tfer_txs >= 0.5 THEN 'nft transfer bot address' | ||
WHEN pct_token_tfer_txs >= 0.5 THEN 'other token transfer bot address' | ||
ELSE 'non-token bot address' | ||
END AS name | ||
|
||
from bot_addresses | ||
|
||
) a |
9 changes: 9 additions & 0 deletions
9
..._category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_addresses_legacy.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{{ | ||
config( | ||
tags=['legacy'], | ||
|
||
alias = alias('op_chains_likely_bot_addresses', legacy_model=True) | ||
) | ||
}} | ||
|
||
SELECT 1 |
123 changes: 123 additions & 0 deletions
123
..._single_category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_contracts.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
{{ | ||
config( | ||
alias = alias('op_chains_likely_bot_contracts'), | ||
tags = ['dunesql'], | ||
post_hook='{{ expose_spells(\'["optimism","base"]\', | ||
"sector", | ||
"labels", | ||
\'["msilb7"]\') }}' | ||
) | ||
}} | ||
|
||
{% set op_chains = all_op_chains() %} --macro: all_op_chains.sql | ||
|
||
-- This could/should become a spell with some kind of modular logic approach so that we can plug in new detection logic over time (i.e. many of X method, or Y project's contracts) | ||
-- the core of this logic is on transaction frequency and sender concentration The "sender concentration" piece will get tested by mutlisigs / smart contract wallets. | ||
|
||
WITH first_contracts AS ( | ||
SELECT *, | ||
cast(num_erc20_tfer_txs as double) / cast( num_txs as double) AS pct_erc20_tfer_txs, | ||
cast(num_nft_tfer_txs as double) / cast( num_txs as double) AS pct_nft_tfer_txs, | ||
cast(num_token_tfer_txs as double) / cast( num_txs as double) AS pct_token_tfer_txs, | ||
cast(num_dex_trade_txs as double) / cast( num_txs as double) AS pct_dex_trade_txs, | ||
cast(num_perp_trade_txs as double) / cast( num_txs as double) AS pct_perp_trade_txs, | ||
cast(num_nft_trade_txs as double) / cast( num_txs as double) AS pct_nft_trade_txs | ||
|
||
FROM ( | ||
{% for chain in op_chains %} | ||
SELECT | ||
'{{chain}}' as blockchain, | ||
to AS contract, | ||
SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ source('erc20_' + chain,'evt_transfer') }} r WHERE t.hash = r.evt_tx_hash AND t.block_number = r.evt_block_number) THEN 1 ELSE 0 END) AS num_erc20_tfer_txs, | ||
SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ ref('nft_transfers') }} r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND r.blockchain = '{{chain}}') THEN 1 ELSE 0 END) AS num_nft_tfer_txs, | ||
|
||
SUM(CASE WHEN EXISTS (SELECT 1 FROM {{ source('erc20_' + chain,'evt_transfer') }} r WHERE t.hash = r.evt_tx_hash AND t.block_number = r.evt_block_number) THEN 1 | ||
WHEN EXISTS (SELECT 1 FROM {{ ref('nft_transfers') }} r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND r.blockchain = '{{chain}}') THEN 1 | ||
ELSE 0 END) AS num_token_tfer_txs, | ||
|
||
0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('dex_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_time = r.block_time AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_dex_trade_txs, | ||
0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('perpetual_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_time = r.block_time AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_perp_trade_txs, | ||
0 /*SUM(CASE WHEN EXISTS (SELECT 1 FROM [[ ref('nft_trades') ]] r WHERE t.hash = r.tx_hash AND t.block_number = r.block_number AND blockchain = '{{chain}}') THEN 1 ELSE 0 END)*/ AS num_nft_trade_txs, | ||
COUNT(*) AS num_txs, COUNT(DISTINCT "from") AS num_senders, COUNT(*)/COUNT(DISTINCT "from") AS txs_per_sender, | ||
|
||
cast(cast(COUNT(*) as double)/cast(COUNT(DISTINCT "from") as double) as double) / | ||
( cast( date_DIFF('second', MIN(block_time), MAX(block_time)) as double) / (60.0*60.0) ) | ||
AS txs_per_addr_per_hour, | ||
|
||
cast(COUNT(*) as double) / | ||
( cast( date_DIFF('second', MIN(block_time), MAX(block_time)) as double) / (60.0*60.0) ) | ||
AS txs_per_hour | ||
|
||
-- SUM( CASE WHEN substring(data from 1 for 10) = mode(substring(data from 1 for 10) THEN 1 ELSE 0 END) ) AS method_dupe | ||
FROM {{ source(chain ,'transactions') }} t | ||
GROUP BY 1,2 | ||
|
||
-- search for various potential bot indicators | ||
HAVING | ||
-- early bots: > 25 txs / hour per address | ||
(COUNT(*) >= 100 AND | ||
cast(cast(COUNT(*) as double)/cast(COUNT(DISTINCT "from") as double) as double) / | ||
( cast( date_DIFF('second', MIN(block_time), MAX(block_time)) as double) / (60.0*60.0) ) >= 25 | ||
) | ||
OR | ||
-- established bots: less than 30 senders & > 2.5k txs & > 0.5 txs / hr (to make sure we don't accidently catch active multisigs) | ||
(COUNT(*) >= 2500 AND COUNT(DISTINCT "from") <=30 | ||
AND cast(COUNT(*) as double) / | ||
( cast( date_DIFF('second', MIN(block_time), MAX(block_time)) as double) / (60.0*60.0) ) >= 0.5 | ||
) | ||
OR | ||
-- wider distribution bots: > 2.5k txs and > 1k txs per sender & > 0.5 txs / hr (to make sure we don't accidently catch active multisigs) | ||
( | ||
COUNT(*) >= 2500 AND cast(COUNT(*) as double)/cast(COUNT(DISTINCT "from") as double) >= 1000 | ||
AND cast(COUNT(*) as double) / | ||
( cast( date_DIFF('second', MIN(block_time), MAX(block_time)) as double) / (60.0*60.0) ) >= 0.5 | ||
) | ||
|
||
{% if not loop.last %} | ||
UNION ALL | ||
{% endif %} | ||
{% endfor %} | ||
) a | ||
) | ||
|
||
select | ||
blockchain, | ||
address, | ||
name, | ||
category, | ||
'msilb7' AS contributor, | ||
'query' AS source, | ||
timestamp '2023-03-11' as created_at, | ||
now() as updated_at, | ||
'likely_bot_contracts' as model_name, | ||
'persona' as label_type | ||
|
||
FROM ( | ||
|
||
SELECT | ||
blockchain, | ||
contract AS address, | ||
'likely bots' AS category, | ||
'likely bot contracts' AS name | ||
|
||
from first_contracts | ||
|
||
UNION ALL | ||
|
||
SELECT | ||
blockchain, | ||
contract AS address, | ||
'likely bot types' AS category, | ||
CASE | ||
WHEN pct_dex_trade_txs >= 0.5 THEN 'dex trade bot contract' | ||
WHEN pct_nft_trade_txs >= 0.5 THEN 'nft trade bot contract' | ||
WHEN pct_perp_trade_txs >= 0.5 THEN 'perp trade bot contract' | ||
WHEN pct_erc20_tfer_txs >= 0.5 THEN 'erc20 transfer bot contract' | ||
WHEN pct_nft_tfer_txs >= 0.5 THEN 'nft transfer bot contract' | ||
WHEN pct_token_tfer_txs >= 0.5 THEN 'other token transfer bot contract' | ||
ELSE 'non-token bot contract' | ||
END AS name | ||
|
||
from first_contracts | ||
|
||
) a |
10 changes: 10 additions & 0 deletions
10
..._category_labels__/likely_bots/op_chains/labels_op_chains_likely_bot_contracts_legacy.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
{{ | ||
config( | ||
tags=['legacy'], | ||
|
||
alias = alias('op_chains_likely_bot_contracts', legacy_model=True) | ||
) | ||
}} | ||
|
||
|
||
SELECT 1 |
Oops, something went wrong.