forta-network · RCantu92 · Jun 3, 2024 · Jun 4, 2024 · Jun 4, 2024 · Jun 4, 2024
diff --git a/early-attack-detector-py/src/agent.py b/early-attack-detector-py/src/agent.py
@@ -21,6 +21,8 @@
     FUNDING_TIME,
     EXTRA_TIME_BOTS,
     EXTRA_TIME_DAYS,
+    ETH_BLOCKS_IN_ONE_DAY,
+    THREE_SECOND_BLOCKS_IN_ONE_DAY
 )
 from src.findings import ContractFindings
 from src.logger import logger
@@ -30,6 +32,8 @@
     get_function_signatures,
     get_storage_addresses,
     is_contract,
+    get_tp_attacker_list,
+    update_tp_attacker_list
 )
 from src.storage import get_secrets
 
@@ -41,7 +45,7 @@
 
 def initialize():
     """
-    this function loads the ml model.
+    this function loads the ml model and fetches the list of true positive attackers
     """
 
     global CHAIN_ID
@@ -80,6 +84,9 @@ def initialize():
     BETA = 'beta' in package['name']
     logger.info(f"Beta: {BETA}")
 
+    global TP_ATTACKER_LIST
+    TP_ATTACKER_LIST = get_tp_attacker_list()
+
 
 def exec_model(w3, opcodes: str, contract_creator: str) -> tuple:
     """
@@ -262,7 +269,10 @@ def detect_malicious_contract(
     findings = []
 
     if created_contract_address is not None and code is not None:
-        if len(code) > BYTE_CODE_LENGTH_THRESHOLD:
+        from_label_type = "contract" if is_contract(w3, from_) else "eoa"
+        from_eoa_in_tp_list = from_label_type == "eoa" and from_ in TP_ATTACKER_LIST
+
+        if (len(code) > BYTE_CODE_LENGTH_THRESHOLD) or from_eoa_in_tp_list:
             try:
                 opcodes = EvmBytecode(code).disassemble()
             except Exception as e:
@@ -275,12 +285,12 @@ def detect_malicious_contract(
             function_signatures = get_function_signatures(w3, opcodes)
             logger.info(f"{created_contract_address}: score={model_score}")
 
-            if model_score is None or model_score < MODEL_INFO_THRESHOLD:
+            if model_score is None or model_score < MODEL_INFO_THRESHOLD and not from_eoa_in_tp_list:
                 if ENV == 'dev':
                     logger.info(f"Score is less than threshold: {model_score} < {MODEL_INFO_THRESHOLD}. Not creating alert.")
                 return []
             # If we are not in beta, we only create alerts if the score is above the threshold
-            if model_score < MODEL_THRESHOLD and not BETA:
+            if model_score < MODEL_THRESHOLD and not BETA and not from_eoa_in_tp_list:
                 if ENV == 'dev':
                     logger.info(f"Score is less than threshold: {model_score} < {MODEL_THRESHOLD} and we are not in beta. Not checking for labels.")
                 return []
@@ -299,10 +309,9 @@ def detect_malicious_contract(
                 MODEL_THRESHOLD,
                 error=error,
             )
-            if model_score is not None and model_score >= MODEL_INFO_THRESHOLD:
+            if (model_score is not None and model_score >= MODEL_INFO_THRESHOLD) or from_eoa_in_tp_list:
                 # If it's a potential alert, we create labels. Otherwise, we don't
-                if model_score >= MODEL_THRESHOLD:
-                    from_label_type = "contract" if is_contract(w3, from_) else "eoa"
+                if (model_score >= MODEL_THRESHOLD) or from_eoa_in_tp_list:
                     labels = [
                         {
                             "entity": created_contract_address,
@@ -326,7 +335,7 @@ def detect_malicious_contract(
                             "entity": from_,
                             "entity_type": EntityType.Address,
                             "label": "attacker",
-                            "confidence": model_score,
+                            "confidence": 1.0 if from_eoa_in_tp_list else model_score,
                         },
                         ]
                     severity = FindingSeverity.Critical
@@ -359,6 +368,24 @@ def handle_transaction(
 ):
     return real_handle_transaction(transaction_event)
 
+def provide_handle_block(w3):
+    def handle_block(block_event: forta_agent.block_event.BlockEvent) -> list:
+        findings = []
+
+        DAILY_BLOCKS_DENOMINATOR = ETH_BLOCKS_IN_ONE_DAY if CHAIN_ID == 1 else THREE_SECOND_BLOCKS_IN_ONE_DAY
+        if block_event.block_number % DAILY_BLOCKS_DENOMINATOR == 0:
+            global TP_ATTACKER_LIST
+            TP_ATTACKER_LIST = update_tp_attacker_list(TP_ATTACKER_LIST)
+        return findings
+
+    return handle_block
+
+
+real_handle_block = provide_handle_block(web3)
+
+
+def handle_block(block_event: forta_agent.block_event.BlockEvent):
+    return real_handle_block(block_event)
 
 def check_funding_labels(address: str, tx_timestamp: int, n_days: int=365, extra_time_bots: str=None, extra_time: int=180):
     t = time.time()

diff --git a/early-attack-detector-py/src/constants.py b/early-attack-detector-py/src/constants.py
@@ -33,4 +33,7 @@
 EXTRA_TIME_BOTS = [
     '0xa91a31df513afff32b9d85a2c2b7e786fdd681b3cdd8d93d6074943ba31ae400'  # funding tornado cash
 ]
-EXTRA_TIME_DAYS = 180
+EXTRA_TIME_DAYS = 180
+
+ETH_BLOCKS_IN_ONE_DAY = 7200
+THREE_SECOND_BLOCKS_IN_ONE_DAY = 28800
diff --git a/early-attack-detector-py/src/utils.py b/early-attack-detector-py/src/utils.py
@@ -6,6 +6,10 @@
 from concurrent.futures import ThreadPoolExecutor
 import functools
 import operator
+import pandas as pd
+import logging
+import io
+import re
 
 from src.constants import CONTRACT_SLOT_ANALYSIS_DEPTH, MASK, BOT_ID
 from src.logger import logger
@@ -116,4 +120,41 @@ def alert_count(chain_id: int, alert_id: str) -> int:
     except Exception as err:
         logger.error(f"Error obtaining alert counts: {err}")
 
-    return alert_id_counts, alert_counts
+    return alert_id_counts, alert_counts
+
+def get_tp_attacker_list() -> list:
+    res = requests.get('https://raw.githubusercontent.com/forta-network/starter-kits/main/early-attack-detector-py/tp_list.csv')
+    logging.info(f"Made request to fetch fp list: {res.status_code}")
+    content = res.content.decode('utf-8') if res.status_code == 200 else open('tp_list.csv', 'r').read()
+
+    df_fps = pd.read_csv(io.StringIO(content), sep=',')
+    attacker_list = set(df_fps['Attacker'].tolist())
+
+    eth_address_pattern = re.compile(r'^0x[a-fA-F0-9]{40}$')
+
+    unique_attacker_addresses = set()
+    # Could be populated by random strings
+    # and non-Ethereum addresses
+    non_eth_entries = []
+
+    for entry in attacker_list:
+        # some values were sneaking in as `float` types
+        # (e.g. 'nan')
+        if isinstance(entry, str):
+            parts = [part.strip() for part in entry.split(',')]
+            for part in parts:
+                if eth_address_pattern.match(part):
+                    unique_attacker_addresses.add(part)
+                else:
+                    non_eth_entries.append(part)
+
+    return list(unique_attacker_addresses)
+
+def update_tp_attacker_list(current_tp_list) -> list:
+    fetched_tp_list = get_tp_attacker_list()
+
+    for entry in fetched_tp_list:
+        if entry not in current_tp_list:
+            current_tp_list.append(entry)
+
+    return current_tp_list