momentum v9

import pandas as pd
import numpy as np
from heapq import nlargest, nsmallest
import matplotlib.pyplot as plt

NUMBER_OF_TOKENS = 10
NUM_BUCKETS = 5

close_prices = pd.read_csv("usdt_price_data.csv")
close_prices['timestamp'] = pd.to_datetime(close_prices['timestamp'])
close_prices.set_index('timestamp', inplace=True)

# some coins should be deleted because they are stablecoins or fiat currencies
deleted_coins = ['PAX', 'BUSD', 'USDP', 'FDUSD', 'USDC', 'UST', 'TUSD', 'SUSD', 'EUR', 'EURI', 'GBP', 'AUD', 'PAXG']
close_prices = close_prices.drop(columns=deleted_coins, errors='ignore')

# PARAMETERS
# how many days' data we'll look at?
lookback = 35
# ignoring last x days' data
last_days = 0
# how many days will we hold the coins?
holding_days = 7
# if BTC return is below the threshold over a given period, we hold BTC; otherwise, we buy altcoins
threshold = 0.05

btc_price = close_prices.iloc[:,0]

def information_discreteness(df):
    returns = df.pct_change()
    positive_returns = returns[returns > 0]
    negative_returns = returns[returns < 0]
    negative_positive_diff = negative_returns.count() / len(df) - positive_returns.count() / len(df)
    cumul_return = df[-1] / df[0] - 1
    sign = lambda x: 1 if x > 0 else -1 if x < 0 else 0
    info_discreteness = sign(cumul_return) * negative_positive_diff
    return info_discreteness

def calculate_beta(df, coin_column, window=30):
    btc_returns = df.iloc[:,0].pct_change()  # Bitcoin is first column
    coin_returns = df[coin_column].pct_change()
    # Remove any NaN values
    valid_data = pd.concat([btc_returns, coin_returns], axis=1).dropna()
    # Calculate covariance and variance
    covariance = valid_data.iloc[:,0].cov(valid_data.iloc[:,1])
    variance = valid_data.iloc[:,0].var()
    beta = covariance / variance
    return beta

def double_sort_momentum(df, lookback, last_days, holding_days, threshold, num_buckets=NUM_BUCKETS, coins_per_bucket=NUMBER_OF_TOKENS, commission=0.001):
    weekly_returns = {i: [] for i in range(num_buckets)}  # Store returns for each bucket
    
    for i in range(lookback, len(df), holding_days):
        try:
            if btc_price[i] / btc_price[i-lookback] - 1 > threshold:
                # Dictionaries to store metrics
                returns_dict = {}
                discreteness_dict = {}
                beta_dict = {}
                
                # Calculate metrics for all valid tokens
                for col in df.columns[1:]:
                    if np.isnan(df[col][i]) == False:
                        try:
                            # Calculate return over the lookback period
                            token_return = df[col][i-last_days] / df[col][i-lookback] - 1
                            # Calculate beta
                            token_beta = calculate_beta(df.iloc[i-lookback:i-last_days], col)
                            # Calculate information discreteness
                            token_discreteness = information_discreteness(df[col][i-lookback:i-last_days])
                            
                            returns_dict[col] = token_return
                            beta_dict[col] = token_beta
                            discreteness_dict[col] = token_discreteness
                        except:
                            pass
                
                if len(returns_dict) < num_buckets * coins_per_bucket:  # Changed condition for long-only
                    continue
                
                # Sort tokens by returns
                sorted_by_returns = sorted(returns_dict.items(), key=lambda x: x[1], reverse=True)
                
                # Create buckets based on returns
                bucket_size = len(sorted_by_returns) // num_buckets
                buckets = {}
                
                # Distribute tokens into buckets
                for bucket in range(num_buckets):
                    start_idx = bucket * bucket_size
                    end_idx = start_idx + bucket_size if bucket < num_buckets - 1 else len(sorted_by_returns)
                    bucket_tokens = sorted_by_returns[start_idx:end_idx]
                    
                    # Filter out high beta tokens (top 10%) within this bucket
                    bucket_tokens_dict = {token: beta_dict[token] for token, _ in bucket_tokens}
                    beta_threshold = np.percentile(list(bucket_tokens_dict.values()), 90)
                    filtered_tokens = [(token, _) for token, _ in bucket_tokens 
                                    if bucket_tokens_dict[token] <= beta_threshold]
                    
                    # For remaining tokens, sort by information discreteness
                    filtered_tokens_dict = {token: discreteness_dict[token] for token, _ in filtered_tokens}
                    
                    # Get tokens with lowest information discreteness
                    selected_tokens = nlargest(coins_per_bucket, filtered_tokens_dict.items(), key=lambda x: -x[1])  # Note the negative sign
                    buckets[bucket] = [token for token, _ in selected_tokens]
                
                # Calculate returns for each bucket
                for bucket, tokens in buckets.items():
                    total_return = 0
                    valid_positions = 0
                    
                    for token in tokens:
                        try:
                            weekly_return = (df[token][i+holding_days-1] * (1-commission)) / (df[token][i] * (1+commission)) - 1
                            total_return += weekly_return
                            valid_positions += 1
                        except:
                            try:
                                weekly_return = (df[token].iloc[-1] * (1-commission)) / (df[token][i] * (1+commission)) - 1
                                total_return += weekly_return
                                valid_positions += 1
                            except:
                                continue
                    
                    if valid_positions > 0:
                        avg_bucket_return = total_return / valid_positions
                        weekly_returns[bucket].append(avg_bucket_return)
                    else:
                        weekly_returns[bucket].append(0)
            else:
                # If BTC threshold not met, append 0 returns for all buckets
                for bucket in range(num_buckets):
                    weekly_returns[bucket].append(0)
        except:
            break
    
    return weekly_returns, buckets

def analyze_bucket_performance(weekly_returns_dict):
    bucket_performance = {}
    for bucket, returns in weekly_returns_dict.items():
        returns = [x for x in returns if str(x) != 'nan']
        if returns:
            # Calculate cumulative return
            cumulative_return = np.prod([1 + r for r in returns]) - 1
            # Calculate annualized return
            ann_return = (1 + cumulative_return) ** (365 / (len(returns) * 7)) - 1
            # Calculate max drawdown
            cumulative_returns = np.cumprod([1 + r for r in returns])
            peak = np.maximum.accumulate(cumulative_returns)
            drawdown = (cumulative_returns - peak) / peak
            max_drawdown = np.min(drawdown)
            
            bucket_performance[bucket] = {
                'annualized_return': ann_return,
                'max_drawdown': max_drawdown,
                'sharpe_ratio': np.mean(returns) / np.std(returns) if np.std(returns) != 0 else 0
            }
    
    return bucket_performance

def calculate_btc_drawdown(df):
    btc_prices = df['BTC'].values
    peak = np.maximum.accumulate(btc_prices)
    drawdown = (btc_prices - peak) / peak
    max_drawdown = np.min(drawdown)
    return max_drawdown

# Run the strategy
weekly_returns_by_bucket, final_buckets = double_sort_momentum(close_prices, lookback, last_days, holding_days, threshold)
performance = analyze_bucket_performance(weekly_returns_by_bucket)
btc_drawdown = calculate_btc_drawdown(close_prices)

# Print results
print("\nBitcoin Maximum Drawdown:", "{:.2%}".format(btc_drawdown))

for bucket, metrics in performance.items():
    print(f"\nBucket {bucket} Performance:")
    print(f"Annualized Return: {metrics['annualized_return']:.2%}")
    print(f"Maximum Drawdown: {metrics['max_drawdown']:.2%}")
    print(f"Sharpe Ratio: {metrics['sharpe_ratio']:.2f}")

# Plot cumulative returns for each bucket
plt.figure(figsize=(12, 6))
for bucket, returns in weekly_returns_by_bucket.items():
    returns = [x for x in returns if str(x) != 'nan']
    cumulative_returns = np.cumprod([1 + r for r in returns])
    plt.plot(cumulative_returns, label=f'Bucket {bucket}')

plt.legend()
plt.title('Cumulative Returns by Bucket')
plt.xlabel('Weeks')
plt.ylabel('Cumulative Return')
plt.show()