Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make TCNPlus consistent, create the right config format, deleting assertion #1

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion src/GettingStarted.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.8.10"
},
"orig_nbformat": 4
},
Expand Down
37 changes: 37 additions & 0 deletions src/ResultsReview.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,43 @@
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run_clf = pd.read_csv(\"/data/bchen158/ML4GW/ML4GWsearch/src/results/train_20231202/224821/plots/test_metrics/testset_preds.csv\")\n",
"count_pred_1 = run_clf[run_clf['prediction'] == 1].shape[0]\n",
"count_pred_0 = run_clf[run_clf['prediction'] == 0].shape[0]\n",
"\n",
"print(f\"Number of samples with label 1: {count_pred_1}\")\n",
"print(f\"Number of samples with label 0: {count_pred_0}\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of samples with label 1: 0\n",
"Number of samples with label 0: 56000\n"
]
}
],
"source": [
"run_clf = pd.read_csv(\"/data/bchen158/ML4GW/ML4GWsearch/src/results/train_20231202/224821/plots/test_metrics/testset_preds.csv\")\n",
"count_label_1 = run_clf['label'].sum()\n",
"count_label_0 = len(run_clf) - count_label_1\n",
"\n",
"print(f\"Number of samples with label 1: {count_label_1}\")\n",
"print(f\"Number of samples with label 0: {count_label_0}\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
Expand Down
166 changes: 166 additions & 0 deletions src/TSInterpret_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import pickle
import numpy as np
import matplotlib.pyplot as plt
import seaborn as snst
import torch
import matplotlib.pyplot as plt
from PIL import Image
import torchvision.transforms as transforms
import utils
import os, json, wandb, argparse
from models.get_model import get_model
from models.tsai.tsai.models.TCN import TCN
from train_pl import GWDetectionLightningModule
import pytorch_lightning as pl
import itertools

PROJECT_DIR = os.getcwd()
with open(PROJECT_DIR+"/configs/train/base.json") as fp:
base_config_dict = json.load(fp)
with open(PROJECT_DIR+"/configs/train/models_config/fcn_plus.json") as fp:
model_config = json.load(fp)
config_dict = {**base_config_dict, **model_config}
config_dict['model_name'] = 'FCNPlus'
if config_dict['batch_size']*config_dict['num_batches'] < config_dict['total_datapoints']:
config_dict['sample_size'] = config_dict['batch_size']*config_dict['num_batches']
else:
config_dict['sample_size'] = config_dict['total_datapoints']
config_dict['num_batches'] = config_dict['total_datapoints']//config_dict['batch_size']

#################################################################
# Restoring the model to the training state from the checkpoint #
#################################################################
RESULTS_DIR = "/data/bchen158/ML4GW/ML4GWsearch/src/results/train_20231023/222238"
checkpoint_path = RESULTS_DIR + "/checkpoints/epoch=29-step=32820.ckpt"

from GWDetectionLightningModule import GWDetectionLightningModule
model = GWDetectionLightningModule(config=config_dict).load_from_checkpoint(checkpoint_path, config=config_dict)
# ??? whether the model checkpoint is correctly loaded in
# model.load_from_checkpoint(checkpoint_path, config=config_dict)
model.eval()
from pprint import pprint
print("# the model configuration: ")
pprint(dict(model.hparams))
# load the Time-Series dataset
DATA_DIR = "/data/rgura001/ML4GWsearch/g2net-gravitational-wave-detection"
# Set up data loaders
from dataloaders.dataloader import get_dataloaders
train_dataloader, \
val_dataloader, \
test_dataloader,\
[train_df, val_df, test_df] \
= get_dataloaders(DATA_DIR=DATA_DIR,
batch_size=config_dict['batch_size'],
sample_size=config_dict['sample_size'],
ifos=config_dict['ifos'],
z_norm=config_dict['z_norm'],
highpass=config_dict['highpass'],
whiten=config_dict['whiten'],
scale=config_dict['scale'],
bandpass=config_dict['bandpass'],
# rng_seed=42 ## Only change this parameter if you want to use a different train/val/test split
)
# trainer = pl.Trainer(
# max_epochs=config_dict['epochs'],
# # log_every_n_steps=1,
# accelerator='gpu',
# ## devices=[config.use_gpu], ## just set using os.environ['CUDA_VISIBLE_DEVICES'] instead
# accumulate_grad_batches=config_dict['accumulate_grad_batches']
# )
# outputs = list(itertools.chain(test_preds))
# test_ids = []
# test_labels = []
# test_preds = []
# test_preds_proba = []
# idx = 0
# true_pred_idx = []
# for output in outputs:
# test_ids.extend(output['ids'])
# test_labels.extend(output['labels'])
# test_preds.extend(output['predictions'])
# test_preds_proba.append(output['prediction_probs'])
# if test_labels[idx]==0 and test_labels[idx] == test_preds[idx]:
# true_pred_idx.append(idx)
# idx += 1
# print("# idx for TN:")
# print(true_pred_idx)
# Fucntion to map the channel to corresponding detector names
def map_channels_to_detectirs(indices):
detectors = ["LIGO Hanford", "LIGO Livingston", "Virgo"]
return [detectors[i] for i in indices]
# Function to shuffle time-series data for each sample
def shuffle_time_series_data(dataloader, shuffled_num_batches):
shuffled_data = []
iters = 0
for x, y, id_num in dataloader:
iters += 1
shuffled_x = np.zeros_like(x)
for i in range(x.shape[0]):
indices = np.random.permutation(3)
shuffled_x[i] = x[i, indices]
mapped_detectors = map_channels_to_detectirs(indices)
# print(f"Sample {i} -Shuffled order: {mapped_detectors}")
if iters == 1 and mapped_detectors[2] != "Virgo":
list_idx = iters - 1
item_idx = i
print("list idx = ", list_idx)
print("item_idx = ", item_idx)
print(f"Sample {i} -Shuffled order: {mapped_detectors}")
id_num = np.asarray(id_num)
# shuffled_data.append([shuffled_x, y.numpy(), id_num])
shuffled_data.append([shuffled_x, y.numpy(), id_num])
if iters>shuffled_num_batches:
break
return shuffled_data, list_idx, item_idx

# shuffle_train_data, list_idx, item_idx = shuffle_time_series_data(train_dataloader, 10)
# shuffle_test_data, list_idx, item_idx = shuffle_time_series_data(test_dataloader, 10)

# for info in shuffle_train_data:
# train_x = info[0][item_idx]
# train_y = info[1][item_idx]
# train_id = info[2][item_idx]
# break

# for info in shuffle_test_data:
# test_x = info[0][item_idx]
# test_y = info[1][item_idx]
# test_batch = torch.from_numpy(info[0])
# test_id = info[2][item_idx]
# break

for x, y, id_num in train_dataloader:
train_x = x[1]
train_y = y[1]
train_id = id_num[1]
break
# print(x.shape)
# break

for x, y, id_num in test_dataloader:
test_x = x[1]
test_y = y[1]
test_batch = x
test_id = id_num
break
# print(x.shape)
# break

# print("test_batch = ", test_batch.shape)
# print("Type of test batch = ", type(test_batch))
device = torch.device("cpu")
model.to(device)
logits = model(test_batch)
test_preds = torch.argmax(logits, dim=1)
print("test_batch: \n", test_batch.numpy().shape)
print("test prediction is : ")
print(test_preds)
print(test_id)

train_x = train_x.numpy()
train_y = train_y.numpy()
test_x = test_x.numpy()
test_y = test_y.numpy()

from TSInterpret.InterpretabilityModels.Saliency.TSR import TSR
int_mod = TSR(model, train_x.shape[-2], train_x.shape[-1], method='IG', mode='time')
Binary file added src/TSinter-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added src/TSinter-10.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added src/TSinter-9.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added src/TSinter.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 3 additions & 3 deletions src/configs/train/base.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
"whiten": false,
"scale": true,
"bandpass": true,
"epochs": 10,
"epochs": 100,
"batch_size": 128,
"num_batches": 100,
"num_batches": 4375,
"optimizer": "sgd",
"learning_rate": 1e-1,
"lr_scheduler": "step",
"stop_early": true,
"accumulate_grad_batches": 2
"accumulate_grad_batches": 3
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"layers": [128, 256, 128],
"kernel_sizes":[7,5,3],
"batch_norm": false
"batch_norm": true
}
9 changes: 9 additions & 0 deletions src/configs/train/models_config/mlp.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"num_hidden_units": 50,
"levels": 8,
"layers": [128, 500, 128],
"ps": [0.1, 0.2, 0.2],
"kernel_size": 5,
"conv_dropout": 0,
"fc_dropout": 0
}
7 changes: 7 additions & 0 deletions src/configs/train/models_config/rnn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"hidden_size": 100,
"n_layers": 1,
"rnn_dropout": 0.0,
"bidirectional": true,
"fc_dropout": 0.0
}
2 changes: 1 addition & 1 deletion src/configs/train/stop_early.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"stop_early__monitor": "val_loss",
"stop_early__mode": "min",
"stop_early__patience": 10
"stop_early__patience": 5
}
Loading