madlab-ucr · BiqianCheng · Oct 24, 2023 · Nov 3, 2023 · Nov 9, 2023 · Nov 14, 2023
diff --git a/src/GettingStarted.ipynb b/src/GettingStarted.ipynb
@@ -1131,7 +1131,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.8.10"
   },
   "orig_nbformat": 4
  },

diff --git a/src/ResultsReview.ipynb b/src/ResultsReview.ipynb
@@ -254,6 +254,43 @@
     "import pandas as pd"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "run_clf = pd.read_csv(\"/data/bchen158/ML4GW/ML4GWsearch/src/results/train_20231202/224821/plots/test_metrics/testset_preds.csv\")\n",
+    "count_pred_1 = run_clf[run_clf['prediction'] == 1].shape[0]\n",
+    "count_pred_0 = run_clf[run_clf['prediction'] == 0].shape[0]\n",
+    "\n",
+    "print(f\"Number of samples with label 1: {count_pred_1}\")\n",
+    "print(f\"Number of samples with label 0: {count_pred_0}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of samples with label 1: 0\n",
+      "Number of samples with label 0: 56000\n"
+     ]
+    }
+   ],
+   "source": [
+    "run_clf = pd.read_csv(\"/data/bchen158/ML4GW/ML4GWsearch/src/results/train_20231202/224821/plots/test_metrics/testset_preds.csv\")\n",
+    "count_label_1 = run_clf['label'].sum()\n",
+    "count_label_0 = len(run_clf) - count_label_1\n",
+    "\n",
+    "print(f\"Number of samples with label 1: {count_label_1}\")\n",
+    "print(f\"Number of samples with label 0: {count_label_0}\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 3,

diff --git a/src/TSInterpret_test.py b/src/TSInterpret_test.py
@@ -0,0 +1,166 @@
+import pickle
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as snst
+import torch
+import matplotlib.pyplot as plt
+from PIL import Image
+import torchvision.transforms as transforms
+import utils
+import os, json, wandb, argparse
+from models.get_model import get_model
+from models.tsai.tsai.models.TCN import TCN
+from train_pl import GWDetectionLightningModule
+import pytorch_lightning as pl
+import itertools
+
+PROJECT_DIR = os.getcwd()
+with open(PROJECT_DIR+"/configs/train/base.json") as fp:
+    base_config_dict = json.load(fp)
+with open(PROJECT_DIR+"/configs/train/models_config/fcn_plus.json") as fp:
+    model_config = json.load(fp)
+config_dict = {**base_config_dict, **model_config}
+config_dict['model_name'] = 'FCNPlus'
+if config_dict['batch_size']*config_dict['num_batches'] < config_dict['total_datapoints']:
+        config_dict['sample_size'] = config_dict['batch_size']*config_dict['num_batches']
+else:
+    config_dict['sample_size'] = config_dict['total_datapoints']
+    config_dict['num_batches'] = config_dict['total_datapoints']//config_dict['batch_size']
+
+#################################################################
+# Restoring the model to the training state from the checkpoint #
+#################################################################
+RESULTS_DIR = "/data/bchen158/ML4GW/ML4GWsearch/src/results/train_20231023/222238"
+checkpoint_path = RESULTS_DIR + "/checkpoints/epoch=29-step=32820.ckpt"
+
+from GWDetectionLightningModule import GWDetectionLightningModule
+model = GWDetectionLightningModule(config=config_dict).load_from_checkpoint(checkpoint_path, config=config_dict)
+# ??? whether the model checkpoint is correctly loaded in
+# model.load_from_checkpoint(checkpoint_path, config=config_dict)
+model.eval()
+from pprint import pprint
+print("# the model configuration: ")
+pprint(dict(model.hparams))
+# load the Time-Series dataset
+DATA_DIR = "/data/rgura001/ML4GWsearch/g2net-gravitational-wave-detection"
+# Set up data loaders
+from dataloaders.dataloader import get_dataloaders
+train_dataloader, \
+    val_dataloader, \
+        test_dataloader,\
+            [train_df, val_df, test_df] \
+                = get_dataloaders(DATA_DIR=DATA_DIR,
+                                batch_size=config_dict['batch_size'],
+                                sample_size=config_dict['sample_size'],
+                                ifos=config_dict['ifos'],
+                                z_norm=config_dict['z_norm'],
+                                highpass=config_dict['highpass'],
+                                whiten=config_dict['whiten'],
+                                scale=config_dict['scale'],
+                                bandpass=config_dict['bandpass'],
+                                # rng_seed=42 ## Only change this parameter if you want to use a different train/val/test split
+                            )
+# trainer = pl.Trainer(
+#         max_epochs=config_dict['epochs'],
+#         # log_every_n_steps=1,
+#         accelerator='gpu',
+#         ## devices=[config.use_gpu], ## just set using os.environ['CUDA_VISIBLE_DEVICES'] instead
+#         accumulate_grad_batches=config_dict['accumulate_grad_batches']
+#     )
+# outputs = list(itertools.chain(test_preds))
+# test_ids = []
+# test_labels = []
+# test_preds = []
+# test_preds_proba = []
+# idx = 0
+# true_pred_idx = []
+# for output in outputs:
+#     test_ids.extend(output['ids'])
+#     test_labels.extend(output['labels'])
+#     test_preds.extend(output['predictions'])
+#     test_preds_proba.append(output['prediction_probs'])
+#     if test_labels[idx]==0 and test_labels[idx] == test_preds[idx]:
+#         true_pred_idx.append(idx)
+#     idx += 1
+# print("# idx for TN:")
+# print(true_pred_idx)
+# Fucntion to map the channel to corresponding detector names
+def map_channels_to_detectirs(indices):
+    detectors = ["LIGO Hanford", "LIGO Livingston", "Virgo"]
+    return [detectors[i] for i in indices]
+# Function to shuffle time-series data for each sample
+def shuffle_time_series_data(dataloader, shuffled_num_batches):
+    shuffled_data = []
+    iters = 0
+    for x, y, id_num in dataloader:
+        iters += 1
+        shuffled_x = np.zeros_like(x)
+        for i in range(x.shape[0]):
+            indices = np.random.permutation(3)
+            shuffled_x[i] = x[i, indices]
+            mapped_detectors = map_channels_to_detectirs(indices)
+            # print(f"Sample {i} -Shuffled order: {mapped_detectors}")
+            if iters == 1 and mapped_detectors[2] != "Virgo":
+                list_idx = iters - 1
+                item_idx = i
+                print("list idx = ", list_idx)
+                print("item_idx = ", item_idx)
+                print(f"Sample {i} -Shuffled order: {mapped_detectors}")
+        id_num = np.asarray(id_num)
+        # shuffled_data.append([shuffled_x, y.numpy(), id_num])
+        shuffled_data.append([shuffled_x, y.numpy(), id_num])
+        if iters>shuffled_num_batches:
+            break
+    return shuffled_data, list_idx, item_idx
+
+# shuffle_train_data, list_idx, item_idx = shuffle_time_series_data(train_dataloader, 10)
+# shuffle_test_data, list_idx, item_idx = shuffle_time_series_data(test_dataloader, 10)
+
+# for info in shuffle_train_data:
+#     train_x = info[0][item_idx]
+#     train_y = info[1][item_idx]
+#     train_id = info[2][item_idx]
+#     break         
+
+# for info in shuffle_test_data:
+#     test_x = info[0][item_idx]
+#     test_y = info[1][item_idx]
+#     test_batch = torch.from_numpy(info[0])
+#     test_id = info[2][item_idx]
+#     break      
+
+for x, y, id_num in train_dataloader:
+    train_x = x[1]
+    train_y = y[1]
+    train_id = id_num[1]
+    break
+    # print(x.shape)
+    # break
+
+for x, y, id_num in test_dataloader:
+    test_x = x[1]
+    test_y = y[1]
+    test_batch = x
+    test_id = id_num
+    break
+    # print(x.shape)
+    # break
+
+# print("test_batch = ", test_batch.shape)
+# print("Type of test batch = ", type(test_batch))
+device = torch.device("cpu")
+model.to(device)
+logits = model(test_batch)
+test_preds = torch.argmax(logits, dim=1)
+print("test_batch: \n", test_batch.numpy().shape)
+print("test prediction is : ")
+print(test_preds)
+print(test_id)
+
+train_x = train_x.numpy()
+train_y = train_y.numpy()
+test_x = test_x.numpy()
+test_y = test_y.numpy()
+
+from TSInterpret.InterpretabilityModels.Saliency.TSR import TSR
+int_mod = TSR(model, train_x.shape[-2], train_x.shape[-1], method='IG', mode='time')
diff --git a/src/TSinter-1.png b/src/TSinter-1.png
diff --git a/src/TSinter-10.png b/src/TSinter-10.png
diff --git a/src/TSinter-9.png b/src/TSinter-9.png
diff --git a/src/TSinter.png b/src/TSinter.png
diff --git a/src/configs/train/base.json b/src/configs/train/base.json
@@ -10,12 +10,12 @@
     "whiten": false,
     "scale": true,
     "bandpass": true,
-    "epochs": 10,
+    "epochs": 100,
     "batch_size": 128,
-    "num_batches": 100,
+    "num_batches": 4375,
     "optimizer": "sgd",
     "learning_rate": 1e-1,
     "lr_scheduler": "step",
     "stop_early": true,
-    "accumulate_grad_batches": 2
+    "accumulate_grad_batches": 3
 }
diff --git a/src/configs/train/models_config/fcn.json → ...configs/train/models_config/fcn_plus.json b/src/configs/train/models_config/fcn.json → ...configs/train/models_config/fcn_plus.json
@@ -1,5 +1,5 @@
 {
     "layers": [128, 256, 128],
     "kernel_sizes":[7,5,3],
-    "batch_norm": false
+    "batch_norm": true
 }
diff --git a/src/configs/train/models_config/mlp.json b/src/configs/train/models_config/mlp.json
@@ -0,0 +1,9 @@
+{
+    "num_hidden_units": 50,
+    "levels": 8,
+    "layers": [128, 500, 128],
+    "ps": [0.1, 0.2, 0.2],
+    "kernel_size": 5,
+    "conv_dropout": 0,
+    "fc_dropout": 0
+}
diff --git a/src/configs/train/models_config/rnn.json b/src/configs/train/models_config/rnn.json
@@ -0,0 +1,7 @@
+{
+    "hidden_size": 100,
+    "n_layers": 1,
+    "rnn_dropout": 0.0,
+    "bidirectional": true,
+    "fc_dropout": 0.0
+}
diff --git a/src/configs/train/stop_early.json b/src/configs/train/stop_early.json
@@ -1,5 +1,5 @@
 {
     "stop_early__monitor": "val_loss",
     "stop_early__mode": "min",
-    "stop_early__patience": 10
+    "stop_early__patience": 5
 }