diff --git a/recognition/PerceiverTransform/Predicted.png b/recognition/PerceiverTransform/Predicted.png new file mode 100644 index 0000000000..2030c3d1ec Binary files /dev/null and b/recognition/PerceiverTransform/Predicted.png differ diff --git a/recognition/PerceiverTransform/Predicted2.png b/recognition/PerceiverTransform/Predicted2.png new file mode 100644 index 0000000000..ab453a5523 Binary files /dev/null and b/recognition/PerceiverTransform/Predicted2.png differ diff --git a/recognition/PerceiverTransform/PredictingAlzheimersBasedonImage.png b/recognition/PerceiverTransform/PredictingAlzheimersBasedonImage.png new file mode 100644 index 0000000000..63fb79def1 Binary files /dev/null and b/recognition/PerceiverTransform/PredictingAlzheimersBasedonImage.png differ diff --git a/recognition/PerceiverTransform/Test Set Accuracy .png b/recognition/PerceiverTransform/Test Set Accuracy .png new file mode 100644 index 0000000000..3cae342d20 Binary files /dev/null and b/recognition/PerceiverTransform/Test Set Accuracy .png differ diff --git a/recognition/PerceiverTransform/Training accuracy over Epochs.png b/recognition/PerceiverTransform/Training accuracy over Epochs.png new file mode 100644 index 0000000000..feed7267b2 Binary files /dev/null and b/recognition/PerceiverTransform/Training accuracy over Epochs.png differ diff --git a/recognition/PerceiverTransform/Training loss over Epochs.png b/recognition/PerceiverTransform/Training loss over Epochs.png new file mode 100644 index 0000000000..bdba46e531 Binary files /dev/null and b/recognition/PerceiverTransform/Training loss over Epochs.png differ diff --git a/recognition/PerceiverTransform/__pycache__/dataset.cpython-311.pyc b/recognition/PerceiverTransform/__pycache__/dataset.cpython-311.pyc new file mode 100644 index 0000000000..2e5db2136e Binary files /dev/null and b/recognition/PerceiverTransform/__pycache__/dataset.cpython-311.pyc differ diff --git a/recognition/PerceiverTransform/__pycache__/modules.cpython-311.pyc b/recognition/PerceiverTransform/__pycache__/modules.cpython-311.pyc new file mode 100644 index 0000000000..21baec40ed Binary files /dev/null and b/recognition/PerceiverTransform/__pycache__/modules.cpython-311.pyc differ diff --git a/recognition/PerceiverTransform/dataset.py b/recognition/PerceiverTransform/dataset.py new file mode 100644 index 0000000000..cd644bf7ed --- /dev/null +++ b/recognition/PerceiverTransform/dataset.py @@ -0,0 +1,62 @@ +import os +import torch +from torch.utils.data import Dataset, DataLoader +from torchvision import transforms +from torch.utils.data import random_split +from PIL import Image + +class AlzheimerDataset(Dataset): + def __init__(self, root_dir, mode='train', transform=None): + # Initialzing the dataset with parameters (root directory with entire dataset present) + + self.root_dir = os.path.join(root_dir, mode) # This appends 'train' or 'test' based on mode, effectively separating the datasets. + self.transform = transform + # Extracts the filepaths for each dataset type. + self.nc_images = [os.path.join(self.root_dir, 'NC', img) for img in os.listdir(os.path.join(self.root_dir, 'NC')) if img.endswith('.jpeg')] + self.ad_images = [os.path.join(self.root_dir, 'AD', img) for img in os.listdir(os.path.join(self.root_dir, 'AD')) if img.endswith('.jpeg')] + self.total_images = self.nc_images + self.ad_images + + + def __len__(self): + return len(self.total_images) + # Returns total number of images + + def __getitem__(self, idx): + if torch.is_tensor(idx): + idx = idx.tolist() + # Returns the images index + img_name = self.total_images[idx] + image = Image.open(img_name) + + # Assigning a label based on the image type. + label = 0 if img_name in self.nc_images else 1 #0 for NC and 1 for AD + + # Applying tranformation if needed. + if self.transform: + image = self.transform(image) + + return image, label + +def get_dataloaders(root_dir, batch_size): + + # Defining image transformations for preprocessing purpose i.e resizing and converting to tensor. + transform = transforms.Compose([ + transforms.Resize((224, 224)), # Resizing to fit typical CNN input sizes. + transforms.ToTensor(), + ]) + + # The AlzheimerDataset class is instantiated twice, dataset for training and testing purposes. + train_dataset = AlzheimerDataset(root_dir, mode='train', transform=transform) + test_dataset = AlzheimerDataset(root_dir, mode='test', transform=transform) + + # Splitting the training dataset into train and validation subsets using a 65-35 split. + train_length = int(0.65 * len(train_dataset)) + valid_length = len(train_dataset) - train_length + train_subset, valid_subset = random_split(train_dataset, [train_length, valid_length]) + + # Dataloaders created for the train, validation, and test datasets with specific batch sizes. + train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True) + valid_loader = DataLoader(valid_subset, batch_size=batch_size, shuffle=False) + test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False) + + return train_loader, valid_loader, test_loader \ No newline at end of file diff --git a/recognition/PerceiverTransform/modules.py b/recognition/PerceiverTransform/modules.py new file mode 100644 index 0000000000..0372b75409 --- /dev/null +++ b/recognition/PerceiverTransform/modules.py @@ -0,0 +1,77 @@ +import torch +import torch.nn as nn + +class CrossAttention(nn.Module): + #Cross-Attention mechanism where the latent array attends to the input data. + + def __init__(self, embed_dim, num_heads): + super(CrossAttention, self).__init__() + self.attn = nn.MultiheadAttention(embed_dim, num_heads) + # Applies multi-head attention where the query is from latent space and the key-value pairs are from input data. + + def forward(self, x, latent): + # Ensure the sequence length of x and latent match + # by truncating or padding latent if necessary + seq_len_x = x.size(1) + seq_len_latent = latent.size(1) + if seq_len_latent < seq_len_x: + # Pad latent with zeros to match x's sequence length + padding = torch.zeros(latent.size(0), seq_len_x - seq_len_latent, latent.size(2)).to(latent.device) + latent = torch.cat([latent, padding], dim=1) + elif seq_len_latent > seq_len_x: + # Truncate latent to match x's sequence length + latent = latent[:, :seq_len_x, :] + + # Adjustes dimensions for attention mechanism + x = x.permute(1, 0, 2) + latent = latent.permute(1, 0, 2) + + # Applying attention + output, _ = self.attn(latent, x, x) + output = output.permute(1, 0, 2) + + return output + latent + + +class LatentTransformer(nn.Module): + #Latent Transformer applies a series of self-attention and feed-forward networks on the latent array. + + def __init__(self, embed_dim, num_heads): + super(LatentTransformer, self).__init__() + self.self_attention = CrossAttention(embed_dim, num_heads) + self.feedforward = nn.Sequential( + nn.Linear(embed_dim, embed_dim), + nn.ReLU(), + nn.Linear(embed_dim, embed_dim) + ) + # Feedfoward network + def forward(self, latent): + latent = self.self_attention(latent, latent) + return self.feedforward(latent) + +class Perceiver(nn.Module): + #The Perceiver model that integrates all components inlcuding embedding layer, crossattention mechanism latent transformer. + + def __init__(self, input_dim, latent_dim, embed_dim, n_classes, num_heads): + super(Perceiver, self).__init__() + # Embedding layer to tranform + self.embed = nn.Linear(input_dim, embed_dim) + self.latent = nn.Parameter(torch.randn(1, latent_dim, embed_dim)) # Initialize latent array with batch dimension + self.cross_attention = CrossAttention(embed_dim, num_heads) + self.latent_transformer = LatentTransformer(embed_dim, num_heads) + # Final classification + self.classifier = nn.Linear(embed_dim, n_classes) + + def forward(self, x): + x = x.view(x.size(0), -1) + x = self.embed(x) + x = x.unsqueeze(1) + + # Repeat latent for each item in the batch + latent = self.latent.repeat(x.size(0), 1, 1) + + latent = self.cross_attention(x, latent) + latent = self.latent_transformer(latent) + latent_mean = latent.mean(dim=1) + + return self.classifier(latent_mean) \ No newline at end of file diff --git a/recognition/PerceiverTransform/predict.py b/recognition/PerceiverTransform/predict.py new file mode 100644 index 0000000000..5b36b7fe95 --- /dev/null +++ b/recognition/PerceiverTransform/predict.py @@ -0,0 +1,48 @@ +import torch +import torch.nn.functional as F +import torchvision.transforms as transforms +from PIL import Image +import matplotlib.pyplot as plt +from modules import Perceiver + +# Constants and defining to use GPU if available +DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +MODEL_PATH = './perceiver_model.pth' + +# Load the trained model same as training model +model = Perceiver( + input_dim = 224 * 224, + latent_dim=256, + embed_dim=256, + n_classes=2, + num_heads=4 +).to(DEVICE) + +model.load_state_dict(torch.load(MODEL_PATH)) +model.eval() + +# Load and preprocess a sample image (resizing and converting to tensor) +def predict_image(img_path): + transform = transforms.Compose([ + transforms.Resize((224, 224)), + transforms.ToTensor(), + ]) + + # Opening image to apply tranformations + image = Image.open(img_path) + image_tensor = transform(image).unsqueeze(0).to(DEVICE) + + # Predicting by passing image through model using probability + output = model(image_tensor) + probabilities = F.softmax(output, dim=1) + predicted_class = torch.argmax(output, dim=1).item() + + # Display the image and predictions + plt.imshow(image) + plt.title(f"Predicted Class: {'NC' if predicted_class == 0 else 'AD'}\nProbability: {probabilities[0][predicted_class]:.4f}") + plt.axis('off') + plt.show() + +if __name__ == "__main__": + img_path = input("Enter the path of the image: ") # Provide path to test images as input + predict_image(img_path) \ No newline at end of file diff --git a/recognition/PerceiverTransform/train.py b/recognition/PerceiverTransform/train.py new file mode 100644 index 0000000000..b5bf3827c2 --- /dev/null +++ b/recognition/PerceiverTransform/train.py @@ -0,0 +1,105 @@ +import torch +import torch.nn as nn +import torch.optim as optim +import matplotlib.pyplot as plt +from modules import Perceiver +from dataset import get_dataloaders + +# Constants and hyperparameters +BATCH_SIZE = 5 +EPOCHS = 10 +LR = 0.005 +DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") +MODEL_PATH = './perceiver_model.pth' + +# Model Initialization with specific parameters and dimensions +model = Perceiver( + input_dim = 224 * 224, + latent_dim = 256, + embed_dim = 256, + n_classes = 2, # NC and AD (2 classes) + num_heads = 4 +).to(DEVICE) # To utilize the GPU rather than the CPU + +# Loss function and optimizer +loss_fn = nn.CrossEntropyLoss() +optimizer = optim.Adam(model.parameters(), lr=LR) + +# Data Loaders for loading dataset +train_loader, valid_loader, test_loader = get_dataloaders("C:\\Users\\AK\\Documents\\COMP3710\\AlzDataset", batch_size=BATCH_SIZE) + +# Training loop +train_losses = [] # Training losses over epochs +accuracies = [] # Training accuracy + +for epoch in range(EPOCHS): + model.train() + running_loss = 0.0 + correct_train = 0 + total_train = 0 + for inputs, labels in train_loader: + inputs, labels = inputs.to(DEVICE), labels.to(DEVICE) + + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_fn(outputs, labels) + loss.backward() + optimizer.step() + + _, predicted = torch.max(outputs.data, 1) + total_train += labels.size(0) + correct_train += (predicted == labels).sum().item() + running_loss += loss.item() + + train_losses.append(running_loss / len(train_loader)) + accuracies.append(100 * correct_train / total_train) + + print(f"Epoch {epoch+1}/{EPOCHS}, Train Loss: {train_losses[-1]:.4f}, Train Accuracy: {accuracies[-1]:.2f}%") + +#Validation Stage + correct_val = 0 + total_val = 0 + for inputs, labels in valid_loader: # Interating through validation data + inputs, labels = inputs.to(DEVICE), labels.to(DEVICE) + outputs = model(inputs) + _, predicted = torch.max(outputs.data, 1) + total_val += labels.size(0) + correct_val += (predicted == labels).sum().item() + + val_accuracy = 100 * correct_val / total_val + print(f"Validation Accuracy: {val_accuracy:.2f}%") + +# After training, evaluate on the test set +model.eval() # Setting model to evaluation mode +correct_test = 0 +total_test = 0 +for inputs, labels in test_loader: #Iterating through test data + inputs, labels = inputs.to(DEVICE), labels.to(DEVICE) + outputs = model(inputs) + _, predicted = torch.max(outputs.data, 1) + total_test += labels.size(0) + correct_test += (predicted == labels).sum().item() + +test_accuracy = 100 * correct_test / total_test +print(f"Test Accuracy: {test_accuracy:.2f}%") + +# Plotting training loss over epochs +plt.figure(figsize=(10, 6)) +plt.plot(train_losses, label='Training Loss') +plt.xlabel('Epochs') +plt.ylabel('Loss') +plt.title('Training Loss over Epochs') +plt.legend() +plt.show() + +# Plotting accuracy over epochs +plt.figure(figsize=(10, 6)) +plt.plot(accuracies, label='Training Accuracy') +plt.xlabel('Epochs') +plt.ylabel('Accuracy') +plt.title('Training Accuracy over Epochs') +plt.legend() +plt.show() + +# Saving trained model +torch.save(model.state_dict(), MODEL_PATH) \ No newline at end of file diff --git a/recognition/README.md b/recognition/README.md new file mode 100644 index 0000000000..ecf7e17c52 --- /dev/null +++ b/recognition/README.md @@ -0,0 +1,52 @@ +# Perceiver Transformer +# Classify Alzheimer’s disease (normal and AD) of the ADNI brain data (see Appendix for link) using a visual or perceiver transformer [8, 9] set having a minimum accuracy of 0.8 on the test set. +# Akshath Katyal +# 47144691 + +## Task Information: +The Perceiver is a cutting-edge deep learning architecture that has been making waves lately due to its exceptional performance across a diverse range of tasks. We decided to put it to the test ourselves and applied it to the problem of classifying Alzheimer's disease from brain images consisting of both (AD or Alzheimers disease and NC or Normal cognition). This implementation of the Perceiver model is used to accurately classify Alzheimer's from non-invasive imaging methods, which can help with early detection and monitoring of the disease.- + +## Model/Algorithm Description +The Perceiver Transformer is a fresh deep learning structure that overcomes the usual constraint of Transformers requiring a fixed-input size. Its purpose is to tackle a vast range of input modes, from images to sound, without depending on domain-specific structures. The key perk of the Perceiver is its proficiency in handling inputs of any size by preserving a set of latent variables with a fixed size that interacts with the input data. The model was inspired by the tutorial from https://medium.com/@curttigges/the-annotated-perceiver-74752113eefb which discusses how "Perceiver IO, landmark multimodal neural architectures that solve many of the issues with classic transformers" but was modified accordingly for the ADNI dataset. + +# List Dependencies +* Numpy version 1.26.1 +* Pytorch 2.0 +* Matplotlib 3.8.0 +* Python 3.11.4 + +# Reproducibility +1. In `train.py`, edit the path of your `AD_NC` dataset to be utilized by the loaders. +2. With the dependencies installed, run python train.py file +3. This will show you the results based on epoch, once the training is done will display two visualizations of it. +4. Then run the `predict.py`. +5. Input the path of any image from the test dataset to check if it is able to predict the image correctly. + +# Results +At first This model was able to achieve really good results weirdly. It was no expected as the the number of epochs it was trained for was very small compared to larger numbers which often tend to get higher accuracy. It was later discovered that the dataset was not being read correctly and the labelling of the images were wrong displaying 1 type of labels everytime hence showing 100% accuracy. The training time was pretty quick for 10 epochs and surprisingly showed 100% test set performance. However, after the model was fixed with correct paths, data split and labelling as well as changing images not from RGB but leaving it as grayscale. This was able to fix the issue however when this model was run, the test accuracy was around 50% (50.44% for 10 epochs and batch size of 5). This is equivalent to random guessing between two types (pretty much 50/50). + +Here is an image of the training and accuracy of the data and test set. +![()]() + +Below are visualized plots of the loss and training accuracy over epochs. +* Previously before the model was modified, Training accuracy after 1 epochs went up to 100%. +* Training loss went to 0. + +![1]![()]() +![2]![()]() + +Based on these results, the loss and training accuracy it can be interpreted that the model is not properly trained. Increasing the number of epochs and decreasing the learning rate only changes the results by bare minimum. + + +# Describe any specific pre-processing you have used with references if any. Justify your training, validation and testing splits of the data. +* Preprocessing was done through Resizing of the images to fit typical neural network input sizes and then converting into tensors +* Justification of training and splitting of the data: + +When working with data, it's important to divide it into three parts. The biggest portion (65%) is used for training, allowing the model to learn the underlying patterns and features. A smaller subset (35%) is used for validation to adjust hyperparameters and prevent overfitting. This helps make informed decisions about the training process, such as when to stop early, providing an unbiased estimate of its generalization capabilities. + +Potential change in the splitting can effect the model learning better however, the issue seems to be with the training. + +# References: +* [1]: https://medium.com/@curttigges/the-annotated-perceiver-74752113eefb +* [2]: https://github.com/clint-kristopher-morris/Perceiver-Transformer/tree/main +