diff --git a/eval10_experiment5.ipynb b/eval10_experiment5.ipynb new file mode 100644 index 0000000..b2b805a --- /dev/null +++ b/eval10_experiment5.ipynb @@ -0,0 +1,552 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[31mmenpo 0.8.1 has requirement matplotlib<2.0,>=1.4, but you'll have matplotlib 3.0.2 which is incompatible.\u001b[0m\n", + "\u001b[31mmenpo 0.8.1 has requirement pillow<5.0,>=3.0, but you'll have pillow 5.4.1 which is incompatible.\u001b[0m\n", + "\u001b[31mmenpo 0.8.1 has requirement scipy<1.0,>=0.16, but you'll have scipy 1.2.0 which is incompatible.\u001b[0m\n", + "\u001b[33mYou are using pip version 10.0.1, however version 19.2.2 is available.\n", + "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install -q -r requirements.txt" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" + }, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from PIL import Image\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.utils.data as D\n", + "from torch.optim.lr_scheduler import ExponentialLR\n", + "import torch.nn.functional as F\n", + "from torch.autograd import Variable\n", + "\n", + "from torchvision import transforms\n", + "\n", + "from ignite.engine import Events\n", + "from scripts.ignite import create_supervised_evaluator, create_supervised_trainer\n", + "from ignite.metrics import Loss, Accuracy\n", + "from ignite.contrib.handlers.tqdm_logger import ProgressBar\n", + "from ignite.handlers import EarlyStopping, ModelCheckpoint\n", + "from ignite.contrib.handlers import LinearCyclicalScheduler, CosineAnnealingScheduler\n", + "\n", + "from tqdm import tqdm_notebook\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "from efficientnet_pytorch import EfficientNet\n", + "\n", + "from scripts.evaluate import eval_model, eval_model_10\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define dataset and model" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", + "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" + }, + "outputs": [], + "source": [ + "img_dir = '../input/rxrxairgb'\n", + "path_data = '../input/rxrxaicsv'\n", + "device = 'cuda'\n", + "batch_size = 32\n", + "torch.manual_seed(0)\n", + "model_name = 'efficientnet-b3'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "jitter = (0.6, 1.4)\n", + "class ImagesDS(D.Dataset):\n", + " # taken textbook from https://arxiv.org/pdf/1812.01187.pdf\n", + " transform_train = transforms.Compose([\n", + " transforms.RandomResizedCrop(224),\n", + " transforms.ColorJitter(brightness=jitter, contrast=jitter, saturation=jitter, hue=.1),\n", + " transforms.RandomHorizontalFlip(p=0.5),\n", + " # PCA Noise should go here,\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=(123.68, 116.779, 103.939), std=(58.393, 57.12, 57.375))\n", + " ])\n", + " \n", + " transform_validation = transforms.Compose([\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=(123.68, 116.779, 103.939), std=(58.393, 57.12, 57.375))\n", + " ])\n", + "\n", + " def __init__(self, df, img_dir=img_dir, mode='train', validation=False, site=1):\n", + " self.records = df.to_records(index=False)\n", + " self.site = site\n", + " self.mode = mode\n", + " self.img_dir = img_dir\n", + " self.len = df.shape[0]\n", + " self.validation = validation\n", + " \n", + " @staticmethod\n", + " def _load_img_as_tensor(file_name, validation):\n", + " with Image.open(file_name) as img:\n", + " if not validation:\n", + " return ImagesDS.transform_train(img)\n", + " else:\n", + " return ImagesDS.transform_validation(img)\n", + "\n", + " def _get_img_path(self, index, site=1):\n", + " experiment, well, plate = self.records[index].experiment, self.records[index].well, self.records[index].plate\n", + " return f'{self.img_dir}/{self.mode}/{experiment}_{plate}_{well}_s{site}.jpeg'\n", + " \n", + " def __getitem__(self, index):\n", + " img1, img2 = [self._load_img_as_tensor(self._get_img_path(index, site), self.validation) for site in [1,2]]\n", + " if self.mode == 'train':\n", + " return img1, img2, int(self.records[index].sirna)\n", + " else:\n", + " return img1, img2, self.records[index].id_code\n", + "\n", + " def __len__(self):\n", + " return self.len\n", + "\n", + "\n", + "class TestImagesDS(D.Dataset):\n", + " transform = transforms.Compose([\n", + " transforms.RandomCrop(224),\n", + " transforms.ToTensor(),\n", + " transforms.Normalize(mean=(123.68, 116.779, 103.939), std=(58.393, 57.12, 57.375))\n", + " ])\n", + "\n", + " def __init__(self, df, img_dir=img_dir, mode='test', validation=False, site=1):\n", + " self.records = df.to_records(index=False)\n", + " self.site = site\n", + " self.mode = mode\n", + " self.img_dir = img_dir\n", + " self.len = df.shape[0]\n", + " self.validation = validation\n", + " \n", + " @staticmethod\n", + " def _load_img_as_tensor(file_name):\n", + " with Image.open(file_name) as img:\n", + " return TestImagesDS.transform(img)\n", + "\n", + " def _get_img_path(self, index, site=1):\n", + " experiment, well, plate = self.records[index].experiment, self.records[index].well, self.records[index].plate\n", + " return f'{self.img_dir}/{self.mode}/{experiment}_{plate}_{well}_s{site}.jpeg'\n", + " \n", + " def get_image_pair(self, index):\n", + " return [self._load_img_as_tensor(self._get_img_path(index, site)) for site in [1,2]]\n", + " \n", + " def __getitem__(self, index):\n", + " image_pairs = [self.get_image_pair(index) for _ in range(10)]\n", + " \n", + " return image_pairs, self.records[index].id_code\n", + "\n", + " def __len__(self):\n", + " return self.len" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# dataframes for training, cross-validation, and testing\n", + "df_test = pd.read_csv(path_data+'/test.csv')\n", + "\n", + "# pytorch test dataset & loader\n", + "ds_test = TestImagesDS(df_test, mode='test', validation=True)\n", + "tloader = D.DataLoader(ds_test, batch_size=1, shuffle=False, num_workers=4)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded pretrained weights for efficientnet-b3\n" + ] + } + ], + "source": [ + "class EfficientNetTwoInputs(nn.Module):\n", + " def __init__(self):\n", + " super(EfficientNetTwoInputs, self).__init__()\n", + " self.classes = 1108\n", + " \n", + " model = model = EfficientNet.from_pretrained(model_name, num_classes=1108) \n", + " num_ftrs = model._fc.in_features\n", + " model._fc = nn.Identity()\n", + " \n", + " self.resnet = model\n", + " self.fc = nn.Linear(num_ftrs * 2, self.classes)\n", + "\n", + " def forward(self, x1, x2):\n", + " x1_out = self.resnet(x1)\n", + " x2_out = self.resnet(x2)\n", + " \n", + " N, _, _, _ = x1.size()\n", + " x1_out = x1_out.view(N, -1)\n", + " x2_out = x2_out.view(N, -1)\n", + " \n", + " out = torch.cat((x1_out, x2_out), 1)\n", + " out = self.fc(out)\n", + "\n", + " return out \n", + " \n", + "model = EfficientNetTwoInputs()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Evaluate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "68b16e5780184c4dbf02e5aae2777d81", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HBox(children=(IntProgress(value=0, max=19897), HTML(value='')))" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "model.cuda()\n", + "eval_model_10(model, tloader, 'models/Model_efficientnet-b3_31.pth', path_data)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "2b62ae829edc4d60acf1d9a9e1d598d8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.4.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.4.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.1.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7740dfb227e54da8b1510dac2d094406": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.1.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.1.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.1.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "921a9c670b6e4a2db86c75a7ff5d9ee6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.1.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.1.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.1.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9dfcb7497f8842af817750eec565b8b9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.4.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.4.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.4.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_921a9c670b6e4a2db86c75a7ff5d9ee6", + "placeholder": "​", + "style": "IPY_MODEL_2b62ae829edc4d60acf1d9a9e1d598d8", + "value": " 94% 2151/2283 [22:45<01:23, 1.58it/s]" + } + }, + "d2df0eb5abab4e3895ec792681cfa8d2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.4.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.4.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.1.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "initial" + } + }, + "e3ff3ae302394523bb5b28ee009842d5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.4.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.4.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.4.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_ff74a4321a59419cb24e116db9dd1e3e", + "IPY_MODEL_9dfcb7497f8842af817750eec565b8b9" + ], + "layout": "IPY_MODEL_7740dfb227e54da8b1510dac2d094406" + } + }, + "fad7703039454db7af5d7fb4bce65003": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.1.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.1.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.1.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ff74a4321a59419cb24e116db9dd1e3e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.4.0", + "model_name": "IntProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.4.0", + "_model_name": "IntProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.4.0", + "_view_name": "ProgressView", + "bar_style": "", + "description": "Loss: 128.54232788085938", + "description_tooltip": null, + "layout": "IPY_MODEL_fad7703039454db7af5d7fb4bce65003", + "max": 2283, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d2df0eb5abab4e3895ec792681cfa8d2", + "value": 2151 + } + } + }, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/experiment6.ipynb b/experiment6.ipynb index 4de870b..10f6fda 100644 --- a/experiment6.ipynb +++ b/experiment6.ipynb @@ -372,8 +372,8 @@ "metadata": {}, "outputs": [], "source": [ - "pbar = ProgressBar(bar_format='')\n", - "pbar.attach(trainer, output_transform=lambda x: {'loss': x})" + "# pbar = ProgressBar(bar_format='')\n", + "# pbar.attach(trainer, output_transform=lambda x: {'loss': x})" ] }, { diff --git a/experiment6.py b/experiment6.py index 1528992..cc8a056 100644 --- a/experiment6.py +++ b/experiment6.py @@ -270,8 +270,8 @@ def save_best_epoch_only(engine): # In[13]: -pbar = ProgressBar(bar_format='') -pbar.attach(trainer, output_transform=lambda x: {'loss': x}) +# pbar = ProgressBar(bar_format='') +# pbar.attach(trainer, output_transform=lambda x: {'loss': x}) # #### Train diff --git a/scripts/evaluate.py b/scripts/evaluate.py index b055d72..8ba2de1 100644 --- a/scripts/evaluate.py +++ b/scripts/evaluate.py @@ -4,6 +4,7 @@ from scipy import misc import torch from tqdm import tqdm_notebook +from collections import Counter from sklearn.model_selection import train_test_split @@ -19,6 +20,27 @@ def eval_model(model, loader, file_path, path_data, device='cuda'): idx = output.max(dim=-1)[1].cpu().numpy() preds = np.append(preds, idx, axis=0) + submission = pd.read_csv(path_data + '/test.csv') + submission['sirna'] = preds.astype(int) + submission.to_csv(f'submission.csv', index=False, columns=['id_code','sirna']) + +def eval_model_10(model, loader, file_path, path_data, device='cuda'): + model.load_state_dict(torch.load(file_path)) + model.eval() + with torch.no_grad(): + preds = np.empty(0) + for image_pairs, _ in tqdm_notebook(loader): + idx_counter = Counter() + for image_pair in image_pairs: + x1, x2 = image_pair + x1 = x1.to(device) + x2 = x2.to(device) + output = model(x1,x2) + idx = output.max(dim=-1)[1].cpu().numpy() + idx_counter.update(idx) + + preds = np.append(preds, [idx_counter.most_common(1)[0][0]], axis=0) + submission = pd.read_csv(path_data + '/test.csv') submission['sirna'] = preds.astype(int) submission.to_csv(f'submission.csv', index=False, columns=['id_code','sirna']) \ No newline at end of file