diff --git a/llm_blender/pair_ranker/pairrm.py b/llm_blender/pair_ranker/pairrm.py
index 8c136e6..56b866a 100644
--- a/llm_blender/pair_ranker/pairrm.py
+++ b/llm_blender/pair_ranker/pairrm.py
@@ -1,7 +1,5 @@
-from regex import P
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 from transformers.models.deberta_v2.modeling_deberta_v2 import (
     DebertaV2PreTrainedModel,
diff --git a/pairrm_to_hf.ipynb b/pairrm_to_hf.ipynb
index d70db4c..b2009eb 100644
--- a/pairrm_to_hf.ipynb
+++ b/pairrm_to_hf.ipynb
@@ -99,6 +99,13 @@
     "trainer.save_model(\"./hf_PairRM/final_checkpoint\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Verifying Correctness"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -108,77 +115,78 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-01-05 15:46:09.979198: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2024-01-05 15:46:10.826106: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64\n",
-      "2024-01-05 15:46:10.826187: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64\n",
-      "2024-01-05 15:46:10.826193: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
+      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+      "You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
      ]
     },
     {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b40fb528bf804a00be8ce15a5747a7c7",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "705223617e6346f2b2b97d730dbc9481",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Downloading model.safetensors:   0%|          | 0.00/1.74G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1.9003021717071533, -1.2547134160995483]\n",
+      "tensor([ True, False], device='cuda:0')\n"
+     ]
     }
    ],
    "source": [
     "import os\n",
     "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
     "from llm_blender.pair_ranker.pairrm import DebertaV2PairRM\n",
-    "pairrm = DebertaV2PairRM.from_pretrained(\"llm-blender/PairRM-hf\", device_map=\"cuda:0\")"
+    "from transformers import AutoTokenizer\n",
+    "from typing import List\n",
+    "pairrm = DebertaV2PairRM.from_pretrained(\"llm-blender/PairRM-hf\", device_map=\"cuda:0\")\n",
+    "tokenizer = AutoTokenizer.from_pretrained('llm-blender/PairRM-hf')\n",
+    "source_prefix = \"<|source|>\"\n",
+    "cand1_prefix = \"<|candidate1|>\"\n",
+    "cand2_prefix = \"<|candidate2|>\"\n",
+    "inputs = [\"hello!\", \"I love you!\"]\n",
+    "candidates_A = [\"hi!\", \"I hate you!\"]\n",
+    "candidates_B = [\"f**k off!\", \"I love you, too!\"]\n",
+    "def tokenize_pair(sources:List[str], candidate1s:List[str], candidate2s:List[str]):\n",
+    "    ids = []\n",
+    "    assert len(sources) == len(candidate1s) == len(candidate2s)\n",
+    "    for i in range(len(sources)):\n",
+    "        source_ids = tokenizer.encode(source_prefix + sources[i])\n",
+    "        candidate1_ids = tokenizer.encode(cand1_prefix + candidate1s[i])\n",
+    "        candidate2_ids = tokenizer.encode(cand2_prefix + candidate2s[i])\n",
+    "        ids.append(source_ids + candidate1_ids + candidate2_ids)\n",
+    "    encodings = tokenizer.pad({\"input_ids\": ids}, return_tensors=\"pt\")\n",
+    "    return encodings\n",
+    "\n",
+    "encodings = tokenize_pair(inputs, candidates_A, candidates_B)\n",
+    "encodings = {k:v.to(pairrm.device) for k,v in encodings.items()}\n",
+    "outputs = pairrm(**encodings)\n",
+    "logits = outputs.logits.tolist()\n",
+    "comparison_results = outputs.logits > 0\n",
+    "print(logits)\n",
+    "# [1.9003021717071533, -1.2547134160995483]\n",
+    "print(comparison_results)\n",
+    "# tensor([ True, False], device='cuda:0'), which means whether candidate A is better than candidate B for each input"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Verifying Correctness"
+    "### load from llm-blender wrapper"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2024-01-05 15:11:00.611021: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2024-01-05 15:11:01.483983: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64\n",
-      "2024-01-05 15:11:01.484081: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64\n",
-      "2024-01-05 15:11:01.484088: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
       "WARNING:root:No ranker config provided, no ranker loaded, please load ranker first through load_ranker()\n",
       "WARNING:root:No fuser config provided, no fuser loaded, please load fuser first through load_fuser()\n",
       "/home/dongfu/miniconda3/envs/llm-blender/lib/python3.9/site-packages/dataclasses_json/core.py:187: RuntimeWarning: 'NoneType' object value of non-optional type load_checkpoint detected when decoding RankerConfig.\n",
@@ -193,149 +201,47 @@
      "text": [
       "Successfully loaded ranker from  /home/dongfu/data/.cache/huggingface/hub/llm-blender/PairRM\n"
      ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
-    "import llm_blender\n",
-    "blender = llm_blender.Blender()\n",
-    "# Load Ranker\n",
-    "blender.loadranker(\"llm-blender/PairRM\") # load ranker checkpoint"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
+    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Ranking candidates:   0%|          | 0/1 [00:00<?, ?it/s]"
+      "Ranking candidates: 100%|██████████| 1/1 [00:00<00:00, 18.13it/s]"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "tensor([[     1, 128001,  12018,    300,      2,      1, 128002,   8595,    300,\n",
-      "              2,      1, 128003,   2994,   1225,   1225,   1165,    442,    300,\n",
-      "              2,      0,      0,      0,      0],\n",
-      "        [     1, 128001,    273,    472,    274,    300,      2,      1, 128002,\n",
-      "            273,   3254,    274,    300,      2,      1, 128003,    273,    472,\n",
-      "            274,    261,    461,    300,      2]], device='cuda:0')\n"
+      "[ 1.9   -1.255]\n",
+      "[ True False]\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Ranking candidates: 100%|██████████| 1/1 [00:00<00:00,  1.69it/s]\n"
+      "\n"
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "array([ 1.9  , -1.255], dtype=float16)"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
+    "import os\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
+    "import llm_blender\n",
+    "blender = llm_blender.Blender()\n",
+    "# Load Ranker\n",
+    "blender.loadranker(\"llm-blender/PairRM\") # load ranker checkpoint\n",
     "inputs = [\"hello!\", \"I love you!\"]\n",
     "candidates_A = [\"hi!\", \"I hate you!\"]\n",
     "candidates_B = [\"f**k off!\", \"I love you, too!\"]\n",
-    "comparison_results = blender.compare(inputs, candidates_A, candidates_B, return_logits=True, mode=\"[A,B]\")\n",
-    "comparison_results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'[CLS]<|source|> hello![SEP][CLS]<|candidate1|> hi![SEP][CLS]<|candidate2|> f**k off![SEP][PAD][PAD][PAD][PAD]'"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "blender.ranker.tokenizer.decode([     1, 128001,  12018,    300,      2,      1, 128002,   8595,    300,\n",
-    "              2,      1, 128003,   2994,   1225,   1225,   1165,    442,    300,\n",
-    "              2,      0,      0,      0,      0], skip_special_tokens=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "SequenceClassifierOutput(loss=None, logits=tensor([1.9003], device='cuda:0', grad_fn=<MeanBackward1>), hidden_states=None, attentions=None)"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "input_ids = torch.tensor([     1, 128001,  12018,    300,      2,      1, 128002,   8595,    300,\n",
-    "              2,      1, 128003,   2994,   1225,   1225,   1165,    442,    300,\n",
-    "              2,      0,      0,      0,      0], dtype=torch.long, device='cuda:0')\n",
-    "attention_masks = input_ids.ne(0).float()\n",
-    "input_ids = input_ids.unsqueeze(0)\n",
-    "attention_masks = attention_masks.unsqueeze(0)\n",
-    "pairrm(input_ids=input_ids, attention_mask=attention_masks)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "SequenceClassifierOutput(loss=None, logits=tensor([-1.2547], device='cuda:0', grad_fn=<MeanBackward1>), hidden_states=None, attentions=None)"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "input_ids = torch.tensor([     1, 128001,    273,    472,    274,    300,      2,      1, 128002,\n",
-    "            273,   3254,    274,    300,      2,      1, 128003,    273,    472,\n",
-    "            274,    261,    461,    300,      2], dtype=torch.long, device='cuda:0')\n",
-    "attention_masks = input_ids.ne(0).float()\n",
-    "input_ids = input_ids.unsqueeze(0)\n",
-    "attention_masks = attention_masks.unsqueeze(0)\n",
-    "pairrm(input_ids=input_ids, attention_mask=attention_masks)"
+    "logits = blender.compare(inputs, candidates_A, candidates_B, return_logits=True, mode=\"[A,B]\")\n",
+    "comparison_results = logits > 0\n",
+    "print(logits)\n",
+    "# [1.9003021717071533, -1.2547134160995483]\n",
+    "print(comparison_results)\n",
+    "# tensor([ True, False], device='cuda:0'), which means whether candidate A is better than candidate B for each input"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {