diff --git a/llm_blender/pair_ranker/pairrm.py b/llm_blender/pair_ranker/pairrm.py index 8c136e6..56b866a 100644 --- a/llm_blender/pair_ranker/pairrm.py +++ b/llm_blender/pair_ranker/pairrm.py @@ -1,7 +1,5 @@ -from regex import P import torch import torch.nn as nn -import torch.nn.functional as F from transformers.models.deberta_v2.modeling_deberta_v2 import ( DebertaV2PreTrainedModel, diff --git a/pairrm_to_hf.ipynb b/pairrm_to_hf.ipynb index d70db4c..b2009eb 100644 --- a/pairrm_to_hf.ipynb +++ b/pairrm_to_hf.ipynb @@ -99,6 +99,13 @@ "trainer.save_model(\"./hf_PairRM/final_checkpoint\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Verifying Correctness" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -108,77 +115,78 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2024-01-05 15:46:09.979198: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-01-05 15:46:10.826106: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64\n", - "2024-01-05 15:46:10.826187: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64\n", - "2024-01-05 15:46:10.826193: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n" ] }, { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b40fb528bf804a00be8ce15a5747a7c7", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Downloading config.json: 0%| | 0.00/967 [00:00\"\n", + "cand1_prefix = \"<|candidate1|>\"\n", + "cand2_prefix = \"<|candidate2|>\"\n", + "inputs = [\"hello!\", \"I love you!\"]\n", + "candidates_A = [\"hi!\", \"I hate you!\"]\n", + "candidates_B = [\"f**k off!\", \"I love you, too!\"]\n", + "def tokenize_pair(sources:List[str], candidate1s:List[str], candidate2s:List[str]):\n", + " ids = []\n", + " assert len(sources) == len(candidate1s) == len(candidate2s)\n", + " for i in range(len(sources)):\n", + " source_ids = tokenizer.encode(source_prefix + sources[i])\n", + " candidate1_ids = tokenizer.encode(cand1_prefix + candidate1s[i])\n", + " candidate2_ids = tokenizer.encode(cand2_prefix + candidate2s[i])\n", + " ids.append(source_ids + candidate1_ids + candidate2_ids)\n", + " encodings = tokenizer.pad({\"input_ids\": ids}, return_tensors=\"pt\")\n", + " return encodings\n", + "\n", + "encodings = tokenize_pair(inputs, candidates_A, candidates_B)\n", + "encodings = {k:v.to(pairrm.device) for k,v in encodings.items()}\n", + "outputs = pairrm(**encodings)\n", + "logits = outputs.logits.tolist()\n", + "comparison_results = outputs.logits > 0\n", + "print(logits)\n", + "# [1.9003021717071533, -1.2547134160995483]\n", + "print(comparison_results)\n", + "# tensor([ True, False], device='cuda:0'), which means whether candidate A is better than candidate B for each input" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Verifying Correctness" + "### load from llm-blender wrapper" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "2024-01-05 15:11:00.611021: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-01-05 15:11:01.483983: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64\n", - "2024-01-05 15:11:01.484081: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/cuda-11.8//lib64\n", - "2024-01-05 15:11:01.484088: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", "WARNING:root:No ranker config provided, no ranker loaded, please load ranker first through load_ranker()\n", "WARNING:root:No fuser config provided, no fuser loaded, please load fuser first through load_fuser()\n", "/home/dongfu/miniconda3/envs/llm-blender/lib/python3.9/site-packages/dataclasses_json/core.py:187: RuntimeWarning: 'NoneType' object value of non-optional type load_checkpoint detected when decoding RankerConfig.\n", @@ -193,149 +201,47 @@ "text": [ "Successfully loaded ranker from /home/dongfu/data/.cache/huggingface/hub/llm-blender/PairRM\n" ] - } - ], - "source": [ - "import os\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", - "import llm_blender\n", - "blender = llm_blender.Blender()\n", - "# Load Ranker\n", - "blender.loadranker(\"llm-blender/PairRM\") # load ranker checkpoint" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ + }, { "name": "stderr", "output_type": "stream", "text": [ - "Ranking candidates: 0%| | 0/1 [00:00 hello![SEP][CLS]<|candidate1|> hi![SEP][CLS]<|candidate2|> f**k off![SEP][PAD][PAD][PAD][PAD]'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "blender.ranker.tokenizer.decode([ 1, 128001, 12018, 300, 2, 1, 128002, 8595, 300,\n", - " 2, 1, 128003, 2994, 1225, 1225, 1165, 442, 300,\n", - " 2, 0, 0, 0, 0], skip_special_tokens=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SequenceClassifierOutput(loss=None, logits=tensor([1.9003], device='cuda:0', grad_fn=), hidden_states=None, attentions=None)" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import torch\n", - "input_ids = torch.tensor([ 1, 128001, 12018, 300, 2, 1, 128002, 8595, 300,\n", - " 2, 1, 128003, 2994, 1225, 1225, 1165, 442, 300,\n", - " 2, 0, 0, 0, 0], dtype=torch.long, device='cuda:0')\n", - "attention_masks = input_ids.ne(0).float()\n", - "input_ids = input_ids.unsqueeze(0)\n", - "attention_masks = attention_masks.unsqueeze(0)\n", - "pairrm(input_ids=input_ids, attention_mask=attention_masks)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "SequenceClassifierOutput(loss=None, logits=tensor([-1.2547], device='cuda:0', grad_fn=), hidden_states=None, attentions=None)" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import torch\n", - "input_ids = torch.tensor([ 1, 128001, 273, 472, 274, 300, 2, 1, 128002,\n", - " 273, 3254, 274, 300, 2, 1, 128003, 273, 472,\n", - " 274, 261, 461, 300, 2], dtype=torch.long, device='cuda:0')\n", - "attention_masks = input_ids.ne(0).float()\n", - "input_ids = input_ids.unsqueeze(0)\n", - "attention_masks = attention_masks.unsqueeze(0)\n", - "pairrm(input_ids=input_ids, attention_mask=attention_masks)" + "logits = blender.compare(inputs, candidates_A, candidates_B, return_logits=True, mode=\"[A,B]\")\n", + "comparison_results = logits > 0\n", + "print(logits)\n", + "# [1.9003021717071533, -1.2547134160995483]\n", + "print(comparison_results)\n", + "# tensor([ True, False], device='cuda:0'), which means whether candidate A is better than candidate B for each input" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {