From 774c852df016bb182d29e7991ea19f557c584803 Mon Sep 17 00:00:00 2001 From: Nadav Timor Date: Mon, 16 Sep 2024 23:13:15 +0000 Subject: [PATCH] STABLE. revert custom device_map (seems like HF has a bug) --- poc/accelerate.ipynb | 2430 ++---------------------------------------- poc/dsi.py | 90 +- 2 files changed, 165 insertions(+), 2355 deletions(-) diff --git a/poc/accelerate.ipynb b/poc/accelerate.ipynb index ffbe6da..82888fb 100644 --- a/poc/accelerate.ipynb +++ b/poc/accelerate.ipynb @@ -36,7 +36,8 @@ ], "source": [ "import torch\n", - "\n", + "from transformers import AutoModelForCausalLM\n", + "from dsi import garbage_collect\n", "from dsi import print_gpu_memory\n", "\n", "print_gpu_memory()" @@ -84,10 +85,6 @@ } ], "source": [ - "from transformers import AutoModelForCausalLM\n", - "from dsi import garbage_collect\n", - "\n", - "\n", "def test_memory_leak(model_name=\"gpt2\"):\n", " print_gpu_memory()\n", " print(\"Loading model...\")\n", @@ -139,760 +136,76 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting garbage...\n", - "The current device is 0\n", - "GPU 0: 44.09 GB free, 44.35 GB total\n", - "GPU 1: 44.09 GB free, 44.35 GB total\n", - "GPU 2: 44.09 GB free, 44.35 GB total\n", - "GPU 3: 44.09 GB free, 44.35 GB total\n", - "GPU 4: 44.09 GB free, 44.35 GB total\n", - "Loading model with auto device map...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading checkpoint shards: 100%|██████████| 4/4 [00:08<00:00, 2.23s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "max_memory=OrderedDict([('', 0)])\n", - "model.device_map does not exist\n", - "model.hf_device_map={'model.embed_tokens': 0, 'model.layers.0': 0, 'model.layers.1': 0, 'model.layers.2': 0, 'model.layers.3': 1, 'model.layers.4': 1, 'model.layers.5': 1, 'model.layers.6': 1, 'model.layers.7': 1, 'model.layers.8': 1, 'model.layers.9': 1, 'model.layers.10': 1, 'model.layers.11': 2, 'model.layers.12': 2, 'model.layers.13': 2, 'model.layers.14': 2, 'model.layers.15': 2, 'model.layers.16': 2, 'model.layers.17': 2, 'model.layers.18': 2, 'model.layers.19': 3, 'model.layers.20': 3, 'model.layers.21': 3, 'model.layers.22': 3, 'model.layers.23': 3, 'model.layers.24': 3, 'model.layers.25': 3, 'model.layers.26': 3, 'model.layers.27': 4, 'model.layers.28': 4, 'model.layers.29': 4, 'model.layers.30': 4, 'model.layers.31': 4, 'model.norm': 4, 'model.rotary_emb': 4, 'lm_head': 4}\n", - "The current device is 0\n", - "GPU 0: 39.69 GB free, 44.35 GB total\n", - "GPU 1: 37.58 GB free, 44.35 GB total\n", - "GPU 2: 37.58 GB free, 44.35 GB total\n", - "GPU 3: 37.58 GB free, 44.35 GB total\n", - "GPU 4: 38.06 GB free, 44.35 GB total\n", - "Collecting garbage...\n", - "The current device is 0\n", - "GPU 0: 44.09 GB free, 44.35 GB total\n", - "GPU 1: 44.09 GB free, 44.35 GB total\n", - "GPU 2: 44.09 GB free, 44.35 GB total\n", - "GPU 3: 44.09 GB free, 44.35 GB total\n", - "GPU 4: 44.09 GB free, 44.35 GB total\n", - "Loading model without specifying device map...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading checkpoint shards: 0%| | 0/4 [00:00 19\u001b[0m \u001b[43mtest_loading_on_all_gpus_except_0\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Meta-Llama-3.1-70B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mload_in_8bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mload_in_8bit\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[6], line 7\u001b[0m, in \u001b[0;36mtest_loading_on_all_gpus_except_0\u001b[0;34m(model_name, dtype, load_in_8bit)\u001b[0m\n\u001b[1;32m 5\u001b[0m garbage_collect()\n\u001b[1;32m 6\u001b[0m print_gpu_memory()\n\u001b[0;32m----> 7\u001b[0m device_map \u001b[38;5;241m=\u001b[39m \u001b[43mget_device_map_without_gpu_0\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mload_in_8bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mload_in_8bit\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdevice_map\u001b[38;5;132;01m=}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 9\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForCausalLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(model_name, cache_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/workspace/hf_cache\u001b[39m\u001b[38;5;124m\"\u001b[39m, device_map\u001b[38;5;241m=\u001b[39mdevice_map)\n", + "File \u001b[0;32m/workspace/distributed-speculative-inference/poc/dsi.py:760\u001b[0m, in \u001b[0;36mget_device_map_without_gpu_0\u001b[0;34m(model_name, dtype, load_in_8bit)\u001b[0m\n\u001b[1;32m 756\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_device_map_without_gpu_0\u001b[39m(model_name, dtype, load_in_8bit):\n\u001b[1;32m 757\u001b[0m \u001b[38;5;66;03m# max_memory = get_max_memory()\u001b[39;00m\n\u001b[1;32m 758\u001b[0m \u001b[38;5;66;03m# max_memory = {k: v if k != 0 else 0 for k, v in max_memory.items()}\u001b[39;00m\n\u001b[1;32m 759\u001b[0m max_memory \u001b[38;5;241m=\u001b[39m {\u001b[38;5;241m0\u001b[39m: \u001b[38;5;241m0\u001b[39m}\n\u001b[0;32m--> 760\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mAutoModelForCausalLM\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 761\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 762\u001b[0m \u001b[43m \u001b[49m\u001b[43mtorch_dtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 763\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_in_8bit\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mload_in_8bit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 764\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 765\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/workspace/hf_cache\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 766\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 767\u001b[0m device_map \u001b[38;5;241m=\u001b[39m accelerate\u001b[38;5;241m.\u001b[39minfer_auto_device_map(model, max_memory\u001b[38;5;241m=\u001b[39mmax_memory)\n\u001b[1;32m 768\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m model\n", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 563\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_class\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 565\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhub_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 566\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 570\u001b[0m )\n", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py:3960\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 3950\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dtype_orig \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 3951\u001b[0m torch\u001b[38;5;241m.\u001b[39mset_default_dtype(dtype_orig)\n\u001b[1;32m 3953\u001b[0m (\n\u001b[1;32m 3954\u001b[0m model,\n\u001b[1;32m 3955\u001b[0m missing_keys,\n\u001b[1;32m 3956\u001b[0m unexpected_keys,\n\u001b[1;32m 3957\u001b[0m mismatched_keys,\n\u001b[1;32m 3958\u001b[0m offload_index,\n\u001b[1;32m 3959\u001b[0m error_msgs,\n\u001b[0;32m-> 3960\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_load_pretrained_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3961\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3962\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3963\u001b[0m \u001b[43m \u001b[49m\u001b[43mloaded_state_dict_keys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# XXX: rename?\u001b[39;49;00m\n\u001b[1;32m 3964\u001b[0m \u001b[43m \u001b[49m\u001b[43mresolved_archive_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3965\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3966\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_mismatched_sizes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_mismatched_sizes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3967\u001b[0m \u001b[43m \u001b[49m\u001b[43msharded_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msharded_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3968\u001b[0m \u001b[43m \u001b[49m\u001b[43m_fast_init\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_fast_init\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3969\u001b[0m \u001b[43m \u001b[49m\u001b[43mlow_cpu_mem_usage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlow_cpu_mem_usage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3970\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3971\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3972\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_state_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_state_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3973\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch_dtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3974\u001b[0m \u001b[43m \u001b[49m\u001b[43mhf_quantizer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_quantizer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3975\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_in_fp32_modules\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_in_fp32_modules\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3976\u001b[0m \u001b[43m \u001b[49m\u001b[43mgguf_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgguf_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3977\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3979\u001b[0m \u001b[38;5;66;03m# make sure token embedding weights are still tied if needed\u001b[39;00m\n\u001b[1;32m 3980\u001b[0m model\u001b[38;5;241m.\u001b[39mtie_weights()\n", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py:4434\u001b[0m, in \u001b[0;36mPreTrainedModel._load_pretrained_model\u001b[0;34m(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, hf_quantizer, keep_in_fp32_modules, gguf_path)\u001b[0m\n\u001b[1;32m 4430\u001b[0m set_module_tensor_to_device(\n\u001b[1;32m 4431\u001b[0m model_to_load, key, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m, torch\u001b[38;5;241m.\u001b[39mempty(\u001b[38;5;241m*\u001b[39mparam\u001b[38;5;241m.\u001b[39msize(), dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[1;32m 4432\u001b[0m )\n\u001b[1;32m 4433\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 4434\u001b[0m new_error_msgs, offload_index, state_dict_index \u001b[38;5;241m=\u001b[39m \u001b[43m_load_state_dict_into_meta_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4435\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_to_load\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4436\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4437\u001b[0m \u001b[43m \u001b[49m\u001b[43mloaded_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4438\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4439\u001b[0m \u001b[43m \u001b[49m\u001b[43mexpected_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4440\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice_map\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4441\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4442\u001b[0m \u001b[43m \u001b[49m\u001b[43moffload_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffload_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4443\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_dict_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstate_dict_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4444\u001b[0m \u001b[43m \u001b[49m\u001b[43mstate_dict_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstate_dict_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4445\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4446\u001b[0m \u001b[43m \u001b[49m\u001b[43mhf_quantizer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_quantizer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4447\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_safetensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_safetensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4448\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_in_fp32_modules\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_in_fp32_modules\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4449\u001b[0m \u001b[43m \u001b[49m\u001b[43munexpected_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43munexpected_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4450\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4451\u001b[0m error_msgs \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m new_error_msgs\n\u001b[1;32m 4452\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 4453\u001b[0m \u001b[38;5;66;03m# Sharded checkpoint or whole but low_cpu_mem_usage==True\u001b[39;00m\n", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/transformers/modeling_utils.py:963\u001b[0m, in \u001b[0;36m_load_state_dict_into_meta_model\u001b[0;34m(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, hf_quantizer, is_safetensors, keep_in_fp32_modules, unexpected_keys, pretrained_model_name_or_path)\u001b[0m\n\u001b[1;32m 961\u001b[0m set_module_tensor_to_device(model, param_name, param_device, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mset_module_kwargs)\n\u001b[1;32m 962\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 963\u001b[0m \u001b[43mhf_quantizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_quantized_param\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparam\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparam_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparam_device\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munexpected_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 964\u001b[0m \u001b[38;5;66;03m# For quantized modules with FSDP/DeepSpeed Stage 3, we need to quantize the parameter on the GPU\u001b[39;00m\n\u001b[1;32m 965\u001b[0m \u001b[38;5;66;03m# and then cast it to CPU to avoid excessive memory usage on each GPU\u001b[39;00m\n\u001b[1;32m 966\u001b[0m \u001b[38;5;66;03m# in comparison to the sharded model across GPUs.\u001b[39;00m\n\u001b[1;32m 967\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_fsdp_enabled() \u001b[38;5;129;01mor\u001b[39;00m is_deepspeed_zero3_enabled():\n", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/transformers/quantizers/quantizer_bnb_8bit.py:207\u001b[0m, in \u001b[0;36mBnb8BitHfQuantizer.create_quantized_param\u001b[0;34m(self, model, param_value, param_name, target_device, state_dict, unexpected_keys)\u001b[0m\n\u001b[1;32m 204\u001b[0m new_value \u001b[38;5;241m=\u001b[39m new_value\u001b[38;5;241m.\u001b[39mT\n\u001b[1;32m 206\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m old_value\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\n\u001b[0;32m--> 207\u001b[0m new_value \u001b[38;5;241m=\u001b[39m \u001b[43mbnb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInt8Params\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnew_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrequires_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtarget_device\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 209\u001b[0m module\u001b[38;5;241m.\u001b[39m_parameters[tensor_name] \u001b[38;5;241m=\u001b[39m new_value\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fp16_statistics \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/bitsandbytes/nn/modules.py:626\u001b[0m, in \u001b[0;36mInt8Params.to\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 623\u001b[0m device, dtype, non_blocking, convert_to_format \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39m_C\u001b[38;5;241m.\u001b[39m_nn\u001b[38;5;241m.\u001b[39m_parse_to(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 625\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m device \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m device\u001b[38;5;241m.\u001b[39mtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mdevice\u001b[38;5;241m.\u001b[39mtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 626\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 627\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 628\u001b[0m new_param \u001b[38;5;241m=\u001b[39m Int8Params(\n\u001b[1;32m 629\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mto(device\u001b[38;5;241m=\u001b[39mdevice, dtype\u001b[38;5;241m=\u001b[39mdtype, non_blocking\u001b[38;5;241m=\u001b[39mnon_blocking),\n\u001b[1;32m 630\u001b[0m requires_grad\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequires_grad,\n\u001b[1;32m 631\u001b[0m has_fp16_weights\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhas_fp16_weights,\n\u001b[1;32m 632\u001b[0m )\n", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/bitsandbytes/nn/modules.py:587\u001b[0m, in \u001b[0;36mInt8Params.cuda\u001b[0;34m(self, device)\u001b[0m\n\u001b[1;32m 583\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 584\u001b[0m \u001b[38;5;66;03m# we store the 8-bit rows-major weight\u001b[39;00m\n\u001b[1;32m 585\u001b[0m \u001b[38;5;66;03m# we convert this weight to the turning/ampere weight during the first inference pass\u001b[39;00m\n\u001b[1;32m 586\u001b[0m B \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mcontiguous()\u001b[38;5;241m.\u001b[39mhalf()\u001b[38;5;241m.\u001b[39mcuda(device)\n\u001b[0;32m--> 587\u001b[0m CB, CBt, SCB, SCBt, coo_tensorB \u001b[38;5;241m=\u001b[39m \u001b[43mbnb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunctional\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdouble_quant\u001b[49m\u001b[43m(\u001b[49m\u001b[43mB\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 588\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m CBt\n\u001b[1;32m 589\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m SCBt\n", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/bitsandbytes/functional.py:2526\u001b[0m, in \u001b[0;36mdouble_quant\u001b[0;34m(A, col_stats, row_stats, out_col, out_row, threshold)\u001b[0m\n\u001b[1;32m 2523\u001b[0m row_stats, col_stats, nnz_row_ptr \u001b[38;5;241m=\u001b[39m get_colrow_absmax(A, threshold\u001b[38;5;241m=\u001b[39mthreshold)\n\u001b[1;32m 2525\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m out_col \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 2526\u001b[0m out_col \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mzeros\u001b[49m\u001b[43m(\u001b[49m\u001b[43mA\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mint8\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2527\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m out_row \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 2528\u001b[0m out_row \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mzeros(A\u001b[38;5;241m.\u001b[39mshape, device\u001b[38;5;241m=\u001b[39mdevice, dtype\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mint8)\n", + "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 224.00 MiB. GPU 0 has a total capacity of 44.35 GiB of which 143.38 MiB is free. Process 2046325 has 44.20 GiB memory in use. Of the allocated memory 43.85 GiB is allocated by PyTorch, and 53.56 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)" ] } ], "source": [ - "def test_loading_on_gpu_0_only(model_name):\n", + "from dsi import get_device_map_without_gpu_0\n", + "\n", + "\n", + "def test_loading_on_all_gpus_except_0(model_name, dtype, load_in_8bit):\n", " garbage_collect()\n", " print_gpu_memory()\n", - " device_map = get_device_map_with_only_gpu_0(model_name)\n", + " device_map = get_device_map_without_gpu_0(model_name, dtype=dtype, load_in_8bit=load_in_8bit)\n", " print(f\"{device_map=}\")\n", " model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=\"/workspace/hf_cache\", device_map=device_map)\n", " print_gpu_memory()\n", @@ -1544,941 +268,25 @@ " print_gpu_memory()\n", "\n", "\n", - "test_loading_on_gpu_0_only(model_name=\"meta-llama/Meta-Llama-3.1-8B-Instruct\")" + "dtype = torch.float16\n", + "load_in_8bit = True\n", + "\n", + "test_loading_on_all_gpus_except_0(model_name=\"meta-llama/Meta-Llama-3.1-70B-Instruct\", dtype=dtype, load_in_8bit=load_in_8bit)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting garbage...\n", - "The current device is 0\n", - "GPU 0: 44.09 GB free, 44.35 GB total\n", - "GPU 1: 44.09 GB free, 44.35 GB total\n", - "GPU 2: 44.09 GB free, 44.35 GB total\n", - "GPU 3: 44.09 GB free, 44.35 GB total\n", - "GPU 4: 44.09 GB free, 44.35 GB total\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading checkpoint shards: 37%|███▋ | 11/30 [00:07<00:14, 1.33it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.32.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.32.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.32.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.32.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 40%|████ | 12/30 [00:08<00:13, 1.35it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.32.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.32.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.32.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.32.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.32.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.33.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.33.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.33.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.33.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.33.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.33.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.33.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.33.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.33.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.34.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.34.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.34.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.34.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.34.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.34.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.34.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.34.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.34.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.35.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.35.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.35.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 43%|████▎ | 13/30 [00:09<00:12, 1.32it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.35.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.35.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.35.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.35.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.35.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.35.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.36.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.36.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.36.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.36.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.36.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.36.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.36.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.36.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.36.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.37.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.37.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.37.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.37.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.37.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.37.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.37.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.37.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.37.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 47%|████▋ | 14/30 [00:10<00:12, 1.29it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.38.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.38.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.38.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.38.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.38.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.38.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.38.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.38.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.38.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.39.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.39.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.39.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.39.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.39.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.39.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.39.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.39.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.39.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.40.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.40.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.40.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.40.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.40.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.40.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 50%|█████ | 15/30 [00:10<00:11, 1.30it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.40.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.40.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.40.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.41.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.41.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.41.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.41.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.41.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.41.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.41.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.41.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.41.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.42.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.42.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.42.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.42.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.42.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.42.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.42.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.42.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.42.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.43.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.43.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.43.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.43.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.43.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 53%|█████▎ | 16/30 [00:11<00:10, 1.30it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.43.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.43.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.43.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.43.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.44.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.44.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.44.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.44.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.44.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.44.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.44.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.44.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.44.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.45.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.45.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.45.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.45.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.45.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.45.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.45.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.45.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.45.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.46.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.46.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.46.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.46.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 57%|█████▋ | 17/30 [00:12<00:10, 1.29it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.46.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.46.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.46.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.46.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.46.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.47.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.47.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.47.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.47.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.47.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.47.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.47.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.47.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.47.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.48.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.48.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.48.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.48.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.48.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.48.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.48.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.48.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.48.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.49.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.49.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.49.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 60%|██████ | 18/30 [00:13<00:09, 1.31it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.49.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.49.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.49.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.49.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.49.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.49.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.50.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.50.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.50.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.50.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.50.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.50.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.50.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.50.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.50.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.51.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.51.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.51.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.51.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.51.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.51.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.51.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.51.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.51.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 63%|██████▎ | 19/30 [00:13<00:08, 1.30it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.52.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.52.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.52.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.52.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.52.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.52.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.52.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.52.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.52.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.53.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.53.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.53.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.53.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.53.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.53.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.53.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.53.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.53.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.54.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.54.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.54.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.54.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.54.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.54.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 67%|██████▋ | 20/30 [00:14<00:07, 1.31it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.54.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.54.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.54.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.55.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.55.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.55.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.55.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.55.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.55.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.55.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.55.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.55.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.56.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.56.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.56.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.56.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.56.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.56.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.56.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.56.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.56.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.57.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.57.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.57.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.57.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.57.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 70%|███████ | 21/30 [00:15<00:06, 1.31it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.57.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.57.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.57.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.57.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.58.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.58.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.58.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.58.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.58.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.58.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.58.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.58.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.58.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.59.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.59.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.59.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.59.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.59.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.59.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.59.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.59.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.59.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.60.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.60.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.60.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.60.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 73%|███████▎ | 22/30 [00:16<00:06, 1.31it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.60.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.60.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.60.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.60.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.60.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.61.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.61.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.61.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.61.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.61.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.61.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.61.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.61.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.61.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.62.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.62.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.62.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.62.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.62.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.62.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.62.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.62.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.62.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.63.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.63.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.63.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 77%|███████▋ | 23/30 [00:17<00:05, 1.29it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.63.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.63.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.63.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.63.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.63.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.63.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.64.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.64.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.64.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.64.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.64.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.64.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.64.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.64.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.64.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.65.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.65.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.65.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.65.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.65.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.65.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.65.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.65.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.65.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 80%|████████ | 24/30 [00:17<00:04, 1.37it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.66.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.66.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.66.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.66.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.66.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.66.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.66.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.66.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.66.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.67.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.67.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.67.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.67.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.67.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.67.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.67.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.67.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.67.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.68.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.68.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.68.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.68.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.68.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.68.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 83%|████████▎ | 25/30 [00:18<00:03, 1.45it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.68.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.68.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.68.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.69.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.69.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.69.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.69.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.69.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.69.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.69.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.69.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.69.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.70.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.70.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.70.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.70.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.70.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.70.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.70.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.70.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.70.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.71.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.71.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.71.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.71.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.71.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 87%|████████▋ | 26/30 [00:18<00:02, 1.51it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.71.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.71.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.71.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.71.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.72.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.72.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.72.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.72.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.72.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.72.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.72.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.72.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.72.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.73.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.73.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.73.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.73.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.73.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.73.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.73.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.73.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.73.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.74.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.74.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.74.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.74.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 90%|█████████ | 27/30 [00:19<00:01, 1.56it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.74.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.74.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.74.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.74.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.74.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.75.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.75.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.75.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.75.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.75.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.75.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.75.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.75.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.75.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.76.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.76.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.76.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.76.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.76.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.76.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.76.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.76.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.76.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.77.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.77.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.77.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 93%|█████████▎| 28/30 [00:20<00:01, 1.57it/s]/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.77.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.77.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.77.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.77.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.77.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.77.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.78.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.78.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.78.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.78.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.78.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.78.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.78.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.78.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.78.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.79.self_attn.q_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.79.self_attn.k_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.79.self_attn.v_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.79.self_attn.o_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.79.mlp.gate_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.79.mlp.up_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.79.mlp.down_proj.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.79.input_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "/root/.cache/pypoetry/virtualenvs/distributed-speculative-inference-w7-NjH3e-py3.11/lib/python3.11/site-packages/torch/nn/modules/module.py:2068: UserWarning: for model.layers.79.post_attention_layernorm.weight: copying from a non-meta parameter in the checkpoint to a meta parameter in the current model, which is a no-op. (Did you mean to pass `assign=True` to assign items in the state dictionary to their corresponding key in the module instead of copying them in place?)\n", - " warnings.warn(f'for {key}: copying from a non-meta parameter in the checkpoint to a meta '\n", - "Loading checkpoint shards: 100%|██████████| 30/30 [00:21<00:00, 1.43it/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "device_map=OrderedDict([('model.embed_tokens', 1), ('model.layers.0', 1), ('model.layers.1', 1), ('model.layers.2', 1), ('model.layers.3', 1), ('model.layers.4', 1), ('model.layers.5', 1), ('model.layers.6', 1), ('model.layers.7', 1), ('model.layers.8', 1), ('model.layers.9', 1), ('model.layers.10', 1), ('model.layers.11.self_attn', 1), ('model.layers.11.mlp.gate_proj', 1), ('model.layers.11.mlp.up_proj', 2), ('model.layers.11.mlp.down_proj', 2), ('model.layers.11.mlp.act_fn', 2), ('model.layers.11.input_layernorm', 2), ('model.layers.11.post_attention_layernorm', 2), ('model.layers.12', 2), ('model.layers.13', 2), ('model.layers.14', 2), ('model.layers.15', 2), ('model.layers.16', 2), ('model.layers.17', 2), ('model.layers.18', 2), ('model.layers.19', 2), ('model.layers.20', 2), ('model.layers.21', 2), ('model.layers.22', 2), ('model.layers.23', 2), ('model.layers.24.self_attn', 2), ('model.layers.24.input_layernorm', 3), ('model.layers.24.post_attention_layernorm', 3), ('model.layers.25', 3), ('model.layers.26', 3), ('model.layers.27', 3), ('model.layers.28', 3), ('model.layers.29', 3), ('model.layers.30', 3), ('model.layers.31', 3), ('model.layers.32', 3), ('model.layers.33', 3), ('model.layers.34', 3), ('model.layers.35', 3), ('model.layers.36', 3), ('model.layers.38', 4), ('model.layers.39', 4), ('model.layers.40', 4), ('model.layers.41', 4), ('model.layers.42', 4), ('model.layers.43', 4), ('model.layers.44', 4), ('model.layers.45', 4), ('model.layers.46', 4), ('model.layers.47', 4), ('model.layers.48', 4), ('model.layers.49.self_attn', 4), ('model.layers.49.mlp.gate_proj', 4), ('model.layers.49.mlp.up_proj', 4), ('model.layers.49.mlp.down_proj', 'disk'), ('model.layers.49.mlp.act_fn', 'disk'), ('model.layers.49.input_layernorm', 'disk'), ('model.layers.49.post_attention_layernorm', 'disk'), ('model.layers.50', 'disk'), ('model.layers.51', 'disk'), ('model.layers.52', 'disk'), ('model.layers.53', 'disk'), ('model.layers.54', 'disk'), ('model.layers.55', 'disk'), ('model.layers.56', 'disk'), ('model.layers.57', 'disk'), ('model.layers.58', 'disk'), ('model.layers.59', 'disk'), ('model.layers.60', 'disk'), ('model.layers.61', 'disk'), ('model.layers.62', 'disk'), ('model.layers.63', 'disk'), ('model.layers.64', 'disk'), ('model.layers.65', 'disk'), ('model.layers.66', 'disk'), ('model.layers.67', 'disk'), ('model.layers.68', 'disk'), ('model.layers.69', 'disk'), ('model.layers.70', 'disk'), ('model.layers.71', 'disk'), ('model.layers.72', 'disk'), ('model.layers.73', 'disk'), ('model.layers.74', 'disk'), ('model.layers.75', 'disk'), ('model.layers.76', 'disk'), ('model.layers.77', 'disk'), ('model.layers.78', 'disk'), ('model.layers.79', 'disk'), ('model.norm', 'disk'), ('model.rotary_emb', 'disk'), ('lm_head', 'disk'), ('model.layers.37', 4), ('model.layers.24.mlp', 3)])\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading checkpoint shards: 100%|██████████| 30/30 [02:47<00:00, 5.57s/it]\n", - "WARNING:root:Some parameters are on the meta device device because they were offloaded to the disk.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The current device is 0\n", - "GPU 0: 44.09 GB free, 44.35 GB total\n", - "GPU 1: 3.67 GB free, 44.35 GB total\n", - "GPU 2: 3.52 GB free, 44.35 GB total\n", - "GPU 3: 3.21 GB free, 44.35 GB total\n", - "GPU 4: 3.52 GB free, 44.35 GB total\n", - "Collecting garbage...\n", - "The current device is 0\n", - "GPU 0: 44.09 GB free, 44.35 GB total\n", - "GPU 1: 44.09 GB free, 44.35 GB total\n", - "GPU 2: 44.09 GB free, 44.35 GB total\n", - "GPU 3: 44.09 GB free, 44.35 GB total\n", - "GPU 4: 44.09 GB free, 44.35 GB total\n" - ] - } - ], + "outputs": [], "source": [ - "from dsi import get_device_map_without_gpu_0\n", + "from dsi import get_device_map_with_only_gpu_0\n", "\n", "\n", - "def test_loading_on_all_gpus_except_0(model_name):\n", + "def test_loading_on_gpu_0_only(model_name, dtype, load_in_8bit):\n", " garbage_collect()\n", " print_gpu_memory()\n", - " device_map = get_device_map_without_gpu_0(model_name)\n", + " device_map = get_device_map_with_only_gpu_0(model_name, dtype=dtype, load_in_8bit=load_in_8bit)\n", " print(f\"{device_map=}\")\n", " model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=\"/workspace/hf_cache\", device_map=device_map)\n", " print_gpu_memory()\n", @@ -2486,8 +294,7 @@ " garbage_collect()\n", " print_gpu_memory()\n", "\n", - "\n", - "test_loading_on_all_gpus_except_0(model_name=\"meta-llama/Meta-Llama-3.1-70B-Instruct\")" + "test_loading_on_gpu_0_only(model_name=\"meta-llama/Meta-Llama-3.1-8B-Instruct\", dtype=dtype, load_in_8bit=load_in_8bit)" ] }, { @@ -2499,50 +306,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.\n", - "`low_cpu_mem_usage` was None, now set to True since model is quantized.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting garbage...\n", - "The current device is 0\n", - "GPU 0: 35.48 GB free, 44.35 GB total\n", - "GPU 1: 44.09 GB free, 44.35 GB total\n", - "GPU 2: 44.09 GB free, 44.35 GB total\n", - "GPU 3: 44.09 GB free, 44.35 GB total\n", - "GPU 4: 44.09 GB free, 44.35 GB total\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading checkpoint shards: 100%|██████████| 4/4 [00:05<00:00, 1.33s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The current device is 0\n", - "GPU 0: 27.05 GB free, 44.35 GB total\n", - "GPU 1: 44.09 GB free, 44.35 GB total\n", - "GPU 2: 44.09 GB free, 44.35 GB total\n", - "GPU 3: 44.09 GB free, 44.35 GB total\n", - "GPU 4: 44.09 GB free, 44.35 GB total\n" - ] - } - ], + "outputs": [], "source": [ "garbage_collect()\n", "print_gpu_memory()\n", diff --git a/poc/dsi.py b/poc/dsi.py index afe703f..ba9e930 100644 --- a/poc/dsi.py +++ b/poc/dsi.py @@ -1,5 +1,6 @@ import asyncio import contextlib +from functools import cache import gc import os import threading @@ -394,7 +395,7 @@ async def load_model( name: str, dtype: torch.dtype, load_in_8bit: bool, - device_map: str, + device_map: None | str, cache_dir: None | str = None, ) -> None: """Loads the model from the given name and moves it to the device.""" @@ -410,13 +411,21 @@ async def load_model( if cache_dir is None: cache_dir = os.environ["TRANSFORMERS_CACHE"] print(f"{self.__class__.__name__}: Loading model {name} with {device_map=}") - self.model = AutoModelForCausalLM.from_pretrained( + if device_map is None: + self.model = AutoModelForCausalLM.from_pretrained( name, torch_dtype=dtype, - device_map=device_map, cache_dir=cache_dir, load_in_8bit=load_in_8bit, ) + else: + self.model = AutoModelForCausalLM.from_pretrained( + name, + torch_dtype=dtype, + device_map=device_map, + cache_dir=cache_dir, + load_in_8bit=load_in_8bit, + ) self.model.eval() # if device != cpu: # print(f"{self.__class__.__name__}: Moving model to {device}") @@ -726,24 +735,47 @@ async def broadcast(self) -> None: print(f"PubSub: Broadcast complete. Queue size: {self.queue.qsize()}") -def get_device_map_with_only_gpu_0(model_name): - with accelerate.init_empty_weights(): - model = AutoModelForCausalLM.from_pretrained( - model_name, cache_dir="/workspace/hf_cache" - ) - max_memory = {i: 0 for i in range(1, torch.cuda.device_count())} - max_memory[0] = f"{torch.cuda.mem_get_info(0)[0] / 1024 / 1024 / 1024:.2f} GB" - return accelerate.infer_auto_device_map(model, max_memory=max_memory) +# @cache +# def get_max_memory(): +# max_memory = {i: f"{torch.cuda.mem_get_info(i)[0] / 1024 / 1024 / 1024:.2f} GB" for i in range(torch.cuda.device_count())} +# return max_memory -def get_device_map_without_gpu_0(model_name): - with accelerate.init_empty_weights(): - model = AutoModelForCausalLM.from_pretrained( - model_name, cache_dir="/workspace/hf_cache" - ) - max_memory = {i: f"{torch.cuda.mem_get_info(i)[0] / 1024 / 1024 / 1024:.2f} GB" for i in range(1, torch.cuda.device_count())} - max_memory[0] = 0 - return accelerate.infer_auto_device_map(model, max_memory=max_memory) +def get_device_map_with_only_gpu_0(model_name, dtype, load_in_8bit): + # max_memory = get_max_memory() + # max_memory = {k: v if k == 0 else 0 for k, v in max_memory.items()} + max_memory = { + i: 0 + for i in range(1, torch.cuda.device_count()) + } + model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=dtype, + load_in_8bit=load_in_8bit, + device_map=None, + cache_dir="/workspace/hf_cache", + ) + device_map = accelerate.infer_auto_device_map(model, max_memory=max_memory) + del model + garbage_collect() + return device_map + + +# def get_device_map_without_gpu_0(model_name, dtype, load_in_8bit): +# # max_memory = get_max_memory() +# # max_memory = {k: v if k != 0 else 0 for k, v in max_memory.items()} +# max_memory = {0: 0} +# model = AutoModelForCausalLM.from_pretrained( +# model_name, +# torch_dtype=dtype, +# load_in_8bit=load_in_8bit, +# device_map=None, +# cache_dir="/workspace/hf_cache", +# ) +# device_map = accelerate.infer_auto_device_map(model, max_memory=max_memory) +# del model +# garbage_collect() +# return device_map def setup_hf_cache(): @@ -804,12 +836,18 @@ async def run( for i in range(1, num_verifiers + 1) ] print("Main: Loading all verifiers") + # verifier_device_map = get_device_map_without_gpu_0(verifier_name, verifier_dtype, verifier_load_in_8bit) + # print(f"Main: Verifier device map: {verifier_device_map}") + # visible_devices = ",".join(str(i) for i in range(1, torch.cuda.device_count())) + # os.environ["CUDA_VISIBLE_DEVICES"] = visible_devices + # print(f"Main: CUDA_VISIBLE_DEVICES set to {os.environ['CUDA_VISIBLE_DEVICES']}") await asyncio.gather( *[ verifier.load_model( verifier_name, dtype=verifier_dtype, - device_map=get_device_map_without_gpu_0(verifier_name), + # device_map="auto", + device_map="balanced_low_0", load_in_8bit=verifier_load_in_8bit, cache_dir=os.environ["TRANSFORMERS_CACHE"], ) @@ -818,16 +856,22 @@ async def run( ) print_gpu_memory() print("Main: Loading drafter") + # drafter_device_map = get_device_map_with_only_gpu_0(drafter_name, drafter_dtype, drafter_load_in_8bit) + # print(f"Main: Drafter device map: {drafter_device_map}") + # os.environ["CUDA_VISIBLE_DEVICES"] = "0" + # print(f"Main: CUDA_VISIBLE_DEVICES set to {os.environ['CUDA_VISIBLE_DEVICES']}") await drafter.load_model( drafter_name, dtype=drafter_dtype, - device_map=get_device_map_with_only_gpu_0(drafter_name), + # device_map="auto", + device_map=None, load_in_8bit=drafter_load_in_8bit, cache_dir=os.environ["TRANSFORMERS_CACHE"], ) print_gpu_memory() print("Main: All models loaded") - + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in range(torch.cuda.device_count())) + print(f"Main: CUDA_VISIBLE_DEVICES set to {os.environ['CUDA_VISIBLE_DEVICES']}") print("Main: Starting all tasks. Start measuring time NOW.") time_start = time.time() asyncio.create_task(manager.pubsub.broadcast()) @@ -940,7 +984,7 @@ async def main(): verifier_load_in_8bit: bool = True drafter_load_in_8bit: bool = True vocab_size: int = 128256 - lookahead: int = 6 + lookahead: int = 10 max_new_tokens: int = 100 prompt: str = """Below is an instruction that describes a task, paired with an input that provides