pip install vllm #推荐 0.6.1.post2 版本
pip show vllm 找到vllm对应位置并进入
cd ./vllm/model_executor/models/
将此路径下的telechat.py 文件放入以上路径
修改同路径下的__init__.py
"StableLmForCausalLM": ("stablelm", "StablelmForCausalLM"),
"Starcoder2ForCausalLM": ("starcoder2", "Starcoder2ForCausalLM"),
"TeleChatForCausalLM": ("telechat", "TeleChatForCausalLM"), #telechat
"ArcticForCausalLM": ("arctic", "ArcticForCausalLM"),
"XverseForCausalLM": ("xverse", "XverseForCausalLM"),
添加以上代码中的TeleChat 一行
>>> architectures": [
>>> "TeleChatForCausalLM"
>>> ]
如有外推需求在模型文件中的config.json 中添加
>>> "rope_scaling": {
>>> "factor": 2.0,
>>> "original_max_position_embeddings": 8192,
>>> "rope_type": "dynamic"
>>> }
按vllm里的方式启动telechat 推理
>>> from vllm import LLM, SamplingParams
>>> import torch
>>> llm = LLM(model="模型路径", trust_remote_code=True, tensor_parallel_size=4)
>>> prompts = ['你好']
>>> sampling_params = SamplingParams(max_tokens=2048, temperature=0.0, repetition_penalty=1.03) #推荐repetition_penalty为1.03
>>> outputs = llm.generate(prompts, sampling_params)
>>> for output in outputs:
>>> generated_text = output.outputs[0].text
>>> print(generated_text)