From 1ff5a78398db36549bb6e1e1ff27f6d301716998 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 9 Dec 2024 15:27:14 +0100 Subject: [PATCH 01/32] adding input generation config --- src/lighteval/models/model_input.py | 112 ++++++++++++++++++ .../models/transformers/base_model.py | 87 +++++++++----- src/lighteval/models/vllm/vllm_model.py | 21 ++-- 3 files changed, 180 insertions(+), 40 deletions(-) create mode 100644 src/lighteval/models/model_input.py diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py new file mode 100644 index 000000000..fca3d1db1 --- /dev/null +++ b/src/lighteval/models/model_input.py @@ -0,0 +1,112 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from dataclasses import dataclass +from typing import Optional + +from lighteval.utils.imports import NO_VLLM_ERROR_MSG, is_vllm_available + + +@dataclass +class GenerationParameters: + early_stopping: Optional[bool] = None # vllm, transformers + repetition_penalty: Optional[float] = None # vllm, transformers, tgi + frequency_penalty: Optional[float] = None # vllm, tgi + length_penalty: Optional[float] = None # vllm, transformers + presence_penalty: Optional[float] = None # vllm + + max_new_tokens: Optional[int] = None # vllm, transformers, tgi + min_new_tokens: Optional[int] = None # vllm, transformers + + seed: Optional[int] = None # vllm, tgi + stop_tokens: Optional[list[str]] = None # vllm, transformers, tgi + temperature: Optional[float] = None # vllm, transformers, tgi + top_k: Optional[int] = None # vllm, transformers, tgi + min_p: Optional[float] = None # vllm, transformers + top_p: Optional[int] = None # vllm, transformers, tgi + truncate_prompt: Optional[bool] = None # vllm, tgi + + def to_vllm(self): + if not is_vllm_available(): + raise ImportError(NO_VLLM_ERROR_MSG) + from vllm import SamplingParameters + + # Task specific sampling params to set in model: n, best_of, use_beam_search + # Generation specific params to set in model: logprobs, prompt_logprobs + args = { + "presence_penalty": self.presence_penalty, + "frequency_penalty": self.frequency_penalty, + "repetition_penalty": self.repetition_penalty, + "temperature": self.temperature, + "top_p": self.top_p, + "top_k": self.top_k, + "min_p": self.min_p, + "seed": self.seed, + "length_penalty": self.length_penalty, + "early_stopping": self.early_stopping, + "stop": self.stop_tokens, + "max_tokens": self.max_new_tokens, + "min_tokens": self.min_new_tokens, + "truncate_prompt_tokens": self.truncate_prompt, + } + return SamplingParameters(**{k: v for k, v in args.items() if v is not None}) + + def to_transformers(self): + from transformers import GenerationConfig + + # Task specific sampling params to set in model: do_sample, num_return_sequences, num_beans + args = { + "max_new_tokens": self.max_new_tokens, + "min_new_tokens": self.min_new_tokens, + "early_stopping": self.early_stopping, + "stop_strings": self.stop_tokens, + "temperature": self.temperature, + "top_k": self.top_k, + "top_p": self.top_p, + "min_p": self.min_p, + "repetition_penalty": self.repetition_penalty, + "length_penalty": self.length_penalty, + "output_scores": True, + "return_dict_in_generate": True, + } + # Even though we only use the dict representation of the GenerationConfig + # we still create the object as it uses validation steps + return GenerationConfig(**{k: v for k, v in args.items() if v is not None}) + + def to_tgi(self): + from huggingface_hub import TextGenerationInputGenerateParameters + + # Task specific sampling params to set in model: best_of, do_sample + args = { + "decoder_input_details": True, + "details": True, + "frequency_penalty": self.frequency_penalty, + "max_new_tokens": self.max_new_tokens, + "repetition_penalty": self.repetition_penalty, + "seed": self.seed, + "stop": self.stop_tokens, + "temperature": self.temperature, + "top_k": self.top_k, + "top_p": self.top_p, + "truncate": self.truncate_prompt, + } + return TextGenerationInputGenerateParameters(**{k: v for k, v in args.items() if v is not None}) diff --git a/src/lighteval/models/transformers/base_model.py b/src/lighteval/models/transformers/base_model.py index 9b815d2b0..36687efc1 100644 --- a/src/lighteval/models/transformers/base_model.py +++ b/src/lighteval/models/transformers/base_model.py @@ -36,9 +36,11 @@ AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, + GenerationConfig, GPTQConfig, PretrainedConfig, ) +from transformers.generation.utils import GenerateOutput from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset, LoglikelihoodSingleTokenDataset @@ -151,6 +153,7 @@ class BaseModelConfig: trust_remote_code: bool = False use_chat_template: bool = False compile: bool = False + generation_config: GenerationConfig = None def __post_init__(self): # Making sure this parameter is a boolean @@ -256,6 +259,7 @@ def __init__( self.model_sha = config.get_model_sha() self.precision = _get_dtype(config.dtype, config=self._config) + self.generation_config = config.generation_config.to_dict() if is_accelerate_available(): model_size, _ = calculate_maximum_sizes(self.model) @@ -631,25 +635,29 @@ def greedy_until_multi_turn( # noqa: C901 ], ] ) - model_outputs = self.model.generate( - **model_inputs, - max_new_tokens=max_generated_tokens, - stopping_criteria=stopping_criteria, - do_sample=False, - pad_token_id=self.tokenizer.pad_token_id - if self.tokenizer.pad_token_id - else self.tokenizer.eos_token_id, + + generation_config = GenerationConfig.from_dict(self.generation_config or {}) + generation_config.update( + { + "max_new_tokens": max_generated_tokens, + "pad_token_id": self.tokenizer.pad_token_id + if self.tokenizer.pad_token_id + else self.tokenizer.eos_token_id, + "eos_token_id": self.tokenizer.eos_token_id, + "do_sample": False, + } ) - model_outputs = model_outputs[0, model_inputs["input_ids"].size(1) :] + + model_outputs: GenerateOutput = self.model.generate( + **model_inputs, stopping_criteria=stopping_criteria, generation_config=generation_config + ) + model_outputs = model_outputs.sequences[0, model_inputs["input_ids"].size(1) :] model_generations = [model_outputs] - decoded_generation = self.tokenizer.decode(model_outputs) - for term in stop_tokens: - decoded_generation = decoded_generation.split(term)[0] input_tokens = [model_inputs["input_ids"]] for i, multi_turn_context in enumerate(request.context[1:]): - multi_turn_context = multi_turn_context.format(model_response=decoded_generation) + multi_turn_context = multi_turn_context.format(model_response=model_generations[-1]) model_inputs = self.tokenizer( multi_turn_context, @@ -671,17 +679,25 @@ def greedy_until_multi_turn( # noqa: C901 ] ) - model_outputs = self.model.generate( + generation_config = GenerationConfig.from_dict(self.generation_config or {}) + generation_config.update( + { + "max_new_tokens": max_generated_tokens, + "pad_token_id": self.tokenizer.pad_token_id + if self.tokenizer.pad_token_id + else self.tokenizer.eos_token_id, + "eos_token_id": self.tokenizer.eos_token_id, + "do_sample": False, + } + ) + + model_outputs: GenerateOutput = self.model.generate( input_ids=model_inputs["input_ids"], attention_mask=model_inputs["attention_mask"], - max_new_tokens=max_generated_tokens, stopping_criteria=stopping_criteria, - do_sample=False, - pad_token_id=self.tokenizer.pad_token_id - if self.tokenizer.pad_token_id - else self.tokenizer.eos_token_id, + generation_config=generation_config, ) - model_outputs = model_outputs[0, model_inputs["input_ids"].size(1) :] + model_outputs = model_outputs.sequences[0, model_inputs["input_ids"].size(1) :] model_generations.append(model_outputs) decoded_generation = self.tokenizer.decode(model_outputs, skip_special_tokens=True) input_tokens.append(model_inputs["input_ids"]) @@ -708,7 +724,7 @@ def greedy_until_multi_turn( # noqa: C901 results.append( GenerativeMultiturnResponse( result=answers, - input_tokens=[], + input_tokens=input_tokens, generated_tokens=[], truncated_tokens_count=0, padded_tokens_count=0, @@ -860,21 +876,28 @@ def _generate( stopping_criteria = stop_sequences_criteria(self.tokenizer, stop_sequences=stop_tokens, batch=batch) batch_size, _ = batch.input_ids.shape + generation_config = GenerationConfig.from_dict(self.generation_config or {}) + generation_config.update( + { + "max_new_tokens": max_new_tokens, + "pad_token_id": self.tokenizer.pad_token_id + if self.tokenizer.pad_token_id + else self.tokenizer.eos_token_id, + "eos_token_id": self.tokenizer.eos_token_id, + "do_sample": do_sample, + "num_return_sequences": num_samples, + "output_logits": returns_logits, + "renormalize_logits": True, + } + ) + # Compute model generation - outputs = self.model.generate( + outputs: GenerateOutput = self.model.generate( input_ids=batch.input_ids, attention_mask=batch.input_mask, - max_new_tokens=max_new_tokens, stopping_criteria=stopping_criteria, - pad_token_id=self.tokenizer.pad_token_id if self.tokenizer.pad_token_id else self.tokenizer.eos_token_id, - return_dict_in_generate=True, - output_scores=True, - eos_token_id=self.tokenizer.eos_token_id, - do_sample=do_sample, - num_return_sequences=num_samples, + generation_config=generation_config, ) - if returns_logits: - logits = self.model.compute_transition_scores(outputs.sequences, outputs.scores, normalize_logits=True) generations = outputs.sequences[:, batch.input_ids.size(1) :] generations = torch.reshape(generations, (batch_size, num_samples, -1)) generations, len_gens = self.pad_and_gather(generations, num_samples=num_samples) @@ -882,7 +905,7 @@ def _generate( logits, len_logits = None, None if returns_logits: - logits, len_logits = self.pad_and_gather(logits) + logits, len_logits = self.pad_and_gather(outputs.logits) logits = logits.cpu().numpy() # We gather remaining info diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 2d413807d..944d7114e 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -85,6 +85,7 @@ class VLLMModelConfig: True # whether to add a space at the start of each continuation in multichoice generation ) pairwise_tokenization: bool = False # whether to tokenize the context and continuation separately or together. + sampling_params: SamplingParams = None # sampling parameters to use for generation subfolder: Optional[str] = None temperature: float = 0.6 # will be used for multi sampling tasks, for tasks requiring no sampling, this will be ignored and set to 0. @@ -117,6 +118,7 @@ def __init__( self.precision = _get_dtype(config.dtype, config=self._config) self.model_info = ModelInfo(model_name=self.model_name, model_sha=self.model_sha) + self.sampling_params = config.sampling_params self.pairwise_tokenization = config.pairwise_tokenization @property @@ -300,16 +302,19 @@ def _generate( generate: bool = True, ) -> list[GenerativeResponse]: """Contains the actual logic of the generation.""" + sampling_params = self.sampling_params or SamplingParams() if generate: - sampling_params = SamplingParams( - temperature=float(self._config.temperature) if num_samples > 1 else 0.0, - n=num_samples, - max_tokens=max_new_tokens, - stop=stop_tokens, - logprobs=1 if returns_logits else 0, - ) + sampling_params.temperature = float(self._config.temperature) if num_samples > 1 else 0.0 + sampling_params.n = num_samples + sampling_params.max_tokens = max_new_tokens + sampling_params.stop = stop_tokens + sampling_params.logprobs = 1 if returns_logits else 0 + else: - sampling_params = SamplingParams(temperature=0, prompt_logprobs=1, max_tokens=1, detokenize=False) + sampling_params.temperature = 0 + sampling_params.prompt_logprobs = 1 + sampling_params.max_tokens = 1 + sampling_params.detokenize = False if self.data_parallel_size > 1: # vLLM hangs if tensor_parallel > 1 and resources are set in ray.remote From 12c6a909c2b613a1dbbef5f6fb841001883d70b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 9 Dec 2024 17:42:08 +0100 Subject: [PATCH 02/32] added tgi model --- src/lighteval/models/endpoints/tgi_model.py | 23 ++++++++++++--------- src/lighteval/models/model_input.py | 5 +++++ src/lighteval/models/vllm/vllm_model.py | 2 +- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index d95609a50..f7d347e0f 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -21,11 +21,11 @@ # SOFTWARE. import asyncio -from dataclasses import dataclass -from typing import Coroutine, Optional +from dataclasses import dataclass, replace +from typing import Coroutine import requests -from huggingface_hub import TextGenerationInputGrammarType, TextGenerationOutput +from huggingface_hub import TextGenerationInputGenerateParameters, TextGenerationOutput from transformers import AutoTokenizer from lighteval.models.endpoints.endpoint_model import InferenceEndpointModel, ModelInfo @@ -57,12 +57,15 @@ class TGIModelConfig: class ModelClient(InferenceEndpointModel): _DEFAULT_MAX_LENGTH: int = 4096 - def __init__(self, address, auth_token=None, model_id=None) -> None: + def __init__( + self, address, auth_token=None, model_id=None, generation_config: TextGenerationInputGenerateParameters = None + ) -> None: if not is_tgi_available(): raise ImportError(NO_TGI_ERROR_MSG) headers = {} if auth_token is None else {"Authorization": f"Bearer {auth_token}"} self.client = AsyncClient(address, headers=headers, timeout=240) + self.generation_config = generation_config self._max_gen_toks = 256 self.model_info = requests.get(f"{address}/info", headers=headers).json() if "model_id" not in self.model_info: @@ -88,17 +91,17 @@ def _async_process_request( context: str, stop_tokens: list[str], max_tokens: int, - grammar: Optional[TextGenerationInputGrammarType] = None, ) -> Coroutine[None, list[TextGenerationOutput], str]: # Todo: add an option to launch with conversational instead for chat prompts - generated_text = self.client.generate( - prompt=context, - decoder_input_details=True, - grammar=grammar, + generation_config: TextGenerationInputGenerateParameters = replace( + self.generation_config, + stop=stop_tokens, max_new_tokens=max_tokens, - stop_sequences=stop_tokens, + decoder_input_details=True, ) + generated_text = self.client.generate(prompt=context, generation_config=generation_config) + return generated_text def _process_request(self, *args, **kwargs) -> TextGenerationOutput: diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index fca3d1db1..8df48ea58 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -23,6 +23,8 @@ from dataclasses import dataclass from typing import Optional +from huggingface_hub import TextGenerationInputGrammarType + from lighteval.utils.imports import NO_VLLM_ERROR_MSG, is_vllm_available @@ -45,6 +47,8 @@ class GenerationParameters: top_p: Optional[int] = None # vllm, transformers, tgi truncate_prompt: Optional[bool] = None # vllm, tgi + grammar: Optional[TextGenerationInputGrammarType] = None # tgi + def to_vllm(self): if not is_vllm_available(): raise ImportError(NO_VLLM_ERROR_MSG) @@ -108,5 +112,6 @@ def to_tgi(self): "top_k": self.top_k, "top_p": self.top_p, "truncate": self.truncate_prompt, + "grammar": self.grammar, } return TextGenerationInputGenerateParameters(**{k: v for k, v in args.items() if v is not None}) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 944d7114e..98345ce65 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -302,7 +302,7 @@ def _generate( generate: bool = True, ) -> list[GenerativeResponse]: """Contains the actual logic of the generation.""" - sampling_params = self.sampling_params or SamplingParams() + sampling_params = self.sampling_params.clone() or SamplingParams() if generate: sampling_params.temperature = float(self._config.temperature) if num_samples > 1 else 0.0 sampling_params.n = num_samples From c9657d29c4042a61d3efb708cccb85c6d807ca00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 9 Dec 2024 18:35:03 +0100 Subject: [PATCH 03/32] grammar is task dependant, removed from the cofnig --- .../models/endpoints/endpoint_model.py | 34 ++++++++++++------- src/lighteval/models/endpoints/tgi_model.py | 27 +++++++++------ src/lighteval/models/model_input.py | 5 --- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 112338964..6483ba2d7 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -24,7 +24,7 @@ import logging import re import time -from dataclasses import dataclass +from dataclasses import dataclass, replace from typing import Coroutine, Dict, List, Optional, Union import requests @@ -35,6 +35,7 @@ InferenceEndpoint, InferenceEndpointError, InferenceEndpointTimeoutError, + TextGenerationInputGenerateParameters, TextGenerationInputGrammarType, TextGenerationOutput, create_inference_endpoint, @@ -78,6 +79,7 @@ class InferenceModelConfig: model: str add_special_tokens: bool = True + generation_config: TextGenerationInputGenerateParameters @dataclass @@ -98,6 +100,7 @@ class InferenceEndpointModelConfig: namespace: str = None # The namespace under which to launch the endopint. Defaults to the current user's namespace image_url: str = None env_vars: dict = None + generation_config: TextGenerationInputGenerateParameters def __post_init__(self): # xor operator, one is None but not the other @@ -281,6 +284,7 @@ def __init__( # noqa: C901 model_dtype=config.model_dtype or "default", model_size=-1, ) + self.generation_config = config.generation_config or TextGenerationInputGenerateParameters() @staticmethod def get_larger_hardware_suggestion(cur_instance_type: str = None, cur_instance_size: str = None): @@ -364,14 +368,16 @@ def _async_process_request( ) -> Coroutine[None, list[TextGenerationOutput], str]: # Todo: add an option to launch with conversational instead for chat prompts # https://huggingface.co/docs/huggingface_hub/v0.20.3/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient.conversational - generated_text = self.async_client.text_generation( - prompt=context, + generation_config: TextGenerationInputGenerateParameters = replace( + self.generation_config, + stop=stop_tokens, + max_new_tokens=max_tokens, details=True, decoder_input_details=True, - grammar=grammar, - max_new_tokens=max_tokens, - stop_sequences=stop_tokens, - # truncate=, + ) + + generated_text = self.async_client.text_generation( + prompt=context, generation_config=generation_config, grammar=grammar ) return generated_text @@ -385,14 +391,16 @@ def _process_request( ) -> TextGenerationOutput: # Todo: add an option to launch with conversational instead for chat prompts # https://huggingface.co/docs/huggingface_hub/v0.20.3/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient.conversational - generated_text = self.client.text_generation( - prompt=context, + generation_config: TextGenerationInputGenerateParameters = replace( + self.generation_config, + stop=stop_tokens, + max_new_tokens=max_tokens, details=True, decoder_input_details=True, - grammar=grammar, - max_new_tokens=max_tokens, - stop_sequences=stop_tokens, - # truncate=, + ) + + generated_text = self.client.text_generation( + prompt=context, generation_config=generation_config, grammar=grammar ) return generated_text diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index f7d347e0f..52a9fd427 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -22,10 +22,10 @@ import asyncio from dataclasses import dataclass, replace -from typing import Coroutine +from typing import Coroutine, Optional import requests -from huggingface_hub import TextGenerationInputGenerateParameters, TextGenerationOutput +from huggingface_hub import TextGenerationInputGenerateParameters, TextGenerationInputGrammarType, TextGenerationOutput from transformers import AutoTokenizer from lighteval.models.endpoints.endpoint_model import InferenceEndpointModel, ModelInfo @@ -50,6 +50,7 @@ class TGIModelConfig: inference_server_address: str inference_server_auth: str model_id: str + generation_config: TextGenerationInputGenerateParameters = None # inherit from InferenceEndpointModel instead of LightevalModel since they both use the same interface, and only overwrite @@ -57,21 +58,21 @@ class TGIModelConfig: class ModelClient(InferenceEndpointModel): _DEFAULT_MAX_LENGTH: int = 4096 - def __init__( - self, address, auth_token=None, model_id=None, generation_config: TextGenerationInputGenerateParameters = None - ) -> None: + def __init__(self, config: TGIModelConfig) -> None: if not is_tgi_available(): raise ImportError(NO_TGI_ERROR_MSG) - headers = {} if auth_token is None else {"Authorization": f"Bearer {auth_token}"} + headers = ( + {} if config.inference_server_auth is None else {"Authorization": f"Bearer {config.inference_server_auth}"} + ) - self.client = AsyncClient(address, headers=headers, timeout=240) - self.generation_config = generation_config + self.client = AsyncClient(config.inference_server_address, headers=headers, timeout=240) + self.generation_config = config.generation_config or TextGenerationInputGenerateParameters() self._max_gen_toks = 256 - self.model_info = requests.get(f"{address}/info", headers=headers).json() + self.model_info = requests.get(f"{config.inference_server_address}/info", headers=headers).json() if "model_id" not in self.model_info: raise ValueError("Error occured when fetching info: " + str(self.model_info)) - if model_id: - self.model_info["model_id"] = model_id + if config.model_id: + self.model_info["model_id"] = config.model_id self._tokenizer = AutoTokenizer.from_pretrained(self.model_info["model_id"]) self._add_special_tokens = True self.use_async = True @@ -91,13 +92,17 @@ def _async_process_request( context: str, stop_tokens: list[str], max_tokens: int, + grammar: Optional[TextGenerationInputGrammarType] = None, ) -> Coroutine[None, list[TextGenerationOutput], str]: # Todo: add an option to launch with conversational instead for chat prompts + # We create a copy of the current text generation params generation_config: TextGenerationInputGenerateParameters = replace( self.generation_config, stop=stop_tokens, max_new_tokens=max_tokens, + details=True, decoder_input_details=True, + grammar=grammar, ) generated_text = self.client.generate(prompt=context, generation_config=generation_config) diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index 8df48ea58..fca3d1db1 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -23,8 +23,6 @@ from dataclasses import dataclass from typing import Optional -from huggingface_hub import TextGenerationInputGrammarType - from lighteval.utils.imports import NO_VLLM_ERROR_MSG, is_vllm_available @@ -47,8 +45,6 @@ class GenerationParameters: top_p: Optional[int] = None # vllm, transformers, tgi truncate_prompt: Optional[bool] = None # vllm, tgi - grammar: Optional[TextGenerationInputGrammarType] = None # tgi - def to_vllm(self): if not is_vllm_available(): raise ImportError(NO_VLLM_ERROR_MSG) @@ -112,6 +108,5 @@ def to_tgi(self): "top_k": self.top_k, "top_p": self.top_p, "truncate": self.truncate_prompt, - "grammar": self.grammar, } return TextGenerationInputGenerateParameters(**{k: v for k, v in args.items() if v is not None}) From ac6565ad6d38d1104cb07580d81376e8b28c0d37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 9 Dec 2024 18:58:34 +0100 Subject: [PATCH 04/32] added openai config + moved everything to dict --- .../models/endpoints/endpoint_model.py | 13 ++++++----- .../models/endpoints/openai_model.py | 5 ++++- src/lighteval/models/endpoints/tgi_model.py | 4 ++-- src/lighteval/models/model_input.py | 22 +++++-------------- .../models/transformers/base_model.py | 10 ++++----- src/lighteval/models/vllm/vllm_model.py | 4 ++-- 6 files changed, 26 insertions(+), 32 deletions(-) diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 6483ba2d7..9aaff6feb 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -100,7 +100,7 @@ class InferenceEndpointModelConfig: namespace: str = None # The namespace under which to launch the endopint. Defaults to the current user's namespace image_url: str = None env_vars: dict = None - generation_config: TextGenerationInputGenerateParameters + generation_config: dict = {} def __post_init__(self): # xor operator, one is None but not the other @@ -284,7 +284,7 @@ def __init__( # noqa: C901 model_dtype=config.model_dtype or "default", model_size=-1, ) - self.generation_config = config.generation_config or TextGenerationInputGenerateParameters() + self.generation_config = TextGenerationInputGenerateParameters(**config.generation_config) @staticmethod def get_larger_hardware_suggestion(cur_instance_type: str = None, cur_instance_size: str = None): @@ -374,11 +374,10 @@ def _async_process_request( max_new_tokens=max_tokens, details=True, decoder_input_details=True, + grammar=grammar, ) - generated_text = self.async_client.text_generation( - prompt=context, generation_config=generation_config, grammar=grammar - ) + generated_text = self.async_client.text_generation(prompt=context, generation_config=generation_config) return generated_text @@ -397,10 +396,12 @@ def _process_request( max_new_tokens=max_tokens, details=True, decoder_input_details=True, + grammar=grammar, ) generated_text = self.client.text_generation( - prompt=context, generation_config=generation_config, grammar=grammar + prompt=context, + generation_config=generation_config, ) return generated_text diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py index b2ca25285..9f3950525 100644 --- a/src/lighteval/models/endpoints/openai_model.py +++ b/src/lighteval/models/endpoints/openai_model.py @@ -62,14 +62,16 @@ @dataclass class OpenAIModelConfig: model: str + sampling_params: dict = {} class OpenAIClient(LightevalModel): _DEFAULT_MAX_LENGTH: int = 4096 - def __init__(self, config, env_config) -> None: + def __init__(self, config: OpenAIModelConfig, env_config) -> None: api_key = os.environ["OPENAI_API_KEY"] self.client = OpenAI(api_key=api_key) + self.sampling_params = config.sampling_params self.model_info = ModelInfo( model_name=config.model, @@ -96,6 +98,7 @@ def __call_api(self, prompt, return_logits, max_new_tokens, num_samples, logit_b logprobs=return_logits, logit_bias=logit_bias, n=num_samples, + **self.sampling_params, ) return response except Exception as e: diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index 52a9fd427..2ae112a05 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -50,7 +50,7 @@ class TGIModelConfig: inference_server_address: str inference_server_auth: str model_id: str - generation_config: TextGenerationInputGenerateParameters = None + generation_config: dict = {} # inherit from InferenceEndpointModel instead of LightevalModel since they both use the same interface, and only overwrite @@ -66,7 +66,7 @@ def __init__(self, config: TGIModelConfig) -> None: ) self.client = AsyncClient(config.inference_server_address, headers=headers, timeout=240) - self.generation_config = config.generation_config or TextGenerationInputGenerateParameters() + self.generation_config = TextGenerationInputGenerateParameters(**config.generation_config) self._max_gen_toks = 256 self.model_info = requests.get(f"{config.inference_server_address}/info", headers=headers).json() if "model_id" not in self.model_info: diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index fca3d1db1..d2a8c7d58 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -23,8 +23,6 @@ from dataclasses import dataclass from typing import Optional -from lighteval.utils.imports import NO_VLLM_ERROR_MSG, is_vllm_available - @dataclass class GenerationParameters: @@ -45,11 +43,7 @@ class GenerationParameters: top_p: Optional[int] = None # vllm, transformers, tgi truncate_prompt: Optional[bool] = None # vllm, tgi - def to_vllm(self): - if not is_vllm_available(): - raise ImportError(NO_VLLM_ERROR_MSG) - from vllm import SamplingParameters - + def to_vllm_openai_dict(self): # Task specific sampling params to set in model: n, best_of, use_beam_search # Generation specific params to set in model: logprobs, prompt_logprobs args = { @@ -68,11 +62,9 @@ def to_vllm(self): "min_tokens": self.min_new_tokens, "truncate_prompt_tokens": self.truncate_prompt, } - return SamplingParameters(**{k: v for k, v in args.items() if v is not None}) - - def to_transformers(self): - from transformers import GenerationConfig + return {k: v for k, v in args.items() if v is not None} + def to_transformers_dict(self): # Task specific sampling params to set in model: do_sample, num_return_sequences, num_beans args = { "max_new_tokens": self.max_new_tokens, @@ -90,11 +82,9 @@ def to_transformers(self): } # Even though we only use the dict representation of the GenerationConfig # we still create the object as it uses validation steps - return GenerationConfig(**{k: v for k, v in args.items() if v is not None}) - - def to_tgi(self): - from huggingface_hub import TextGenerationInputGenerateParameters + return {k: v for k, v in args.items() if v is not None} + def to_tgi_inferenceendpoint_dict(self): # Task specific sampling params to set in model: best_of, do_sample args = { "decoder_input_details": True, @@ -109,4 +99,4 @@ def to_tgi(self): "top_p": self.top_p, "truncate": self.truncate_prompt, } - return TextGenerationInputGenerateParameters(**{k: v for k, v in args.items() if v is not None}) + return {k: v for k, v in args.items() if v is not None} diff --git a/src/lighteval/models/transformers/base_model.py b/src/lighteval/models/transformers/base_model.py index 36687efc1..33294a571 100644 --- a/src/lighteval/models/transformers/base_model.py +++ b/src/lighteval/models/transformers/base_model.py @@ -153,7 +153,7 @@ class BaseModelConfig: trust_remote_code: bool = False use_chat_template: bool = False compile: bool = False - generation_config: GenerationConfig = None + generation_config: dict = {} def __post_init__(self): # Making sure this parameter is a boolean @@ -259,7 +259,7 @@ def __init__( self.model_sha = config.get_model_sha() self.precision = _get_dtype(config.dtype, config=self._config) - self.generation_config = config.generation_config.to_dict() + self.generation_config = config.generation_config if is_accelerate_available(): model_size, _ = calculate_maximum_sizes(self.model) @@ -636,7 +636,7 @@ def greedy_until_multi_turn( # noqa: C901 ] ) - generation_config = GenerationConfig.from_dict(self.generation_config or {}) + generation_config = GenerationConfig.from_dict(self.generation_config) generation_config.update( { "max_new_tokens": max_generated_tokens, @@ -679,7 +679,7 @@ def greedy_until_multi_turn( # noqa: C901 ] ) - generation_config = GenerationConfig.from_dict(self.generation_config or {}) + generation_config = GenerationConfig.from_dict(self.generation_config) generation_config.update( { "max_new_tokens": max_generated_tokens, @@ -876,7 +876,7 @@ def _generate( stopping_criteria = stop_sequences_criteria(self.tokenizer, stop_sequences=stop_tokens, batch=batch) batch_size, _ = batch.input_ids.shape - generation_config = GenerationConfig.from_dict(self.generation_config or {}) + generation_config = GenerationConfig.from_dict(self.generation_config) generation_config.update( { "max_new_tokens": max_new_tokens, diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 98345ce65..64e110e5f 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -85,7 +85,7 @@ class VLLMModelConfig: True # whether to add a space at the start of each continuation in multichoice generation ) pairwise_tokenization: bool = False # whether to tokenize the context and continuation separately or together. - sampling_params: SamplingParams = None # sampling parameters to use for generation + sampling_params: dict = {} # sampling parameters to use for generation subfolder: Optional[str] = None temperature: float = 0.6 # will be used for multi sampling tasks, for tasks requiring no sampling, this will be ignored and set to 0. @@ -118,7 +118,7 @@ def __init__( self.precision = _get_dtype(config.dtype, config=self._config) self.model_info = ModelInfo(model_name=self.model_name, model_sha=self.model_sha) - self.sampling_params = config.sampling_params + self.sampling_params = SamplingParams(**config.sampling_params) self.pairwise_tokenization = config.pairwise_tokenization @property From 2628571b4b9a20372169eed4a875800c4946ff2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 9 Dec 2024 19:13:16 +0100 Subject: [PATCH 05/32] added generation configs to models --- src/lighteval/main_accelerate.py | 3 +++ src/lighteval/main_endpoint.py | 17 ++++++++++++++--- src/lighteval/models/model_input.py | 20 ++++++++++++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py index 27e4141f5..28da4dc90 100644 --- a/src/lighteval/main_accelerate.py +++ b/src/lighteval/main_accelerate.py @@ -107,6 +107,7 @@ def accelerate( # noqa C901 from accelerate import Accelerator, InitProcessGroupKwargs from lighteval.logging.evaluation_tracker import EvaluationTracker + from lighteval.models.model_input import GenerationParameters from lighteval.models.transformers.adapter_model import AdapterModelConfig from lighteval.models.transformers.base_model import BaseModelConfig, BitsAndBytesConfig from lighteval.models.transformers.delta_model import DeltaModelConfig @@ -154,6 +155,8 @@ def accelerate( # noqa C901 # We extract the model args args_dict = {k.split("=")[0]: k.split("=")[1] for k in config["base_params"]["model_args"].split(",")} + args_dict["generation_config"] = GenerationParameters.from_dict(config).to_transformers_dict() + # We store the relevant other args args_dict["base_model"] = config["merged_weights"]["base_model"] args_dict["compile"] = bool(config["base_params"]["compile"]) diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py index d17da4325..1e51c3dd8 100644 --- a/src/lighteval/main_endpoint.py +++ b/src/lighteval/main_endpoint.py @@ -93,7 +93,9 @@ def openai( Evaluate OPENAI models. """ from lighteval.logging.evaluation_tracker import EvaluationTracker - from lighteval.models.model_config import OpenAIModelConfig + + # from lighteval.models.model_input import GenerationParameters + from lighteval.models.endpoints.openai_model import OpenAIModelConfig from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) @@ -107,7 +109,8 @@ def openai( ) parallelism_manager = ParallelismManager.OPENAI - model_config = OpenAIModelConfig(model=model_name) + # sampling_params = GenerationParameters.from_dict(config) + model_config = OpenAIModelConfig(model=model_name) # , sampling_params=sampling_params.to_vllm_openai_dict()) pipeline_params = PipelineParameters( launcher_type=parallelism_manager, @@ -204,6 +207,7 @@ def inference_endpoint( from lighteval.models.endpoints.endpoint_model import ( InferenceEndpointModelConfig, ) + from lighteval.models.model_input import GenerationParameters from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) @@ -226,6 +230,7 @@ def inference_endpoint( # Find a way to add this back # if config["base_params"].get("endpoint_name", None): # return InferenceModelConfig(model=config["base_params"]["endpoint_name"]) + generation_config = GenerationParameters.from_dict(config) all_params = { "model_name": config["base_params"].get("model_name", None), "endpoint_name": config["base_params"].get("endpoint_name", None), @@ -240,7 +245,9 @@ def inference_endpoint( "namespace": config.get("instance", {}).get("namespace", None), "image_url": config.get("instance", {}).get("image_url", None), "env_vars": config.get("instance", {}).get("env_vars", None), + "generation_config": generation_config.to_tgi_inferenceendpoint_dict(), } + model_config = InferenceEndpointModelConfig( # We only initialize params which have a non default value **{k: v for k, v in all_params.items() if v is not None}, @@ -338,7 +345,8 @@ def tgi( import yaml from lighteval.logging.evaluation_tracker import EvaluationTracker - from lighteval.models.model_config import TGIModelConfig + from lighteval.models.endpoints.tgi_model import TGIModelConfig + from lighteval.models.model_input import GenerationParameters from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) @@ -356,10 +364,13 @@ def tgi( with open(model_config_path, "r") as f: config = yaml.safe_load(f)["model"] + generation_config = GenerationParameters.from_dict(config) + model_config = TGIModelConfig( inference_server_address=config["instance"]["inference_server_address"], inference_server_auth=config["instance"]["inference_server_auth"], model_id=config["instance"]["model_id"], + generation_config=generation_config.to_tgi_inferenceendpoint_dict(), ) pipeline_params = PipelineParameters( diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index d2a8c7d58..569cd7dc6 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -43,6 +43,26 @@ class GenerationParameters: top_p: Optional[int] = None # vllm, transformers, tgi truncate_prompt: Optional[bool] = None # vllm, tgi + @classmethod + def from_dict(cls, config_dict): + if "generation_parameters" not in config_dict: + return cls + cls.early_stopping = config_dict["generation_parameters"].get("early_stopping", None) + cls.repetition_penalty = config_dict["generation_parameters"].get("repetition_penalty", None) + cls.frequency_penalty = config_dict["generation_parameters"].get("frequency_penalty", None) + cls.length_penalty = config_dict["generation_parameters"].get("length_penalty", None) + cls.presence_penalty = config_dict["generation_parameters"].get("presence_penalty", None) + cls.max_new_tokens = config_dict["generation_parameters"].get("max_new_tokens", None) + cls.min_new_tokens = config_dict["generation_parameters"].get("min_new_tokens", None) + cls.seed = config_dict["generation_parameters"].get("seed", None) + cls.stop_tokens = config_dict["generation_parameters"].get("stop_tokens", None) + cls.temperature = config_dict["generation_parameters"].get("temperature", None) + cls.top_k = config_dict["generation_parameters"].get("top_k", None) + cls.min_p = config_dict["generation_parameters"].get("min_p", None) + cls.top_p = config_dict["generation_parameters"].get("top_p", None) + cls.truncate_prompt = config_dict["generation_parameters"].get("truncate_prompt", None) + return cls + def to_vllm_openai_dict(self): # Task specific sampling params to set in model: n, best_of, use_beam_search # Generation specific params to set in model: logprobs, prompt_logprobs From c24bf9b7725715918ec43361dacc28ecc195d90f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 9 Dec 2024 19:17:12 +0100 Subject: [PATCH 06/32] added generation configs to models --- src/lighteval/models/endpoints/endpoint_model.py | 2 +- src/lighteval/models/endpoints/openai_model.py | 2 +- src/lighteval/models/endpoints/tgi_model.py | 2 +- src/lighteval/models/transformers/base_model.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 9aaff6feb..9d4e4afa7 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -100,7 +100,7 @@ class InferenceEndpointModelConfig: namespace: str = None # The namespace under which to launch the endopint. Defaults to the current user's namespace image_url: str = None env_vars: dict = None - generation_config: dict = {} + generation_config: dict = dict def __post_init__(self): # xor operator, one is None but not the other diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py index 9f3950525..3020ada49 100644 --- a/src/lighteval/models/endpoints/openai_model.py +++ b/src/lighteval/models/endpoints/openai_model.py @@ -62,7 +62,7 @@ @dataclass class OpenAIModelConfig: model: str - sampling_params: dict = {} + sampling_params: dict = dict class OpenAIClient(LightevalModel): diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index 2ae112a05..d3488a98b 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -50,7 +50,7 @@ class TGIModelConfig: inference_server_address: str inference_server_auth: str model_id: str - generation_config: dict = {} + generation_config: dict = dict # inherit from InferenceEndpointModel instead of LightevalModel since they both use the same interface, and only overwrite diff --git a/src/lighteval/models/transformers/base_model.py b/src/lighteval/models/transformers/base_model.py index 33294a571..5759b14ca 100644 --- a/src/lighteval/models/transformers/base_model.py +++ b/src/lighteval/models/transformers/base_model.py @@ -153,7 +153,7 @@ class BaseModelConfig: trust_remote_code: bool = False use_chat_template: bool = False compile: bool = False - generation_config: dict = {} + generation_config: dict = dict def __post_init__(self): # Making sure this parameter is a boolean From 0aa2e194e7fdf2b8857cb0d224ce4fcda10fe449 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 9 Dec 2024 19:20:27 +0100 Subject: [PATCH 07/32] fix --- src/lighteval/models/endpoints/endpoint_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 9d4e4afa7..b34de9fe2 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -79,7 +79,7 @@ class InferenceModelConfig: model: str add_special_tokens: bool = True - generation_config: TextGenerationInputGenerateParameters + generation_config: dict = dict @dataclass From e3311bdedc4a083815c0d74ea4b75fd1dec4329c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 9 Dec 2024 19:23:34 +0100 Subject: [PATCH 08/32] fix --- src/lighteval/models/vllm/vllm_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 64e110e5f..60fcdbafb 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -85,7 +85,7 @@ class VLLMModelConfig: True # whether to add a space at the start of each continuation in multichoice generation ) pairwise_tokenization: bool = False # whether to tokenize the context and continuation separately or together. - sampling_params: dict = {} # sampling parameters to use for generation + sampling_params: dict = dict # sampling parameters to use for generation subfolder: Optional[str] = None temperature: float = 0.6 # will be used for multi sampling tasks, for tasks requiring no sampling, this will be ignored and set to 0. From a3f535f0dfe9ca9c2531958c20798859f9d4a054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Mon, 9 Dec 2024 19:31:30 +0100 Subject: [PATCH 09/32] added doc --- src/lighteval/models/model_input.py | 38 ++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index 569cd7dc6..ae74aa558 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -44,7 +44,19 @@ class GenerationParameters: truncate_prompt: Optional[bool] = None # vllm, tgi @classmethod - def from_dict(cls, config_dict): + def from_dict(cls, config_dict: dict): + """Creates a GenerationParameters object from a config dictionary + + Args: + config_dict (dict): Config dictionary. Must obey the following shape: + {"generation_parameters": + { + "early_stopping": value, + ... + "truncate_prompt": value + } + } + """ if "generation_parameters" not in config_dict: return cls cls.early_stopping = config_dict["generation_parameters"].get("early_stopping", None) @@ -63,7 +75,13 @@ def from_dict(cls, config_dict): cls.truncate_prompt = config_dict["generation_parameters"].get("truncate_prompt", None) return cls - def to_vllm_openai_dict(self): + def to_vllm_openai_dict(self) -> dict: + """Selects relevant generation and sampling parameters for vllm and openai models. + Doc: https://docs.vllm.ai/en/v0.5.5/dev/sampling_params.html + + Returns: + dict: The parameters to create a vllm.SamplingParams or just provide OpenAI params as such in the model config. + """ # Task specific sampling params to set in model: n, best_of, use_beam_search # Generation specific params to set in model: logprobs, prompt_logprobs args = { @@ -84,7 +102,13 @@ def to_vllm_openai_dict(self): } return {k: v for k, v in args.items() if v is not None} - def to_transformers_dict(self): + def to_transformers_dict(self) -> dict: + """Selects relevant generation and sampling parameters for transformers models. + Doc: https://huggingface.co/docs/transformers/v4.46.3/en/main_classes/text_generation#transformers.GenerationConfig + + Returns: + dict: The parameters to create a transformers.GenerationConfig in the model config. + """ # Task specific sampling params to set in model: do_sample, num_return_sequences, num_beans args = { "max_new_tokens": self.max_new_tokens, @@ -104,7 +128,13 @@ def to_transformers_dict(self): # we still create the object as it uses validation steps return {k: v for k, v in args.items() if v is not None} - def to_tgi_inferenceendpoint_dict(self): + def to_tgi_inferenceendpoint_dict(self) -> dict: + """Selects relevant generation and sampling parameters for tgi or inference endpoints models. + Doc: https://huggingface.co/docs/huggingface_hub/v0.26.3/en/package_reference/inference_types#huggingface_hub.TextGenerationInputGenerateParameters + + Returns: + dict: The parameters to create a huggingface_hub.TextGenerationInputGenerateParameters in the model config. + """ # Task specific sampling params to set in model: best_of, do_sample args = { "decoder_input_details": True, From 286668ffd2ca6ebe9e4479706b8beee64855e55b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Tue, 10 Dec 2024 10:31:24 +0100 Subject: [PATCH 10/32] Saved GenerationParameter class in model config classes, then saved in the models to use other attributes later --- src/lighteval/main_accelerate.py | 2 +- src/lighteval/main_endpoint.py | 42 +++++++++++++------ src/lighteval/main_vllm.py | 31 ++++++++++++-- .../models/endpoints/endpoint_model.py | 17 ++++++-- .../models/endpoints/openai_model.py | 10 ++++- src/lighteval/models/endpoints/tgi_model.py | 12 +++++- .../models/transformers/base_model.py | 15 ++++--- src/lighteval/models/vllm/vllm_model.py | 7 +++- 8 files changed, 105 insertions(+), 31 deletions(-) diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py index 28da4dc90..733285437 100644 --- a/src/lighteval/main_accelerate.py +++ b/src/lighteval/main_accelerate.py @@ -155,7 +155,7 @@ def accelerate( # noqa C901 # We extract the model args args_dict = {k.split("=")[0]: k.split("=")[1] for k in config["base_params"]["model_args"].split(",")} - args_dict["generation_config"] = GenerationParameters.from_dict(config).to_transformers_dict() + args_dict["generation_parameters"] = GenerationParameters.from_dict(config) # We store the relevant other args args_dict["base_model"] = config["merged_weights"]["base_model"] diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py index 1e51c3dd8..69921512d 100644 --- a/src/lighteval/main_endpoint.py +++ b/src/lighteval/main_endpoint.py @@ -23,6 +23,7 @@ from typing import Optional import typer +import yaml from typer import Argument, Option from typing_extensions import Annotated @@ -42,10 +43,19 @@ @app.command(rich_help_panel="Evaluation Backends") def openai( # === general === - model_name: Annotated[ - str, Argument(help="The model name to evaluate (has to be available through the openai API.") - ], tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], + model_name: Annotated[ + str, + Argument( + help="The model name to evaluate (has to be available through the openai API. Mutually exclusive with the config path" + ), + ] = None, + model_config_path: Annotated[ + str, + Argument( + help="Path to model config yaml file. (examples/model_configs/endpoint_model.yaml). Mutually exclusive with the model name" + ), + ] = None, # === Common parameters === system_prompt: Annotated[ Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) @@ -96,8 +106,12 @@ def openai( # from lighteval.models.model_input import GenerationParameters from lighteval.models.endpoints.openai_model import OpenAIModelConfig + from lighteval.models.model_input import GenerationParameters from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters + if not (model_name is None ^ model_config_path is None): + raise typer.Abort("You must define either the model_name or the model_config_path, not both") + env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) evaluation_tracker = EvaluationTracker( output_dir=output_dir, @@ -109,8 +123,14 @@ def openai( ) parallelism_manager = ParallelismManager.OPENAI - # sampling_params = GenerationParameters.from_dict(config) - model_config = OpenAIModelConfig(model=model_name) # , sampling_params=sampling_params.to_vllm_openai_dict()) + + if model_name: + model_config = OpenAIModelConfig(model=model_name) + else: + with open(model_config_path, "r") as f: + config = yaml.safe_load(f)["model"] + generation_parameters = GenerationParameters.from_dict(config) + model_config = OpenAIModelConfig(model=config["model_name"], generation_parameters=generation_parameters) pipeline_params = PipelineParameters( launcher_type=parallelism_manager, @@ -201,8 +221,6 @@ def inference_endpoint( """ Evaluate models using inference-endpoints as backend. """ - import yaml - from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.models.endpoints.endpoint_model import ( InferenceEndpointModelConfig, @@ -230,7 +248,7 @@ def inference_endpoint( # Find a way to add this back # if config["base_params"].get("endpoint_name", None): # return InferenceModelConfig(model=config["base_params"]["endpoint_name"]) - generation_config = GenerationParameters.from_dict(config) + generation_parameters = GenerationParameters.from_dict(config) all_params = { "model_name": config["base_params"].get("model_name", None), "endpoint_name": config["base_params"].get("endpoint_name", None), @@ -245,7 +263,7 @@ def inference_endpoint( "namespace": config.get("instance", {}).get("namespace", None), "image_url": config.get("instance", {}).get("image_url", None), "env_vars": config.get("instance", {}).get("env_vars", None), - "generation_config": generation_config.to_tgi_inferenceendpoint_dict(), + "generation_parameters": generation_parameters, } model_config = InferenceEndpointModelConfig( @@ -342,8 +360,6 @@ def tgi( """ Evaluate models using TGI as backend. """ - import yaml - from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.models.endpoints.tgi_model import TGIModelConfig from lighteval.models.model_input import GenerationParameters @@ -364,13 +380,13 @@ def tgi( with open(model_config_path, "r") as f: config = yaml.safe_load(f)["model"] - generation_config = GenerationParameters.from_dict(config) + generation_parameters = GenerationParameters.from_dict(config) model_config = TGIModelConfig( inference_server_address=config["instance"]["inference_server_address"], inference_server_auth=config["instance"]["inference_server_auth"], model_id=config["instance"]["model_id"], - generation_config=generation_config.to_tgi_inferenceendpoint_dict(), + generation_parameters=generation_parameters, ) pipeline_params = PipelineParameters( diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py index 078000da5..8ddef4cde 100644 --- a/src/lighteval/main_vllm.py +++ b/src/lighteval/main_vllm.py @@ -22,7 +22,7 @@ import os from typing import Optional -from typer import Argument, Option +from typer import Abort, Argument, Option from typing_extensions import Annotated @@ -37,8 +37,19 @@ def vllm( # === general === - model_args: Annotated[str, Argument(help="Model arguments in the form key1=value1,key2=value2,...")], tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], + model_args: Annotated[ + str, + Argument( + help="Model arguments in the form key1=value1,key2=value2,... Mutually exclusive with the config path" + ), + ] = None, + model_config_path: Annotated[ + str, + Argument( + help="Path to model config yaml file. (examples/model_configs/vllm_model.yaml). Mutually exclusive with the model args" + ), + ] = None, # === Common parameters === use_chat_template: Annotated[ bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) @@ -88,10 +99,16 @@ def vllm( """ Evaluate models using vllm as backend. """ + import yaml + from lighteval.logging.evaluation_tracker import EvaluationTracker + from lighteval.models.model_input import GenerationParameters from lighteval.models.vllm.vllm_model import VLLMModelConfig from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters + if not (model_args is None ^ model_config_path is None): + raise Abort("You must define either the model_args or the model_config_path, not both") + TOKEN = os.getenv("HF_TOKEN") env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) @@ -118,8 +135,14 @@ def vllm( system_prompt=system_prompt, ) - model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")} - model_config = VLLMModelConfig(**model_args_dict) + if model_args: + model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")} + model_config = VLLMModelConfig(**model_args_dict) + else: + with open(model_config_path, "r") as f: + config = yaml.safe_load(f)["model"] + generation_parameters = GenerationParameters.from_dict(config) + model_config = VLLMModelConfig(**model_args_dict, generation_parameters=generation_parameters) pipeline = Pipeline( tasks=tasks, diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index b34de9fe2..262257c39 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -49,6 +49,7 @@ from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset from lighteval.models.abstract_model import LightevalModel, ModelInfo +from lighteval.models.model_input import GenerationParameters from lighteval.models.model_output import GenerativeResponse, LoglikelihoodResponse, LoglikelihoodSingleTokenResponse from lighteval.tasks.requests import ( GreedyUntilRequest, @@ -79,7 +80,11 @@ class InferenceModelConfig: model: str add_special_tokens: bool = True - generation_config: dict = dict + generation_parameters: GenerationParameters = None + + def __post_init__(self): + if not self.generation_parameters: + self.generation_parameters = GenerationParameters() @dataclass @@ -100,7 +105,7 @@ class InferenceEndpointModelConfig: namespace: str = None # The namespace under which to launch the endopint. Defaults to the current user's namespace image_url: str = None env_vars: dict = None - generation_config: dict = dict + generation_parameters: GenerationParameters = None def __post_init__(self): # xor operator, one is None but not the other @@ -112,6 +117,9 @@ def __post_init__(self): if not (self.endpoint_name is None) ^ int(self.model_name is None): raise ValueError("You need to set either endpoint_name or model_name (but not both).") + if not self.generation_parameters: + self.generation_parameters = GenerationParameters() + def get_dtype_args(self) -> Dict[str, str]: if self.model_dtype is None: return {} @@ -284,7 +292,10 @@ def __init__( # noqa: C901 model_dtype=config.model_dtype or "default", model_size=-1, ) - self.generation_config = TextGenerationInputGenerateParameters(**config.generation_config) + self.generation_parameters = config.generation_parameters + self.generation_config = TextGenerationInputGenerateParameters( + **self.generation_parameters.to_tgi_inferenceendpoint_dict() + ) @staticmethod def get_larger_hardware_suggestion(cur_instance_type: str = None, cur_instance_size: str = None): diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py index 3020ada49..d3707cba1 100644 --- a/src/lighteval/models/endpoints/openai_model.py +++ b/src/lighteval/models/endpoints/openai_model.py @@ -32,6 +32,7 @@ from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset from lighteval.models.abstract_model import LightevalModel from lighteval.models.endpoints.endpoint_model import ModelInfo +from lighteval.models.model_input import GenerationParameters from lighteval.models.model_output import ( GenerativeResponse, LoglikelihoodResponse, @@ -62,7 +63,11 @@ @dataclass class OpenAIModelConfig: model: str - sampling_params: dict = dict + generation_parameters: GenerationParameters = None + + def __post_init__(self): + if not self.generation_parameters: + self.generation_parameters = GenerationParameters() class OpenAIClient(LightevalModel): @@ -71,7 +76,8 @@ class OpenAIClient(LightevalModel): def __init__(self, config: OpenAIModelConfig, env_config) -> None: api_key = os.environ["OPENAI_API_KEY"] self.client = OpenAI(api_key=api_key) - self.sampling_params = config.sampling_params + self.generation_parameters = config.generation_parameters + self.sampling_params = self.generation_parameters.to_vllm_openai_dict() self.model_info = ModelInfo( model_name=config.model, diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index d3488a98b..58742e2f7 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -29,6 +29,7 @@ from transformers import AutoTokenizer from lighteval.models.endpoints.endpoint_model import InferenceEndpointModel, ModelInfo +from lighteval.models.model_input import GenerationParameters from lighteval.utils.imports import NO_TGI_ERROR_MSG, is_tgi_available @@ -50,7 +51,11 @@ class TGIModelConfig: inference_server_address: str inference_server_auth: str model_id: str - generation_config: dict = dict + generation_parameters: GenerationParameters = None + + def __post_init__(self): + if not self.generation_parameters: + self.generation_parameters = GenerationParameters() # inherit from InferenceEndpointModel instead of LightevalModel since they both use the same interface, and only overwrite @@ -66,7 +71,10 @@ def __init__(self, config: TGIModelConfig) -> None: ) self.client = AsyncClient(config.inference_server_address, headers=headers, timeout=240) - self.generation_config = TextGenerationInputGenerateParameters(**config.generation_config) + self.generation_parameters = config.generation_parameters + self.generation_config = TextGenerationInputGenerateParameters( + **self.generation_parameters.to_tgi_inferenceendpoint_dict() + ) self._max_gen_toks = 256 self.model_info = requests.get(f"{config.inference_server_address}/info", headers=headers).json() if "model_id" not in self.model_info: diff --git a/src/lighteval/models/transformers/base_model.py b/src/lighteval/models/transformers/base_model.py index 5759b14ca..b7ac7b1c6 100644 --- a/src/lighteval/models/transformers/base_model.py +++ b/src/lighteval/models/transformers/base_model.py @@ -45,6 +45,7 @@ from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset, LoglikelihoodSingleTokenDataset from lighteval.models.abstract_model import LightevalModel, ModelInfo +from lighteval.models.model_input import GenerationParameters from lighteval.models.model_output import ( Batch, GenerativeMultiturnResponse, @@ -153,7 +154,7 @@ class BaseModelConfig: trust_remote_code: bool = False use_chat_template: bool = False compile: bool = False - generation_config: dict = dict + generation_parameters: GenerationParameters = None def __post_init__(self): # Making sure this parameter is a boolean @@ -180,6 +181,9 @@ def __post_init__(self): if not isinstance(self.device, str): raise ValueError("Current device must be passed as string.") + if not self.generation_parameters: + self.generation_parameters = GenerationParameters() + def _init_configs(self, model_name: str, env_config: EnvConfig) -> PretrainedConfig: revision = self.revision if self.subfolder: @@ -259,7 +263,8 @@ def __init__( self.model_sha = config.get_model_sha() self.precision = _get_dtype(config.dtype, config=self._config) - self.generation_config = config.generation_config + self.generation_parameters = config.generation_parameters + self.generation_config_dict = self.generation_parameters.to_transformers_dict() if is_accelerate_available(): model_size, _ = calculate_maximum_sizes(self.model) @@ -636,7 +641,7 @@ def greedy_until_multi_turn( # noqa: C901 ] ) - generation_config = GenerationConfig.from_dict(self.generation_config) + generation_config = GenerationConfig.from_dict(self.generation_config_dict) generation_config.update( { "max_new_tokens": max_generated_tokens, @@ -679,7 +684,7 @@ def greedy_until_multi_turn( # noqa: C901 ] ) - generation_config = GenerationConfig.from_dict(self.generation_config) + generation_config = GenerationConfig.from_dict(self.generation_config_dict) generation_config.update( { "max_new_tokens": max_generated_tokens, @@ -876,7 +881,7 @@ def _generate( stopping_criteria = stop_sequences_criteria(self.tokenizer, stop_sequences=stop_tokens, batch=batch) batch_size, _ = batch.input_ids.shape - generation_config = GenerationConfig.from_dict(self.generation_config) + generation_config = GenerationConfig.from_dict(self.generation_config_dict) generation_config.update( { "max_new_tokens": max_new_tokens, diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 60fcdbafb..c78fbd2ff 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -32,6 +32,7 @@ from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset from lighteval.models.abstract_model import LightevalModel, ModelInfo +from lighteval.models.model_input import GenerationParameters from lighteval.models.model_output import ( GenerativeResponse, LoglikelihoodResponse, @@ -85,11 +86,15 @@ class VLLMModelConfig: True # whether to add a space at the start of each continuation in multichoice generation ) pairwise_tokenization: bool = False # whether to tokenize the context and continuation separately or together. - sampling_params: dict = dict # sampling parameters to use for generation + generation_parameters: GenerationParameters = None # sampling parameters to use for generation subfolder: Optional[str] = None temperature: float = 0.6 # will be used for multi sampling tasks, for tasks requiring no sampling, this will be ignored and set to 0. + def __post_init__(self): + if not self.generation_parameters: + self.generation_parameters = GenerationParameters() + class VLLMModel(LightevalModel): def __init__( From 0b2475a125cd4675a0144fdaec11d73685b087af Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Tue, 10 Dec 2024 14:25:09 +0000 Subject: [PATCH 11/32] changed model args --- examples/model_configs/base_model.yaml | 1 + src/lighteval/main_endpoint.py | 31 +++++++++---------------- src/lighteval/main_vllm.py | 30 +++++++++--------------- src/lighteval/models/model_input.py | 32 +++++++++++++------------- 4 files changed, 39 insertions(+), 55 deletions(-) diff --git a/examples/model_configs/base_model.yaml b/examples/model_configs/base_model.yaml index d6563e616..8fe1d161d 100644 --- a/examples/model_configs/base_model.yaml +++ b/examples/model_configs/base_model.yaml @@ -9,3 +9,4 @@ model: base_model: null # path to the base_model generation: multichoice_continuations_start_space: null # If true/false, will force multiple choice continuations to start/not start with a space. If none, will do nothing + temperature: 0.5 diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py index 69921512d..2bbe9b75a 100644 --- a/src/lighteval/main_endpoint.py +++ b/src/lighteval/main_endpoint.py @@ -43,19 +43,13 @@ @app.command(rich_help_panel="Evaluation Backends") def openai( # === general === - tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], - model_name: Annotated[ + model_args: Annotated[ str, Argument( - help="The model name to evaluate (has to be available through the openai API. Mutually exclusive with the config path" + help="Model name as a string (has to be available through the openai API) or path to yaml config file (see examples/model_configs/base_model.yaml)" ), - ] = None, - model_config_path: Annotated[ - str, - Argument( - help="Path to model config yaml file. (examples/model_configs/endpoint_model.yaml). Mutually exclusive with the model name" - ), - ] = None, + ], + tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], # === Common parameters === system_prompt: Annotated[ Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) @@ -109,8 +103,13 @@ def openai( from lighteval.models.model_input import GenerationParameters from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters - if not (model_name is None ^ model_config_path is None): - raise typer.Abort("You must define either the model_name or the model_config_path, not both") + if model_args.endswith(".yaml"): + with open(model_args, "r") as f: + config = yaml.safe_load(f)["model"] + generation_parameters = GenerationParameters.from_dict(config) + model_config = OpenAIModelConfig(model=config["model_name"], generation_parameters=generation_parameters) + else: + model_config = OpenAIModelConfig(model=model_args) env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) evaluation_tracker = EvaluationTracker( @@ -124,14 +123,6 @@ def openai( parallelism_manager = ParallelismManager.OPENAI - if model_name: - model_config = OpenAIModelConfig(model=model_name) - else: - with open(model_config_path, "r") as f: - config = yaml.safe_load(f)["model"] - generation_parameters = GenerationParameters.from_dict(config) - model_config = OpenAIModelConfig(model=config["model_name"], generation_parameters=generation_parameters) - pipeline_params = PipelineParameters( launcher_type=parallelism_manager, env_config=env_config, diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py index 8ddef4cde..850343e6d 100644 --- a/src/lighteval/main_vllm.py +++ b/src/lighteval/main_vllm.py @@ -22,7 +22,7 @@ import os from typing import Optional -from typer import Abort, Argument, Option +from typer import Argument, Option from typing_extensions import Annotated @@ -37,19 +37,13 @@ def vllm( # === general === - tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], model_args: Annotated[ str, Argument( - help="Model arguments in the form key1=value1,key2=value2,... Mutually exclusive with the config path" - ), - ] = None, - model_config_path: Annotated[ - str, - Argument( - help="Path to model config yaml file. (examples/model_configs/vllm_model.yaml). Mutually exclusive with the model args" + help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/base_model.yaml)" ), - ] = None, + ], + tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], # === Common parameters === use_chat_template: Annotated[ bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANNEL_NAME_4) @@ -106,9 +100,6 @@ def vllm( from lighteval.models.vllm.vllm_model import VLLMModelConfig from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters - if not (model_args is None ^ model_config_path is None): - raise Abort("You must define either the model_args or the model_config_path, not both") - TOKEN = os.getenv("HF_TOKEN") env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir) @@ -135,14 +126,15 @@ def vllm( system_prompt=system_prompt, ) - if model_args: - model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")} - model_config = VLLMModelConfig(**model_args_dict) - else: - with open(model_config_path, "r") as f: + if model_args.endswith(".yaml"): + with open(model_args, "r") as f: config = yaml.safe_load(f)["model"] generation_parameters = GenerationParameters.from_dict(config) - model_config = VLLMModelConfig(**model_args_dict, generation_parameters=generation_parameters) + model_config = VLLMModelConfig(config, generation_parameters=generation_parameters) + + else: + model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")} + model_config = VLLMModelConfig(**model_args_dict) pipeline = Pipeline( tasks=tasks, diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index ae74aa558..10edf15cc 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -49,7 +49,7 @@ def from_dict(cls, config_dict: dict): Args: config_dict (dict): Config dictionary. Must obey the following shape: - {"generation_parameters": + {"generation": { "early_stopping": value, ... @@ -57,22 +57,22 @@ def from_dict(cls, config_dict: dict): } } """ - if "generation_parameters" not in config_dict: + if "generation" not in config_dict: return cls - cls.early_stopping = config_dict["generation_parameters"].get("early_stopping", None) - cls.repetition_penalty = config_dict["generation_parameters"].get("repetition_penalty", None) - cls.frequency_penalty = config_dict["generation_parameters"].get("frequency_penalty", None) - cls.length_penalty = config_dict["generation_parameters"].get("length_penalty", None) - cls.presence_penalty = config_dict["generation_parameters"].get("presence_penalty", None) - cls.max_new_tokens = config_dict["generation_parameters"].get("max_new_tokens", None) - cls.min_new_tokens = config_dict["generation_parameters"].get("min_new_tokens", None) - cls.seed = config_dict["generation_parameters"].get("seed", None) - cls.stop_tokens = config_dict["generation_parameters"].get("stop_tokens", None) - cls.temperature = config_dict["generation_parameters"].get("temperature", None) - cls.top_k = config_dict["generation_parameters"].get("top_k", None) - cls.min_p = config_dict["generation_parameters"].get("min_p", None) - cls.top_p = config_dict["generation_parameters"].get("top_p", None) - cls.truncate_prompt = config_dict["generation_parameters"].get("truncate_prompt", None) + cls.early_stopping = config_dict["generation"].get("early_stopping", None) + cls.repetition_penalty = config_dict["generation"].get("repetition_penalty", None) + cls.frequency_penalty = config_dict["generation"].get("frequency_penalty", None) + cls.length_penalty = config_dict["generation"].get("length_penalty", None) + cls.presence_penalty = config_dict["generation"].get("presence_penalty", None) + cls.max_new_tokens = config_dict["generation"].get("max_new_tokens", None) + cls.min_new_tokens = config_dict["generation"].get("min_new_tokens", None) + cls.seed = config_dict["generation"].get("seed", None) + cls.stop_tokens = config_dict["generation"].get("stop_tokens", None) + cls.temperature = config_dict["generation"].get("temperature", None) + cls.top_k = config_dict["generation"].get("top_k", None) + cls.min_p = config_dict["generation"].get("min_p", None) + cls.top_p = config_dict["generation"].get("top_p", None) + cls.truncate_prompt = config_dict["generation"].get("truncate_prompt", None) return cls def to_vllm_openai_dict(self) -> dict: From 521559f7c45f00bf930877877af78214a6bd2db9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Tue, 10 Dec 2024 20:10:32 +0100 Subject: [PATCH 12/32] test --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0ee4010c4..2e50dc3f3 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@
-[![Tests](https://github.com/huggingface/lighteval/actions/workflows/tests.yaml/badge.svg?branch=main)](https://github.com/huggingface/lighteval/actions/workflows/tests.yaml?query=branch%3Amain) +[![Test suite](https://github.com/huggingface/lighteval/actions/workflows/tests.yaml/badge.svg?branch=main)](https://github.com/huggingface/lighteval/actions/workflows/tests.yaml?query=branch%3Amain) [![Quality](https://github.com/huggingface/lighteval/actions/workflows/quality.yaml/badge.svg?branch=main)](https://github.com/huggingface/lighteval/actions/workflows/quality.yaml?query=branch%3Amain) [![Python versions](https://img.shields.io/pypi/pyversions/lighteval)](https://www.python.org/downloads/) [![License](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/huggingface/lighteval/blob/main/LICENSE) From c088ab679cab30b58516748c84a5b25a939366a7 Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Wed, 11 Dec 2024 10:16:55 +0000 Subject: [PATCH 13/32] updated launchers --- examples/model_configs/base_model.yaml | 2 +- src/lighteval/models/model_input.py | 33 ++++++++++--------- .../models/transformers/base_model.py | 25 +++++++------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/examples/model_configs/base_model.yaml b/examples/model_configs/base_model.yaml index 8fe1d161d..44e095dd3 100644 --- a/examples/model_configs/base_model.yaml +++ b/examples/model_configs/base_model.yaml @@ -1,6 +1,6 @@ model: base_params: - model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... + model_args: "pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... dtype: "bfloat16" compile: true merged_weights: # Ignore this section if you are not using PEFT models diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index 10edf15cc..1d53887f5 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -58,22 +58,23 @@ def from_dict(cls, config_dict: dict): } """ if "generation" not in config_dict: - return cls - cls.early_stopping = config_dict["generation"].get("early_stopping", None) - cls.repetition_penalty = config_dict["generation"].get("repetition_penalty", None) - cls.frequency_penalty = config_dict["generation"].get("frequency_penalty", None) - cls.length_penalty = config_dict["generation"].get("length_penalty", None) - cls.presence_penalty = config_dict["generation"].get("presence_penalty", None) - cls.max_new_tokens = config_dict["generation"].get("max_new_tokens", None) - cls.min_new_tokens = config_dict["generation"].get("min_new_tokens", None) - cls.seed = config_dict["generation"].get("seed", None) - cls.stop_tokens = config_dict["generation"].get("stop_tokens", None) - cls.temperature = config_dict["generation"].get("temperature", None) - cls.top_k = config_dict["generation"].get("top_k", None) - cls.min_p = config_dict["generation"].get("min_p", None) - cls.top_p = config_dict["generation"].get("top_p", None) - cls.truncate_prompt = config_dict["generation"].get("truncate_prompt", None) - return cls + return GenerationParameters() + return GenerationParameters( + early_stopping=config_dict["generation"].get("early_stopping", None), + repetition_penalty=config_dict["generation"].get("repetition_penalty", None), + frequency_penalty=config_dict["generation"].get("frequency_penalty", None), + length_penalty=config_dict["generation"].get("length_penalty", None), + presence_penalty=config_dict["generation"].get("presence_penalty", None), + max_new_tokens=config_dict["generation"].get("max_new_tokens", None), + min_new_tokens=config_dict["generation"].get("min_new_tokens", None), + seed=config_dict["generation"].get("seed", None), + stop_tokens=config_dict["generation"].get("stop_tokens", None), + temperature=config_dict["generation"].get("temperature", None), + top_k=config_dict["generation"].get("top_k", None), + min_p=config_dict["generation"].get("min_p", None), + top_p=config_dict["generation"].get("top_p", None), + truncate_prompt=config_dict["generation"].get("truncate_prompt", None), + ) def to_vllm_openai_dict(self) -> dict: """Selects relevant generation and sampling parameters for vllm and openai models. diff --git a/src/lighteval/models/transformers/base_model.py b/src/lighteval/models/transformers/base_model.py index b7ac7b1c6..0f59d405a 100644 --- a/src/lighteval/models/transformers/base_model.py +++ b/src/lighteval/models/transformers/base_model.py @@ -256,8 +256,11 @@ def __init__( logger.info(f"Using Data Parallelism, putting model on device {self._device}") self.model = self.model.to(self._device) if config.compile: - logger.info("Compiling the model") - self.model.model.compile() + try: + logger.info("Compiling the model") + self.model.model.compile() + except AttributeError as e: + logger.warn("Could not compile the model because: ", e) self.model_name = _simplify_name(config.pretrained) self.model_sha = config.get_model_sha() @@ -883,17 +886,13 @@ def _generate( generation_config = GenerationConfig.from_dict(self.generation_config_dict) generation_config.update( - { - "max_new_tokens": max_new_tokens, - "pad_token_id": self.tokenizer.pad_token_id - if self.tokenizer.pad_token_id - else self.tokenizer.eos_token_id, - "eos_token_id": self.tokenizer.eos_token_id, - "do_sample": do_sample, - "num_return_sequences": num_samples, - "output_logits": returns_logits, - "renormalize_logits": True, - } + max_new_tokens=max_new_tokens, + pad_token_id=self.tokenizer.pad_token_id if self.tokenizer.pad_token_id else self.tokenizer.eos_token_id, + eos_token_id=self.tokenizer.eos_token_id, + do_sample=do_sample, + num_return_sequences=num_samples, + output_logits=returns_logits, + renormalize_logits=True, ) # Compute model generation From 3eb7d0ffe5eb9505e2ae3b97b9681d35482f2f84 Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Thu, 12 Dec 2024 12:03:51 +0000 Subject: [PATCH 14/32] rename base_model to transformers_model --- docs/source/package_reference/models.mdx | 6 ++-- src/lighteval/main_accelerate.py | 6 ++-- src/lighteval/models/model_loader.py | 14 ++++----- .../models/nanotron/nanotron_model.py | 2 +- .../models/transformers/adapter_model.py | 6 ++-- .../models/transformers/delta_model.py | 6 ++-- .../{base_model.py => transformers_model.py} | 31 ++++++++++++++++--- src/lighteval/pipeline.py | 6 ++-- src/lighteval/tasks/lighteval_task.py | 6 ++-- tests/models/test_base_model.py | 6 ++-- 10 files changed, 55 insertions(+), 34 deletions(-) rename src/lighteval/models/transformers/{base_model.py => transformers_model.py} (98%) diff --git a/docs/source/package_reference/models.mdx b/docs/source/package_reference/models.mdx index 096ce7be3..a0e8d68ac 100644 --- a/docs/source/package_reference/models.mdx +++ b/docs/source/package_reference/models.mdx @@ -6,9 +6,9 @@ ## Accelerate and Transformers Models -### BaseModel -[[autodoc]] models.transformers.base_model.BaseModelConfig -[[autodoc]] models.transformers.base_model.BaseModel +### TransformersModel +[[autodoc]] models.transformers.base_model.TransformersModelConfig +[[autodoc]] models.transformers.base_model.TransformersModel ### AdapterModel [[autodoc]] models.transformers.adapter_model.AdapterModelConfig diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py index 51fd9e3da..e785034b6 100644 --- a/src/lighteval/main_accelerate.py +++ b/src/lighteval/main_accelerate.py @@ -109,8 +109,8 @@ def accelerate( # noqa C901 from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.models.model_input import GenerationParameters from lighteval.models.transformers.adapter_model import AdapterModelConfig - from lighteval.models.transformers.base_model import BaseModelConfig, BitsAndBytesConfig from lighteval.models.transformers.delta_model import DeltaModelConfig + from lighteval.models.transformers.transformers_model import BitsAndBytesConfig, TransformersModelConfig from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))]) @@ -183,13 +183,13 @@ def accelerate( # noqa C901 elif config["merged_weights"]["base_model"] not in ["", None]: raise ValueError("You can't specify a base model if you are not using delta/adapter weights") else: - model_config = BaseModelConfig(**args_dict) + model_config = TransformersModelConfig(**args_dict) else: model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")} model_args_dict["accelerator"] = accelerator model_args_dict["use_chat_template"] = use_chat_template model_args_dict["compile"] = bool(model_args_dict["compile"]) if "compile" in model_args_dict else False - model_config = BaseModelConfig(**model_args_dict) + model_config = TransformersModelConfig(**model_args_dict) pipeline = Pipeline( tasks=tasks, diff --git a/src/lighteval/models/model_loader.py b/src/lighteval/models/model_loader.py index b0817be4a..bd256472e 100644 --- a/src/lighteval/models/model_loader.py +++ b/src/lighteval/models/model_loader.py @@ -32,8 +32,8 @@ from lighteval.models.endpoints.openai_model import OpenAIClient, OpenAIModelConfig from lighteval.models.endpoints.tgi_model import ModelClient, TGIModelConfig from lighteval.models.transformers.adapter_model import AdapterModel, AdapterModelConfig -from lighteval.models.transformers.base_model import BaseModel, BaseModelConfig from lighteval.models.transformers.delta_model import DeltaModel, DeltaModelConfig +from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig from lighteval.models.vllm.vllm_model import VLLMModel, VLLMModelConfig from lighteval.utils.imports import ( NO_TGI_ERROR_MSG, @@ -50,7 +50,7 @@ def load_model( # noqa: C901 config: Union[ - BaseModelConfig, + TransformersModelConfig, AdapterModelConfig, DeltaModelConfig, TGIModelConfig, @@ -60,7 +60,7 @@ def load_model( # noqa: C901 OpenAIModelConfig, ], env_config: EnvConfig, -) -> Union[BaseModel, AdapterModel, DeltaModel, ModelClient, DummyModel]: +) -> Union[TransformersModel, AdapterModel, DeltaModel, ModelClient, DummyModel]: """Will load either a model from an inference server or a model from a checkpoint, depending on the config type. @@ -74,7 +74,7 @@ def load_model( # noqa: C901 ValueError: If you did not specify a base model when using delta weights or adapter weights Returns: - Union[BaseModel, AdapterModel, DeltaModel, ModelClient]: The model that will be evaluated + Union[TransformersModel, AdapterModel, DeltaModel, ModelClient]: The model that will be evaluated """ # Inference server loading if isinstance(config, TGIModelConfig): @@ -83,7 +83,7 @@ def load_model( # noqa: C901 if isinstance(config, InferenceEndpointModelConfig) or isinstance(config, InferenceModelConfig): return load_model_with_inference_endpoints(config, env_config=env_config) - if isinstance(config, BaseModelConfig): + if isinstance(config, TransformersModelConfig): return load_model_with_accelerate_or_default(config=config, env_config=env_config) if isinstance(config, DummyModelConfig): @@ -123,7 +123,7 @@ def load_model_with_inference_endpoints(config: InferenceEndpointModelConfig, en def load_model_with_accelerate_or_default( - config: Union[AdapterModelConfig, BaseModelConfig, DeltaModelConfig], env_config: EnvConfig + config: Union[AdapterModelConfig, TransformersModelConfig, DeltaModelConfig], env_config: EnvConfig ): if isinstance(config, AdapterModelConfig): model = AdapterModel(config=config, env_config=env_config) @@ -135,7 +135,7 @@ def load_model_with_accelerate_or_default( model = VLLMModel(config=config, env_config=env_config) return model else: - model = BaseModel(config=config, env_config=env_config) + model = TransformersModel(config=config, env_config=env_config) return model diff --git a/src/lighteval/models/nanotron/nanotron_model.py b/src/lighteval/models/nanotron/nanotron_model.py index b7e9b1a5d..5f139174c 100644 --- a/src/lighteval/models/nanotron/nanotron_model.py +++ b/src/lighteval/models/nanotron/nanotron_model.py @@ -48,7 +48,7 @@ LoglikelihoodResponse, LoglikelihoodSingleTokenResponse, ) -from lighteval.models.transformers.base_model import LightevalModel, ModelInfo +from lighteval.models.transformers.transformers_model import LightevalModel, ModelInfo from lighteval.tasks.requests import ( GreedyUntilRequest, LoglikelihoodRequest, diff --git a/src/lighteval/models/transformers/adapter_model.py b/src/lighteval/models/transformers/adapter_model.py index e66a1aa1d..4ce3c7f20 100644 --- a/src/lighteval/models/transformers/adapter_model.py +++ b/src/lighteval/models/transformers/adapter_model.py @@ -27,7 +27,7 @@ import torch from transformers import AutoModelForCausalLM, PreTrainedTokenizer -from lighteval.models.transformers.base_model import BaseModel, BaseModelConfig +from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig from lighteval.models.utils import _get_dtype from lighteval.utils.imports import NO_PEFT_ERROR_MSG, is_peft_available from lighteval.utils.utils import EnvConfig @@ -40,7 +40,7 @@ @dataclass -class AdapterModelConfig(BaseModelConfig): +class AdapterModelConfig(TransformersModelConfig): # Adapter models have the specificity that they look at the base model (= the parent) for the tokenizer and config base_model: str = None @@ -57,7 +57,7 @@ def init_configs(self, env_config: EnvConfig): return self._init_configs(self.base_model, env_config) -class AdapterModel(BaseModel): +class AdapterModel(TransformersModel): def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConfig) -> PreTrainedTokenizer: # By default, we look at the model config for the model stored in `base_model` # (= the parent model, not the model of interest) diff --git a/src/lighteval/models/transformers/delta_model.py b/src/lighteval/models/transformers/delta_model.py index 20780f1e7..40a91992a 100644 --- a/src/lighteval/models/transformers/delta_model.py +++ b/src/lighteval/models/transformers/delta_model.py @@ -28,7 +28,7 @@ from tqdm import tqdm from transformers import AutoModelForCausalLM -from lighteval.models.transformers.base_model import BaseModel, BaseModelConfig +from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig from lighteval.models.utils import _get_dtype, _get_model_sha from lighteval.utils.utils import EnvConfig @@ -37,7 +37,7 @@ @dataclass -class DeltaModelConfig(BaseModelConfig): +class DeltaModelConfig(TransformersModelConfig): # Delta models look at the pretrained (= the delta weights) for the tokenizer and model config base_model: str = None @@ -53,7 +53,7 @@ def get_model_sha(self): return _get_model_sha(repo_id=self.pretrained, revision="main") -class DeltaModel(BaseModel): +class DeltaModel(TransformersModel): def _create_auto_model( self, config: DeltaModelConfig, diff --git a/src/lighteval/models/transformers/base_model.py b/src/lighteval/models/transformers/transformers_model.py similarity index 98% rename from src/lighteval/models/transformers/base_model.py rename to src/lighteval/models/transformers/transformers_model.py index 0f59d405a..7a8217622 100644 --- a/src/lighteval/models/transformers/base_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -86,7 +86,7 @@ @dataclass -class BaseModelConfig: +class TransformersModelConfig: """ Base configuration class for models. @@ -228,11 +228,21 @@ def get_model_sha(self): return _get_model_sha(repo_id=self.pretrained, revision=self.revision) -class BaseModel(LightevalModel): +@dataclass +class BaseModelConfig(TransformersModelConfig): + def __post_init__(self): + super() + + logger.warning( + "Careful, BaseModelConfig is deprecated and will be removed, you should use TransformersModelConfig instead!" + ) + + +class TransformersModel(LightevalModel): def __init__( self, env_config: EnvConfig, - config: BaseModelConfig, + config: TransformersModelConfig, ): """Initializes a HuggingFace `AutoModel` and `AutoTokenizer` for evaluation.""" self._config = config.init_configs(env_config) @@ -403,7 +413,9 @@ def init_model_parallel(self, model_parallel: bool | None = None) -> Tuple[bool, ) return model_parallel, max_mem_this_process, device_map - def _create_auto_model(self, config: BaseModelConfig, env_config: EnvConfig) -> transformers.PreTrainedModel: + def _create_auto_model( + self, config: TransformersModelConfig, env_config: EnvConfig + ) -> transformers.PreTrainedModel: """ Creates an instance of the pretrained HF model. @@ -440,7 +452,7 @@ def _create_auto_model(self, config: BaseModelConfig, env_config: EnvConfig) -> return model def _create_auto_tokenizer( - self, config: BaseModelConfig, env_config: EnvConfig + self, config: TransformersModelConfig, env_config: EnvConfig ) -> transformers.PreTrainedTokenizer: return self._create_auto_tokenizer_with_name( model_name=config.pretrained, @@ -1324,6 +1336,15 @@ def _loglikelihood_single_token( return dataset.get_original_order(res) +class BaseModel(TransformersModel): + def __post_init__(self): + super() + + logger.warning( + "Careful, the BaseModel name is deprecated and will be removed, you should use TransformersModel instead!" + ) + + class MultiTokenEOSCriteria(transformers.StoppingCriteria): """Criteria to stop on the specified multi-token sequence.""" diff --git a/src/lighteval/pipeline.py b/src/lighteval/pipeline.py index facecd8ec..6a40d2801 100644 --- a/src/lighteval/pipeline.py +++ b/src/lighteval/pipeline.py @@ -33,7 +33,7 @@ from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.metrics.utils.metric_utils import MetricCategory -from lighteval.models.model_loader import BaseModel, load_model +from lighteval.models.model_loader import TransformersModel, load_model from lighteval.models.model_output import ModelResponse from lighteval.tasks.lighteval_task import LightevalTask, create_requests_from_tasks from lighteval.tasks.registry import Registry, taskinfo_selector @@ -180,10 +180,10 @@ def _init_model(self, model_config, model): ) else: return load_model(config=model_config, env_config=self.pipeline_parameters.env_config) - if isinstance(model, BaseModel): + if isinstance(model, TransformersModel): return model else: - return BaseModel.from_model( + return TransformersModel.from_model( model=model, use_chat_template=self.pipeline_parameters.use_chat_template, env_config=self.pipeline_parameters.env_config, diff --git a/src/lighteval/tasks/lighteval_task.py b/src/lighteval/tasks/lighteval_task.py index ea01f81e4..ad6815c1d 100644 --- a/src/lighteval/tasks/lighteval_task.py +++ b/src/lighteval/tasks/lighteval_task.py @@ -41,7 +41,7 @@ apply_target_perplexity_metric, ) from lighteval.metrics.metrics import Metric, MetricCategory, Metrics -from lighteval.models.transformers.base_model import BaseModel +from lighteval.models.transformers.transformers_model import TransformersModel from lighteval.tasks.prompt_manager import PromptManager from lighteval.tasks.requests import ( Doc, @@ -578,7 +578,7 @@ def create_requests_from_tasks( # noqa: C901 task_dict: dict[str, LightevalTask], fewshot_dict: dict[str, list[Tuple[int, bool]]], num_fewshot_seeds: int, - lm: BaseModel, + lm: TransformersModel, max_samples: int | None, evaluation_tracker: "EvaluationTracker", use_chat_template: bool, @@ -594,7 +594,7 @@ def create_requests_from_tasks( # noqa: C901 fewshot_dict (dict[str, list[Tuple[int, bool]]]): A dictionary of few shot examples. num_fewshot_seeds (int): number of few shot seeds. - lm (BaseModel): language model class that will be used to eventually + lm (TransformersModel): language model class that will be used to eventually truncate the few shot examples (we need the maximum input size of the model) max_samples (int): maximum number of samples. diff --git a/tests/models/test_base_model.py b/tests/models/test_base_model.py index 4f26d2924..faf9c5755 100644 --- a/tests/models/test_base_model.py +++ b/tests/models/test_base_model.py @@ -21,13 +21,13 @@ # SOFTWARE. from lighteval.models.model_loader import load_model -from lighteval.models.transformers.base_model import BaseModel, BaseModelConfig +from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig from lighteval.utils.utils import EnvConfig def test_empty_requests(): - model_config = BaseModelConfig("hf-internal-testing/tiny-random-LlamaForCausalLM") - model: BaseModel = load_model(config=model_config, env_config=EnvConfig(cache_dir=".")) + model_config = TransformersModelConfig("hf-internal-testing/tiny-random-LlamaForCausalLM") + model: TransformersModel = load_model(config=model_config, env_config=EnvConfig(cache_dir=".")) assert model.loglikelihood([]) == [] assert model.loglikelihood_single_token([]) == [] From a585701c1c55e674081fb8365d94c804d04170c7 Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Thu, 12 Dec 2024 12:46:03 +0000 Subject: [PATCH 15/32] removed the use of a GenerationConfig object, as it's got lots of params set by default which slow down generations --- src/lighteval/models/model_input.py | 7 ++++--- .../models/transformers/transformers_model.py | 13 ++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index 1d53887f5..d2e5f4359 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -107,6 +107,9 @@ def to_transformers_dict(self) -> dict: """Selects relevant generation and sampling parameters for transformers models. Doc: https://huggingface.co/docs/transformers/v4.46.3/en/main_classes/text_generation#transformers.GenerationConfig + Note: We actually don't use the GenerationConfig object itself because it has a huge number of parameters automatically + initialized, to a config which slows down evals insanely. + Returns: dict: The parameters to create a transformers.GenerationConfig in the model config. """ @@ -114,7 +117,7 @@ def to_transformers_dict(self) -> dict: args = { "max_new_tokens": self.max_new_tokens, "min_new_tokens": self.min_new_tokens, - "early_stopping": self.early_stopping, + "early_stopping": self.early_stopping or False, "stop_strings": self.stop_tokens, "temperature": self.temperature, "top_k": self.top_k, @@ -125,8 +128,6 @@ def to_transformers_dict(self) -> dict: "output_scores": True, "return_dict_in_generate": True, } - # Even though we only use the dict representation of the GenerationConfig - # we still create the object as it uses validation steps return {k: v for k, v in args.items() if v is not None} def to_tgi_inferenceendpoint_dict(self) -> dict: diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index 7a8217622..aedc769fb 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -36,7 +36,6 @@ AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, - GenerationConfig, GPTQConfig, PretrainedConfig, ) @@ -656,7 +655,7 @@ def greedy_until_multi_turn( # noqa: C901 ] ) - generation_config = GenerationConfig.from_dict(self.generation_config_dict) + generation_config = self.generation_config_dict.copy() generation_config.update( { "max_new_tokens": max_generated_tokens, @@ -669,7 +668,7 @@ def greedy_until_multi_turn( # noqa: C901 ) model_outputs: GenerateOutput = self.model.generate( - **model_inputs, stopping_criteria=stopping_criteria, generation_config=generation_config + **model_inputs, stopping_criteria=stopping_criteria, **generation_config ) model_outputs = model_outputs.sequences[0, model_inputs["input_ids"].size(1) :] model_generations = [model_outputs] @@ -699,7 +698,7 @@ def greedy_until_multi_turn( # noqa: C901 ] ) - generation_config = GenerationConfig.from_dict(self.generation_config_dict) + generation_config = self.generation_config_dict.copy() generation_config.update( { "max_new_tokens": max_generated_tokens, @@ -715,7 +714,7 @@ def greedy_until_multi_turn( # noqa: C901 input_ids=model_inputs["input_ids"], attention_mask=model_inputs["attention_mask"], stopping_criteria=stopping_criteria, - generation_config=generation_config, + **generation_config, ) model_outputs = model_outputs.sequences[0, model_inputs["input_ids"].size(1) :] model_generations.append(model_outputs) @@ -896,7 +895,7 @@ def _generate( stopping_criteria = stop_sequences_criteria(self.tokenizer, stop_sequences=stop_tokens, batch=batch) batch_size, _ = batch.input_ids.shape - generation_config = GenerationConfig.from_dict(self.generation_config_dict) + generation_config = self.generation_config_dict.copy() generation_config.update( max_new_tokens=max_new_tokens, pad_token_id=self.tokenizer.pad_token_id if self.tokenizer.pad_token_id else self.tokenizer.eos_token_id, @@ -912,7 +911,7 @@ def _generate( input_ids=batch.input_ids, attention_mask=batch.input_mask, stopping_criteria=stopping_criteria, - generation_config=generation_config, + **generation_config, ) generations = outputs.sequences[:, batch.input_ids.size(1) :] generations = torch.reshape(generations, (batch_size, num_samples, -1)) From f9ab29b89d890d9b03de80bb241958505b241a3c Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Thu, 12 Dec 2024 12:47:19 +0000 Subject: [PATCH 16/32] revert --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2e50dc3f3..0ee4010c4 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@
-[![Test suite](https://github.com/huggingface/lighteval/actions/workflows/tests.yaml/badge.svg?branch=main)](https://github.com/huggingface/lighteval/actions/workflows/tests.yaml?query=branch%3Amain) +[![Tests](https://github.com/huggingface/lighteval/actions/workflows/tests.yaml/badge.svg?branch=main)](https://github.com/huggingface/lighteval/actions/workflows/tests.yaml?query=branch%3Amain) [![Quality](https://github.com/huggingface/lighteval/actions/workflows/quality.yaml/badge.svg?branch=main)](https://github.com/huggingface/lighteval/actions/workflows/quality.yaml?query=branch%3Amain) [![Python versions](https://img.shields.io/pypi/pyversions/lighteval)](https://www.python.org/downloads/) [![License](https://img.shields.io/badge/License-MIT-green.svg)](https://github.com/huggingface/lighteval/blob/main/LICENSE) From 4833929783aab7057be2363fc8a3e1c981e0987f Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Thu, 12 Dec 2024 12:50:59 +0000 Subject: [PATCH 17/32] fix docs --- docs/source/package_reference/models.mdx | 4 ++-- src/lighteval/main_accelerate.py | 2 +- src/lighteval/main_endpoint.py | 2 +- src/lighteval/main_vllm.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/package_reference/models.mdx b/docs/source/package_reference/models.mdx index a0e8d68ac..01066fb60 100644 --- a/docs/source/package_reference/models.mdx +++ b/docs/source/package_reference/models.mdx @@ -7,8 +7,8 @@ ## Accelerate and Transformers Models ### TransformersModel -[[autodoc]] models.transformers.base_model.TransformersModelConfig -[[autodoc]] models.transformers.base_model.TransformersModel +[[autodoc]] models.transformers.transformers_model.TransformersModelConfig +[[autodoc]] models.transformers.transformers_model.TransformersModel ### AdapterModel [[autodoc]] models.transformers.adapter_model.AdapterModelConfig diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py index e785034b6..e962d5384 100644 --- a/src/lighteval/main_accelerate.py +++ b/src/lighteval/main_accelerate.py @@ -44,7 +44,7 @@ def accelerate( # noqa C901 model_args: Annotated[ str, Argument( - help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/base_model.yaml)" + help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/transformers_model.yaml)" ), ], tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py index bb88e015e..470374e09 100644 --- a/src/lighteval/main_endpoint.py +++ b/src/lighteval/main_endpoint.py @@ -46,7 +46,7 @@ def openai( model_args: Annotated[ str, Argument( - help="Model name as a string (has to be available through the openai API) or path to yaml config file (see examples/model_configs/base_model.yaml)" + help="Model name as a string (has to be available through the openai API) or path to yaml config file (see examples/model_configs/transformers_model.yaml)" ), ], tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py index 850343e6d..b1742b117 100644 --- a/src/lighteval/main_vllm.py +++ b/src/lighteval/main_vllm.py @@ -40,7 +40,7 @@ def vllm( model_args: Annotated[ str, Argument( - help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/base_model.yaml)" + help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/transformers_model.yaml)" ), ], tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")], From 30bed89696322c7304a374a1baa1139b701bbf8e Mon Sep 17 00:00:00 2001 From: "clementine@huggingface.co" Date: Thu, 12 Dec 2024 13:03:45 +0000 Subject: [PATCH 18/32] fix #16 by also allowing a generationconfig object to be passed programmatically if needed --- .../models/transformers/transformers_model.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index aedc769fb..e66a2d70a 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -39,7 +39,7 @@ GPTQConfig, PretrainedConfig, ) -from transformers.generation.utils import GenerateOutput +from transformers.generation.utils import GenerateOutput, GenerationConfig from transformers.models.auto.modeling_auto import MODEL_FOR_CAUSAL_LM_MAPPING_NAMES from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset, LoglikelihoodSingleTokenDataset @@ -126,6 +126,8 @@ class TransformersModelConfig: model at a quantized precision. Needed for 4-bit and 8-bit precision. trust_remote_code (bool): Whether to trust remote code during model loading. + generation_parameters (GenerationParameters): Range of parameters which will affect the generation. + generation_config (GenerationConfig): GenerationConfig object (only passed during manual creation) Methods: __post_init__(): Performs post-initialization checks on the configuration. @@ -154,6 +156,7 @@ class TransformersModelConfig: use_chat_template: bool = False compile: bool = False generation_parameters: GenerationParameters = None + generation_config: GenerationConfig = None def __post_init__(self): # Making sure this parameter is a boolean @@ -180,7 +183,12 @@ def __post_init__(self): if not isinstance(self.device, str): raise ValueError("Current device must be passed as string.") - if not self.generation_parameters: + if self.generation_config and self.generation_parameters: + raise ValueError( + "Can't use both generation_config and generation_parameters argument. Pass the generation parameters to your generation config object" + ) + + if not self.generation_parameters and not self.generation_config: self.generation_parameters = GenerationParameters() def _init_configs(self, model_name: str, env_config: EnvConfig) -> PretrainedConfig: @@ -275,8 +283,11 @@ def __init__( self.model_sha = config.get_model_sha() self.precision = _get_dtype(config.dtype, config=self._config) - self.generation_parameters = config.generation_parameters - self.generation_config_dict = self.generation_parameters.to_transformers_dict() + if config.generation_config is None: + self.generation_parameters = config.generation_parameters + self.generation_config_dict = self.generation_parameters.to_transformers_dict() + else: + self.generation_config_dict = config.generation_config.to_dict() if is_accelerate_available(): model_size, _ = calculate_maximum_sizes(self.model) From fb4ecdc6659df8f1a37c93d4c64ff47f51d40f00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Fourrier?= <22726840+clefourrier@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:32:02 +0100 Subject: [PATCH 19/32] Apply suggestions from code review Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com> Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --- src/lighteval/main_endpoint.py | 1 - src/lighteval/models/transformers/transformers_model.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py index 470374e09..a9c23f481 100644 --- a/src/lighteval/main_endpoint.py +++ b/src/lighteval/main_endpoint.py @@ -98,7 +98,6 @@ def openai( """ from lighteval.logging.evaluation_tracker import EvaluationTracker - # from lighteval.models.model_input import GenerationParameters from lighteval.models.endpoints.openai_model import OpenAIModelConfig from lighteval.models.model_input import GenerationParameters from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index e66a2d70a..69af4733b 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -238,7 +238,7 @@ def get_model_sha(self): @dataclass class BaseModelConfig(TransformersModelConfig): def __post_init__(self): - super() + super().__post_init__() logger.warning( "Careful, BaseModelConfig is deprecated and will be removed, you should use TransformersModelConfig instead!" From be99c5ed89c5d1a26ebf6d27f9d2262d4aab3149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Fourrier?= <22726840+clefourrier@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:43:20 +0100 Subject: [PATCH 20/32] Update src/lighteval/models/transformers/transformers_model.py Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com> --- src/lighteval/models/transformers/transformers_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index 69af4733b..e63d0840e 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -277,7 +277,7 @@ def __init__( logger.info("Compiling the model") self.model.model.compile() except AttributeError as e: - logger.warn("Could not compile the model because: ", e) + logger.warning("Could not compile the model because: ", e) self.model_name = _simplify_name(config.pretrained) self.model_sha = config.get_model_sha() From dece2f9babd74f60f0d7aa72a7a03a01933e0cd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Tue, 17 Dec 2024 13:20:22 +0100 Subject: [PATCH 21/32] removed temperature from default vllm params as it should be passed via the generationparams instead --- src/lighteval/models/vllm/vllm_model.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index c78fbd2ff..463e9984a 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -89,7 +89,6 @@ class VLLMModelConfig: generation_parameters: GenerationParameters = None # sampling parameters to use for generation subfolder: Optional[str] = None - temperature: float = 0.6 # will be used for multi sampling tasks, for tasks requiring no sampling, this will be ignored and set to 0. def __post_init__(self): if not self.generation_parameters: @@ -309,7 +308,6 @@ def _generate( """Contains the actual logic of the generation.""" sampling_params = self.sampling_params.clone() or SamplingParams() if generate: - sampling_params.temperature = float(self._config.temperature) if num_samples > 1 else 0.0 sampling_params.n = num_samples sampling_params.max_tokens = max_new_tokens sampling_params.stop = stop_tokens From 5c89fe2516a7df745340481e94c4dc363613c3a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Fourrier?= <22726840+clefourrier@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:53:11 +0100 Subject: [PATCH 22/32] Update src/lighteval/models/transformers/transformers_model.py Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --- src/lighteval/models/transformers/transformers_model.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index e63d0840e..19ead82cc 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -240,8 +240,9 @@ class BaseModelConfig(TransformersModelConfig): def __post_init__(self): super().__post_init__() - logger.warning( - "Careful, BaseModelConfig is deprecated and will be removed, you should use TransformersModelConfig instead!" + warnings.warn( + "BaseModelConfig is deprecated and will be removed. Use TransformersModelConfig instead", + FutureWarning, ) From 6a18b8122fcde19d624e197ef01954e749a9f4db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Wed, 18 Dec 2024 11:58:53 +0100 Subject: [PATCH 23/32] logging fix --- src/lighteval/__main__.py | 5 +++-- src/lighteval/models/transformers/transformers_model.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lighteval/__main__.py b/src/lighteval/__main__.py index 4484f7812..e4053813e 100644 --- a/src/lighteval/__main__.py +++ b/src/lighteval/__main__.py @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import logging -from logging.config import dictConfig +import logging.config import colorlog import typer @@ -57,7 +57,8 @@ }, ) -dictConfig(logging_config) +logging.config.dictConfig(logging_config) +logging.captureWarnings(capture=True) app.command(rich_help_panel="Evaluation Backends")(lighteval.main_accelerate.accelerate) app.command(rich_help_panel="Evaluation Utils")(lighteval.main_baseline.baseline) diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index 19ead82cc..295ed4b11 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -22,6 +22,7 @@ import logging import os +import warnings from dataclasses import dataclass from typing import Optional, Tuple, Union From 90593a96a9014fa5dcc3bd9849ee334597817d46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Wed, 18 Dec 2024 12:35:11 +0100 Subject: [PATCH 24/32] added default gen params --- tests/models/endpoints/test_tgi_model.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/tests/models/endpoints/test_tgi_model.py b/tests/models/endpoints/test_tgi_model.py index 305034278..40bae2ceb 100644 --- a/tests/models/endpoints/test_tgi_model.py +++ b/tests/models/endpoints/test_tgi_model.py @@ -33,7 +33,27 @@ class TestTGIModelConfig: [ ( "examples/model_configs/tgi_model.yaml", - {"inference_server_address": "", "inference_server_auth": None, "model_id": None}, + { + "inference_server_address": "", + "inference_server_auth": None, + "model_id": None, + "generation_parameters": { + "early_stopping": None, + "frequency_penalty": None, + "length_penalty": None, + "max_new_tokens": None, + "min_new_tokens": None, + "min_p": None, + "presence_penalty": None, + "repetition_penalty": None, + "seed": None, + "stop_tokens": None, + "temperature": None, + "top_k": None, + "top_p": None, + "truncate_prompt": None, + }, + }, ), ], ) From ff5026b10b1723652d912271b4ce714816198d77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Fourrier?= <22726840+clefourrier@users.noreply.github.com> Date: Thu, 26 Dec 2024 11:16:40 +0100 Subject: [PATCH 25/32] Apply suggestions from code review Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com> --- src/lighteval/models/model_input.py | 41 ++----------------- .../models/transformers/transformers_model.py | 4 +- 2 files changed, 6 insertions(+), 39 deletions(-) diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index d2e5f4359..6481124bd 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from dataclasses import dataclass +from dataclasses import dataclass, asdict from typing import Optional @@ -57,24 +57,7 @@ def from_dict(cls, config_dict: dict): } } """ - if "generation" not in config_dict: - return GenerationParameters() - return GenerationParameters( - early_stopping=config_dict["generation"].get("early_stopping", None), - repetition_penalty=config_dict["generation"].get("repetition_penalty", None), - frequency_penalty=config_dict["generation"].get("frequency_penalty", None), - length_penalty=config_dict["generation"].get("length_penalty", None), - presence_penalty=config_dict["generation"].get("presence_penalty", None), - max_new_tokens=config_dict["generation"].get("max_new_tokens", None), - min_new_tokens=config_dict["generation"].get("min_new_tokens", None), - seed=config_dict["generation"].get("seed", None), - stop_tokens=config_dict["generation"].get("stop_tokens", None), - temperature=config_dict["generation"].get("temperature", None), - top_k=config_dict["generation"].get("top_k", None), - min_p=config_dict["generation"].get("min_p", None), - top_p=config_dict["generation"].get("top_p", None), - truncate_prompt=config_dict["generation"].get("truncate_prompt", None), - ) + return GenerationParameters(**config_dict.get("generation", {})) def to_vllm_openai_dict(self) -> dict: """Selects relevant generation and sampling parameters for vllm and openai models. @@ -85,23 +68,7 @@ def to_vllm_openai_dict(self) -> dict: """ # Task specific sampling params to set in model: n, best_of, use_beam_search # Generation specific params to set in model: logprobs, prompt_logprobs - args = { - "presence_penalty": self.presence_penalty, - "frequency_penalty": self.frequency_penalty, - "repetition_penalty": self.repetition_penalty, - "temperature": self.temperature, - "top_p": self.top_p, - "top_k": self.top_k, - "min_p": self.min_p, - "seed": self.seed, - "length_penalty": self.length_penalty, - "early_stopping": self.early_stopping, - "stop": self.stop_tokens, - "max_tokens": self.max_new_tokens, - "min_tokens": self.min_new_tokens, - "truncate_prompt_tokens": self.truncate_prompt, - } - return {k: v for k, v in args.items() if v is not None} + return {k: v for k, v in asdict(self).items() if v is not None} def to_transformers_dict(self) -> dict: """Selects relevant generation and sampling parameters for transformers models. @@ -117,7 +84,7 @@ def to_transformers_dict(self) -> dict: args = { "max_new_tokens": self.max_new_tokens, "min_new_tokens": self.min_new_tokens, - "early_stopping": self.early_stopping or False, + "early_stopping": self.early_stopping, "stop_strings": self.stop_tokens, "temperature": self.temperature, "top_k": self.top_k, diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index 295ed4b11..34db20a6f 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -1350,9 +1350,9 @@ def _loglikelihood_single_token( class BaseModel(TransformersModel): def __post_init__(self): - super() + super().__post_init__() - logger.warning( + warnings.warn( "Careful, the BaseModel name is deprecated and will be removed, you should use TransformersModel instead!" ) From 87d052cea03fa8203ff0b0fbcd93b7482d35c2e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Thu, 26 Dec 2024 12:29:58 +0100 Subject: [PATCH 26/32] rename file --- .../model_configs/{base_model.yaml => transformers_model.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/model_configs/{base_model.yaml => transformers_model.yaml} (100%) diff --git a/examples/model_configs/base_model.yaml b/examples/model_configs/transformers_model.yaml similarity index 100% rename from examples/model_configs/base_model.yaml rename to examples/model_configs/transformers_model.yaml From 3f96b954291e7140b68ca562bf5564c20f0fa06b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Thu, 26 Dec 2024 12:32:47 +0100 Subject: [PATCH 27/32] added from path to openai model --- src/lighteval/main_endpoint.py | 7 +------ src/lighteval/models/endpoints/openai_model.py | 9 +++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py index 07ce19c23..f9f83d2a1 100644 --- a/src/lighteval/main_endpoint.py +++ b/src/lighteval/main_endpoint.py @@ -23,7 +23,6 @@ from typing import Optional import typer -import yaml from typer import Argument, Option from typing_extensions import Annotated @@ -98,14 +97,10 @@ def openai( """ from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.models.endpoints.openai_model import OpenAIModelConfig - from lighteval.models.model_input import GenerationParameters from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters if model_args.endswith(".yaml"): - with open(model_args, "r") as f: - config = yaml.safe_load(f)["model"] - generation_parameters = GenerationParameters.from_dict(config) - model_config = OpenAIModelConfig(model=config["model_name"], generation_parameters=generation_parameters) + model_config = OpenAIModelConfig.from_path(model_args) else: model_config = OpenAIModelConfig(model=model_args) diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py index d3707cba1..d659c191e 100644 --- a/src/lighteval/models/endpoints/openai_model.py +++ b/src/lighteval/models/endpoints/openai_model.py @@ -69,6 +69,15 @@ def __post_init__(self): if not self.generation_parameters: self.generation_parameters = GenerationParameters() + @classmethod + def from_path(cls, path: str) -> "OpenAIModelConfig": + import yaml + + with open(path, "r") as f: + config = yaml.safe_load(f)["model"] + generation_parameters = GenerationParameters.from_dict(config) + return cls(model=config["model_name"], generation_parameters=generation_parameters) + class OpenAIClient(LightevalModel): _DEFAULT_MAX_LENGTH: int = 4096 From 843b572f3f9f28d11741854496183dd4d4bf80da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Thu, 26 Dec 2024 12:33:12 +0100 Subject: [PATCH 28/32] style --- src/lighteval/models/model_input.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index 6481124bd..2635245c3 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from dataclasses import dataclass, asdict +from dataclasses import asdict, dataclass from typing import Optional From e233190e0ac3f6cb7f25a768beba438085961f80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Fourrier?= <22726840+clefourrier@users.noreply.github.com> Date: Thu, 26 Dec 2024 12:33:57 +0100 Subject: [PATCH 29/32] Update src/lighteval/models/transformers/transformers_model.py Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com> --- src/lighteval/models/transformers/transformers_model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index 34db20a6f..081874985 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -1353,7 +1353,8 @@ def __post_init__(self): super().__post_init__() warnings.warn( - "Careful, the BaseModel name is deprecated and will be removed, you should use TransformersModel instead!" + "Careful, the BaseModel name is deprecated and will be removed, you should use TransformersModel instead!", + FutureWarning, ) From 97db62032c4191fb11afae67e58519f90d4a738d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Thu, 26 Dec 2024 12:35:00 +0100 Subject: [PATCH 30/32] inferenceendpoint renamed to ie --- src/lighteval/models/endpoints/endpoint_model.py | 4 +--- src/lighteval/models/endpoints/tgi_model.py | 4 +--- src/lighteval/models/model_input.py | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 47978adff..942ece410 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -316,9 +316,7 @@ def __init__( # noqa: C901 model_size=-1, ) self.generation_parameters = config.generation_parameters - self.generation_config = TextGenerationInputGenerateParameters( - **self.generation_parameters.to_tgi_inferenceendpoint_dict() - ) + self.generation_config = TextGenerationInputGenerateParameters(**self.generation_parameters.to_tgi_ie_dict()) @staticmethod def get_larger_hardware_suggestion(cur_instance_type: str = None, cur_instance_size: str = None): diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index 9ca5dc053..f0bb712b6 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -88,9 +88,7 @@ def __init__(self, config: TGIModelConfig) -> None: self.client = AsyncClient(config.inference_server_address, headers=headers, timeout=240) self.generation_parameters = config.generation_parameters - self.generation_config = TextGenerationInputGenerateParameters( - **self.generation_parameters.to_tgi_inferenceendpoint_dict() - ) + self.generation_config = TextGenerationInputGenerateParameters(**self.generation_parameters.to_tgi_ie_dict()) self._max_gen_toks = 256 self.model_info = requests.get(f"{config.inference_server_address}/info", headers=headers).json() if "model_id" not in self.model_info: diff --git a/src/lighteval/models/model_input.py b/src/lighteval/models/model_input.py index 2635245c3..04e35be17 100644 --- a/src/lighteval/models/model_input.py +++ b/src/lighteval/models/model_input.py @@ -97,7 +97,7 @@ def to_transformers_dict(self) -> dict: } return {k: v for k, v in args.items() if v is not None} - def to_tgi_inferenceendpoint_dict(self) -> dict: + def to_tgi_ie_dict(self) -> dict: """Selects relevant generation and sampling parameters for tgi or inference endpoints models. Doc: https://huggingface.co/docs/huggingface_hub/v0.26.3/en/package_reference/inference_types#huggingface_hub.TextGenerationInputGenerateParameters From e636f7396f82b2f6ea7b6254c8d24c60ec92f41d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Thu, 26 Dec 2024 12:38:32 +0100 Subject: [PATCH 31/32] style 2 --- src/lighteval/models/transformers/transformers_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lighteval/models/transformers/transformers_model.py b/src/lighteval/models/transformers/transformers_model.py index 081874985..1e087554d 100644 --- a/src/lighteval/models/transformers/transformers_model.py +++ b/src/lighteval/models/transformers/transformers_model.py @@ -1354,7 +1354,7 @@ def __post_init__(self): warnings.warn( "Careful, the BaseModel name is deprecated and will be removed, you should use TransformersModel instead!", - FutureWarning, + FutureWarning, ) From ded4cf0f131378485c8f1990eeb811e76f93d73e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine?= Date: Thu, 26 Dec 2024 12:55:41 +0100 Subject: [PATCH 32/32] fix vllm --- src/lighteval/models/vllm/vllm_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 463e9984a..d85bbf943 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -122,7 +122,7 @@ def __init__( self.precision = _get_dtype(config.dtype, config=self._config) self.model_info = ModelInfo(model_name=self.model_name, model_sha=self.model_sha) - self.sampling_params = SamplingParams(**config.sampling_params) + self.sampling_params = SamplingParams(**config.generation_parameters.to_vllm_openai_dict()) self.pairwise_tokenization = config.pairwise_tokenization @property