diff --git a/examples/model_configs/base_model.yaml b/examples/model_configs/base_model.yaml index d6563e61..802b2eba 100644 --- a/examples/model_configs/base_model.yaml +++ b/examples/model_configs/base_model.yaml @@ -1,6 +1,6 @@ model: base_params: - model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... + model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True.To see the full list of parameters, please click here: https://huggingface.co/docs/lighteval/main/en/quicktour#model-arguments dtype: "bfloat16" compile: true merged_weights: # Ignore this section if you are not using PEFT models diff --git a/examples/model_configs/peft_model.yaml b/examples/model_configs/peft_model.yaml index 81205818..def06970 100644 --- a/examples/model_configs/peft_model.yaml +++ b/examples/model_configs/peft_model.yaml @@ -1,6 +1,6 @@ model: base_params: - model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied. + model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied.To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/package_reference/models#lighteval.models.transformers.adapter_model.AdapterModelConfig dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. compile: true merged_weights: # Ignore this section if you are not using PEFT models diff --git a/examples/model_configs/quantized_model.yaml b/examples/model_configs/quantized_model.yaml index 3bc6b2c3..51249c8c 100644 --- a/examples/model_configs/quantized_model.yaml +++ b/examples/model_configs/quantized_model.yaml @@ -1,6 +1,6 @@ model: base_params: - model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... + model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True.To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/quicktour#model-arguments dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. compile: true merged_weights: # Ignore this section if you are not using PEFT models diff --git a/examples/model_configs/serverless_model.yaml b/examples/model_configs/serverless_model.yaml index af1652e1..ae261127 100644 --- a/examples/model_configs/serverless_model.yaml +++ b/examples/model_configs/serverless_model.yaml @@ -1,3 +1,3 @@ model: base_params: - model_name: "meta-llama/Llama-3.1-8B-Instruct" #Qwen/Qwen2.5-14B" #Qwen/Qwen2.5-7B" + model_name: "meta-llama/Llama-3.1-8B-Instruct" #Qwen/Qwen2.5-14B" #Qwen/Qwen2.5-7B"To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/package_reference/models#endpoints-based-models diff --git a/examples/model_configs/tgi_model.yaml b/examples/model_configs/tgi_model.yaml index 8db5654d..059a1d31 100644 --- a/examples/model_configs/tgi_model.yaml +++ b/examples/model_configs/tgi_model.yaml @@ -2,4 +2,4 @@ model: instance: inference_server_address: "" inference_server_auth: null - model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory + model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory. To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/package_reference/models#lighteval.models.endpoints.tgi_model.TGIModelConfig diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 80798b61..6743d8e0 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -90,6 +90,27 @@ def from_path(cls, path: str) -> "ServerlessEndpointModelConfig": @dataclass class InferenceEndpointModelConfig: + """ + This class is designed to manage and define settings for deploying inference endpoints in machine learning models. + + Attributes: + endpoint_name (str, optional):The name of the inference endpoint. + model_name (str, optional): The name of the model for inference. + reuse_existing (bool, default: False): Indicates whether to reuse an existing endpoint. + accelerator (str, default: "gpu"): Specifies the type of hardware accelerator. + model_dtype (str, optional): The data type used by the model. Defaults to the framework's choice if None. + vendor (str, default: "aws"): Cloud service provider for hosting the endpoint. + region (str, default: "us-east-1"): Cloud region, chosen based on hardware availability. + instance_size (str, optional): Specifies the size of the instance (e.g., large, xlarge). + instance_type (str, optional): Specifies the type of the instance (e.g., g5.4xlarge). + framework (str, default: "pytorch"): Framework used for inference (e.g., pytorch, tensorflow). + endpoint_type (str, default: "protected"): Security level of the endpoint (e.g., public, protected). + add_special_tokens (bool, default: True): Specifies if special tokens should be added during processing. + revision (str, default: "main"): The Git branch or commit hash of the model. + namespace (str, optional): The namespace under which the endpoint is launched. + image_url (str, optional): Docker image URL for the endpoint. + env_vars (dict, optional): Environment variables for the endpoint. + """ endpoint_name: str = None model_name: str = None reuse_existing: bool = False diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py index b2ca2528..c38e3d2d 100644 --- a/src/lighteval/models/endpoints/openai_model.py +++ b/src/lighteval/models/endpoints/openai_model.py @@ -61,6 +61,13 @@ @dataclass class OpenAIModelConfig: + """ + A configuration class for OpenAI models. This class is used to specify settings related to OpenAI models, + including the model name or identifier. + + Attributes: + model: It specifies the name or identifier of the OpenAI model to be used. + """ model: str diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index 3f20e4a5..4f5118f3 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -47,6 +47,14 @@ def divide_chunks(array, n): @dataclass class TGIModelConfig: + """ + This class provides a streamlined configuration for integrating with Text Generation Inference (TGI) endpoints. + + Attributes: + inference_server_address (str, required): The endpoint address of the inference server hosting the model. + inference_server_auth (str, required): Authentication credentials or tokens required to access the server. + model_id (str, required): Identifier for the model hosted on the inference server. + """ inference_server_address: str inference_server_auth: str model_id: str diff --git a/src/lighteval/models/transformers/adapter_model.py b/src/lighteval/models/transformers/adapter_model.py index e66a1aa1..af69ed72 100644 --- a/src/lighteval/models/transformers/adapter_model.py +++ b/src/lighteval/models/transformers/adapter_model.py @@ -41,6 +41,10 @@ @dataclass class AdapterModelConfig(BaseModelConfig): + """ + This class is used to manage the configuration of adapter models. Adapter models are designed to extend or adapt a + base model's functionality for specific tasks while keeping most of the base model's parameters frozen. + """ # Adapter models have the specificity that they look at the base model (= the parent) for the tokenizer and config base_model: str = None @@ -58,7 +62,19 @@ def init_configs(self, env_config: EnvConfig): class AdapterModel(BaseModel): + """ + This class is designed to integrate adapter models with a pre-trained base model. + """ def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConfig) -> PreTrainedTokenizer: + """ + Creates and configures the adapter model by applying adapter weights to the base model. + + Args: + config(AdapterModelConfig): An instance of AdapterModelConfig. + env_config(EnvConfig): An instance of EnvConfig. + + Returns: PreTrainedTokenizer + """ # By default, we look at the model config for the model stored in `base_model` # (= the parent model, not the model of interest) return self._create_auto_tokenizer_with_name( @@ -71,7 +87,15 @@ def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConf ) def _create_auto_model(self, config: AdapterModelConfig, env_config: EnvConfig) -> AutoModelForCausalLM: - """Returns a PeftModel from a base model and a version fined tuned using PEFT.""" + """ + It returns a PeftModel from a base model and a version fined tuned using PEFT. + + Args: + config(AdapterModelConfig): An instance of AdapterModelConfig. + env_config(EnvConfig): An instance of EnvConfig. + + Returns: AutoModelForCasualLM + """ torch_dtype = _get_dtype(config.dtype, self._config) config.model_parallel, max_memory, device_map = self.init_model_parallel(config.model_parallel) diff --git a/src/lighteval/models/transformers/delta_model.py b/src/lighteval/models/transformers/delta_model.py index 20780f1e..2ab675cb 100644 --- a/src/lighteval/models/transformers/delta_model.py +++ b/src/lighteval/models/transformers/delta_model.py @@ -38,6 +38,9 @@ @dataclass class DeltaModelConfig(BaseModelConfig): + """ + This class is used to manage the configuration class for delta models. + """ # Delta models look at the pretrained (= the delta weights) for the tokenizer and model config base_model: str = None @@ -59,7 +62,15 @@ def _create_auto_model( config: DeltaModelConfig, env_config: EnvConfig, ) -> AutoModelForCausalLM: - """Returns a model created by adding the weights of a delta model to a base model.""" + """ + It returns a model created by adding the weights of a delta model to a base model. + + Args: + config(AdapterModelConfig): An instance of AdapterModelConfig. + env_config(EnvConfig): An instance of EnvConfig. + + Returns: AutoModelForCasualLM + """ config.model_parallel, max_memory, device_map = self.init_model_parallel(config.model_parallel) torch_dtype = _get_dtype(config.dtype, self._config) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 2d413807..e95bb380 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -68,6 +68,28 @@ @dataclass class VLLMModelConfig: + """ + This class defines the configuration parameters for deploying and running models using the vLLM framework. + + Attributes: + pretrained (str, required): The identifier for the pretrained model (e.g., model name or path). + gpu_memory_utilisation (float, default: 0.9): Fraction of GPU memory to allocate for the model. Reduce this value if you encounter memory issues. + revision (str, default: "main"): Specifies the branch or version of the model repository. + dtype (str | None, optional): Data type for computations (e.g., float32, float16, or bfloat16). Defaults to the model's preset if None. + tensor_parallel_size (int, default: 1): Number of GPUs used for splitting tensors across devices. + pipeline_parallel_size (int, default: 1): Number of GPUs used for pipeline parallelism. + data_parallel_size (int, default: 1): Number of GPUs used for data parallelism. + max_model_length (int | None, optional): Maximum sequence length for the model. If None, it is inferred automatically. Can be reduced to handle Out-of-Memory (OOM) issues. + swap_space (int, default: 4): Amount of CPU swap space (in GiB) per GPU for offloading. + seed (int, default: 1234): Seed for reproducibility in experiments. + trust_remote_code (bool, default: False): Whether to trust custom code provided by remote repositories. + use_chat_template (bool, default: False): Specifies if chat-specific templates should be used for input formatting. + add_special_tokens (bool, default: True): Indicates whether to add special tokens during tokenization. + multichoice_continuations_start_space (bool, default: True): Adds a space at the beginning of each continuation during multi-choice generation. + pairwise_tokenization (bool, default: False): Specifies if context and continuation should be tokenized separately or together. + subfolder (Optional[str], optional): Path to a specific subfolder in the model repository, if applicable. + temperature (float, default: 0.6): Sampling temperature for stochastic tasks. Ignored for deterministic tasks (set internally to 0). + """ pretrained: str gpu_memory_utilisation: float = 0.9 # lower this if you are running out of memory revision: str = "main" # revision of the model