diff --git a/docs/source/package_reference/models.mdx b/docs/source/package_reference/models.mdx index a04c9eef9..34b5b2739 100644 --- a/docs/source/package_reference/models.mdx +++ b/docs/source/package_reference/models.mdx @@ -7,9 +7,8 @@ ## Accelerate and Transformers Models ### BaseModel [[autodoc]] models.base_model.BaseModel -[//]: # (TODO: Fix import error) -[//]: # (### AdapterModel) -[//]: # ([[autodoc]] models.adapter_model.AdapterModel) +### AdapterModel +[[autodoc]] models.adapter_model.AdapterModel ### DeltaModel [[autodoc]] models.delta_model.DeltaModel @@ -19,12 +18,10 @@ ### ModelClient [[autodoc]] models.tgi_model.ModelClient -[//]: # (TODO: Fix import error) -[//]: # (## Nanotron Model) -[//]: # (### NanotronLightevalModel) -[//]: # ([[autodoc]] models.nanotron_model.NanotronLightevalModel) +## Nanotron Model +### NanotronLightevalModel +[[autodoc]] models.nanotron_model.NanotronLightevalModel -[//]: # (TODO: Fix import error) -[//]: # (## VLLM Model) -[//]: # (### VLLMModel) -[//]: # ([[autodoc]] models.vllm_model.VLLMModel) +## VLLM Model +### VLLMModel +[[autodoc]] models.vllm_model.VLLMModel diff --git a/src/lighteval/config/lighteval_config.py b/src/lighteval/config/lighteval_config.py index 1f77b34e0..f24a15184 100644 --- a/src/lighteval/config/lighteval_config.py +++ b/src/lighteval/config/lighteval_config.py @@ -23,20 +23,23 @@ from dataclasses import dataclass from typing import Dict, Optional, Union -from nanotron.config import Config -from nanotron.config.parallelism_config import ParallelismArgs -from nanotron.generation.sampler import SamplerType -from nanotron.logging import get_logger +from lighteval.utils.imports import is_nanotron_available -logger = get_logger(__name__) +if is_nanotron_available(): + from nanotron.config import Config + from nanotron.config.parallelism_config import ParallelismArgs + from nanotron.generation.sampler import SamplerType + from nanotron.logging import get_logger + + logger = get_logger(__name__) DEFAULT_GENERATION_SEED = 42 @dataclass class GenerationArgs: - sampler: Optional[Union[str, SamplerType]] = None + sampler: Optional[Union[str, "SamplerType"]] = None temperature: Optional[float] = None top_k: Optional[int] = None top_p: Optional[float] = None @@ -89,7 +92,7 @@ class LightEvalConfig: logging: LightEvalLoggingArgs tasks: LightEvalTasksArgs - parallelism: ParallelismArgs + parallelism: "ParallelismArgs" batch_size: int = 0 generation: Optional[Union[GenerationArgs, Dict[str, GenerationArgs]]] = None @@ -97,4 +100,4 @@ class LightEvalConfig: @dataclass class FullNanotronConfig: lighteval_config: LightEvalConfig - nanotron_config: Config + nanotron_config: "Config" diff --git a/src/lighteval/models/nanotron_model.py b/src/lighteval/models/nanotron_model.py index 0814acac0..ded1624f7 100644 --- a/src/lighteval/models/nanotron_model.py +++ b/src/lighteval/models/nanotron_model.py @@ -78,7 +78,7 @@ from nanotron.serialize import load_weights from nanotron.trainer import CONFIG_TO_MODEL_CLASS, mark_tied_parameters -logger = logging.get_logger(__name__) + logger = logging.get_logger(__name__) class NanotronLightevalModel(LightevalModel): @@ -90,7 +90,7 @@ def __init__( self, checkpoint_path: str, nanotron_config: FullNanotronConfig, - parallel_context: ParallelContext, + parallel_context: "ParallelContext", max_gen_toks: Optional[int] = 256, max_length: Optional[int] = None, add_special_tokens: Optional[bool] = True, @@ -591,7 +591,7 @@ def prepare_batch( input_ids=input_ids, input_mask=input_mask, input_lengths=input_lengths, truncated=truncated, padded=padded ) - def gather(self, output_tensor: torch.Tensor, process_group: dist.ProcessGroup = None) -> torch.Tensor: + def gather(self, output_tensor: torch.Tensor, process_group: "dist.ProcessGroup" = None) -> torch.Tensor: """Gather together tensors of (possibly) various size spread on separate GPUs (first exchange the lengths and then pad and gather)""" if process_group is None: process_group = self.parallel_context.dp_pg