Skip to content

Commit

Permalink
Add missing models to docs (#419)
Browse files Browse the repository at this point in the history
* Add AdapterModel to Model docs

* Add NanotronLightevalModel to Model docs

* Fix nanotron imports and type hints

* Add VLLMModel to Model docs

---------

Co-authored-by: Clémentine Fourrier <[email protected]>
  • Loading branch information
albertvillanova and clefourrier authored Dec 4, 2024
1 parent 6e2754e commit 3929825
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 22 deletions.
19 changes: 8 additions & 11 deletions docs/source/package_reference/models.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
## Accelerate and Transformers Models
### BaseModel
[[autodoc]] models.base_model.BaseModel
[//]: # (TODO: Fix import error)
[//]: # (### AdapterModel)
[//]: # ([[autodoc]] models.adapter_model.AdapterModel)
### AdapterModel
[[autodoc]] models.adapter_model.AdapterModel
### DeltaModel
[[autodoc]] models.delta_model.DeltaModel

Expand All @@ -19,12 +18,10 @@
### ModelClient
[[autodoc]] models.tgi_model.ModelClient

[//]: # (TODO: Fix import error)
[//]: # (## Nanotron Model)
[//]: # (### NanotronLightevalModel)
[//]: # ([[autodoc]] models.nanotron_model.NanotronLightevalModel)
## Nanotron Model
### NanotronLightevalModel
[[autodoc]] models.nanotron_model.NanotronLightevalModel

[//]: # (TODO: Fix import error)
[//]: # (## VLLM Model)
[//]: # (### VLLMModel)
[//]: # ([[autodoc]] models.vllm_model.VLLMModel)
## VLLM Model
### VLLMModel
[[autodoc]] models.vllm_model.VLLMModel
19 changes: 11 additions & 8 deletions src/lighteval/config/lighteval_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,23 @@
from dataclasses import dataclass
from typing import Dict, Optional, Union

from nanotron.config import Config
from nanotron.config.parallelism_config import ParallelismArgs
from nanotron.generation.sampler import SamplerType
from nanotron.logging import get_logger
from lighteval.utils.imports import is_nanotron_available


logger = get_logger(__name__)
if is_nanotron_available():
from nanotron.config import Config
from nanotron.config.parallelism_config import ParallelismArgs
from nanotron.generation.sampler import SamplerType
from nanotron.logging import get_logger

logger = get_logger(__name__)

DEFAULT_GENERATION_SEED = 42


@dataclass
class GenerationArgs:
sampler: Optional[Union[str, SamplerType]] = None
sampler: Optional[Union[str, "SamplerType"]] = None
temperature: Optional[float] = None
top_k: Optional[int] = None
top_p: Optional[float] = None
Expand Down Expand Up @@ -89,12 +92,12 @@ class LightEvalConfig:

logging: LightEvalLoggingArgs
tasks: LightEvalTasksArgs
parallelism: ParallelismArgs
parallelism: "ParallelismArgs"
batch_size: int = 0
generation: Optional[Union[GenerationArgs, Dict[str, GenerationArgs]]] = None


@dataclass
class FullNanotronConfig:
lighteval_config: LightEvalConfig
nanotron_config: Config
nanotron_config: "Config"
6 changes: 3 additions & 3 deletions src/lighteval/models/nanotron_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
from nanotron.serialize import load_weights
from nanotron.trainer import CONFIG_TO_MODEL_CLASS, mark_tied_parameters

logger = logging.get_logger(__name__)
logger = logging.get_logger(__name__)


class NanotronLightevalModel(LightevalModel):
Expand All @@ -90,7 +90,7 @@ def __init__(
self,
checkpoint_path: str,
nanotron_config: FullNanotronConfig,
parallel_context: ParallelContext,
parallel_context: "ParallelContext",
max_gen_toks: Optional[int] = 256,
max_length: Optional[int] = None,
add_special_tokens: Optional[bool] = True,
Expand Down Expand Up @@ -591,7 +591,7 @@ def prepare_batch(
input_ids=input_ids, input_mask=input_mask, input_lengths=input_lengths, truncated=truncated, padded=padded
)

def gather(self, output_tensor: torch.Tensor, process_group: dist.ProcessGroup = None) -> torch.Tensor:
def gather(self, output_tensor: torch.Tensor, process_group: "dist.ProcessGroup" = None) -> torch.Tensor:
"""Gather together tensors of (possibly) various size spread on separate GPUs (first exchange the lengths and then pad and gather)"""
if process_group is None:
process_group = self.parallel_context.dp_pg
Expand Down

0 comments on commit 3929825

Please sign in to comment.