Fix model type

2015aroras · Nov 4, 2024 · fe2e478 · fe2e478
1 parent a5f92c2
commit fe2e478
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 2 deletions.
diff --git a/src/transformers/models/olmo_1124/configuration_olmo_1124.py b/src/transformers/models/olmo_1124/configuration_olmo_1124.py
@@ -70,7 +70,9 @@ class Olmo1124Config(PretrainedConfig):
             Whether to use a bias in the query, key, value and output projection layers during self-attention.
         attention_dropout (`float`, *optional*, defaults to 0.0):
             The dropout ratio for the attention probabilities.
-        rms_norm_eps (`<fill_type>`, *optional*, defaults to 1e-05): <fill_docstring>
+        clip_qkv (`float`, *optional*):
+            If not `None`, elements of query, key and value attention states are clipped so that their
+            absolute value does not exceed this value.
 
     ```python
     >>> from transformers import Olmo1124Model, Olmo1124Config
@@ -88,7 +90,7 @@ class Olmo1124Config(PretrainedConfig):
             The epsilon used by the rms normalization layers.
     """
 
-    model_type = "olmo_1124"
+    model_type = "olmo-1124"
     keys_to_ignore_at_inference = ["past_key_values"]
 
     def __init__(

diff --git a/src/transformers/models/olmo_1124/modular_olmo_1124.py b/src/transformers/models/olmo_1124/modular_olmo_1124.py
@@ -33,6 +33,8 @@ class Olmo1124Config(OlmoConfig):
         The epsilon used by the rms normalization layers.
     """
 
+    model_type = "olmo-1124"
+
     def __init__(
         self,
         vocab_size=50304,