From efde34b45dff54d9c229b677ddb7108b69d0a735 Mon Sep 17 00:00:00 2001 From: cryptal-mc Date: Thu, 12 Dec 2024 00:46:32 +0000 Subject: [PATCH 1/2] Aligned model update limit to number of active competitions --- constants/__init__.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/constants/__init__.py b/constants/__init__.py index 131db00..072d10b 100644 --- a/constants/__init__.py +++ b/constants/__init__.py @@ -173,6 +173,20 @@ epsilon_func=LinearDecay(0.005, 0.0002, 36000), max_bytes=29 * 1024 * 1024 * 1024, ), + CompetitionId.B14_MODEL_MULTI_DATASET: ModelConstraints( + max_model_parameter_size=13_900_000_000, + min_model_parameter_size=13_700_000_000, + sequence_length=4096, + allowed_architectures=ALLOWED_MODEL_TYPES_2, + tokenizer="Xenova/gpt-4", + kwargs={ + "torch_dtype": torch.bfloat16, + "attn_implementation": "flash_attention_2", + }, + eval_block_delay=EVAL_BLOCK_DELAY, + epsilon_func=LinearDecay(0.005, 0.0002, 36000), + max_bytes=29 * 1024 * 1024 * 1024, + ), } # Schedule of competitions by block. @@ -256,7 +270,7 @@ sample_min = 5 # Max number of uids that can be either pending eval or currently being evaluated. # We allow the sample_min per competition + 10 additional models to be held at any one time. -updated_models_limit = sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID) + 10 +updated_models_limit = sample_min * len(MODEL_CONSTRAINTS_BY_COMPETITION_ID_2) + 10 # time required between updates to the chain. chain_update_cadence = dt.timedelta(minutes=20) # Number of blocks required between retrying evaluation of a model. From 571e4970e350b5de4cc8b6681a9abc81e98b5fe6 Mon Sep 17 00:00:00 2001 From: cryptal-mc Date: Thu, 12 Dec 2024 01:24:46 +0000 Subject: [PATCH 2/2] Added comment to explain unused model constraints entry --- constants/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/constants/__init__.py b/constants/__init__.py index 072d10b..1b386a8 100644 --- a/constants/__init__.py +++ b/constants/__init__.py @@ -173,6 +173,13 @@ epsilon_func=LinearDecay(0.005, 0.0002, 36000), max_bytes=29 * 1024 * 1024 * 1024, ), + # This constraint is not actually used, it is added as a copy + # of the 14B-model competition constraint entry. + # This is just to keep the size of the constraint dict equal + # to the number of competitions so `update_models_limit` is + # set correctly below. + # This hack will be removed once native support for multi datasets + # is implemented in a future release. CompetitionId.B14_MODEL_MULTI_DATASET: ModelConstraints( max_model_parameter_size=13_900_000_000, min_model_parameter_size=13_700_000_000,