diff --git a/README.md b/README.md index 8812f24e69..1f472cba3d 100644 --- a/README.md +++ b/README.md @@ -828,15 +828,8 @@ flash_attn_fuse_mlp: # Whether to fuse part of the MLP into a single operation # Whether to use scaled-dot-product attention # https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html sdp_attention: -# Landmark attention (only llama) -landmark_attention: # Shifted-sparse attention (only llama) s2_attention: - -# xpos RoPE see https://github.com/kaiokendev/cutoff-len-is-context-len/blob/main/util/xpos_rope_llama_monkey_patch.py -# LLaMA only -xpos_rope: - # Resume from a specific checkpoint dir resume_from_checkpoint: # If resume_from_checkpoint isn't set and you simply want it to start where it left off. diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 1ca8a31603..b305fc3d8c 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -299,30 +299,11 @@ def load_model( LOG.info("patching with sdp attention") hijack_llama_sdp_attention() - elif cfg.landmark_attention: - from axolotl.monkeypatch.llama_landmark_attn import ( - MEM_TOKEN, - patch_llama_with_landmark_attn, - ) - - LOG.info("patching with landmark attention") - patch_llama_with_landmark_attn() - - # Note: This might overwrite previous additional_special_tokens - tokenizer.add_special_tokens({"additional_special_tokens": [MEM_TOKEN]}) elif cfg.s2_attention: raise NotImplementedError( "Shifted-sparse attention not currently implemented without flash attention." ) - if cfg.xpos_rope: - from axolotl.monkeypatch.xpos_rope_llama_monkey_patch import ( - replace_llama_rope_with_xpos_rope, - ) - - LOG.info("patching with xpos rope") - replace_llama_rope_with_xpos_rope() - LOG.info("patching with sdp attention") hijack_llama_sdp_attention()