From bb3f563325376c69baf7a31f8518d3e0fcd90589 Mon Sep 17 00:00:00 2001 From: Tony Salomone Date: Thu, 12 Dec 2024 13:48:32 -0500 Subject: [PATCH] Add 4-bit MLX of Llama 3.3 model. --- transformerlab/galleries/model-gallery.json | 27 +++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/transformerlab/galleries/model-gallery.json b/transformerlab/galleries/model-gallery.json index ba40ba4..c60bc92 100644 --- a/transformerlab/galleries/model-gallery.json +++ b/transformerlab/galleries/model-gallery.json @@ -1264,6 +1264,33 @@ "paperUrl": "?" } }, + { + "uniqueID": "mlx-community/Llama-3.3-70B-Instruct-4bit", + "name": "Llama-3.3-70B-Instruct-4bit", + "description": "MLX export of Llama 3.3 70B Instruct model quantized to 4 bit. Loses some quality but useful for systems with lower RAM.", + "parameters": "70B", + "context": "131072", + "architecture": "LlamaForCausalLM", + "formats": [ + "Safetensors" + ], + "huggingface_repo": "mlx-community/Llama-3.3-70B-Instruct-4bit", + "transformers_version": "4.47.0.dev0", + "gated": false, + "license": "llama3.3", + "logo": "https://upload.wikimedia.org/wikipedia/commons/a/ab/Meta-Logo.png", + "size_of_model_in_mb": 37866.6, + "author": { + "name": "mlx-community", + "url": "https://huggingface.co/mlx-community/Llama-3.3-70B-Instruct-4bit", + "blurb": "" + }, + "resources": { + "canonicalUrl": "https://huggingface.co/mlx-community/Llama-3.3-70B-Instruct-4bit", + "downloadUrl": "https://huggingface.co/mlx-community/Llama-3.3-70B-Instruct-4bit", + "paperUrl": "?" + } + }, { "uniqueID": "Qwen/Qwen2-7B-Instruct", "name": "Qwen2 7B Instruct",