Add 4-bit MLX of Llama 3.3 model.

transformerlab · Dec 12, 2024 · bb3f563 · bb3f563
1 parent 9f64e3e
commit bb3f563
Showing 1 changed file with 27 additions and 0 deletions.
diff --git a/transformerlab/galleries/model-gallery.json b/transformerlab/galleries/model-gallery.json
@@ -1264,6 +1264,33 @@
             "paperUrl": "?"
         }
     },
+    {
+        "uniqueID": "mlx-community/Llama-3.3-70B-Instruct-4bit",
+        "name": "Llama-3.3-70B-Instruct-4bit",
+        "description": "MLX export of Llama 3.3 70B Instruct model quantized to 4 bit. Loses some quality but useful for systems with lower RAM.",
+        "parameters": "70B",
+        "context": "131072",
+        "architecture": "LlamaForCausalLM",
+        "formats": [
+            "Safetensors"
+        ],
+        "huggingface_repo": "mlx-community/Llama-3.3-70B-Instruct-4bit",
+        "transformers_version": "4.47.0.dev0",
+        "gated": false,
+        "license": "llama3.3",
+        "logo": "https://upload.wikimedia.org/wikipedia/commons/a/ab/Meta-Logo.png",
+        "size_of_model_in_mb": 37866.6,
+        "author": {
+            "name": "mlx-community",
+            "url": "https://huggingface.co/mlx-community/Llama-3.3-70B-Instruct-4bit",
+            "blurb": ""
+        },
+        "resources": {
+            "canonicalUrl": "https://huggingface.co/mlx-community/Llama-3.3-70B-Instruct-4bit",
+            "downloadUrl": "https://huggingface.co/mlx-community/Llama-3.3-70B-Instruct-4bit",
+            "paperUrl": "?"
+        }
+    },
     {
         "uniqueID": "Qwen/Qwen2-7B-Instruct",
         "name": "Qwen2 7B Instruct",