Merge branch 'main' into shashank/alibi_flash_attn

mosaicml · Jan 1, 2024 · ae67c0c · ae67c0c
2 parents c1dcdce + 2b1fa79
commit ae67c0c
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 6 deletions.
diff --git a/llmfoundry/optim/lion8b.py b/llmfoundry/optim/lion8b.py
@@ -235,9 +235,9 @@ def __init__(self, data: Optional[torch.Tensor], try_quantize: bool = True):
         self._f_encode = None
         self._f_decode = None
         if self._try_quantize:
-            from turbo import dequantize8b, quantize8b
-            self._f_encode = quantize8b
-            self._f_decode = dequantize8b
+            from turbo import dequantize_signed, quantize_signed
+            self._f_encode = quantize_signed
+            self._f_decode = dequantize_signed
 
         if data is not None:
             self.set_data(data)
@@ -277,7 +277,7 @@ def set_data(self, data: torch.Tensor) -> None:
                     f'on device {data.device} with shape {data.shape}.')
             self.data = None
             assert self._f_encode is not None  # pyright
-            self.quantized, self.scales = self._f_encode(data)
+            self.quantized, self.scales, _ = self._f_encode(data)
         else:
             self.data = data.to(dtype=torch.float32)
             self.quantized = None

diff --git a/setup.py b/setup.py
@@ -93,13 +93,13 @@
 
 extra_deps['gpu'] = [
     'flash-attn==1.0.9',
-    'mosaicml-turbo==0.0.4',
+    'mosaicml-turbo==0.0.7',
     # PyPI does not support direct dependencies, so we remove this line before uploading from PyPI
     'xentropy-cuda-lib@git+https://github.com/HazyResearch/[email protected]#subdirectory=csrc/xentropy',
 ]
 extra_deps['gpu-flash2'] = [
     'flash-attn==2.4.2',
-    'mosaicml-turbo==0.0.4',
+    'mosaicml-turbo==0.0.7',
 ]
 
 extra_deps['peft'] = [