diff --git a/llmfoundry/optim/lion8b.py b/llmfoundry/optim/lion8b.py index 9d1d1dda71..f76d29b1c7 100644 --- a/llmfoundry/optim/lion8b.py +++ b/llmfoundry/optim/lion8b.py @@ -235,9 +235,9 @@ def __init__(self, data: Optional[torch.Tensor], try_quantize: bool = True): self._f_encode = None self._f_decode = None if self._try_quantize: - from turbo import dequantize8b, quantize8b - self._f_encode = quantize8b - self._f_decode = dequantize8b + from turbo import dequantize_signed, quantize_signed + self._f_encode = quantize_signed + self._f_decode = dequantize_signed if data is not None: self.set_data(data) @@ -277,7 +277,7 @@ def set_data(self, data: torch.Tensor) -> None: f'on device {data.device} with shape {data.shape}.') self.data = None assert self._f_encode is not None # pyright - self.quantized, self.scales = self._f_encode(data) + self.quantized, self.scales, _ = self._f_encode(data) else: self.data = data.to(dtype=torch.float32) self.quantized = None diff --git a/setup.py b/setup.py index c030fe3268..dd5b184357 100644 --- a/setup.py +++ b/setup.py @@ -93,13 +93,13 @@ extra_deps['gpu'] = [ 'flash-attn==1.0.9', - 'mosaicml-turbo==0.0.4', + 'mosaicml-turbo==0.0.7', # PyPI does not support direct dependencies, so we remove this line before uploading from PyPI 'xentropy-cuda-lib@git+https://github.com/HazyResearch/flash-attention.git@v1.0.9#subdirectory=csrc/xentropy', ] extra_deps['gpu-flash2'] = [ 'flash-attn==2.3.6', - 'mosaicml-turbo==0.0.4', + 'mosaicml-turbo==0.0.7', ] extra_deps['peft'] = [