diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 039139b95..feb6c766e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.15 + rev: v0.2.0 hooks: - id: ruff args: diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py index 4cd2da153..3b729b8da 100644 --- a/bitsandbytes/nn/modules.py +++ b/bitsandbytes/nn/modules.py @@ -226,11 +226,11 @@ def to(self, *args, **kwargs): class Linear4bit(nn.Linear): """ - This class is the base module for the 4-bit quantization algorithm presented in [QLoRA](https://arxiv.org/abs/2305.14314). + This class is the base module for the 4-bit quantization algorithm presented in [QLoRA](https://arxiv.org/abs/2305.14314). QLoRA 4-bit linear layers uses blockwise k-bit quantization under the hood, with the possibility of selecting various compute datatypes such as FP4 and NF4. - In order to quantize a linear layer one should first load the original fp16 / bf16 weights into + In order to quantize a linear layer one should first load the original fp16 / bf16 weights into the Linear8bitLt module, then call `quantized_module.to("cuda")` to quantize the fp16 / bf16 weights. Example: @@ -442,10 +442,10 @@ def maybe_rearrange_weight(state_dict, prefix, local_metadata, strict, missing_k class Linear8bitLt(nn.Linear): """ - This class is the base module for the [LLM.int8()](https://arxiv.org/abs/2208.07339) algorithm. + This class is the base module for the [LLM.int8()](https://arxiv.org/abs/2208.07339) algorithm. To read more about it, have a look at the paper. - In order to quantize a linear layer one should first load the original fp16 / bf16 weights into + In order to quantize a linear layer one should first load the original fp16 / bf16 weights into the Linear8bitLt module, then call `int8_module.to("cuda")` to quantize the fp16 weights. Example: diff --git a/docs/source/quantization.mdx b/docs/source/quantization.mdx index 8fbff809f..e106c4401 100644 --- a/docs/source/quantization.mdx +++ b/docs/source/quantization.mdx @@ -14,4 +14,4 @@ Below you will find the docstring of the quantization primitives exposed in bits ## StableEmbedding -[[autodoc]] bitsandbytes.nn.StableEmbedding \ No newline at end of file +[[autodoc]] bitsandbytes.nn.StableEmbedding