From 2d73f4d350ad7490c7bb5d0b5f64e25a69591819 Mon Sep 17 00:00:00 2001 From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com> Date: Sat, 3 Feb 2024 14:45:03 -0800 Subject: [PATCH] run pre-commit hooks --- .pre-commit-config.yaml | 2 +- bitsandbytes/nn/modules.py | 8 ++++---- docs/source/quantization.mdx | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 039139b95..feb6c766e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.15 + rev: v0.2.0 hooks: - id: ruff args: diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py index 4cd2da153..3b729b8da 100644 --- a/bitsandbytes/nn/modules.py +++ b/bitsandbytes/nn/modules.py @@ -226,11 +226,11 @@ def to(self, *args, **kwargs): class Linear4bit(nn.Linear): """ - This class is the base module for the 4-bit quantization algorithm presented in [QLoRA](https://arxiv.org/abs/2305.14314). + This class is the base module for the 4-bit quantization algorithm presented in [QLoRA](https://arxiv.org/abs/2305.14314). QLoRA 4-bit linear layers uses blockwise k-bit quantization under the hood, with the possibility of selecting various compute datatypes such as FP4 and NF4. - In order to quantize a linear layer one should first load the original fp16 / bf16 weights into + In order to quantize a linear layer one should first load the original fp16 / bf16 weights into the Linear8bitLt module, then call `quantized_module.to("cuda")` to quantize the fp16 / bf16 weights. Example: @@ -442,10 +442,10 @@ def maybe_rearrange_weight(state_dict, prefix, local_metadata, strict, missing_k class Linear8bitLt(nn.Linear): """ - This class is the base module for the [LLM.int8()](https://arxiv.org/abs/2208.07339) algorithm. + This class is the base module for the [LLM.int8()](https://arxiv.org/abs/2208.07339) algorithm. To read more about it, have a look at the paper. - In order to quantize a linear layer one should first load the original fp16 / bf16 weights into + In order to quantize a linear layer one should first load the original fp16 / bf16 weights into the Linear8bitLt module, then call `int8_module.to("cuda")` to quantize the fp16 weights. Example: diff --git a/docs/source/quantization.mdx b/docs/source/quantization.mdx index 8fbff809f..e106c4401 100644 --- a/docs/source/quantization.mdx +++ b/docs/source/quantization.mdx @@ -14,4 +14,4 @@ Below you will find the docstring of the quantization primitives exposed in bits ## StableEmbedding -[[autodoc]] bitsandbytes.nn.StableEmbedding \ No newline at end of file +[[autodoc]] bitsandbytes.nn.StableEmbedding