diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index feb6c766e..edcbc9b6b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,3 +17,7 @@ repos: - id: mixed-line-ending args: - --fix=lf + - repo: https://github.com/crate-ci/typos + rev: v1.17.2 + hooks: + - id: typos diff --git a/_typos.toml b/_typos.toml new file mode 100644 index 000000000..a04206b8d --- /dev/null +++ b/_typos.toml @@ -0,0 +1,11 @@ +[files] + +[default.extend-identifiers] + +[type.py.extend-words] +"BA" = "BA" # used as a commented-out variable in tests + +[type.cuda.extend-words] +"subtile" = "subtile" +"subtiles" = "subtiles" +"transation" = "transation" # TODO: is this transition, transaction, translation..? diff --git a/benchmarking/switchback/make_plot_with_jsonl.py b/benchmarking/switchback/make_plot_with_jsonl.py index 177270346..b23f63562 100644 --- a/benchmarking/switchback/make_plot_with_jsonl.py +++ b/benchmarking/switchback/make_plot_with_jsonl.py @@ -36,8 +36,8 @@ ('x_quantize_rowwise', 'P', '--', 'C4', 'Quantize rowwise X (switchback)'), ('g_quantize_rowwise', 'P', '-.', 'C4', 'Quantize rowwise G (switchback)'), - ('w_quantize_global', '.', '--', 'C4', 'Quatnize global W (switchback)'), - ('w_quantize_global_transpose', '.', '-.', 'C4', 'Quantize gloabl and\ntranspose W (switchback)'), + ('w_quantize_global', '.', '--', 'C4', 'Quantize global W (switchback)'), + ('w_quantize_global_transpose', '.', '-.', 'C4', 'Quantize global and\ntranspose W (switchback)'), ]: xs = [] ys = [] diff --git a/bitsandbytes/cuda_setup/main.py b/bitsandbytes/cuda_setup/main.py index a8792c1ad..4245a2842 100644 --- a/bitsandbytes/cuda_setup/main.py +++ b/bitsandbytes/cuda_setup/main.py @@ -4,7 +4,7 @@ [ ] TODO: Q - What if we have multiple GPUs of different makes? - CUDA version - Software: - - CPU-only: only CPU quantization functions (no optimizer, no matrix multipl) + - CPU-only: only CPU quantization functions (no optimizer, no matrix multiply) - CuBLAS-LT: full-build 8-bit optimizer - no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`) @@ -263,7 +263,7 @@ def warn_in_case_of_duplicates(results_paths: Set[Path]) -> None: warning_msg = ( f"Found duplicate {CUDA_RUNTIME_LIBS} files: {results_paths}.. " "We select the PyTorch default libcudart.so, which is {torch.version.cuda}," - "but this might missmatch with the CUDA version that is needed for bitsandbytes." + "but this might mismatch with the CUDA version that is needed for bitsandbytes." "To override this behavior set the BNB_CUDA_VERSION= environmental variable" "For example, if you want to use the CUDA version 122" "BNB_CUDA_VERSION=122 python ..." diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py index 11db74859..9fc5e08f0 100644 --- a/bitsandbytes/functional.py +++ b/bitsandbytes/functional.py @@ -120,7 +120,7 @@ def get_instance(cls): return cls._instance def prefetch_all(self, to_cpu=False): - # assume the first added, will be hte + # assume the first added, will be the # ones that are used first, so swap them in last # in the case they are evicted again for t in self.paged_tensors[::-1]: @@ -219,7 +219,7 @@ def elementwise_func(func_name, A, B, value, prefetch=True): # paged function are fully asynchronous # if we return from this function, we want to the tensor # to be in the correct state, that is the final state after the - # operation occured. So we synchronize. + # operation occurred. So we synchronize. torch.cuda.synchronize() def fill(A, value, device=None, prefetch=True): elementwise_func('fill', A, None, value) @@ -589,7 +589,7 @@ def estimate_quantiles(A: Tensor, out: Optional[torch.Tensor] = None, offset: fl class QuantState: - """container for quantization state components to work with Params4bit and similar clases""" + """container for quantization state components to work with Params4bit and similar classes""" valid_quant_types = ('fp4', 'nf4') valid_qs_type_keys = [f"bitsandbytes__{x}" for x in valid_quant_types] valid_qs_keys = ['absmax', 'quant_map', 'nested_absmax', 'nested_quant_map', 'quant_state', 'quant_type', diff --git a/csrc/kernels.cu b/csrc/kernels.cu index f117547ed..df8488389 100644 --- a/csrc/kernels.cu +++ b/csrc/kernels.cu @@ -134,10 +134,10 @@ __device__ unsigned char dQuantizeFP4(float x) // we do a binary search // the pivots are divided by 12 (the FP4 absmax) - // since we assum input data is in [-1.0, 1.0] + // since we assume input data is in [-1.0, 1.0] // !be careful here, its easy to make a mistake - // that is difficult to noice if you add an extra + // that is difficult to notice if you add an extra // zero somewhere! int sign = x < 0 ? 0b1000 : 0b0000; @@ -2259,8 +2259,8 @@ template__global__ void kd // data is in 32 column-tile major with tile width 32 columns and numRows rows // L1. Load sub-tile row/col statistics. Each thread only holds 1 col, load rows into shared memory. - // L2. Load data in warp-striped arangement (t0 holds colidx [0, 0, 0, 0], rowidx [0, 1, 2, 3]) + // L2. Load data in warp-striped arrangement (t0 holds colidx [0, 0, 0, 0], rowidx [0, 1, 2, 3]) // C1. Compute val(row_stat*col_stat)/(127*127) (load 1/(127*127 into register)) // C2. Compute normalization values and store col values in register // S1. Store C1 into 16-bit output @@ -2383,7 +2383,7 @@ template __global__ void kd if(valid_items <= 0) // the sub-tile might have more elements than the tile itself break; - // L2. Load data in warp-striped arangement (t0 holds colidx [0, 0, 0, 0], rowidx [0, 1, 2, 3]) + // L2. Load data in warp-striped arrangement (t0 holds colidx [0, 0, 0, 0], rowidx [0, 1, 2, 3]) LoadInt32(loadint32).Load(&(A[subtile_idx]), local_values, valid_items, 0); ExchangeInt32(exchangeint32).BlockedToWarpStriped(local_values, local_values); @@ -2650,7 +2650,7 @@ template __global__ void kExtractOutliers(char *A, int *idx, char * //// 4. do dequantization from register of B into second pair of registers //// 5. store (4) into fragment //// 6. matmul aggregate into fragment C -//// 7. aggreecate files of C into shared memroy block C +//// 7. aggreecate files of C into shared memory block C //// 8. sum (7) //// 9. write outputs to matmul output matrix //} diff --git a/deploy.sh b/deploy.sh index c261ee9a9..e60373627 100644 --- a/deploy.sh +++ b/deploy.sh @@ -5,7 +5,7 @@ echo "MAKE SURE LD_LIBRARY_PATH IS EMPTY!" echo $LD_LIBRARY_PATH if [[ ! -z "${LD_LIBRARY_PATH}" ]]; then - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -24,7 +24,7 @@ make cpuonly CUDA_VERSION="CPU" if [ ! -f "./bitsandbytes/libbitsandbytes_cpu.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -34,7 +34,7 @@ make cuda110 CUDA_VERSION=110 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -44,7 +44,7 @@ make cuda11x CUDA_VERSION=111 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -54,7 +54,7 @@ make cuda11x CUDA_VERSION=114 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -64,7 +64,7 @@ make cuda11x CUDA_VERSION=115 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -74,7 +74,7 @@ make cuda11x CUDA_VERSION=117 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -84,7 +84,7 @@ make cuda118 CUDA_VERSION=118 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda118.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -94,7 +94,7 @@ make cuda12x CUDA_VERSION=120 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda120.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -104,7 +104,7 @@ make cuda12x CUDA_VERSION=121 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -114,7 +114,7 @@ make cuda12x CUDA_VERSION=122 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -124,7 +124,7 @@ make cuda12x CUDA_VERSION=123 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda123.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -138,7 +138,7 @@ make cuda110_nomatmul CUDA_VERSION=110 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110_nocublaslt.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -149,7 +149,7 @@ make cuda11x_nomatmul CUDA_VERSION=111 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111_nocublaslt.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -159,7 +159,7 @@ make cuda11x_nomatmul CUDA_VERSION=114 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114_nocublaslt.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -169,7 +169,7 @@ make cuda11x_nomatmul CUDA_VERSION=115 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115_nocublaslt.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -179,7 +179,7 @@ make cuda11x_nomatmul CUDA_VERSION=117 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117_nocublaslt.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -189,7 +189,7 @@ make cuda118_nomatmul CUDA_VERSION=118 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda118_nocublaslt.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -199,7 +199,7 @@ make cuda12x_nomatmul CUDA_VERSION=120 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda120_nocublaslt.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -209,7 +209,7 @@ make cuda12x_nomatmul CUDA_VERSION=121 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -219,7 +219,7 @@ make cuda12x_nomatmul CUDA_VERSION=122 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122_nocublaslt.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi @@ -229,7 +229,7 @@ make cuda12x_nomatmul CUDA_VERSION=123 if [ ! -f "./bitsandbytes/libbitsandbytes_cuda123_nocublaslt.so" ]; then # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessul!" 1>&2 + echo "Compilation unsuccessful!" 1>&2 exit 64 fi diff --git a/docs/source/contributing.mdx b/docs/source/contributing.mdx index b28e91936..b482364de 100644 --- a/docs/source/contributing.mdx +++ b/docs/source/contributing.mdx @@ -1,5 +1,5 @@ # Contributors guidelines -... stil under construction ... (feel free to propose materials, `bitsandbytes` is a community project) +... still under construction ... (feel free to propose materials, `bitsandbytes` is a community project) ## Setup pre-commit hooks - Install pre-commit hooks with `pip install pre-commit`. diff --git a/docs/source/integrations.mdx b/docs/source/integrations.mdx index 7857abf4c..7d47ede62 100644 --- a/docs/source/integrations.mdx +++ b/docs/source/integrations.mdx @@ -29,7 +29,7 @@ Please review the [bitsandbytes section in the Accelerate docs](https://huggingf # Trainer for the optimizers -You can use any of the 8-bit and/or paged optimizers by simple passing them to the `transformers.Trainer` class on intialization.All bnb optimizers are supported by passing the correct string in `TrainingArguments`'s `optim` attribute - e.g. (`paged_adamw_32bit`). +You can use any of the 8-bit and/or paged optimizers by simple passing them to the `transformers.Trainer` class on initialization.All bnb optimizers are supported by passing the correct string in `TrainingArguments`'s `optim` attribute - e.g. (`paged_adamw_32bit`). See the [official API docs for reference](https://huggingface.co/docs/transformers/main_classes/trainer#transformers.Trainer). diff --git a/docs/source/optimizers.mdx b/docs/source/optimizers.mdx index 18d20de1d..f74c89ae6 100644 --- a/docs/source/optimizers.mdx +++ b/docs/source/optimizers.mdx @@ -168,9 +168,9 @@ Possible options for the config override are: `betas, eps, weight_decay, lr, opt For overrides for particular layers, we recommend overriding locally in each module. You can do this by passing the module, the parameter, and its attribute name to the GlobalOptimManager: ```py class MyModule(torch.nn.Module): - def __init__(din, dout): + def __init__(d_in, d_out): super(MyModule, self).__init__() - self.linear = torch.nn.Linear(din, dout) + self.linear = torch.nn.Linear(d_in, d_out) # optimization will happen in 32-bit and # learning rate will be set to 0.0001 independent of the main learning rate config = {'optim_bits': 32, 'lr' : 0.0001} diff --git a/include/Algo-Direct2.h b/include/Algo-Direct2.h index d5fa58d12..4211c77bd 100644 --- a/include/Algo-Direct2.h +++ b/include/Algo-Direct2.h @@ -157,7 +157,7 @@ struct AlgoVecBase::val FVec vxp = _mm256_i32gather_ps(xi, idxp, sizeof(float)); IVec ip = idxm; -#else // do not use gather instrucions +#else // do not use gather instructions union U { __m256i vec; diff --git a/include/Portable.h b/include/Portable.h index 1710b0502..2cec1e7de 100644 --- a/include/Portable.h +++ b/include/Portable.h @@ -147,5 +147,5 @@ inline T prev(T x) return x; } -} // namepsace Details +} // namespace Details } // namespace BinSearch diff --git a/include/SIMD.h b/include/SIMD.h index d559e9f55..a2639d3ac 100644 --- a/include/SIMD.h +++ b/include/SIMD.h @@ -568,5 +568,5 @@ FORCE_INLINE FVec mulSub(const FVec& a, const FVec int8 automatically l1 = module(32, 64).cuda()