refactor: adjust cli

Signed-off-by: thxCode <[email protected]>
gpustack · Aug 9, 2024 · bb6f6f5 · bb6f6f5
1 parent 4938734
commit bb6f6f5
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/cmd/gguf-parser/README.md b/cmd/gguf-parser/README.md
@@ -23,7 +23,7 @@ GLOBAL OPTIONS:
    --cache-type-k value, --ctk value                    Specify the type of Key cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16")
    --cache-type-v value, --ctv value                    Specify the type of Value cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16")
    --ctx-size value, -c value                           Specify the size of prompt context, which is used to estimate the usage, default is equal to the model's maximum context size. (default: -1)
-   --flash-attention, --fa                              Specify enabling Flash Attention, which is used to estimate the usage. Flash Attention can reduce the usage of RAM/VRAM. (default: false)
+   --flash-attention, --flash-attn, --fa                Specify enabling Flash Attention, which is used to estimate the usage. Flash Attention can reduce the usage of RAM/VRAM. (default: false)
    --gpu-layers value, --ngl value                      Specify how many layers of the main model to offload, which is used to estimate the usage, default is full offloaded. (default: -1)
    --gpu-layers-draft value, --ngld value               Specify how many layers of the draft model to offload, which is used to estimate the usage, default is full offloaded. (default: -1)
    --gpu-layers-step value                              Specify the step of layers to offload, works with --gpu-layers. (default: 0)

diff --git a/cmd/gguf-parser/main.go b/cmd/gguf-parser/main.go
@@ -369,7 +369,7 @@ func main() {
 				Value:       flashAttention,
 				Category:    "Estimate",
 				Name:        "flash-attention",
-				Aliases:     []string{"fa"},
+				Aliases:     []string{"flash-attn", "fa"},
 				Usage: "Specify enabling Flash Attention, " +
 					"which is used to estimate the usage. " +
 					"Flash Attention can reduce the usage of RAM/VRAM.",