Skip to content

Commit

Permalink
models(gallery): add mistral-0.3 and command-r, update functions (#2388)
Browse files Browse the repository at this point in the history
* models(gallery): add mistral-0.3 and command-r, update functions

Add also disable_parallel_new_lines to disable newlines in the JSON
output when forcing parallel tools. Some models (like mistral) might be
very sensible to that when being used for function calling.

Signed-off-by: Ettore Di Giacinto <[email protected]>

* models(gallery): add aya-23-8b

Signed-off-by: Ettore Di Giacinto <[email protected]>

---------

Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler authored May 23, 2024
1 parent eb11a46 commit ea330d4
Show file tree
Hide file tree
Showing 12 changed files with 266 additions and 9 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ get-sources
prepare-sources
/backend/cpp/llama/grpc-server
/backend/cpp/llama/llama.cpp
/backend/cpp/llama-*

*.log

go-ggml-transformers
go-gpt2
Expand Down Expand Up @@ -49,4 +52,4 @@ prepare
.scannerwork

# backend virtual environments
**/venv
**/venv
1 change: 1 addition & 0 deletions aio/cpu/text-to-text.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: gpt-4
mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
context_size: 8192

stopwords:
- "<|im_end|>"
Expand Down
1 change: 1 addition & 0 deletions aio/gpu-8g/text-to-text.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: gpt-4
mmap: true
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
context_size: 8192

stopwords:
- "<|im_end|>"
Expand Down
2 changes: 2 additions & 0 deletions aio/intel/text-to-text.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
name: gpt-4
mmap: false
context_size: 8192

f16: false
parameters:
model: huggingface://NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
Expand Down
69 changes: 69 additions & 0 deletions gallery/command-r.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
---
name: "command-r"

config_file: |
context_size: 131072
stopwords:
- "<|END_OF_TURN_TOKEN|>"
function:
# disable injecting the "answer" tool
disable_no_action: true
grammar:
# This allows the grammar to also return messages
mixed_mode: true
# Not all models have a sketchpad or something to write thoughts on.
# This one will OR reply to strings OR JSON, but not both in the same reply
#no_mixed_free_string: true
# Disable grammar
# Base instructor model doesn't work well with grammars
#disable: true
disable_parallel_new_lines: true
return_name_in_function_response: true
replace_function_results:
# Replace everything that is not JSON array or object
- key: '(?s)^[^{\[]*'
value: ""
- key: '(?s)[^}\]]*$'
value: ""
# Convert single quotes to double quotes
- key: "'([^']*?)'"
value: "_DQUOTE_${1}_DQUOTE_"
- key: '\\"'
value: "__TEMP_QUOTE__"
- key: "\'"
value: "'"
- key: "_DQUOTE_"
value: '"'
- key: "__TEMP_QUOTE__"
value: '"'
template:
join_chat_messages_by_character: "" ## No newlines between messages
chat: |-
{{.Input -}}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
chat_message: |-
{{if eq .RoleName "user" -}}
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
{{- else if eq .RoleName "system" -}}
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
{{- else if eq .RoleName "assistant" -}}
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
{{- else if eq .RoleName "tool" -}}
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
{{- else if .FunctionCall -}}
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{toJson .FunctionCall}}}<|END_OF_TURN_TOKEN|>
{{- end -}}
completion: |
{{.Input}}
function: |-
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
You are a function calling AI model, you can call the following functions:
## Available Tools
{{range .Functions}}
- {"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}
{{end}}
When using a tool, reply with JSON, for instance {"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Input -}}
1 change: 1 addition & 0 deletions gallery/hermes-2-pro-mistral.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ name: "hermes-2-pro-mistral"

config_file: |
mmap: true
context_size: 8192
stopwords:
- "<|im_end|>"
- "<dummy32000>"
Expand Down
71 changes: 71 additions & 0 deletions gallery/index.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,35 @@
---
## START Mistral
- &mistral03
url: "github:mudler/LocalAI/gallery/mistral-0.3.yaml@master"
name: "mistral-7b-instruct-v0.3"
icon: https://cdn-avatars.huggingface.co/v1/production/uploads/62dac1c7a8ead43d20e3e17a/wrLf5yaGC6ng4XME70w6Z.png
license: apache-2.0
description: |
The Mistral-7B-Instruct-v0.3 Large Language Model (LLM) is an instruct fine-tuned version of the Mistral-7B-v0.3.
Mistral-7B-v0.3 has the following changes compared to Mistral-7B-v0.2
Extended vocabulary to 32768
Supports v3 Tokenizer
Supports function calling
urls:
- https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
- https://huggingface.co/MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF
tags:
- llm
- gguf
- gpu
- mistral
- cpu
- function-calling
overrides:
parameters:
model: Mistral-7B-Instruct-v0.3.Q4_K_M.gguf
files:
- filename: "Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"
sha256: "14850c84ff9f06e9b51d505d64815d5cc0cea0257380353ac0b3d21b21f6e024"
uri: "huggingface://MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf"
### START mudler's LocalAI specific-models
- &mudler
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
Expand Down Expand Up @@ -1134,6 +1165,46 @@
- filename: Llama-3-Hercules-5.0-8B-Q4_K_M.gguf
sha256: 83647caf4a23a91697585cff391e7d1236fac867392f9e49a6dab59f81b5f810
uri: huggingface://bartowski/Llama-3-Hercules-5.0-8B-GGUF/Llama-3-Hercules-5.0-8B-Q4_K_M.gguf
### START Command-r
- &command-R
url: "github:mudler/LocalAI/gallery/command-r.yaml@master"
name: "command-r-v01:q1_s"
license: "cc-by-nc-4.0"
icon: https://cdn.sanity.io/images/rjtqmwfu/production/ae020d94b599cc453cc09ebc80be06d35d953c23-102x18.svg
urls:
- https://huggingface.co/CohereForAI/c4ai-command-r-v01
- https://huggingface.co/dranger003/c4ai-command-r-v01-iMat.GGUF
description: |
C4AI Command-R is a research release of a 35 billion parameter highly performant generative model. Command-R is a large language model with open weights optimized for a variety of use cases including reasoning, summarization, and question answering. Command-R has the capability for multilingual generation evaluated in 10 languages and highly performant RAG capabilities.
tags:
- llm
- gguf
- gpu
- command-r
- cpu
overrides:
parameters:
model: ggml-c4ai-command-r-v01-iq1_s.gguf
files:
- filename: "ggml-c4ai-command-r-v01-iq1_s.gguf"
sha256: "aad4594ee45402fe344d8825937d63b9fa1f00becc6d1cc912b016dbb020e0f0"
uri: "huggingface://dranger003/c4ai-command-r-v01-iMat.GGUF/ggml-c4ai-command-r-v01-iq1_s.gguf"
- !!merge <<: *command-R
name: "aya-23-8b"
urls:
- https://huggingface.co/CohereForAI/aya-23-8B
- https://huggingface.co/bartowski/aya-23-8B-GGUF
description: |
Aya 23 is an open weights research release of an instruction fine-tuned model with highly advanced multilingual capabilities. Aya 23 focuses on pairing a highly performant pre-trained Command family of models with the recently released Aya Collection. The result is a powerful multilingual large language model serving 23 languages.
This model card corresponds to the 8-billion version of the Aya 23 model. We also released a 35-billion version which you can find here.
overrides:
parameters:
model: aya-23-8B-Q4_K_M.gguf
files:
- filename: "aya-23-8B-Q4_K_M.gguf"
sha256: "21b3aa3abf067f78f6fe08deb80660cc4ee8ad7b4ab873a98d87761f9f858b0f"
uri: "huggingface://bartowski/aya-23-8B-GGUF/aya-23-8B-Q4_K_M.gguf"
- &phi-2-chat
### START Phi-2
url: "github:mudler/LocalAI/gallery/phi-2-chat.yaml@master"
Expand Down
67 changes: 67 additions & 0 deletions gallery/mistral-0.3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
---
name: "mistral-0.3"

config_file: |
context_size: 8192
mmap: true
stopwords:
- "<|im_end|>"
- "<dummy32000>"
- "</tool_call>"
- "<|eot_id|>"
- "<|end_of_text|>"
- "</s>"
- "[/TOOL_CALLS]"
- "[/ACTIONS]"
function:
# disable injecting the "answer" tool
disable_no_action: true
grammar:
# This allows the grammar to also return messages
#mixed_mode: true
# Not all models have a sketchpad or something to write thoughts on.
# This one will OR reply to strings OR JSON, but not both in the same reply
#no_mixed_free_string: true
# Disable grammar
# Base instructor model doesn't work well with grammars
disable: true
parallel_calls: true
disable_parallel_new_lines: true
return_name_in_function_response: true
# Without grammar uncomment the lines below
# Warning: this is relying only on the capability of the
# LLM model to generate the correct function call.
json_regex_match:
- "(?s)\\[TOOL\\_CALLS\\](.*)"
replace_function_results:
# Replace everything that is not JSON array or object
- key: '(?s)^[^{\[]*'
value: ""
- key: '(?s)[^}\]]*$'
value: ""
- key: "(?s)\\[TOOL\\_CALLS\\]"
value: ""
- key: "(?s)\\[\\/TOOL\\_CALLS\\]"
value: ""
template:
join_chat_messages_by_character: "" ## No newlines between messages
chat: |
{{.Input -}}
chat_message: |-
{{if eq .RoleName "user" -}}
[INST] {{.Content }} [/INST]
{{- else if .FunctionCall -}}
[TOOL_CALLS] {{toJson .FunctionCall}} [/TOOL_CALLS]
{{- else if eq .RoleName "tool" -}}
[TOOL_RESULTS] {{.Content}} [/TOOL_RESULTS]
{{- else -}}
{{ .Content -}}
{{ end -}}
completion: |
{{.Input}}
function: |-
[AVAILABLE_TOOLS] [{{range .Functions}}{"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}{{end}} ] [/AVAILABLE_TOOLS]{{.Input }}
18 changes: 16 additions & 2 deletions pkg/functions/grammar_json_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ var (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\"" space`,
// TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here,
// however, if we don't have it, the grammar will be ambiguous and
// empirically results are way worse.
"freestring": `(
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
Expand Down Expand Up @@ -111,19 +114,26 @@ func (sc *JSONSchemaConverter) addRule(name, rule string) string {
return key
}

const array = `arr ::=
const arrayNewLines = `arr ::=
"[\n" (
realvalue
(",\n" realvalue)*
)? "]"`

const array = `arr ::=
"[" (
realvalue
("," realvalue)*
)? "]"`

func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string {

grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...)

suffix := grammarOpts.Suffix
maybeArray := grammarOpts.MaybeArray
disableParallelNewLines := grammarOpts.DisableParallelNewLines
maybeString := grammarOpts.MaybeString
noMixedFreeString := grammarOpts.NoMixedFreeString

Expand Down Expand Up @@ -177,7 +187,11 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
}

lines = append(lines, fmt.Sprintf("%s ::= %s", "root", newRoot))
lines = append(lines, array)
if disableParallelNewLines {
lines = append(lines, array)
} else {
lines = append(lines, arrayNewLines)
}

if maybeArray {
lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
Expand Down
17 changes: 17 additions & 0 deletions pkg/functions/grammar_json_schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,5 +427,22 @@ var _ = Describe("JSON schema grammar tests", func() {
}
Expect(len(results)).To(Equal(len(strings.Split(grammar, "\n"))), grammar)
})

It("generates parallel tools without newlines in JSON", func() {
structuredGrammar := JSONFunctionStructureName{
OneOf: testFunctionsName}
content := `arr ::=
"[" (
realvalue
("," realvalue)*
)? "]"`
grammar := structuredGrammar.Grammar(functions.EnableMaybeString, functions.EnableMaybeArray, functions.DisableParallelNewLines)
results := strings.Split(content, "\n")
for _, r := range results {
if r != "" {
Expect(grammar).To(ContainSubstring(r))
}
}
})
})
})
15 changes: 10 additions & 5 deletions pkg/functions/options.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package functions

type GrammarOption struct {
PropOrder string
Suffix string
MaybeArray bool
MaybeString bool
NoMixedFreeString bool
PropOrder string
Suffix string
MaybeArray bool
DisableParallelNewLines bool
MaybeString bool
NoMixedFreeString bool
}

func (o *GrammarOption) Apply(options ...func(*GrammarOption)) {
Expand All @@ -18,6 +19,10 @@ var EnableMaybeArray = func(o *GrammarOption) {
o.MaybeArray = true
}

var DisableParallelNewLines = func(o *GrammarOption) {
o.DisableParallelNewLines = true
}

var EnableMaybeString = func(o *GrammarOption) {
o.MaybeString = true
}
Expand Down
Loading

0 comments on commit ea330d4

Please sign in to comment.