refactor: updates chat and generation options (#34)

kevinhermawan · Jul 31, 2024 · 6c96dcb · 6c96dcb
1 parent ab65fe3
commit 6c96dcb
Show file tree

Hide file tree

Showing 5 changed files with 104 additions and 56 deletions.
diff --git a/Sources/OllamaKit/RequestData/Completion/OKCompletionOptions.swift b/Sources/OllamaKit/RequestData/Completion/OKCompletionOptions.swift
@@ -7,53 +7,79 @@
 
 import Foundation
 
-/// A structure that encapsulates options for controlling the behavior of content generation in the Ollama API.
+// A structure that encapsulates options for controlling the behavior of content generation in the Ollama API.
 public struct OKCompletionOptions: Encodable {
-    /// Optional integer to enable Mirostat sampling for controlling perplexity. (0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
+    /// Optional integer to enable Mirostat sampling for controlling perplexity.
+    /// (0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)
+    /// Mirostat sampling helps regulate the unpredictability of the output,
+    /// balancing coherence and diversity. The default value is 0, which disables Mirostat.
     public var mirostat: Int?
 
-    /// Optional float influencing the adjustment speed of the Mirostat algorithm. (Lower = slower adjustment)
-    public var mirostatEta: Float?
+    /// Optional double influencing the adjustment speed of the Mirostat algorithm.
+    /// (Lower values result in slower adjustments, higher values increase responsiveness.)
+    /// This parameter, `mirostatEta`, adjusts how quickly the algorithm reacts to feedback
+    /// from the generated text. A default value of 0.1 provides a moderate adjustment speed.
+    public var mirostatEta: Double?
 
-    /// Optional float controlling the balance between coherence and diversity. (Lower = more focused text)
-    public var mirostatTau: Float?
+    /// Optional double controlling the balance between coherence and diversity.
+    /// (Lower values lead to more focused and coherent text)
+    /// The `mirostatTau` parameter sets the target perplexity level, influencing how
+    /// creative or constrained the text generation should be. Default is 5.0.
+    public var mirostatTau: Double?
 
     /// Optional integer setting the size of the context window for token generation.
+    /// This defines the number of previous tokens the model considers when generating new tokens.
+    /// Larger values allow the model to use more context, with a default of 2048 tokens.
     public var numCtx: Int?
 
-    /// Optional integer for the number of GQA groups in the transformer layer, specific to some models.
-    public var numGqa: Int?
-
-    /// Optional integer indicating the number of layers to send to the GPU(s).
-    public var numGpu: Int?
-
-    /// Optional integer for the number of threads used in computation, recommended to match physical CPU cores.
-    public var numThread: Int?
-
-    /// Optional integer setting how far back the model checks to prevent repetition.
+    /// Optional integer setting how far back the model looks to prevent repetition.
+    /// This parameter, `repeatLastN`, determines the number of tokens the model
+    /// reviews to avoid repeating phrases. A value of 64 is typical, while 0 disables this feature.
     public var repeatLastN: Int?
 
-    /// Optional float setting the penalty strength for repetitions.
-    public var repeatPenalty: Float?
+    /// Optional double setting the penalty strength for repetitions.
+    /// A higher value increases the penalty for repeated tokens, discouraging repetition.
+    /// The default value is 1.1, providing moderate repetition control.
+    public var repeatPenalty: Double?
 
-    /// Optional float to control the model's creativity (higher = more creative).
-    public var temperature: Float?
+    /// Optional double to control the model's creativity.
+    /// (Higher values increase creativity and randomness)
+    /// The `temperature` parameter adjusts the randomness of predictions; higher values
+    /// like 0.8 make outputs more creative and diverse. The default is 0.7.
+    public var temperature: Double?
 
     /// Optional integer for setting a random number seed for generation consistency.
+    /// Specifying a seed ensures the same output for the same prompt and parameters,
+    /// useful for testing or reproducing results. Default is 0, meaning no fixed seed.
     public var seed: Int?
 
     /// Optional string defining stop sequences for the model to cease generation.
+    /// The `stop` parameter specifies sequences that, when encountered, will halt further text generation.
+    /// Multiple stop sequences can be defined. For example, "AI assistant:".
     public var stop: String?
 
-    /// Optional float for tail free sampling, reducing impact of less probable tokens.
-    public var tfsZ: Float?
+    /// Optional double for tail free sampling, reducing impact of less probable tokens.
+    /// `tfsZ` adjusts how much the model avoids unlikely tokens, with higher values
+    /// reducing their influence. A value of 1.0 disables this feature.
+    public var tfsZ: Double?
 
     /// Optional integer for the maximum number of tokens to predict.
+    /// `numPredict` sets the upper limit for the number of tokens to generate.
+    /// A default of 128 tokens is typical, with special values like -1 for infinite generation.
     public var numPredict: Int?
 
     /// Optional integer to limit nonsense generation and control answer diversity.
+    /// The `topK` parameter limits the set of possible tokens to the top-k likely choices.
+    /// Lower values (e.g., 10) reduce diversity, while higher values (e.g., 100) increase it. Default is 40.
     public var topK: Int?
 
-    /// Optional float working with top-k to balance text diversity and focus.
-    public var topP: Float?
+    /// Optional double working with top-k to balance text diversity and focus.
+    /// `topP` (nucleus sampling) retains tokens that cumulatively account for a certain
+    /// probability mass, adding flexibility beyond `topK`. A value like 0.9 increases diversity.
+    public var topP: Double?
+
+    /// Optional double for the minimum probability threshold for token inclusion.
+    /// `minP` ensures that tokens below a certain probability threshold are excluded,
+    /// focusing the model's output on more probable sequences. Default is 0.0, meaning no filtering.
+    public var minP: Double?
 }
diff --git a/Sources/OllamaKit/RequestData/OKChatRequestData.swift b/Sources/OllamaKit/RequestData/OKChatRequestData.swift
@@ -17,7 +17,7 @@ public struct OKChatRequestData: Encodable {
     /// An array of ``Message`` instances representing the content to be sent to the Ollama API.
     public let messages: [Message]
 
-    /// An optional array of `OKJSONValue` representing the tools available for tool calling in the chat.
+    /// An optional array of ``OKJSONValue`` representing the tools available for tool calling in the chat.
     public let tools: [OKJSONValue]?
 
     /// Optional ``OKCompletionOptions`` providing additional configuration for the chat request.

diff --git a/Sources/OllamaKit/Responses/Completion/OKCompletionResponse.swift b/Sources/OllamaKit/Responses/Completion/OKCompletionResponse.swift
@@ -7,15 +7,35 @@
 
 import Foundation
 
+/// A protocol that defines the response structure for a completion request in the Ollama API.
 protocol OKCompletionResponse: Decodable {
+    /// The identifier of the model used for generating the response.
     var model: String { get }
+
+    /// The date and time when the response was created.
     var createdAt: Date { get }
+
+    /// A boolean indicating whether the completion process is done.
     var done: Bool { get }
 
+    /// An optional string providing the reason why the process was completed.
+    var doneReason: String? { get }
+
+    /// An optional integer representing the total time spent generating the response, in nanoseconds.
     var totalDuration: Int? { get }
+
+    /// An optional integer representing the time spent loading the model, in nanoseconds.
     var loadDuration: Int? { get }
+
+    /// An optional integer indicating the number of tokens in the prompt that were evaluated.
     var promptEvalCount: Int? { get }
+
+    /// An optional integer representing the time spent evaluating the prompt, in nanoseconds.
     var promptEvalDuration: Int? { get }
+
+    /// An optional integer indicating the number of tokens in the generated response.
     var evalCount: Int? { get }
+
+    /// An optional integer representing the time spent generating the response, in nanoseconds.
     var evalDuration: Int? { get }
 }
diff --git a/Sources/OllamaKit/Responses/OKChatResponse.swift b/Sources/OllamaKit/Responses/OKChatResponse.swift
@@ -9,72 +9,74 @@ import Foundation
 
 /// A structure that represents the response to a chat request from the Ollama API.
 public struct OKChatResponse: OKCompletionResponse, Decodable {
-    /// A string representing the identifier of the model that processed the request.
+    /// The identifier of the model that processed the request.
     public let model: String
 
-    /// A `Date` indicating when the response was created.
+    /// The date and time when the response was created.
     public let createdAt: Date
 
-    /// An optional `Message` instance representing the content of the response.
+    /// An optional ``Message`` instance representing the content of the response.
+    /// Contains the main message data, including the role of the sender and the content.
     public let message: Message?
 
     /// A boolean indicating whether the chat session is complete.
     public let done: Bool
 
-    /// An optional string indicating the cause for the completion of the chat session.
+    /// An optional string providing the reason for the completion of the chat session.
     public let doneReason: String?
 
-    /// An optional integer representing the total duration of processing the request.
+    /// An optional integer representing the total duration of processing the request, in nanoseconds.
     public let totalDuration: Int?
 
-    /// An optional integer indicating the duration of loading the model.
+    /// An optional integer representing the time taken to load the model, in nanoseconds.
     public let loadDuration: Int?
 
-    /// An optional integer specifying the number of evaluations performed on the prompt.
+    /// An optional integer indicating the number of tokens in the prompt that were evaluated.
     public let promptEvalCount: Int?
 
-    /// An optional integer indicating the duration of prompt evaluations.
+    /// An optional integer representing the duration of prompt evaluations, in nanoseconds.
     public let promptEvalDuration: Int?
 
-    ///  An optional integer representing the total number of evaluations performed.
+    /// An optional integer indicating the number of tokens generated in the response.
     public let evalCount: Int?
 
-    ///  An optional integer indicating the duration of all evaluations.
+    /// An optional integer representing the duration of all evaluations, in nanoseconds.
     public let evalDuration: Int?
 
-    /// A  structure  that represents a single response message.
+    /// A structure that represents a single response message.
     public struct Message: Decodable {
-        /// A ``Role`` value indicating the sender of the message (system, assistant, user).
+        /// The role of the message sender (system, assistant, user).
         public var role: Role
 
-        /// A string containing the message's content.
+        /// The content of the message.
         public var content: String
 
+        /// An optional array of ``ToolCall`` instances representing any tools invoked in the response.
         public var toolCalls: [ToolCall]?
 
-        /// An enumeration that represents the role of the message sender.
+        /// An enumeration representing the role of the message sender.
         public enum Role: String, Decodable {
-            /// Indicates the message is from the system.
+            /// The message is from the system.
             case system
 
-            /// Indicates the message is from the assistant.
+            /// The message is from the assistant.
             case assistant
 
-            /// Indicates the message is from the user.
+            /// The message is from the user.
             case user
         }
 
         /// A structure that represents a tool call in the response.
         public struct ToolCall: Decodable {
-            /// An optional `Function` structure that represents the details of the tool call.
+            /// An optional ``Function`` structure representing the details of the tool call.
             public let function: Function?
 
             /// A structure that represents the details of a tool call.
             public struct Function: Decodable {
                 /// The name of the tool being called.
                 public let name: String?
 
-                /// An optional `OKJSONValue` that represents the arguments passed to the tool.
+                /// An optional ``OKJSONValue`` representing the arguments passed to the tool.
                 public let arguments: OKJSONValue?
             }
         }

diff --git a/Sources/OllamaKit/Responses/OKGenerateResponse.swift b/Sources/OllamaKit/Responses/OKGenerateResponse.swift
@@ -9,39 +9,39 @@ import Foundation
 
 /// A structure that represents the response to a content generation request from the Ollama API.
 public struct OKGenerateResponse: OKCompletionResponse, Decodable {
-    /// A string representing the identifier of the model used for generation.
+    /// The identifier of the model used for generating the content.
     public let model: String
 
-    /// A `Date` indicating when the response was generated.
+    /// The date and time when the response was generated.
     public let createdAt: Date
 
-    /// A string containing the generated content.
+    /// A string containing the generated content. This field will be empty if the response was streamed; otherwise, it contains the full response.
     public let response: String
 
-    /// An optional array of integers representing contextual information used in the generation.
+    /// An optional array of integers providing contextual information used during generation.
     public let context: [Int]?
 
-    /// A boolean indicating whether the generation process is complete.
+    /// A boolean indicating whether the content generation process is complete.
     public let done: Bool
 
-    /// An optional string indicating the cause for the completion of the generation.
+    /// An optional string providing the reason for the completion of the generation process.
     public let doneReason: String?
 
-    /// An optional integer representing the total duration of processing the request.
+    /// An optional integer representing the total time spent generating the response, in nanoseconds.
     public let totalDuration: Int?
 
-    /// An optional integer indicating the duration of loading the model.
+    /// An optional integer representing the time spent loading the model, in nanoseconds.
     public let loadDuration: Int?
 
-    /// An optional integer specifying the number of evaluations performed on the prompt.
+    /// An optional integer indicating the number of tokens in the prompt that were evaluated.
     public let promptEvalCount: Int?
 
-    /// An optional integer indicating the duration of prompt evaluations.
+    /// An optional integer representing the time spent evaluating the prompt, in nanoseconds.
     public let promptEvalDuration: Int?
 
-    /// An optional integer representing the total number of evaluations performed.
+    /// An optional integer indicating the number of tokens in the generated response.
     public let evalCount: Int?
 
-    /// An optional integer indicating the duration of all evaluations.
+    /// An optional integer representing the time spent generating the response, in nanoseconds.
     public let evalDuration: Int?
 }