chore(model): add image and extra_params for textToImage Task (#225)

Because - to support image-to-image before [INS-2574](https://linear.app/instill-ai/issue/INS-2574/make-ai-tasks-consistent) complete This commit - add two new fields on text0to-image task
instill-ai · Nov 22, 2023 · a8d29d6 · a8d29d6
1 parent 6d848af
commit a8d29d6
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 4 deletions.
diff --git a/model/model/v1alpha/task_text_to_image.proto b/model/model/v1alpha/task_text_to_image.proto
@@ -9,14 +9,23 @@ import "google/api/field_behavior.proto";
 message TextToImageInput {
   // The prompt text
   string prompt = 1 [(google.api.field_behavior) = REQUIRED];
+  // The Prompt Image, only for multimodal input
+  oneof type {
+    // Image type URL
+    string prompt_image_url = 2;
+    // Image type base64
+    string prompt_image_base64 = 3;
+  }
   // The steps, default is 5
-  optional int32 steps = 2 [(google.api.field_behavior) = OPTIONAL];
+  optional int32 steps = 4 [(google.api.field_behavior) = OPTIONAL];
   // The guidance scale, default is 7.5
-  optional float cfg_scale = 3 [(google.api.field_behavior) = OPTIONAL];
+  optional float cfg_scale = 5 [(google.api.field_behavior) = OPTIONAL];
   // The seed, default is 0
-  optional int32 seed = 4 [(google.api.field_behavior) = OPTIONAL];
+  optional int32 seed = 6 [(google.api.field_behavior) = OPTIONAL];
   // The number of generated samples, default is 1
-  optional int32 samples = 5 [(google.api.field_behavior) = OPTIONAL];
+  optional int32 samples = 7 [(google.api.field_behavior) = OPTIONAL];
+  // The extra parameters
+  optional string extra_params = 8 [(google.api.field_behavior) = OPTIONAL];
 }
 
 // TextToImageOutput represents the output of text to image task

diff --git a/openapiv2/openapiv2.swagger.yaml b/openapiv2/openapiv2.swagger.yaml
@@ -9032,6 +9032,12 @@ definitions:
       prompt:
         type: string
         title: The prompt text
+      prompt_image_url:
+        type: string
+        title: Image type URL
+      prompt_image_base64:
+        type: string
+        title: Image type base64
       steps:
         type: integer
         format: int32
@@ -9048,6 +9054,9 @@ definitions:
         type: integer
         format: int32
         title: The number of generated samples, default is 1
+      extra_params:
+        type: string
+        title: The extra parameters
     title: TextToImageInput represents the input of text to image task
     required:
       - prompt