From a8d29d61d9ce84688b8558b5afe35be27bb0d6fe Mon Sep 17 00:00:00 2001
From: Tony Wang <78333580+tonywang10101@users.noreply.github.com>
Date: Wed, 22 Nov 2023 16:07:02 +0800
Subject: [PATCH] chore(model): add image and extra_params for textToImage Task
 (#225)

Because

- to support image-to-image before
[INS-2574](https://linear.app/instill-ai/issue/INS-2574/make-ai-tasks-consistent)
complete

This commit

- add two new fields on text0to-image task
---
 model/model/v1alpha/task_text_to_image.proto | 17 +++++++++++++----
 openapiv2/openapiv2.swagger.yaml             |  9 +++++++++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/model/model/v1alpha/task_text_to_image.proto b/model/model/v1alpha/task_text_to_image.proto
index 107becee..760f2e48 100644
--- a/model/model/v1alpha/task_text_to_image.proto
+++ b/model/model/v1alpha/task_text_to_image.proto
@@ -9,14 +9,23 @@ import "google/api/field_behavior.proto";
 message TextToImageInput {
   // The prompt text
   string prompt = 1 [(google.api.field_behavior) = REQUIRED];
+  // The Prompt Image, only for multimodal input
+  oneof type {
+    // Image type URL
+    string prompt_image_url = 2;
+    // Image type base64
+    string prompt_image_base64 = 3;
+  }
   // The steps, default is 5
-  optional int32 steps = 2 [(google.api.field_behavior) = OPTIONAL];
+  optional int32 steps = 4 [(google.api.field_behavior) = OPTIONAL];
   // The guidance scale, default is 7.5
-  optional float cfg_scale = 3 [(google.api.field_behavior) = OPTIONAL];
+  optional float cfg_scale = 5 [(google.api.field_behavior) = OPTIONAL];
   // The seed, default is 0
-  optional int32 seed = 4 [(google.api.field_behavior) = OPTIONAL];
+  optional int32 seed = 6 [(google.api.field_behavior) = OPTIONAL];
   // The number of generated samples, default is 1
-  optional int32 samples = 5 [(google.api.field_behavior) = OPTIONAL];
+  optional int32 samples = 7 [(google.api.field_behavior) = OPTIONAL];
+  // The extra parameters
+  optional string extra_params = 8 [(google.api.field_behavior) = OPTIONAL];
 }
 
 // TextToImageOutput represents the output of text to image task
diff --git a/openapiv2/openapiv2.swagger.yaml b/openapiv2/openapiv2.swagger.yaml
index cdcaa069..d8c70e17 100644
--- a/openapiv2/openapiv2.swagger.yaml
+++ b/openapiv2/openapiv2.swagger.yaml
@@ -9032,6 +9032,12 @@ definitions:
       prompt:
         type: string
         title: The prompt text
+      prompt_image_url:
+        type: string
+        title: Image type URL
+      prompt_image_base64:
+        type: string
+        title: Image type base64
       steps:
         type: integer
         format: int32
@@ -9048,6 +9054,9 @@ definitions:
         type: integer
         format: int32
         title: The number of generated samples, default is 1
+      extra_params:
+        type: string
+        title: The extra parameters
     title: TextToImageInput represents the input of text to image task
     required:
       - prompt