From a8d29d61d9ce84688b8558b5afe35be27bb0d6fe Mon Sep 17 00:00:00 2001 From: Tony Wang <78333580+tonywang10101@users.noreply.github.com> Date: Wed, 22 Nov 2023 16:07:02 +0800 Subject: [PATCH] chore(model): add image and extra_params for textToImage Task (#225) Because - to support image-to-image before [INS-2574](https://linear.app/instill-ai/issue/INS-2574/make-ai-tasks-consistent) complete This commit - add two new fields on text0to-image task --- model/model/v1alpha/task_text_to_image.proto | 17 +++++++++++++---- openapiv2/openapiv2.swagger.yaml | 9 +++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/model/model/v1alpha/task_text_to_image.proto b/model/model/v1alpha/task_text_to_image.proto index 107becee..760f2e48 100644 --- a/model/model/v1alpha/task_text_to_image.proto +++ b/model/model/v1alpha/task_text_to_image.proto @@ -9,14 +9,23 @@ import "google/api/field_behavior.proto"; message TextToImageInput { // The prompt text string prompt = 1 [(google.api.field_behavior) = REQUIRED]; + // The Prompt Image, only for multimodal input + oneof type { + // Image type URL + string prompt_image_url = 2; + // Image type base64 + string prompt_image_base64 = 3; + } // The steps, default is 5 - optional int32 steps = 2 [(google.api.field_behavior) = OPTIONAL]; + optional int32 steps = 4 [(google.api.field_behavior) = OPTIONAL]; // The guidance scale, default is 7.5 - optional float cfg_scale = 3 [(google.api.field_behavior) = OPTIONAL]; + optional float cfg_scale = 5 [(google.api.field_behavior) = OPTIONAL]; // The seed, default is 0 - optional int32 seed = 4 [(google.api.field_behavior) = OPTIONAL]; + optional int32 seed = 6 [(google.api.field_behavior) = OPTIONAL]; // The number of generated samples, default is 1 - optional int32 samples = 5 [(google.api.field_behavior) = OPTIONAL]; + optional int32 samples = 7 [(google.api.field_behavior) = OPTIONAL]; + // The extra parameters + optional string extra_params = 8 [(google.api.field_behavior) = OPTIONAL]; } // TextToImageOutput represents the output of text to image task diff --git a/openapiv2/openapiv2.swagger.yaml b/openapiv2/openapiv2.swagger.yaml index cdcaa069..d8c70e17 100644 --- a/openapiv2/openapiv2.swagger.yaml +++ b/openapiv2/openapiv2.swagger.yaml @@ -9032,6 +9032,12 @@ definitions: prompt: type: string title: The prompt text + prompt_image_url: + type: string + title: Image type URL + prompt_image_base64: + type: string + title: Image type base64 steps: type: integer format: int32 @@ -9048,6 +9054,9 @@ definitions: type: integer format: int32 title: The number of generated samples, default is 1 + extra_params: + type: string + title: The extra parameters title: TextToImageInput represents the input of text to image task required: - prompt