From bde4a9bb8b7e5d52e4e6e0c44b8bd2723caf084e Mon Sep 17 00:00:00 2001 From: Konie Date: Mon, 9 Oct 2023 17:46:06 +0800 Subject: [PATCH] Merge for Fooocus v2.1.25 --- README.md | 2 +- fooocus_api_version.py | 2 +- fooocusapi/api.py | 28 +- fooocusapi/api_utils.py | 2 +- fooocusapi/models.py | 376 +++++++++----- fooocusapi/repositories_versions.py | 6 +- fooocusapi/worker.py | 756 +++++++++++++++++----------- main.py | 8 - requirements.txt | 1 + 9 files changed, 752 insertions(+), 429 deletions(-) diff --git a/README.md b/README.md index f0ce397..6e33ec9 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Fooocus-API FastAPI powered API for [Fooocus](https://github.com/lllyasviel/Fooocus) -Currently loaded Fooocus version: 2.0.93 +Currently loaded Fooocus version: 2.1.25 ### Install dependencies. Need python version >= 3.10 diff --git a/fooocus_api_version.py b/fooocus_api_version.py index 905c05b..85b15ca 100644 --- a/fooocus_api_version.py +++ b/fooocus_api_version.py @@ -1 +1 @@ -version = '0.1.9' \ No newline at end of file +version = '0.1.10' \ No newline at end of file diff --git a/fooocusapi/api.py b/fooocusapi/api.py index fcd766e..ac7bfde 100644 --- a/fooocusapi/api.py +++ b/fooocusapi/api.py @@ -1,8 +1,9 @@ -from typing import List -from fastapi import Depends, FastAPI, File, Header, Query, Response, UploadFile +from typing import List, Optional +from fastapi import Depends, FastAPI, Header, Query, UploadFile +from fastapi.params import File import uvicorn -from fooocusapi.api_utils import generation_output, narray_to_base64img, narray_to_bytesimg -from fooocusapi.models import GeneratedImageBase64, GenerationFinishReason, ImgInpaintOrOutpaintRequest, ImgUpscaleOrVaryRequest, Text2ImgRequest +from fooocusapi.api_utils import generation_output +from fooocusapi.models import GeneratedImageBase64, ImgInpaintOrOutpaintRequest, ImgPromptRequest, ImgUpscaleOrVaryRequest, Text2ImgRequest from fooocusapi.task_queue import TaskQueue from fooocusapi.worker import process_generate @@ -82,6 +83,25 @@ def img_inpaint_or_outpaint(input_image: UploadFile, req: ImgInpaintOrOutpaintRe return generation_output(results, streaming_output) +@app.post("/v1/generation/image-prompt", response_model=List[GeneratedImageBase64], responses=img_generate_responses) +def img_prompt(cn_img1: Optional[UploadFile] = File(None), + req: ImgPromptRequest = Depends(ImgPromptRequest.as_form), + accept: str = Header(None), + accept_query: str | None = Query(None, alias='accept', description="Parameter to overvide 'Accept' header, 'image/png' for output bytes")): + if accept_query is not None and len(accept_query) > 0: + accept = accept_query + + if accept == 'image/png': + streaming_output = True + # image_number auto set to 1 in streaming mode + req.image_number = 1 + else: + streaming_output = False + + results = process_generate(req) + return generation_output(results, streaming_output) + + def start_app(args): uvicorn.run("fooocusapi.api:app", host=args.host, port=args.port, log_level=args.log_level) diff --git a/fooocusapi/api_utils.py b/fooocusapi/api_utils.py index d0109e8..d817e04 100644 --- a/fooocusapi/api_utils.py +++ b/fooocusapi/api_utils.py @@ -39,7 +39,7 @@ def read_input_image(input_image: UploadFile) -> np.ndarray: input_image_bytes = input_image.file.read() pil_image = Image.open(io.BytesIO(input_image_bytes)) image = np.array(pil_image) - return HWC3(image) + return image def generation_output(results: List[GeneratedImage], streaming_output: bool) -> Response | List[GeneratedImageBase64]: diff --git a/fooocusapi/models.py b/fooocusapi/models.py index 172dcfe..8db63a7 100644 --- a/fooocusapi/models.py +++ b/fooocusapi/models.py @@ -1,10 +1,12 @@ -from fastapi import File, Form, UploadFile +from fastapi import Form, UploadFile +from fastapi.params import File from fastapi.exceptions import RequestValidationError from pydantic import BaseModel, ConfigDict, Field from typing import List from enum import Enum from pydantic_core import InitErrorDetails +import modules.flags as flags class Lora(BaseModel): @@ -24,110 +26,110 @@ class PerfomanceSelection(str, Enum): class FooocusStyle(str, Enum): fooocus_expansion = 'Fooocus V2' default = 'Default (Slightly Cinematic)', - sai_3d_model = 'sai-3d-model' - sai_analog_film = 'sai-analog film' - sai_anime = 'sai-anime' - sai_cinematic = 'sai-cinematic' - sai_comic_book = 'sai-comic book' - sai_ccraft_clay = 'sai-craft clay' - sai_digital_art = 'sai-digital art' - sai_enhance = 'sai-enhance' - sai_fantasy_art = 'sai-fantasy art' - sai_isometric = 'sai-isometric' - sai_line_art = 'sai-line art' - sai_lowpoly = 'sai-lowpoly' - sai_neonpunk = 'sai-neonpunk' - sai_origami = 'sai-origami' - sai_photographic = 'sai-photographic' - sai_pixel_art = 'sai-pixel art' - sai_texture = 'sai-texture' - ads_advertising = 'ads-advertising' - ads_automotive = 'ads-automotive' - ads_corporate = 'ads-corporate' - ads_fashion_editorial = 'ads-fashion editorial' - adsfood_photography = 'ads-food photography' - ads_luxury = 'ads-luxury' - ads_real_estate = 'ads-real estate' - ads_retail = 'ads-retail' - artstyle_abstract = 'artstyle-abstract' - artstyle_abstract_expressionism = 'artstyle-abstract expressionism' - artstyle_art_deco = 'artstyle-art deco' - artstyle_art_nouveau = 'artstyle-art nouveau' - artstyle_constructivist = 'artstyle-constructivist' - artstyle_cubist = 'artstyle-cubist' - artstyle_expressionist = 'artstyle-expressionist' - artstyle_graffiti = 'artstyle-graffiti' - artstyle_hyperrealism = 'artstyle-hyperrealism' - artstyle_impressionist = 'artstyle-impressionist' - artstyle_pointillism = 'artstyle-pointillism' - artstyle_pop_art = 'artstyle-pop art' - artstyle_psychedelic = 'artstyle-psychedelic' - artstyle_renaissance = 'artstyle-renaissance' - artstyle_steampunk = 'artstyle-steampunk' - artstyle_surrealist = 'artstyle-surrealist' - artstyle_typography = 'artstyle-typography' - artstyle_watercolor = 'artstyle-watercolor' - futuristic_biomechanical = 'futuristic-biomechanical' - futuristic_biomechanical_cyberpunk = 'futuristic-biomechanical cyberpunk' - futuristic_cybernetic = 'futuristic-cybernetic' - futuristic_cybernetic_robot = 'futuristic-cybernetic robot' - futuristic_cyberpunk_cityscape = 'futuristic-cyberpunk cityscape' - futuristic_futuristic = 'futuristic-futuristic' - futuristic_retro_cyberpunk = 'futuristic-retro cyberpunk' - futuristic_retro_futurism = 'futuristic-retro futurism' - futuristic_sci_fi = 'futuristic-sci-fi' - futuristic_vaporwave = 'futuristic-vaporwave' - game_bubble_bobble = 'game-bubble bobble' - game_cyberpunk_game = 'game-cyberpunk game' - game_fighting_game = 'game-fighting game' - game_gta = 'game-gta' - game_mario = 'game-mario' - game_minecraft = 'game-minecraft' - game_pokemon = 'game-pokemon' - game_retro_arcade = 'game-retro arcade' - game_retro_game = 'game-retro game' - game_rpg_fantasy_game = 'game-rpg fantasy game' - game_strategy_game = 'game-strategy game' - game_streetfighter = 'game-streetfighter' - game_zelda = 'game-zelda' - misc_architectural = 'misc-architectural' - misc_disco = 'misc-disco' - misc_dreamscape = 'misc-dreamscape' - misc_dystopian = 'misc-dystopian' - misc_fairy_tale = 'misc-fairy tale' - misc_gothic = 'misc-gothic' - misc_grunge = 'misc-grunge' - misc_horror = 'misc-horror' - misc_kawaii = 'misc-kawaii' - misc_lovecraftian = 'misc-lovecraftian' - misc_macabre = 'misc-macabre' - misc_manga = 'misc-manga' - misc_metropolis = 'misc-metropolis' - misc_minimalist = 'misc-minimalist' - misc_monochrome = 'misc-monochrome' - misc_nautical = 'misc-nautical' - misc_space = 'misc-space' - misc_stained_glass = 'misc-stained glass' - misc_techwear_fashion = 'misc-techwear fashion' - misc_tribal = 'misc-tribal' - misc_zentangle = 'misc-zentangle' - papercraft_collage = 'papercraft-collage' - papercraft_flat_papercut = 'papercraft-flat papercut' - papercraft_kirigami = 'papercraft-kirigami' - papercraft_paper_mache = 'papercraft-paper mache' - papercraft_paper_quilling = 'papercraft-paper quilling' - papercraft_papercut_collage = 'papercraft-papercut collage' - papercraft_papercut_shadow_box = 'papercraft-papercut shadow box' - papercraft_stacked_papercut = 'papercraft-stacked papercut' - papercraft_thick_layered_papercut = 'papercraft-thick layered papercut' - photo_alien = 'photo-alien' - photo_film_noir = 'photo-film noir' - photo_hdr = 'photo-hdr' - photo_long_exposure = 'photo-long exposure' - photo_neon_noir = 'photo-neon noir' - photo_silhouette = 'photo-silhouette' - photo_tilt_shift = 'photo-tilt-shift' - cinematic_diva = 'cinematic-diva' + sai_3d_model = 'SAI 3D Model' + sai_analog_film = 'SAI Analog Film' + sai_anime = 'SAI Anime' + sai_cinematic = 'SAI Cinematic' + sai_comic_book = 'SAI Comic Book' + sai_ccraft_clay = 'SAI Craft Clay' + sai_digital_art = 'SAI Digital Art' + sai_enhance = 'SAI Enhance' + sai_fantasy_art = 'SAI Fantasy Art' + sai_isometric = 'SAI Isometric' + sai_line_art = 'SAI Line Art' + sai_lowpoly = 'SAI Lowpoly' + sai_neonpunk = 'SAI Neonpunk' + sai_origami = 'SAI Prigami' + sai_photographic = 'SAI Photographic' + sai_pixel_art = 'SAI Pixel Art' + sai_texture = 'SAI Texture' + ads_advertising = 'Ads Advertising' + ads_automotive = 'Ads Automotive' + ads_corporate = 'Ads Corporate' + ads_fashion_editorial = 'Ads Fashion Editorial' + adsfood_photography = 'Ads Food Photography' + ads_luxury = 'Ads Luxury' + ads_real_estate = 'Ads Real Estate' + ads_retail = 'Ads Retail' + artstyle_abstract = 'Artstyle Abstract' + artstyle_abstract_expressionism = 'Artstyle Abstract Expressionism' + artstyle_art_deco = 'Artstyle Art Deco' + artstyle_art_nouveau = 'Artstyle Art Nouveau' + artstyle_constructivist = 'Artstyle Constructivist' + artstyle_cubist = 'Artstyle Cubist' + artstyle_expressionist = 'Artstyle Expressionist' + artstyle_graffiti = 'Artstyle Graffiti' + artstyle_hyperrealism = 'Artstyle Hyperrealism' + artstyle_impressionist = 'Artstyle Impressionist' + artstyle_pointillism = 'Artstyle Pointillism' + artstyle_pop_art = 'Artstyle Pop Art' + artstyle_psychedelic = 'Artstyle Psychedelic' + artstyle_renaissance = 'Artstyle Renaissance' + artstyle_steampunk = 'Artstyle Steampunk' + artstyle_surrealist = 'Artstyle Surrealist' + artstyle_typography = 'Artstyle Typography' + artstyle_watercolor = 'Artstyle Watercolor' + futuristic_biomechanical = 'Futuristic Biomechanical' + futuristic_biomechanical_cyberpunk = 'Futuristic Biomechanical Cyberpunk' + futuristic_cybernetic = 'Futuristic Cybernetic' + futuristic_cybernetic_robot = 'Futuristic Cybernetic Robot' + futuristic_cyberpunk_cityscape = 'Futuristic Cyberpunk Cityscape' + futuristic_futuristic = 'Futuristic Futuristic' + futuristic_retro_cyberpunk = 'Futuristic Retro Cyberpunk' + futuristic_retro_futurism = 'Futuristic Retro Futurism' + futuristic_sci_fi = 'Futuristic Sci Fi' + futuristic_vaporwave = 'Futuristic Vaporwave' + game_bubble_bobble = 'Game Bubble Bobble' + game_cyberpunk_game = 'Game Cyberpunk Game' + game_fighting_game = 'Game Fighting Game' + game_gta = 'Game Gta' + game_mario = 'Game Mario' + game_minecraft = 'Game Minecraft' + game_pokemon = 'Game Pokemon' + game_retro_arcade = 'Game Retro Arcade' + game_retro_game = 'Game Retro Game' + game_rpg_fantasy_game = 'Game Rpg Fantasy Game' + game_strategy_game = 'Game Strategy Game' + game_streetfighter = 'Game Streetfighter' + game_zelda = 'Game Zelda' + misc_architectural = 'Misc Architectural' + misc_disco = 'Misc Disco' + misc_dreamscape = 'Misc Dreamscape' + misc_dystopian = 'Misc Dystopian' + misc_fairy_tale = 'Misc Fairy Tale' + misc_gothic = 'Misc Gothic' + misc_grunge = 'Misc Grunge' + misc_horror = 'Misc Horror' + misc_kawaii = 'Misc Kawaii' + misc_lovecraftian = 'Misc Lovecraftian' + misc_macabre = 'Misc Macabre' + misc_manga = 'Misc Manga' + misc_metropolis = 'Misc Metropolis' + misc_minimalist = 'Misc Minimalist' + misc_monochrome = 'Misc Monochrome' + misc_nautical = 'Misc Nautical' + misc_space = 'Misc Space' + misc_stained_glass = 'Misc Stained Glass' + misc_techwear_fashion = 'Misc Techwear Fashion' + misc_tribal = 'Misc Tribal' + misc_zentangle = 'Misc Zentangle' + papercraft_collage = 'Papercraft Collage' + papercraft_flat_papercut = 'Papercraft Flat Papercut' + papercraft_kirigami = 'Papercraft Kirigami' + papercraft_paper_mache = 'Papercraft Paper Mache' + papercraft_paper_quilling = 'Papercraft Paper Quilling' + papercraft_papercut_collage = 'Papercraft Papercut Collage' + papercraft_papercut_shadow_box = 'Papercraft Papercut Shadow Box' + papercraft_stacked_papercut = 'Papercraft Stacked Papercut' + papercraft_thick_layered_papercut = 'Papercraft Thick Layered Papercut' + photo_alien = 'Photo Alien' + photo_film_noir = 'Photo Film Noir' + photo_hdr = 'Photo Hdr' + photo_long_exposure = 'Photo Long Exposure' + photo_neon_noir = 'Photo Neon Noir' + photo_silhouette = 'Photo Silhouette' + photo_tilt_shift = 'Photo Tilt Shift' + cinematic_diva = 'Cinematic Diva' abstract_expressionism = 'Abstract Expressionism' academia = 'Academia' action_figure = 'Action Figure' @@ -136,9 +138,9 @@ class FooocusStyle(str, Enum): art_deco = 'Art Deco' art_nouveau = 'Art Nouveau' astral_aura = 'Astral Aura' - avant_garde = 'Avant-garde' + avant_garde = 'Avant Garde' baroque = 'Baroque' - bauhaus_style_poster = 'Bauhaus-Style Poster' + bauhaus_style_poster = 'Bauhaus Style Poster' blueprint_schematic_drawing = 'Blueprint Schematic Drawing' caricature = 'Caricature' cel_shaded_art = 'Cel Shaded Art' @@ -157,13 +159,13 @@ class FooocusStyle(str, Enum): double_exposure = 'Double Exposure' dripping_paint_splatter_art = 'Dripping Paint Splatter Art' expressionism = 'Expressionism' - faded_polaroid_photo = 'Faded Polaroid Photo' + faded_polaroid_Photo = 'Faded Polaroid Photo' fauvism = 'Fauvism' flat_2d_art = 'Flat 2D Art' fortnite_art_style = 'Fortnite Art Style' futurism = 'Futurism' glitchcore = 'Glitchcore' - glo_fi = 'Glo-fi' + glo_fi = 'Glo Fi' googie_art_style = 'Googie Art Style' graffiti_art = 'Graffiti Art' harlem_renaissance_art = 'Harlem Renaissance Art' @@ -182,14 +184,14 @@ class FooocusStyle(str, Enum): marker_drawing = 'Marker Drawing' medievalism = 'Medievalism' minimalism = 'Minimalism' - neo_baroque = 'Neo-Baroque' - neo_byzantine = 'Neo-Byzantine' - neo_futurism = 'Neo-Futurism' - neo_impressionism = 'Neo-Impressionism' - neo_rococo = 'Neo-Rococo' + neo_baroque = 'Neo Baroque' + neo_byzantine = 'Neo Byzantine' + neo_futurism = 'Neo Futurism' + neo_impressionism = 'Neo Impressionism' + neo_rococo = 'Neo Rococo' neoclassicism = 'Neoclassicism' op_art = 'Op Art' - ornate_and_intricate = 'Ornate and Intricate' + ornate_and_intricate = 'Ornate And Intricate' pencil_sketch_drawing = 'Pencil Sketch Drawing' pop_art_2 = 'Pop Art 2' rococo = 'Rococo' @@ -205,7 +207,7 @@ class FooocusStyle(str, Enum): vibrant_rim_light = 'Vibrant Rim Light' volumetric_lighting = 'Volumetric Lighting' watercolor_2 = 'Watercolor 2' - whimsical_and_playful = 'Whimsical and Playful' + whimsical_and_playful = 'Whimsical And Playful' class AspectRatio(str, Enum): @@ -252,6 +254,20 @@ class OutpaintExpansion(str, Enum): bottom = 'Bottom' +class ControlNetType(str, Enum): + cn_ip = 'Image Prompt' + cn_canny = 'PyraCanny' + cn_cpds = 'CPDS' + + +class ImagePrompt(BaseModel): + cn_img: UploadFile | None = Field(default=None) + cn_stop: float = Field(default=0.4, min=0, max=1) + cn_weight: float | None = Field( + default=None, min=0, max=2, description="None for default value") + cn_type: ControlNetType = Field(default=ControlNetType.cn_ip) + + class Text2ImgRequest(BaseModel): prompt: str = '' negative_promit: str = '' @@ -261,7 +277,7 @@ class Text2ImgRequest(BaseModel): aspect_ratios_selection: AspectRatio = AspectRatio.a_1_29 image_number: int = Field( default=1, description="Image number", min=1, max=32) - image_seed: int | None = None + image_seed: int = Field(default=-1, description="Seed to generate image, -1 for random") sharpness: float = Field(default=2.0, min=0.0, max=30.0) guidance_scale: float = Field(default=7.0, min=1.0, max=30.0) base_model_name: str = 'sd_xl_base_1.0_0.9vae.safetensors' @@ -287,7 +303,7 @@ def as_form(cls, input_image: UploadFile = Form(description="Init image for upsa AspectRatio.a_1_29), image_number: int = Form( default=1, description="Image number", ge=1, le=32), - image_seed: int | None = Form(None), + image_seed: int = Form(default=-1, description="Seed to generate image, -1 for random"), sharpness: float = Form(default=2.0, ge=0.0, le=30.0), guidance_scale: float = Form(default=7.0, ge=1.0, le=30.0), base_model_name: str = Form( @@ -339,8 +355,10 @@ class ImgInpaintOrOutpaintRequest(Text2ImgRequest): @classmethod def as_form(cls, input_image: UploadFile = Form(description="Init image for inpaint or outpaint"), - input_mask: UploadFile = Form(File(None), description="Inpaint or outpaint mask"), - outpaint_selections: List[str] = Form([], description="Outpaint expansion selections, literal 'Left', 'Right', 'Top', 'Bottom' seperated by comma"), + input_mask: UploadFile = Form( + File(None), description="Inpaint or outpaint mask"), + outpaint_selections: List[str] = Form( + [], description="Outpaint expansion selections, literal 'Left', 'Right', 'Top', 'Bottom' seperated by comma"), prompt: str = Form(''), negative_promit: str = Form(''), style_selections: List[str] = Form([ @@ -351,7 +369,7 @@ def as_form(cls, input_image: UploadFile = Form(description="Init image for inpa AspectRatio.a_1_29), image_number: int = Form( default=1, description="Image number", ge=1, le=32), - image_seed: int | None = Form(None), + image_seed: int = Form(default=-1, description="Seed to generate image, -1 for random"), sharpness: float = Form(default=2.0, ge=0.0, le=30.0), guidance_scale: float = Form(default=7.0, ge=1.0, le=30.0), base_model_name: str = Form( @@ -371,6 +389,9 @@ def as_form(cls, input_image: UploadFile = Form(description="Init image for inpa w5: float = Form(default=0.5, ge=-2, le=2), ): + if isinstance(input_mask, File): + input_mask = None + outpaint_selections_arr: List[OutpaintExpansion] = [] for part in outpaint_selections: if len(part) > 0: @@ -381,7 +402,7 @@ def as_form(cls, input_image: UploadFile = Form(description="Init image for inpa except ValueError as ve: err = InitErrorDetails(type='enum', loc=['outpaint_selections'], input=outpaint_selections, ctx={ 'expected': "Literal 'Left', 'Right', 'Top', 'Bottom' seperated by comma"}) - raise RequestValidationError(errors=[err]) + raise RequestValidationError(errors=[err]) style_selection_arr: List[FooocusStyle] = [] for part in style_selections: @@ -393,7 +414,7 @@ def as_form(cls, input_image: UploadFile = Form(description="Init image for inpa except ValueError as ve: err = InitErrorDetails(type='enum', loc=['style_selections'], input=style_selections, ctx={ 'expected': 'Valid fooocus styles seperated by comma'}) - raise RequestValidationError(errors=[err]) + raise RequestValidationError(errors=[err]) loras: List[Lora] = [] lora_config = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)] @@ -409,6 +430,115 @@ def as_form(cls, input_image: UploadFile = Form(description="Init image for inpa loras=loras) +class ImgPromptRequest(Text2ImgRequest): + image_prompts: List[ImagePrompt] + + @classmethod + def as_form(cls, cn_img1: UploadFile = Form(File(None), description="Input image for image prompt"), + cn_stop1: float = Form( + default=0.4, ge=0, le=1, description="Stop at for image prompt"), + cn_weight1: float | None = Form( + default=None, ge=0, le=2, description="Weight for image prompt, None for default value"), + cn_type1: ControlNetType = Form( + default=ControlNetType.cn_ip, description="ControlNet type for image prompt"), + cn_img2: UploadFile = Form( + File(None), description="Input image for image prompt"), + cn_stop2: float = Form( + default=0.4, ge=0, le=1, description="Stop at for image prompt"), + cn_weight2: float | None = Form( + default=None, ge=0, le=2, description="Weight for image prompt, None for default value"), + cn_type2: ControlNetType = Form( + default=ControlNetType.cn_ip, description="ControlNet type for image prompt"), + cn_img3: UploadFile = Form( + File(None), description="Input image for image prompt"), + cn_stop3: float = Form( + default=0.4, ge=0, le=1, description="Stop at for image prompt"), + cn_weight3: float | None = Form( + default=None, ge=0, le=2, description="Weight for image prompt, None for default value"), + cn_type3: ControlNetType = Form( + default=ControlNetType.cn_ip, description="ControlNet type for image prompt"), + cn_img4: UploadFile = Form( + File(None), description="Input image for image prompt"), + cn_stop4: float = Form( + default=0.4, ge=0, le=1, description="Stop at for image prompt"), + cn_weight4: float | None = Form( + default=None, ge=0, le=2, description="Weight for image prompt, None for default value"), + cn_type4: ControlNetType = Form( + default=ControlNetType.cn_ip, description="ControlNet type for image prompt"), + prompt: str = Form(''), + negative_promit: str = Form(''), + style_selections: List[str] = Form([ + FooocusStyle.fooocus_expansion, FooocusStyle.default], description="Fooocus style selections, seperated by comma"), + performance_selection: PerfomanceSelection = Form( + PerfomanceSelection.speed), + aspect_ratios_selection: AspectRatio = Form( + AspectRatio.a_1_29), + image_number: int = Form( + default=1, description="Image number", ge=1, le=32), + image_seed: int = Form(default=-1, description="Seed to generate image, -1 for random"), + sharpness: float = Form(default=2.0, ge=0.0, le=30.0), + guidance_scale: float = Form(default=7.0, ge=1.0, le=30.0), + base_model_name: str = Form( + 'sd_xl_base_1.0_0.9vae.safetensors'), + refiner_model_name: str = Form( + 'sd_xl_refiner_1.0_0.9vae.safetensors'), + l1: str | None = Form( + 'sd_xl_offset_example-lora_1.0.safetensors'), + w1: float = Form(default=0.5, ge=-2, le=2), + l2: str | None = Form(None), + w2: float = Form(default=0.5, ge=-2, le=2), + l3: str | None = Form(None), + w3: float = Form(default=0.5, ge=-2, le=2), + l4: str | None = Form(None), + w4: float = Form(default=0.5, ge=-2, le=2), + l5: str | None = Form(None), + w5: float = Form(default=0.5, ge=-2, le=2), + ): + if isinstance(cn_img1, File): + cn_img1 = None + if isinstance(cn_img2, File): + cn_img2 = None + if isinstance(cn_img3, File): + cn_img3 = None + if isinstance(cn_img4, File): + cn_img4 = None + + image_prompts: List[ImagePrompt] = [] + image_prompt_config = [(cn_img1, cn_stop1, cn_weight1, cn_type1), (cn_img2, cn_stop2, cn_weight2, cn_type2), + (cn_img3, cn_stop3, cn_weight3, cn_type3), (cn_img4, cn_stop4, cn_weight4, cn_type4)] + for config in image_prompt_config: + cn_img, cn_stop, cn_weight, cn_type = config + if cn_weight is None: + cn_weight = flags.default_parameters[cn_type.value][1] + image_prompts.append(ImagePrompt( + cn_img=cn_img, cn_stop=cn_stop, cn_weight=cn_weight, cn_type=cn_type)) + + style_selection_arr: List[FooocusStyle] = [] + for part in style_selections: + if len(part) > 0: + for s in part.split(','): + try: + expansion = FooocusStyle(s) + style_selection_arr.append(expansion) + except ValueError as ve: + err = InitErrorDetails(type='enum', loc=['style_selections'], input=style_selections, ctx={ + 'expected': 'Valid fooocus styles seperated by comma'}) + raise RequestValidationError(errors=[err]) + + loras: List[Lora] = [] + lora_config = [(l1, w1), (l2, w2), (l3, w3), (l4, w4), (l5, w5)] + for config in lora_config: + lora_model, lora_weight = config + if lora_model is not None and len(lora_model) > 0: + loras.append(Lora(model_name=lora_model, weight=lora_weight)) + + return cls(image_prompts=image_prompts, prompt=prompt, negative_promit=negative_promit, style_selections=style_selection_arr, + performance_selection=performance_selection, aspect_ratios_selection=aspect_ratios_selection, + image_number=image_number, image_seed=image_seed, sharpness=sharpness, guidance_scale=guidance_scale, + base_model_name=base_model_name, refiner_model_name=refiner_model_name, + loras=loras) + + class GenerationFinishReason(str, Enum): success = 'SUCCESS' queue_is_full = 'QUEUE_IS_FULL' diff --git a/fooocusapi/repositories_versions.py b/fooocusapi/repositories_versions.py index 2925995..3db68e7 100644 --- a/fooocusapi/repositories_versions.py +++ b/fooocusapi/repositories_versions.py @@ -1,7 +1,7 @@ import os -fooocus_version = '2.0.93' +fooocus_version = '2.1.25' comfy_commit_hash = os.environ.get( - 'COMFY_COMMIT_HASH', "9bfec2bdbf0b0d778087a9b32f79e57e2d15b913") + 'COMFY_COMMIT_HASH', "1c5d6663faf1a33e00ec67240167b174a9cac655") fooocus_commit_hash = os.environ.get( - 'FOOOCUS_COMMIT_HASH', "b42e96a52d92265fb966954090aa43340f199008") + 'FOOOCUS_COMMIT_HASH', "71e9ebc7a26bbb6ccec08aa656c98f2e38a21425") diff --git a/fooocusapi/worker.py b/fooocusapi/worker.py index ab72296..e3ae857 100644 --- a/fooocusapi/worker.py +++ b/fooocusapi/worker.py @@ -5,34 +5,52 @@ import torch from typing import List from fooocusapi.api_utils import read_input_image -from fooocusapi.models import GeneratedImage, GenerationFinishReason, ImgInpaintOrOutpaintRequest, ImgUpscaleOrVaryRequest, PerfomanceSelection, TaskType, Text2ImgRequest +from fooocusapi.models import GeneratedImage, GenerationFinishReason, ImgInpaintOrOutpaintRequest, ImgPromptRequest, ImgUpscaleOrVaryRequest, PerfomanceSelection, TaskType, Text2ImgRequest from fooocusapi.task_queue import TaskQueue -from modules.expansion import safe_str -from modules.path import downloading_inpaint_models -from modules.sdxl_styles import apply_style, fooocus_expansion, aspect_ratios task_queue = TaskQueue() +@torch.no_grad() +@torch.inference_mode() def process_generate(req: Text2ImgRequest) -> List[GeneratedImage]: import modules.default_pipeline as pipeline import modules.patch as patch import modules.flags as flags import modules.core as core import modules.inpaint_worker as inpaint_worker + import modules.path as path import comfy.model_management as model_management - from modules.util import join_prompts, remove_empty_str, image_is_generated_in_current_ui, resize_image + import modules.advanced_parameters as advanced_parameters + import fooocus_extras.preprocessors as preprocessors + import fooocus_extras.ip_adapter as ip_adapter + from modules.util import join_prompts, remove_empty_str, image_is_generated_in_current_ui, resize_image, HWC3 from modules.private_logger import log from modules.upscaler import perform_upscale + from modules.expansion import safe_str + from modules.sdxl_styles import apply_style, fooocus_expansion, aspect_ratios + + outputs = [] + + def progressbar(number, text): + print(f'[Fooocus] {text}') + outputs.append(['preview', (number, text, None)]) + + def make_results_from_outputs(): + results: List[GeneratedImage] = [] + for item in outputs: + if item[0] == 'results': + for im in item[1]: + if isinstance(im, np.ndarray): + results.append(GeneratedImage(im=im, seed=item[2], finish_reason=GenerationFinishReason.success)) + return results task_seq = task_queue.add_task(TaskType.text2img, { 'body': req.__dict__}) if task_seq is None: print("[Task Queue] The task queue has reached limit") - results: List[GeneratedImage] = [] - for i in range(0, req.image_number): - results.append(GeneratedImage(im=None, seed=0, - finish_reason=GenerationFinishReason.queue_is_full)) + results = [GeneratedImage(im=None, seed=0, + finish_reason=GenerationFinishReason.queue_is_full)] return results try: @@ -56,330 +74,487 @@ def process_generate(req: Text2ImgRequest) -> List[GeneratedImage]: execution_start_time = time.perf_counter() - loras = [(l.model_name, l.weight) for l in req.loras] - loras_user_raw_input = copy.deepcopy(loras) - + # Transform pamameters + prompt = req.prompt + negative_prompt = req.negative_promit style_selections = [s.value for s in req.style_selections] + performance_selection = req.performance_selection.value + aspect_ratios_selection = req.aspect_ratios_selection.value + image_number = req.image_number + image_seed = None if req.image_seed == -1 else req.image_seed + sharpness = req.sharpness + guidance_scale = req.guidance_scale + base_model_name = req.base_model_name + refiner_model_name = req.refiner_model_name + loras = [(lora.model_name, lora.weight) for lora in req.loras] + input_image_checkbox = isinstance(req, ImgUpscaleOrVaryRequest) or isinstance(req, ImgInpaintOrOutpaintRequest) or isinstance(req, ImgPromptRequest) + current_tab = 'uov' if isinstance(req, ImgUpscaleOrVaryRequest) else 'inpaint' if isinstance(req, ImgInpaintOrOutpaintRequest) else 'ip' if isinstance(req, ImgPromptRequest) else None + uov_method = flags.disabled if not isinstance(req, ImgUpscaleOrVaryRequest) else req.uov_method.value + uov_input_image = None if not isinstance(req, ImgUpscaleOrVaryRequest) else read_input_image(req.input_image) + outpaint_selections = [] if not isinstance(req, ImgInpaintOrOutpaintRequest) else [s.value for s in req.outpaint_selections] + + inpaint_input_image = None + if isinstance(req, ImgInpaintOrOutpaintRequest): + input_image = read_input_image(req.input_image) + if req.input_mask is not None: + input_mask = read_input_image(req.input_mask) + else: + input_mask = np.zeros(input_image.shape) + inpaint_input_image = { + 'image': input_image, + 'mask': input_mask + } + + cn_tasks = {flags.cn_ip: [], flags.cn_canny: [], flags.cn_cpds: []} + if isinstance(req, ImgPromptRequest): + for img_prompt in req.image_prompts: + if img_prompt.cn_img is not None: + cn_tasks[img_prompt.cn_type.value].append([read_input_image(img_prompt.cn_img), img_prompt.cn_stop, img_prompt.cn_weight]) + + def build_advanced_parameters(): + adm_scaler_positive=1.5 + adm_scaler_negative=0.8 + adm_scaler_end=0.3 + adaptive_cfg=7.0 + sampler_name=flags.default_sampler + scheduler_name=flags.default_scheduler + overwrite_step=-1 + overwrite_switch=-1 + overwrite_width=-1 + overwrite_height=-1 + overwrite_vary_strength=-1 + overwrite_upscale_strength=-1 + mixing_image_prompt_and_vary_upscale=False + mixing_image_prompt_and_inpaint=False + debugging_cn_preprocessor=False + controlnet_softness=0.25 + return [adm_scaler_positive, adm_scaler_negative, adm_scaler_end, adaptive_cfg, sampler_name, + scheduler_name, overwrite_step, overwrite_switch, overwrite_width, overwrite_height, + overwrite_vary_strength, overwrite_upscale_strength, + mixing_image_prompt_and_vary_upscale, mixing_image_prompt_and_inpaint, + debugging_cn_preprocessor, controlnet_softness] + + advanced_parameters.set_all_advanced_parameters(*build_advanced_parameters()) + + # Fooocus async_worker.py code start + + outpaint_selections = [o.lower() for o in outpaint_selections] + loras_raw = copy.deepcopy(loras) raw_style_selections = copy.deepcopy(style_selections) + uov_method = uov_method.lower() + if fooocus_expansion in style_selections: use_expansion = True style_selections.remove(fooocus_expansion) else: use_expansion = False - use_style = len(req. style_selections) > 0 + use_style = len(style_selections) > 0 - adaptive_cfg = 7 - patch.adaptive_cfg = adaptive_cfg + patch.adaptive_cfg = advanced_parameters.adaptive_cfg print(f'[Parameters] Adaptive CFG = {patch.adaptive_cfg}') - patch.sharpness = req.sharpness + patch.sharpness = sharpness print(f'[Parameters] Sharpness = {patch.sharpness}') - adm_scaler_positive = 1.5 - adm_scaler_negative = 0.8 - adm_scaler_end = 0.3 - patch.positive_adm_scale = adm_scaler_positive - patch.negative_adm_scale = adm_scaler_negative - patch.adm_scaler_end = adm_scaler_end - print( - f'[Parameters] ADM Scale = {patch.positive_adm_scale} : {patch.negative_adm_scale} : {patch.adm_scaler_end}') + patch.positive_adm_scale = advanced_parameters.adm_scaler_positive + patch.negative_adm_scale = advanced_parameters.adm_scaler_negative + patch.adm_scaler_end = advanced_parameters.adm_scaler_end + print(f'[Parameters] ADM Scale = {patch.positive_adm_scale} : {patch.negative_adm_scale} : {patch.adm_scaler_end}') - cfg_scale = req.guidance_scale + cfg_scale = float(guidance_scale) print(f'[Parameters] CFG = {cfg_scale}') initial_latent = None denoising_strength = 1.0 tiled = False - results: List[GeneratedImage] = [] + inpaint_worker.current_task = None + width, height = aspect_ratios[aspect_ratios_selection] + skip_prompt_processing = False + + raw_prompt = prompt + raw_negative_prompt = negative_prompt + + inpaint_image = None + input_mask = None + inpaint_head_model_path = None + controlnet_canny_path = None + controlnet_cpds_path = None + clip_vision_path, ip_negative_path, ip_adapter_path = None, None, None - if req.performance_selection == PerfomanceSelection.speed: + seed = image_seed + max_seed = int(1024 * 1024 * 1024) + if not isinstance(seed, int): + seed = random.randint(1, max_seed) + if seed < 0: + seed = - seed + seed = seed % max_seed + + if performance_selection == 'Speed': steps = 30 switch = 20 else: steps = 60 switch = 40 - pipeline.clear_all_caches() - width, height = aspect_ratios[req.aspect_ratios_selection.value] - - sampler_name = flags.default_sampler - scheduler_name = flags.default_scheduler - - if isinstance(req, ImgUpscaleOrVaryRequest): - uov_method = req.uov_method.value.lower() - uov_input_image = read_input_image(req.input_image) - if 'vary' in uov_method: - if not image_is_generated_in_current_ui(uov_input_image, ui_width=width, ui_height=height): - uov_input_image = resize_image( - uov_input_image, width=width, height=height) - print( - f'Resolution corrected - users are uploading their own images.') - else: - print(f'Processing images generated by Fooocus.') - if 'subtle' in uov_method: - denoising_strength = 0.5 - if 'strong' in uov_method: - denoising_strength = 0.85 - initial_pixels = core.numpy_to_pytorch(uov_input_image) - initial_latent = core.encode_vae( - vae=pipeline.xl_base_patched.vae, pixels=initial_pixels) - B, C, H, W = initial_latent['samples'].shape - width = W * 8 - height = H * 8 - print(f'Final resolution is {str((height, width))}.') - elif 'upscale' in uov_method: - H, W, C = uov_input_image.shape - - uov_input_image = core.numpy_to_pytorch(uov_input_image) - uov_input_image = perform_upscale(uov_input_image) - uov_input_image = core.pytorch_to_numpy(uov_input_image)[0] - print(f'Image upscaled.') - - if '1.5x' in uov_method: - f = 1.5 - elif '2x' in uov_method: - f = 2.0 - else: - f = 1.0 - - width_f = int(width * f) - height_f = int(height * f) - - if image_is_generated_in_current_ui(uov_input_image, ui_width=width_f, ui_height=height_f): - uov_input_image = resize_image( - uov_input_image, width=int(W * f), height=int(H * f)) - print(f'Processing images generated by Fooocus.') - else: - uov_input_image = resize_image( - uov_input_image, width=width_f, height=height_f) - print( - f'Resolution corrected - users are uploading their own images.') - - H, W, C = uov_input_image.shape - image_is_super_large = H * W > 2800 * 2800 - - if 'fast' in uov_method: - direct_return = True - elif image_is_super_large: - print('Image is too large. Directly returned the SR image. ' - 'Usually directly return SR image at 4K resolution ' - 'yields better results than SDXL diffusion.') - direct_return = True - else: - direct_return = False - - if direct_return: - d = [('Upscale (Fast)', '2x')] - log(uov_input_image, d, single_line_number=1) - for i in range(0, req.image_number): - results.append(GeneratedImage( - im=uov_input_image, seed=0, finish_reason=GenerationFinishReason.success)) - print(f"[Task Queue] Finish task, seq={task_seq}") - task_queue.finish_task(task_seq, results, False) - return results + sampler_name = advanced_parameters.sampler_name + scheduler_name = advanced_parameters.scheduler_name + + goals = [] + tasks = [] + + if input_image_checkbox: + if (current_tab == 'uov' or (current_tab == 'ip' and advanced_parameters.mixing_image_prompt_and_vary_upscale)) \ + and uov_method != flags.disabled and uov_input_image is not None: + uov_input_image = HWC3(uov_input_image) + if 'vary' in uov_method: + goals.append('vary') + elif 'upscale' in uov_method: + goals.append('upscale') + if 'fast' in uov_method: + skip_prompt_processing = True + else: + if performance_selection == 'Speed': + steps = 18 + switch = 12 + else: + steps = 36 + switch = 24 + progressbar(1, 'Downloading upscale models ...') + path.downloading_upscale_model() + if (current_tab == 'inpaint' or (current_tab == 'ip' and advanced_parameters.mixing_image_prompt_and_inpaint))\ + and isinstance(inpaint_input_image, dict): + inpaint_image = inpaint_input_image['image'] + input_mask = inpaint_input_image['mask'][:, :, 0] + inpaint_image = HWC3(inpaint_image) + if isinstance(inpaint_image, np.ndarray) and isinstance(input_mask, np.ndarray) \ + and (np.any(input_mask > 127) or len(outpaint_selections) > 0): + progressbar(1, 'Downloading inpainter ...') + inpaint_head_model_path, inpaint_patch_model_path = path.downloading_inpaint_models() + loras += [(inpaint_patch_model_path, 1.0)] + goals.append('inpaint') + sampler_name = 'dpmpp_fooocus_2m_sde_inpaint_seamless' + if current_tab == 'ip' or \ + advanced_parameters.mixing_image_prompt_and_inpaint or \ + advanced_parameters.mixing_image_prompt_and_vary_upscale: + goals.append('cn') + progressbar(1, 'Downloading control models ...') + if len(cn_tasks[flags.cn_canny]) > 0: + controlnet_canny_path = path.downloading_controlnet_canny() + if len(cn_tasks[flags.cn_cpds]) > 0: + controlnet_cpds_path = path.downloading_controlnet_cpds() + if len(cn_tasks[flags.cn_ip]) > 0: + clip_vision_path, ip_negative_path, ip_adapter_path = path.downloading_ip_adapters() + progressbar(1, 'Loading control models ...') + + # Load or unload CNs + pipeline.refresh_controlnets([controlnet_canny_path, controlnet_cpds_path]) + ip_adapter.load_ip_adapter(clip_vision_path, ip_negative_path, ip_adapter_path) + + if advanced_parameters.overwrite_step > 0: + steps = advanced_parameters.overwrite_step + + if advanced_parameters.overwrite_switch > 0: + switch = advanced_parameters.overwrite_switch + + if advanced_parameters.overwrite_width > 0: + width = advanced_parameters.overwrite_width + + if advanced_parameters.overwrite_height > 0: + height = advanced_parameters.overwrite_height - tiled = True - denoising_strength = 1.0 - 0.618 - steps = int(steps * 0.618) - switch = int(steps * 0.67) + print(f'[Parameters] Sampler = {sampler_name} - {scheduler_name}') + print(f'[Parameters] Steps = {steps} - {switch}') - initial_pixels = core.numpy_to_pytorch(uov_input_image) + progressbar(1, 'Initializing ...') - initial_latent = core.encode_vae( - vae=pipeline.xl_base_patched.vae, pixels=initial_pixels, tiled=True) - B, C, H, W = initial_latent['samples'].shape - width = W * 8 - height = H * 8 - print(f'Final resolution is {str((height, width))}.') + if not skip_prompt_processing: - elif isinstance(req, ImgInpaintOrOutpaintRequest): - inpaint_image = read_input_image(req.input_image) - if req.input_mask is not None: - inpaint_mask = read_input_image( - req.input_mask)[:, :, 0] - else: - inpaint_mask = np.zeros(inpaint_image.shape[:-1]) - outpaint_selections = [s.value.lower() - for s in req.outpaint_selections] - if isinstance(inpaint_image, np.ndarray) and isinstance(inpaint_mask, np.ndarray) \ - and (np.any(inpaint_mask > 127) or len(outpaint_selections) > 0): - if len(outpaint_selections) > 0: - H, W, C = inpaint_image.shape - if 'top' in outpaint_selections: - inpaint_image = np.pad( - inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge') - inpaint_mask = np.pad( - inpaint_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant', constant_values=255) - if 'bottom' in outpaint_selections: - inpaint_image = np.pad( - inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge') - inpaint_mask = np.pad( - inpaint_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant', constant_values=255) - - H, W, C = inpaint_image.shape - if 'left' in outpaint_selections: - inpaint_image = np.pad( - inpaint_image, [[0, 0], [int(H * 0.3), 0], [0, 0]], mode='edge') - inpaint_mask = np.pad( - inpaint_mask, [[0, 0], [int(H * 0.3), 0]], mode='constant', constant_values=255) - if 'right' in outpaint_selections: - inpaint_image = np.pad( - inpaint_image, [[0, 0], [0, int(H * 0.3)], [0, 0]], mode='edge') - inpaint_mask = np.pad(inpaint_mask, [[0, 0], [0, int( - H * 0.3)]], mode='constant', constant_values=255) - - inpaint_image = np.ascontiguousarray(inpaint_image.copy()) - inpaint_mask = np.ascontiguousarray(inpaint_mask.copy()) - - inpaint_worker.current_task = inpaint_worker.InpaintWorker(image=inpaint_image, mask=inpaint_mask, - is_outpaint=len(outpaint_selections) > 0) - - # print(f'Inpaint task: {str((height, width))}') - # outputs.append(['results', inpaint_worker.current_task.visualize_mask_processing()]) - # return - - inpaint_head_model_path, inpaint_patch_model_path = downloading_inpaint_models() - loras += [(inpaint_patch_model_path, 1.0)] - - inpaint_pixels = core.numpy_to_pytorch( - inpaint_worker.current_task.image_ready) - initial_latent = core.encode_vae( - vae=pipeline.xl_base_patched.vae, pixels=inpaint_pixels) - inpaint_latent = initial_latent['samples'] - B, C, H, W = inpaint_latent.shape - inpaint_mask = core.numpy_to_pytorch( - inpaint_worker.current_task.mask_ready[None]) - inpaint_mask = torch.nn.functional.avg_pool2d( - inpaint_mask, (8, 8)) - inpaint_mask = torch.nn.functional.interpolate( - inpaint_mask, (H, W), mode='bilinear') - inpaint_worker.current_task.load_latent( - latent=inpaint_latent, mask=inpaint_mask) - - inpaint_mask = ( - inpaint_worker.current_task.mask_ready > 0).astype(np.float32) - inpaint_mask = torch.tensor(inpaint_mask).float() - - vae_dict = core.encode_vae_inpaint( - mask=inpaint_mask, vae=pipeline.xl_base_patched.vae, pixels=inpaint_pixels) - - inpaint_latent = vae_dict['samples'] - inpaint_mask = vae_dict['noise_mask'] - inpaint_worker.current_task.load_inpaint_guidance( - latent=inpaint_latent, mask=inpaint_mask, model_path=inpaint_head_model_path) - - B, C, H, W = inpaint_latent.shape - height, width = inpaint_worker.current_task.image_raw.shape[:2] - print( - f'Final resolution is {str((height, width))}, latent is {str((H * 8, W * 8))}.') + prompts = remove_empty_str([safe_str(p) for p in prompt.split('\n')], default='') + negative_prompts = remove_empty_str([safe_str(p) for p in negative_prompt.split('\n')], default='') - sampler_name = 'dpmpp_fooocus_2m_sde_inpaint_seamless' + prompt = prompts[0] + negative_prompt = negative_prompts[0] - print(f'[Parameters] Sampler = {sampler_name} - {scheduler_name}') + extra_positive_prompts = prompts[1:] if len(prompts) > 1 else [] + extra_negative_prompts = negative_prompts[1:] if len(negative_prompts) > 1 else [] - raw_prompt = req.prompt - raw_negative_prompt = req.negative_promit + progressbar(3, 'Loading models ...') + pipeline.refresh_everything(refiner_model_name=refiner_model_name, base_model_name=base_model_name, loras=loras) - prompts = remove_empty_str([safe_str(p) - for p in req.prompt.split('\n')], default='') - negative_prompts = remove_empty_str( - [safe_str(p) for p in req.negative_promit.split('\n')], default='') + progressbar(3, 'Processing prompts ...') + positive_basic_workloads = [] + negative_basic_workloads = [] - prompt = prompts[0] - negative_prompt = negative_prompts[0] + if use_style: + for s in style_selections: + p, n = apply_style(s, positive=prompt) + positive_basic_workloads.append(p) + negative_basic_workloads.append(n) + else: + positive_basic_workloads.append(prompt) - extra_positive_prompts = prompts[1:] if len(prompts) > 1 else [] - extra_negative_prompts = negative_prompts[1:] if len( - negative_prompts) > 1 else [] + negative_basic_workloads.append(negative_prompt) # Always use independent workload for negative. - seed = req.image_seed - max_seed = int(1024 * 1024 * 1024) - if not isinstance(seed, int): - seed = random.randint(1, max_seed) - if seed < 0: - seed = - seed - seed = seed % max_seed - - pipeline.refresh_everything( - refiner_model_name=req.refiner_model_name, - base_model_name=req.base_model_name, - loras=loras - ) - pipeline.prepare_text_encoder(async_call=False) - - positive_basic_workloads = [] - negative_basic_workloads = [] - - if use_style: - for s in style_selections: - p, n = apply_style(s, positive=prompt) - positive_basic_workloads.append(p) - negative_basic_workloads.append(n) - else: - positive_basic_workloads.append(prompt) + positive_basic_workloads = positive_basic_workloads + extra_positive_prompts + negative_basic_workloads = negative_basic_workloads + extra_negative_prompts - positive_basic_workloads = positive_basic_workloads + extra_positive_prompts - negative_basic_workloads = negative_basic_workloads + extra_negative_prompts + positive_basic_workloads = remove_empty_str(positive_basic_workloads, default=prompt) + negative_basic_workloads = remove_empty_str(negative_basic_workloads, default=negative_prompt) - positive_basic_workloads = remove_empty_str( - positive_basic_workloads, default=prompt) - negative_basic_workloads = remove_empty_str( - negative_basic_workloads, default=negative_prompt) + positive_top_k = len(positive_basic_workloads) + negative_top_k = len(negative_basic_workloads) - positive_top_k = len(positive_basic_workloads) - negative_top_k = len(negative_basic_workloads) + tasks = [dict( + task_seed=seed + i, + positive=positive_basic_workloads, + negative=negative_basic_workloads, + expansion='', + c=None, + uc=None, + ) for i in range(image_number)] - tasks = [dict( - task_seed=seed + i, - positive=positive_basic_workloads, - negative=negative_basic_workloads, - expansion='', - c=[None, None], - uc=[None, None], - ) for i in range(req.image_number)] + if use_expansion: + for i, t in enumerate(tasks): + progressbar(5, f'Preparing Fooocus text #{i + 1} ...') + expansion = pipeline.final_expansion(prompt, t['task_seed']) + print(f'[Prompt Expansion] New suffix: {expansion}') + t['expansion'] = expansion + t['positive'] = copy.deepcopy(t['positive']) + [join_prompts(prompt, expansion)] # Deep copy. - if use_expansion: - for i, t in enumerate(tasks): - expansion = pipeline.expansion(prompt, t['task_seed']) - print(f'[Prompt Expansion] New suffix: {expansion}') - t['expansion'] = expansion - # Deep copy. - t['positive'] = copy.deepcopy( - t['positive']) + [join_prompts(prompt, expansion)] - - for i, t in enumerate(tasks): - t['c'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['positive'], - pool_top_k=positive_top_k) - - for i, t in enumerate(tasks): - t['uc'][0] = pipeline.clip_encode(sd=pipeline.xl_base_patched, texts=t['negative'], - pool_top_k=negative_top_k) - - if pipeline.xl_refiner is not None: for i, t in enumerate(tasks): - t['c'][1] = pipeline.clip_separate(t['c'][0]) + progressbar(7, f'Encoding positive #{i + 1} ...') + t['c'] = pipeline.clip_encode(texts=t['positive'], pool_top_k=positive_top_k) for i, t in enumerate(tasks): - t['uc'][1] = pipeline.clip_separate(t['uc'][0]) + progressbar(10, f'Encoding negative #{i + 1} ...') + t['uc'] = pipeline.clip_encode(texts=t['negative'], pool_top_k=negative_top_k) - all_steps = steps * req.image_number + if len(goals) > 0: + progressbar(13, 'Image processing ...') - def callback(step, x0, x, total_steps, y): - done_steps = current_task_id * steps + step - print(f"Finished {done_steps}/{all_steps}") + if 'vary' in goals: + if not image_is_generated_in_current_ui(uov_input_image, ui_width=width, ui_height=height): + uov_input_image = resize_image(uov_input_image, width=width, height=height) + print(f'Resolution corrected - users are uploading their own images.') + else: + print(f'Processing images generated by Fooocus.') + if 'subtle' in uov_method: + denoising_strength = 0.5 + if 'strong' in uov_method: + denoising_strength = 0.85 + if advanced_parameters.overwrite_vary_strength > 0: + denoising_strength = advanced_parameters.overwrite_vary_strength + initial_pixels = core.numpy_to_pytorch(uov_input_image) + progressbar(13, 'VAE encoding ...') + initial_latent = core.encode_vae(vae=pipeline.final_vae, pixels=initial_pixels) + B, C, H, W = initial_latent['samples'].shape + width = W * 8 + height = H * 8 + print(f'Final resolution is {str((height, width))}.') + + if 'upscale' in goals: + H, W, C = uov_input_image.shape + progressbar(13, f'Upscaling image from {str((H, W))} ...') + + uov_input_image = core.numpy_to_pytorch(uov_input_image) + uov_input_image = perform_upscale(uov_input_image) + uov_input_image = core.pytorch_to_numpy(uov_input_image)[0] + print(f'Image upscaled.') + + if '1.5x' in uov_method: + f = 1.5 + elif '2x' in uov_method: + f = 2.0 + else: + f = 1.0 + + width_f = int(width * f) + height_f = int(height * f) + + if image_is_generated_in_current_ui(uov_input_image, ui_width=width_f, ui_height=height_f): + uov_input_image = resize_image(uov_input_image, width=int(W * f), height=int(H * f)) + print(f'Processing images generated by Fooocus.') + else: + uov_input_image = resize_image(uov_input_image, width=width_f, height=height_f) + print(f'Resolution corrected - users are uploading their own images.') + + H, W, C = uov_input_image.shape + image_is_super_large = H * W > 2800 * 2800 + + if 'fast' in uov_method: + direct_return = True + elif image_is_super_large: + print('Image is too large. Directly returned the SR image. ' + 'Usually directly return SR image at 4K resolution ' + 'yields better results than SDXL diffusion.') + direct_return = True + else: + direct_return = False + + if direct_return: + d = [('Upscale (Fast)', '2x')] + log(uov_input_image, d, single_line_number=1) + outputs.append(['results', [uov_input_image], -1 if len(tasks) == 0 else tasks[0]['task_seed']]) + results = make_results_from_outputs() + task_queue.finish_task(task_seq, results, False) + return results * image_number + + tiled = True + denoising_strength = 0.382 + + if advanced_parameters.overwrite_upscale_strength > 0: + denoising_strength = advanced_parameters.overwrite_upscale_strength + + initial_pixels = core.numpy_to_pytorch(uov_input_image) + progressbar(13, 'VAE encoding ...') + + initial_latent = core.encode_vae(vae=pipeline.final_vae, pixels=initial_pixels, tiled=True) + B, C, H, W = initial_latent['samples'].shape + width = W * 8 + height = H * 8 + print(f'Final resolution is {str((height, width))}.') + + if 'inpaint' in goals: + if len(outpaint_selections) > 0: + H, W, C = inpaint_image.shape + if 'top' in outpaint_selections: + inpaint_image = np.pad(inpaint_image, [[int(H * 0.3), 0], [0, 0], [0, 0]], mode='edge') + input_mask = np.pad(input_mask, [[int(H * 0.3), 0], [0, 0]], mode='constant', + constant_values=255) + if 'bottom' in outpaint_selections: + inpaint_image = np.pad(inpaint_image, [[0, int(H * 0.3)], [0, 0], [0, 0]], mode='edge') + input_mask = np.pad(input_mask, [[0, int(H * 0.3)], [0, 0]], mode='constant', + constant_values=255) + + H, W, C = inpaint_image.shape + if 'left' in outpaint_selections: + inpaint_image = np.pad(inpaint_image, [[0, 0], [int(H * 0.3), 0], [0, 0]], mode='edge') + input_mask = np.pad(input_mask, [[0, 0], [int(H * 0.3), 0]], mode='constant', + constant_values=255) + if 'right' in outpaint_selections: + inpaint_image = np.pad(inpaint_image, [[0, 0], [0, int(H * 0.3)], [0, 0]], mode='edge') + input_mask = np.pad(input_mask, [[0, 0], [0, int(H * 0.3)]], mode='constant', + constant_values=255) + + inpaint_image = np.ascontiguousarray(inpaint_image.copy()) + input_mask = np.ascontiguousarray(input_mask.copy()) + + inpaint_worker.current_task = inpaint_worker.InpaintWorker(image=inpaint_image, mask=input_mask, + is_outpaint=len(outpaint_selections) > 0) + + # print(f'Inpaint task: {str((height, width))}') + # outputs.append(['results', inpaint_worker.current_task.visualize_mask_processing()]) + # return + + progressbar(13, 'VAE encoding ...') + inpaint_pixels = core.numpy_to_pytorch(inpaint_worker.current_task.image_ready) + initial_latent = core.encode_vae(vae=pipeline.final_vae, pixels=inpaint_pixels) + inpaint_latent = initial_latent['samples'] + B, C, H, W = inpaint_latent.shape + input_mask = core.numpy_to_pytorch(inpaint_worker.current_task.mask_ready[None]) + input_mask = torch.nn.functional.avg_pool2d(input_mask, (8, 8)) + input_mask = torch.nn.functional.interpolate(input_mask, (H, W), mode='bilinear') + inpaint_worker.current_task.load_latent(latent=inpaint_latent, mask=input_mask) + + progressbar(13, 'VAE inpaint encoding ...') + + input_mask = (inpaint_worker.current_task.mask_ready > 0).astype(np.float32) + input_mask = torch.tensor(input_mask).float() + + vae_dict = core.encode_vae_inpaint( + mask=input_mask, vae=pipeline.final_vae, pixels=inpaint_pixels) + + inpaint_latent = vae_dict['samples'] + input_mask = vae_dict['noise_mask'] + inpaint_worker.current_task.load_inpaint_guidance(latent=inpaint_latent, mask=input_mask, + model_path=inpaint_head_model_path) + + B, C, H, W = inpaint_latent.shape + final_height, final_width = inpaint_worker.current_task.image_raw.shape[:2] + height, width = H * 8, W * 8 + print(f'Final resolution is {str((final_height, final_width))}, latent is {str((height, width))}.') + + if 'cn' in goals: + for task in cn_tasks[flags.cn_canny]: + cn_img, cn_stop, cn_weight = task + cn_img = resize_image(HWC3(cn_img), width=width, height=height) + cn_img = preprocessors.canny_pyramid(cn_img) + cn_img = HWC3(cn_img) + task[0] = core.numpy_to_pytorch(cn_img) + if advanced_parameters.debugging_cn_preprocessor: + outputs.append(['results', [cn_img], task['task_seed']]) + results = make_results_from_outputs() + task_queue.finish_task(task_seq, results, False) + return results + for task in cn_tasks[flags.cn_cpds]: + cn_img, cn_stop, cn_weight = task + cn_img = resize_image(HWC3(cn_img), width=width, height=height) + cn_img = preprocessors.cpds(cn_img) + cn_img = HWC3(cn_img) + task[0] = core.numpy_to_pytorch(cn_img) + if advanced_parameters.debugging_cn_preprocessor: + outputs.append(['results', [cn_img], task['task_seed']]) + results = make_results_from_outputs() + task_queue.finish_task(task_seq, results, False) + return results + for task in cn_tasks[flags.cn_ip]: + cn_img, cn_stop, cn_weight = task + cn_img = HWC3(cn_img) + + # https://github.com/tencent-ailab/IP-Adapter/blob/d580c50a291566bbf9fc7ac0f760506607297e6d/README.md?plain=1#L75 + cn_img = resize_image(cn_img, width=224, height=224, resize_mode=0) + + task[0] = ip_adapter.preprocess(cn_img) + if advanced_parameters.debugging_cn_preprocessor: + outputs.append(['results', [cn_img], task['task_seed']]) + results = make_results_from_outputs() + task_queue.finish_task(task_seq, results, False) + return results + + if len(cn_tasks[flags.cn_ip]) > 0: + pipeline.final_unet = ip_adapter.patch_model(pipeline.final_unet, cn_tasks[flags.cn_ip]) + + results = [] + all_steps = steps * image_number preparation_time = time.perf_counter() - execution_start_time print(f'Preparation time: {preparation_time:.2f} seconds') - process_with_error = False + outputs.append(['preview', (13, 'Moving model to GPU ...', None)]) + execution_start_time = time.perf_counter() + model_management.load_models_gpu([pipeline.final_unet]) + moving_time = time.perf_counter() - execution_start_time + print(f'Moving model to GPU: {moving_time:.2f} seconds') + + outputs.append(['preview', (13, 'Starting tasks ...', None)]) + + def callback(step, x0, x, total_steps, y): + done_steps = current_task_id * steps + step + outputs.append(['preview', ( + int(15.0 + 85.0 * float(done_steps) / float(all_steps)), + f'Step {step}/{total_steps} in the {current_task_id + 1}-th Sampling', + y)]) + for current_task_id, task in enumerate(tasks): execution_start_time = time.perf_counter() try: + positive_cond, negative_cond = task['c'], task['uc'] + + if 'cn' in goals: + for cn_flag, cn_path in [ + (flags.cn_canny, controlnet_canny_path), + (flags.cn_cpds, controlnet_cpds_path) + ]: + for cn_img, cn_stop, cn_weight in cn_tasks[cn_flag]: + positive_cond, negative_cond = core.apply_controlnet( + positive_cond, negative_cond, + pipeline.loaded_ControlNets[cn_path], cn_img, cn_weight, 0, cn_stop) + imgs = pipeline.process_diffusion( - positive_cond=task['c'], - negative_cond=task['uc'], + positive_cond=positive_cond, + negative_cond=negative_cond, steps=steps, switch=switch, width=width, @@ -394,39 +569,44 @@ def callback(step, x0, x, total_steps, y): cfg_scale=cfg_scale ) + del task['c'], task['uc'], positive_cond, negative_cond # Save memory + + if inpaint_worker.current_task is not None: + imgs = [inpaint_worker.current_task.post_process(x) for x in imgs] + for x in imgs: d = [ ('Prompt', raw_prompt), ('Negative Prompt', raw_negative_prompt), ('Fooocus V2 Expansion', task['expansion']), ('Styles', str(raw_style_selections)), - ('Performance', req.performance_selection), + ('Performance', performance_selection), ('Resolution', str((width, height))), - ('Sharpness', req.sharpness), - ('Guidance Scale', req.guidance_scale), - ('ADM Guidance', str((adm_scaler_positive, adm_scaler_negative))), - ('Base Model', req.base_model_name), - ('Refiner Model', req.refiner_model_name), + ('Sharpness', sharpness), + ('Guidance Scale', guidance_scale), + ('ADM Guidance', str((patch.positive_adm_scale, patch.negative_adm_scale))), + ('Base Model', base_model_name), + ('Refiner Model', refiner_model_name), ('Sampler', sampler_name), ('Scheduler', scheduler_name), ('Seed', task['task_seed']) ] - for n, w in loras_user_raw_input: + for n, w in loras_raw: if n != 'None': d.append((f'LoRA [{n}] weight', w)) log(x, d, single_line_number=3) + + # Fooocus async_worker.py code end results.append(GeneratedImage( im=imgs[0], seed=task['task_seed'], finish_reason=GenerationFinishReason.success)) except model_management.InterruptProcessingException as e: print('User stopped') - for i in range(current_task_id + 1, len(tasks)): - results.append(GeneratedImage( + results.append(GeneratedImage( im=None, seed=task['task_seed'], finish_reason=GenerationFinishReason.user_cancel)) break except Exception as e: print('Process failed:', e) - process_with_error = True results.append(GeneratedImage( im=None, seed=task['task_seed'], finish_reason=GenerationFinishReason.error)) @@ -436,7 +616,7 @@ def callback(step, x0, x, total_steps, y): pipeline.prepare_text_encoder(async_call=True) print(f"[Task Queue] Finish task, seq={task_seq}") - task_queue.finish_task(task_seq, results, process_with_error) + task_queue.finish_task(task_seq, results, False) return results except Exception as e: diff --git a/main.py b/main.py index b4454c3..d692661 100644 --- a/main.py +++ b/main.py @@ -174,11 +174,6 @@ def download_models(): 'https://huggingface.co/lllyasviel/misc/resolve/main/xlvaeapp.pth') ] - upscaler_filenames = [ - ('fooocus_upscaler_s409985e5.bin', - 'https://huggingface.co/lllyasviel/misc/resolve/main/fooocus_upscaler_s409985e5.bin') - ] - from modules.model_loader import load_file_from_url from modules.path import (fooocus_expansion_path, lorafile_path, modelfile_path, upscale_models_path, @@ -193,9 +188,6 @@ def download_models(): for file_name, url in vae_approx_filenames: load_file_from_url( url=url, model_dir=vae_approx_path, file_name=file_name) - for file_name, url in upscaler_filenames: - load_file_from_url( - url=url, model_dir=upscale_models_path, file_name=file_name) load_file_from_url( url='https://huggingface.co/lllyasviel/misc/resolve/main/fooocus_expansion.bin', diff --git a/requirements.txt b/requirements.txt index bee6a9d..c22f832 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ numpy==1.23.5 pytorch_lightning==1.9.4 omegaconf==2.2.3 pygit2==1.12.2 +opencv-contrib-python==4.8.0.74 fastapi==0.103.1 python-multipart==0.0.6 uvicorn[standard]==0.23.2 \ No newline at end of file