-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathinference_fp16_Monetico.py
64 lines (57 loc) · 2.6 KB
/
inference_fp16_Monetico.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
import sys
sys.path.append("./")
import torch
from torchvision import transforms
from src.transformer import Transformer2DModel
from src.pipeline import Pipeline
from src.scheduler import Scheduler
from transformers import (
CLIPTextModelWithProjection,
CLIPTokenizer,
)
from diffusers import VQModel
device = 'cuda'
dtype = torch.bfloat16
model_path = "Collov-Labs/Monetico"
model = Transformer2DModel.from_pretrained(model_path, subfolder="transformer", torch_dtype=dtype)
vq_model = VQModel.from_pretrained(model_path, subfolder="vqvae", torch_dtype=dtype)
text_encoder = CLIPTextModelWithProjection.from_pretrained(model_path, subfolder="text_encoder", torch_dtype=dtype) # better for Monetico
# text_encoder = CLIPTextModelWithProjection.from_pretrained( #more stable sampling for some cases
# "laion/CLIP-ViT-H-14-laion2B-s32B-b79K", torch_dtype=dtype
# )
tokenizer = CLIPTokenizer.from_pretrained(model_path, subfolder="tokenizer", torch_dtype=dtype)
scheduler = Scheduler.from_pretrained(model_path, subfolder="scheduler", torch_dtype=dtype)
pipe = Pipeline(vq_model, tokenizer=tokenizer, text_encoder=text_encoder, transformer=model, scheduler=scheduler)
pipe.to(device)
steps = 48
CFG = 9
resolution = 512
negative_prompt = "worst quality, low quality, low res, blurry, distortion, watermark, logo, signature, text, jpeg artifacts, signature, sketch, duplicate, ugly, identifying mark"
prompts = [
"Two actors are posing for a pictur with one wearing a black and white face paint.",
"A large body of water with a rock in the middle and mountains in the background.",
"A white and blue coffee mug with a picture of a man on it.",
"A statue of a man with a crown on his head.",
"A man in a yellow wet suit is holding a big black dog in the water.",
"A white table with a vase of flowers and a cup of coffee on top of it.",
"A woman stands on a dock in the fog.",
"A woman is standing next to a picture of another woman."
]
batched_generation = False
num_images = len(prompts) if batched_generation else 1
images = pipe(
prompt=prompts[:num_images],
negative_prompt=[negative_prompt] * num_images,
height=resolution,
width=resolution,
guidance_scale=CFG,
num_inference_steps=steps
).images
output_dir = "./output"
os.makedirs(output_dir, exist_ok=True)
for i, prompt in enumerate(prompts[:num_images]):
sanitized_prompt = prompt.replace(" ", "_")
file_path = os.path.join(output_dir, f"{sanitized_prompt}_{resolution}_{steps}_{CFG}.png")
images[i].save(file_path)
print(f"The {i+1}/{num_images} image is saved to {file_path}")