Add files via upload

able2608 · Aug 1, 2024 · b390cc6 · b390cc6
1 parent 1972d79
commit b390cc6
Show file tree

Hide file tree

Showing 100 changed files with 5,880 additions and 0 deletions.
diff --git a/__pycache__/__init__.cpython-310.pyc b/__pycache__/__init__.cpython-310.pyc
diff --git a/__pycache__/run_align_pose.cpython-310.pyc b/__pycache__/run_align_pose.cpython-310.pyc
diff --git a/__pycache__/uniAnimate_Inference.cpython-310.pyc b/__pycache__/uniAnimate_Inference.cpython-310.pyc
diff --git a/configs/UniAnimate_infer.yaml b/configs/UniAnimate_infer.yaml
@@ -0,0 +1,101 @@
+# manual setting
+max_frames: 1
+resolution: [512, 768]  # or resolution: [768, 1216]
+# resolution: [768, 1216]
+round: 1 
+ddim_timesteps: 30  # among 25-50
+seed: 11 # 7
+test_list_path: [
+    # Format: [frame_interval, reference image, driving pose sequence]
+    # [2, "data/images/WOMEN-Blouses_Shirts-id_00004955-01_4_full.jpg", "data/saved_pose/WOMEN-Blouses_Shirts-id_00004955-01_4_full"],
+    # [2, "data/images/musk.jpg", "data/saved_pose/musk"],
+    # [2, "data/images/WOMEN-Blouses_Shirts-id_00005125-03_4_full.jpg", "data/saved_pose/WOMEN-Blouses_Shirts-id_00005125-03_4_full"],
+    # [2, "data/images/IMG_20240514_104337.jpg", "data/saved_pose/IMG_20240514_104337"]
+    [1, "data/images/i.png", "data/saved_pose/dancePoses"]
+
+]
+partial_keys: [
+                    # ['image','local_image', "dwpose"], # reference image as the first frame of the generated video (optional)
+                    ['image', 'randomref', "dwpose"],
+                ]
+
+
+
+
+# default settings
+TASK_TYPE: inference_unianimate_entrance
+use_fp16: True
+guide_scale: 2.5
+vit_resolution: [224, 224]
+use_fp16: True
+batch_size: 1
+latent_random_ref: True
+chunk_size: 2
+decoder_bs: 2
+scale: 8
+use_fps_condition: False
+test_model: checkpoints/unianimate_16f_32f_non_ema_223000.pth  
+embedder: {
+    'type': 'FrozenOpenCLIPTextVisualEmbedder',
+    'layer': 'penultimate',
+    'pretrained': 'checkpoints/open_clip_pytorch_model.bin'
+}
+
+
+auto_encoder: {
+    'type': 'AutoencoderKL',
+    'ddconfig': {
+        'double_z': True, 
+        'z_channels': 4,
+        'resolution': 256, 
+        'in_channels': 3,
+        'out_ch': 3, 
+        'ch': 128, 
+        'ch_mult': [1, 2, 4, 4],
+        'num_res_blocks': 2, 
+        'attn_resolutions': [], 
+        'dropout': 0.0,
+        'video_kernel_size': [3, 1, 1]
+    },
+    'embed_dim': 4,
+    'pretrained': 'checkpoints/v2-1_512-ema-pruned.ckpt'
+}
+
+UNet: {
+    'type': 'UNetSD_UniAnimate',
+    'config': None,
+    'in_dim': 4,
+    'dim': 320,
+    'y_dim': 1024,
+    'context_dim': 1024,
+    'out_dim': 4,
+    'dim_mult': [1, 2, 4, 4],
+    'num_heads': 8,
+    'head_dim': 64,
+    'num_res_blocks': 2,
+    'dropout': 0.1,
+    'temporal_attention': True,
+    'num_tokens': 4,
+    'temporal_attn_times': 1,
+    'use_checkpoint': True,
+    'use_fps_condition': False,
+    'use_sim_mask': False
+}
+video_compositions: ['image', 'local_image', 'dwpose', 'randomref', 'randomref_pose']
+Diffusion: {
+    'type': 'DiffusionDDIM',
+    'schedule': 'linear_sd', 
+    'schedule_param': {
+        'num_timesteps': 1000,
+        "init_beta": 0.00085, 
+        "last_beta": 0.0120,
+        'zero_terminal_snr': True,
+    },
+    'mean_type': 'v',
+    'loss_type': 'mse',
+    'var_type': 'fixed_small', # 'fixed_large',
+    'rescale_timesteps': False,
+    'noise_strength': 0.1
+}
+use_DiffusionDPM: False
+CPU_CLIP_VAE: True
diff --git a/configs/UniAnimate_infer_long.yaml b/configs/UniAnimate_infer_long.yaml
@@ -0,0 +1,101 @@
+# manual setting
+# resolution: [512, 768] # or [768, 1216]
+resolution: [768, 1216]
+round: 1
+ddim_timesteps: 30 # among 25-50
+context_size: 32
+context_stride: 1
+context_overlap: 8
+seed: 7
+max_frames: "None" # 64, 96, "None" mean the length of original pose sequence
+test_list_path: [
+    # Format: [frame_interval, reference image, driving pose sequence]
+    [2, "data/images/WOMEN-Blouses_Shirts-id_00004955-01_4_full.jpg", "data/saved_pose/WOMEN-Blouses_Shirts-id_00004955-01_4_full"],
+    [2, "data/images/musk.jpg", "data/saved_pose/musk"],
+    [2, "data/images/WOMEN-Blouses_Shirts-id_00005125-03_4_full.jpg", "data/saved_pose/WOMEN-Blouses_Shirts-id_00005125-03_4_full"],
+    [2, "data/images/IMG_20240514_104337.jpg", "data/saved_pose/IMG_20240514_104337"],
+    [2, "data/images/IMG_20240514_104337.jpg", "data/saved_pose/IMG_20240514_104337_dance"],
+    [2, "data/images/WOMEN-Blouses_Shirts-id_00005125-03_4_full.jpg", "data/saved_pose/WOMEN-Blouses_Shirts-id_00005125-03_4_full_dance"]
+]
+
+
+# default settings
+TASK_TYPE: inference_unianimate_long_entrance
+use_fp16: True
+guide_scale: 2.5
+vit_resolution: [224, 224]
+use_fp16: True
+batch_size: 1
+latent_random_ref: True
+chunk_size: 2
+decoder_bs: 2
+scale: 8
+use_fps_condition: False
+test_model: checkpoints/unianimate_16f_32f_non_ema_223000.pth  
+partial_keys: [
+                    ['image', 'randomref', "dwpose"],
+                ]
+embedder: {
+    'type': 'FrozenOpenCLIPTextVisualEmbedder',
+    'layer': 'penultimate',
+    'pretrained': 'checkpoints/open_clip_pytorch_model.bin'
+}
+
+
+auto_encoder: {
+    'type': 'AutoencoderKL',
+    'ddconfig': {
+        'double_z': True, 
+        'z_channels': 4,
+        'resolution': 256, 
+        'in_channels': 3,
+        'out_ch': 3, 
+        'ch': 128, 
+        'ch_mult': [1, 2, 4, 4],
+        'num_res_blocks': 2, 
+        'attn_resolutions': [], 
+        'dropout': 0.0,
+        'video_kernel_size': [3, 1, 1]
+    },
+    'embed_dim': 4,
+    'pretrained': 'checkpoints/v2-1_512-ema-pruned.ckpt'
+}
+
+UNet: {
+    'type': 'UNetSD_UniAnimate',
+    'config': None,
+    'in_dim': 4,
+    'dim': 320,
+    'y_dim': 1024,
+    'context_dim': 1024,
+    'out_dim': 4,
+    'dim_mult': [1, 2, 4, 4],
+    'num_heads': 8,
+    'head_dim': 64,
+    'num_res_blocks': 2,
+    'dropout': 0.1,
+    'temporal_attention': True,
+    'num_tokens': 4,
+    'temporal_attn_times': 1,
+    'use_checkpoint': True,
+    'use_fps_condition': False,
+    'use_sim_mask': False
+}
+video_compositions: ['image', 'local_image', 'dwpose', 'randomref', 'randomref_pose']
+Diffusion: {
+    'type': 'DiffusionDDIMLong',
+    'schedule': 'linear_sd', 
+    'schedule_param': {
+        'num_timesteps': 1000,
+        "init_beta": 0.00085, 
+        "last_beta": 0.0120,
+        'zero_terminal_snr': True,
+    },
+    'mean_type': 'v',
+    'loss_type': 'mse',
+    'var_type': 'fixed_small', 
+    'rescale_timesteps': False,
+    'noise_strength': 0.1
+}
+CPU_CLIP_VAE: True
+context_batch_size: 1
diff --git a/dwpose/__init__.py b/dwpose/__init__.py
diff --git a/dwpose/__pycache__/__init__.cpython-310.pyc b/dwpose/__pycache__/__init__.cpython-310.pyc
diff --git a/dwpose/__pycache__/__init__.cpython-39.pyc b/dwpose/__pycache__/__init__.cpython-39.pyc
diff --git a/dwpose/__pycache__/onnxdet.cpython-310.pyc b/dwpose/__pycache__/onnxdet.cpython-310.pyc
diff --git a/dwpose/__pycache__/onnxdet.cpython-39.pyc b/dwpose/__pycache__/onnxdet.cpython-39.pyc
diff --git a/dwpose/__pycache__/onnxpose.cpython-310.pyc b/dwpose/__pycache__/onnxpose.cpython-310.pyc
diff --git a/dwpose/__pycache__/onnxpose.cpython-39.pyc b/dwpose/__pycache__/onnxpose.cpython-39.pyc
diff --git a/dwpose/__pycache__/util.cpython-310.pyc b/dwpose/__pycache__/util.cpython-310.pyc
diff --git a/dwpose/__pycache__/util.cpython-39.pyc b/dwpose/__pycache__/util.cpython-39.pyc
diff --git a/dwpose/__pycache__/wholebody.cpython-310.pyc b/dwpose/__pycache__/wholebody.cpython-310.pyc
diff --git a/dwpose/__pycache__/wholebody.cpython-39.pyc b/dwpose/__pycache__/wholebody.cpython-39.pyc
diff --git a/dwpose/onnxdet.py b/dwpose/onnxdet.py
@@ -0,0 +1,127 @@
+import cv2
+import numpy as np
+
+import onnxruntime
+
+def nms(boxes, scores, nms_thr):
+    """Single class NMS implemented in Numpy."""
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= nms_thr)[0]
+        order = order[inds + 1]
+
+    return keep
+
+def multiclass_nms(boxes, scores, nms_thr, score_thr):
+    """Multiclass NMS implemented in Numpy. Class-aware version."""
+    final_dets = []
+    num_classes = scores.shape[1]
+    for cls_ind in range(num_classes):
+        cls_scores = scores[:, cls_ind]
+        valid_score_mask = cls_scores > score_thr
+        if valid_score_mask.sum() == 0:
+            continue
+        else:
+            valid_scores = cls_scores[valid_score_mask]
+            valid_boxes = boxes[valid_score_mask]
+            keep = nms(valid_boxes, valid_scores, nms_thr)
+            if len(keep) > 0:
+                cls_inds = np.ones((len(keep), 1)) * cls_ind
+                dets = np.concatenate(
+                    [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
+                )
+                final_dets.append(dets)
+    if len(final_dets) == 0:
+        return None
+    return np.concatenate(final_dets, 0)
+
+def demo_postprocess(outputs, img_size, p6=False):
+    grids = []
+    expanded_strides = []
+    strides = [8, 16, 32] if not p6 else [8, 16, 32, 64]
+
+    hsizes = [img_size[0] // stride for stride in strides]
+    wsizes = [img_size[1] // stride for stride in strides]
+
+    for hsize, wsize, stride in zip(hsizes, wsizes, strides):
+        xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
+        grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
+        grids.append(grid)
+        shape = grid.shape[:2]
+        expanded_strides.append(np.full((*shape, 1), stride))
+
+    grids = np.concatenate(grids, 1)
+    expanded_strides = np.concatenate(expanded_strides, 1)
+    outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
+    outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
+
+    return outputs
+
+def preprocess(img, input_size, swap=(2, 0, 1)):
+    if len(img.shape) == 3:
+        padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
+    else:
+        padded_img = np.ones(input_size, dtype=np.uint8) * 114
+
+    r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
+    resized_img = cv2.resize(
+        img,
+        (int(img.shape[1] * r), int(img.shape[0] * r)),
+        interpolation=cv2.INTER_LINEAR,
+    ).astype(np.uint8)
+    padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
+
+    padded_img = padded_img.transpose(swap)
+    padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
+    return padded_img, r
+
+def inference_detector(session, oriImg):
+    input_shape = (640,640)
+    img, ratio = preprocess(oriImg, input_shape)
+
+    ort_inputs = {session.get_inputs()[0].name: img[None, :, :, :]}
+
+    output = session.run(None, ort_inputs)
+
+    predictions = demo_postprocess(output[0], input_shape)[0]
+
+    boxes = predictions[:, :4]
+    scores = predictions[:, 4:5] * predictions[:, 5:]
+
+    boxes_xyxy = np.ones_like(boxes)
+    boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2.
+    boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2.
+    boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2.
+    boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2.
+    boxes_xyxy /= ratio
+    dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)
+    if dets is not None:
+        final_boxes, final_scores, final_cls_inds = dets[:, :4], dets[:, 4], dets[:, 5]
+        isscore = final_scores>0.3
+        iscat = final_cls_inds == 0
+        isbbox = [ i and j for (i, j) in zip(isscore, iscat)]
+        final_boxes = final_boxes[isbbox]
+    else:
+        final_boxes = np.array([])
+
+    return final_boxes