hand_detection/hand_detection_pytorch/hand_detection_pytorch.py

import sys
import time

import cv2

import ailia
import hand_detection_pytorch_utils

# import original modules
sys.path.append('../../util')
from utils import get_base_parser, update_parser  # noqa: E402
from model_utils import check_and_download_models  # noqa: E402
import webcamera_utils  # noqa: E402


# ======================
# Parameters
# ======================
WEIGHT_PATH = 'hand_detection_pytorch.onnx'
MODEL_PATH = 'hand_detection_pytorch.onnx.prototxt'
REMOTE_PATH = 'https://storage.googleapis.com/ailia-models/hand_detection_pytorch/'

IMAGE_PATH = 'CARDS_OFFICE.jpg'
SAVE_IMAGE_PATH = 'CARDS_OFFICE_output.jpg'

THRESHOLD = 0.2
IOU = 0.2


# ======================
# Arguemnt Parser Config
# ======================
parser = get_base_parser(
    'hand-detection.PyTorch hand detection model',
    IMAGE_PATH,
    SAVE_IMAGE_PATH,
)
args = update_parser(parser)


# ======================
# Main functions
# ======================
def recognize_from_image():
    # prepare input data
    to_show = cv2.imread(args.input, cv2.IMREAD_COLOR)
    print(f'input image shape: {to_show.shape}')
    img, scale = hand_detection_pytorch_utils.pre_process(to_show)

    # net initialize
    detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
    detector.set_input_shape((1, 3, img.shape[2], img.shape[3]))

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            out = detector.predict({'input.1': img})
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        out = detector.predict({'input.1': img})

    dets = hand_detection_pytorch_utils.post_process(
        out, img, scale, THRESHOLD, IOU
    )
    for i in range(dets.shape[0]):
        cv2.rectangle(
            to_show,
            (dets[i][0], dets[i][1]),
            (dets[i][2], dets[i][3]),
            [0, 0, 255],
            3
        )
    cv2.imwrite(args.savepath, to_show)
    print('Script finished successfully.')


def recognize_from_video():
    # net initialize
    detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        writer = webcamera_utils.get_writer(args.savepath, f_h, f_w)
    else:
        writer = None

    while(True):
        ret, to_show = capture.read()
        # press q to end video capture
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        img, scale = hand_detection_pytorch_utils.pre_process(to_show)
        detector.set_input_shape((1, 3, img.shape[2], img.shape[3]))
        out = detector.predict({'input.1': img})
        dets = hand_detection_pytorch_utils.post_process(
            out, img, scale, THRESHOLD, IOU
        )
        for i in range(dets.shape[0]):
            cv2.rectangle(
                to_show,
                (dets[i][0], dets[i][1]),
                (dets[i][2], dets[i][3]),
                [0, 0, 255],
                3
            )
        cv2.imshow('frame', to_show)
        # save results
        if writer is not None:
            writer.write(to_show)
    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    print('Script finished successfully.')


def main():
    # model files check and download
    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)

    if args.video is not None:
        # video mode
        recognize_from_video()
    else:
        # image mode
        recognize_from_image()


if __name__ == '__main__':
    main()