diff --git a/.gitignore b/.gitignore index c7a17a0..a64f75e 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,5 @@ __pycache__/ /.mypy_cache/ /examples/*.png + +.pre-commit-config.yaml diff --git a/README.md b/README.md index af169a1..84220d4 100644 --- a/README.md +++ b/README.md @@ -208,9 +208,10 @@ Requirements ------------ - [Python](https://www.python.org/downloads/) 3.6.0 or newer -- [Pillow](https://github.com/python-pillow/Pillow) 6.0.0 - Python image library +- [Pillow](https://github.com/python-pillow/Pillow) 6.0.0 or newer - Python image library - [FlatBuffers](https://github.com/google/flatbuffers) - serialization library -- [dpkt](https://github.com/kbandla/dpkt) - TCP/IP packet parsing library +- [dpkt](https://github.com/kbandla/dpkt) - TCP/IP packet parsing library +- [OpenCV](https://opencv.org/) 4.0.0 or newer - computer vision library Installation diff --git a/docs/source/overview.md b/docs/source/overview.md index 4313fb9..50403dc 100644 --- a/docs/source/overview.md +++ b/docs/source/overview.md @@ -2,7 +2,7 @@ Overview ======== -https://github.com/zayfod/pycozmo +[https://github.com/zayfod/pycozmo](https://github.com/zayfod/pycozmo) `PyCozmo` is a pure-Python communication library, alternative SDK, and application for the [Cozmo robot](https://www.digitaldreamlabs.com/pages/cozmo) . It allows controlling a Cozmo robot directly, without @@ -69,9 +69,10 @@ Requirements ------------ - [Python](https://www.python.org/downloads/) 3.6.0 or newer -- [Pillow](https://github.com/python-pillow/Pillow) 6.0.0 - Python image library +- [Pillow](https://github.com/python-pillow/Pillow) 6.0.0 or newer - Python image library - [FlatBuffers](https://github.com/google/flatbuffers) - serialization library - [dpkt](https://github.com/kbandla/dpkt) - TCP/IP packet parsing library +- [OpenCV](https://opencv.org/) 4.0.0 or newer - computer vision library Installation diff --git a/examples/face_detection.py b/examples/face_detection.py new file mode 100644 index 0000000..58b54e8 --- /dev/null +++ b/examples/face_detection.py @@ -0,0 +1,136 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +from queue import Empty +import math +import time +import numpy as np +import cv2 as cv +import pycozmo as pc +from pycozmo.object_detection_n_tracking import ObjectDetectionNTracking +from pycozmo.object_tracker import TrackType +from pycozmo.object_detector import ObjCat +from pycozmo.display import Display + +# Instantiate a face detection and tracking object +TRACKER = ObjectDetectionNTracking(TrackType.MOSSE, skip_frames=5, obj_cats=[ObjCat.HEAD], conf_thres=0.5, + conf_decay_rate=0.995, img_w=480, img_h=480) + +# Instantiate a display +DISPLAY = Display(win_name='Tracker') + +# Those two constants are used when sharpening the image with the unsharp mask +# algorithm +SHARP_AMOUNT = 0.7 +SHARP_GAMMA = 2.2 + +# Some more constants to store the robots current status +HEAD_TILT = (pc.MAX_HEAD_ANGLE.radians - pc.MIN_HEAD_ANGLE.radians) * 0.1 +HEAD_INC = math.radians(4) +HEAD_LIGHT = False + +# If you want the camera to only be in grayscale, set this to False +COLOR_CAMERA = True + + +# Define a function for handling new frames received by the camera +def on_camera_img(cli, image): + """ + Detect and track any head that appear in the frame using yolov4-tiny. + :param cli: + :param image: + :return: None + """ + global TRACKER, SHARP_AMOUNT, SHARP_GAMMA + + # Convert the image to a numpy array + frame = np.array(image) + + # Check if the frame is in color + if frame.shape[-1] == 3: + # OpenCV mainly works with BGR formatted images so we need to convert + # the frame + frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR) + + # Rescale the image + scaled_frame = cv.resize(frame, None, fx=2, fy=2, + interpolation=cv.INTER_LANCZOS4) + + # Try to sharpen the image as much as we can + blurred_frame = cv.GaussianBlur(scaled_frame, (3, 3), 0) + sharp_frame = cv.addWeighted(scaled_frame, 1 + SHARP_AMOUNT, + blurred_frame, -SHARP_AMOUNT, + gamma=SHARP_GAMMA) + + # Let the tracker detect the different faces + # (this is where the heavy lifting happens) + TRACKER.process_frame(sharp_frame) + + +if __name__ == "__main__": + # Connect to the robot + with pc.connect() as cli: + try: + # Look forward + cli.set_head_angle(HEAD_TILT) + + # Enable the camera + cli.enable_camera(enable=True, color=COLOR_CAMERA) + + # Wait a little bit for the image to stabilize + time.sleep(2) + + # Handle new incoming images + cli.add_handler(pc.event.EvtNewRawCameraImage, on_camera_img) + + # Loop forever + while True: + try: + # Get the next frame with the bounding boxes + # A timeout is applied so that the robot might still be + # controlled even if no image can be displayed + img = TRACKER.get_next_frame(block=False) + # Display the image in a dedicated window + DISPLAY.step(img) + except Empty: + pass + + # Read the next key event received by OpenCV's main window + key = cv.waitKey(25) + + # Act accordingly + if key == ord('q'): + # Exit the program + break + + elif key in [ord('k'), ord('j')]: + if key == ord('k'): + # Increase head tilt + HEAD_TILT = min(pc.MAX_HEAD_ANGLE.radians, + HEAD_TILT + HEAD_INC) + elif key == ord('j'): + # Decrease head tilt + HEAD_TILT = max(pc.MIN_HEAD_ANGLE.radians, + HEAD_TILT - HEAD_INC) + # Set the head angle + cli.set_head_angle(HEAD_TILT) + + elif key == ord('l'): + # Toggle the head light + HEAD_LIGHT = not HEAD_LIGHT + # Set the head light + cli.set_head_light(enable=HEAD_LIGHT) + + # Display the robot's status + print("Head angle: {:.2f} degrees, " + "Head light enabled: {}".format(math.degrees(HEAD_TILT), + HEAD_LIGHT), end='\r') + + finally: + # Set the head down + cli.set_head_angle(pc.MIN_HEAD_ANGLE.radians) + + # Close the display + DISPLAY.stop() + + # Stop the face detection and tracking process + TRACKER.stop() diff --git a/examples/opencv_rc.py b/examples/opencv_rc.py new file mode 100644 index 0000000..d98444e --- /dev/null +++ b/examples/opencv_rc.py @@ -0,0 +1,246 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import math +from queue import Queue, Empty +import logging +import numpy as np +import cv2 as cv +import pycozmo as pc + +# Set that to False if you want Cozmo's camera to return grayscale images +COLOR_IMG = True + +# Define the increments for the linear and angular velocities, as well as the +# head angle and lift height +LIN_INC = 20 +ANG_INC = math.radians(20) +HEAD_INC = math.radians(2) +LIFT_INC = 5 + +# This is required since Open CV cannot open displays from threads. +IMG_QUEUE = Queue() + +# The linear velocity is expressed in mm/s and the angular velocity in rad/s +LIN_VELOCITY = 0 +ANG_VELOCITY = 0 +HEAD_TILT = (pc.MAX_HEAD_ANGLE.radians - pc.MIN_HEAD_ANGLE.radians) * 0.1 +HEAD_LIGHT = False +LIFT_HEIGHT = pc.MIN_LIFT_HEIGHT.mm + +# Those parameters are used to configure the unsharp masking algorithm +# Play with them to get the best image you can +SHARP_AMOUNT = 0.7 +SHARP_GAMMA = 2.2 + + +# NOTE: This could be used with cv.filter2D() in place of the unsharp masking +# However, controlling the amount of sharpening is more difficult +# UNSHARP_KERNEL = -1 / 256 * np.array([[1, 4, 6, 4, 1], +# [4, 16, 24, 16, 4], +# [6, 24, -476, 24, 6], +# [4, 16, 24, 16, 4], +# [1, 4, 6, 4, 1]]) + + +def on_camera_img(cli, image): + """ + A simple function that converts a frame from Cozmo's camera into a BGR + formatted image to be used by OpenCV. + :param cli: An instance of the pycozmo.Client class representing the robot. + :param image: A color/grayscale frame from Cozmo's camera. + :return: None + """ + global IMG_QUEUE + + # Convert the image into a numpy array so that OpenCV can manipulate it + orig_img = np.array(image) + + # Check if we got a color image + if orig_img.shape[-1] == 3: + # The thing about OpenCV is that it uses BGR formatted images for + # reasons + orig_img = cv.cvtColor(orig_img, cv.COLOR_RGB2BGR) + + # Resize the image + # The lanczos4 algorithm produces the best results, but might be slow + # you can use cv.INTER_LINEAR for poorer, but faster results + resized_img = cv.resize(orig_img, None, fx=2, fy=2, + interpolation=cv.INTER_LANCZOS4) + + # Try to reduce the noise using unsharp masking + # An explanation for this technique can be found here: + # https://en.wikipedia.org/wiki/Unsharp_masking#Digital_unsharp_masking + blurred_img = cv.GaussianBlur(resized_img, (3, 3), 0) + sharp_img = cv.addWeighted(resized_img, 1 + SHARP_AMOUNT, blurred_img, + -SHARP_AMOUNT, gamma=SHARP_GAMMA) + + # Send the image back to the main thread for display + IMG_QUEUE.put(sharp_img) + + +def stop_all(cli, state): + """ + This function simply stops all motors and resets the corresponding + velocities. It is used when Cozmo detects that it has been picked up, or is + about to fall off a cliff. + :param cli: An instance of the pycozmo.Client class representing the robot. + :param state: A boolean set to True if Cozmo has been picked up or is about + to fall off a cliff. False otherwise. + :return: None + """ + global LIN_VELOCITY, ANG_VELOCITY + + # Well as said above, if Cozmo is not touching the ground anymore we stop + # all motors to prevent any damage + if state: + # Stop the motors + cli.stop_all_motors() + + # Reset the linear and angular velocities + LIN_VELOCITY = 0 + ANG_VELOCITY = 0 + + +if __name__ == "__main__": + # Connect to the robot + with pc.connect() as cli: + try: + # Look forward + cli.set_head_angle(HEAD_TILT) + + # Enable the camera + cli.enable_camera(enable=True, color=COLOR_IMG) + + # Set the lift in its minimum position + cli.set_lift_height(height=LIFT_HEIGHT) + + # Handle new incoming images + cli.add_handler(pc.event.EvtNewRawCameraImage, on_camera_img) + + # Handle cliff and pick-up detection + cli.add_handler(pc.event.EvtCliffDetectedChange, stop_all) + cli.add_handler(pc.event.EvtRobotPickedUpChange, stop_all) + + # Loop forever + while True: + try: + # Get the next frame from the camera + # A timeout is applied so that the robot might still be + # controlled even if no image can be displayed + img = IMG_QUEUE.get(timeout=0.2) + # Display the frame in a window + cv.imshow('Camera', img) + IMG_QUEUE.task_done() + except Empty: + logging.warning('Did not get any image from the camera so ' + 'not displaying any.') + + # Read the next key event + # /!\ It should be noted that that if OpenCV's window displaying + # the image received from the camera loses focus, then Cozmo + # will not answer your commands anymore. + key = cv.waitKeyEx(1) + + # Act accordingly + if key == ord('q'): + # Exit the program + break + + # Losing a bit of computational time to prevent sending motor + # commands on each loop even when not necessary + if key in [ord('w'), ord('s'), ord('a'), ord('d')]: + if key == ord('w'): + print('up') + # Increase the linear velocity + LIN_VELOCITY = min(pc.MAX_WHEEL_SPEED.mmps, + LIN_VELOCITY + LIN_INC) + elif key == ord('s'): + # Decrease the linear velocity + LIN_VELOCITY = max(-pc.MAX_WHEEL_SPEED.mmps, + LIN_VELOCITY - LIN_INC) + elif key == ord('a'): + # Increase the angular velocity + ANG_VELOCITY = min(pc.MAX_WHEEL_SPEED.mmps / pc.TRACK_WIDTH.mm, + ANG_VELOCITY + ANG_INC) + elif key == ord('d'): + # Decrease the angular velocity + ANG_VELOCITY = max(-pc.MAX_WHEEL_SPEED.mmps / pc.TRACK_WIDTH.mm, + ANG_VELOCITY - ANG_INC) + + # Compute the velocity of the left and right wheels + # using the inverse kinematic equations for a differential + # drive robot + l_speed = min(pc.MAX_WHEEL_SPEED.mmps, + LIN_VELOCITY - ( + pc.TRACK_WIDTH.mm * ANG_VELOCITY) / 2) + r_speed = min(pc.MAX_WHEEL_SPEED.mmps, + LIN_VELOCITY + ( + pc.TRACK_WIDTH.mm * ANG_VELOCITY) / 2) + + # Send the command to the robot + cli.drive_wheels(lwheel_speed=l_speed, + rwheel_speed=r_speed) + + # Same as above, sacrificing a bit of computational time to + # prevent sending extraneous head tilt commands + elif key in [ord('k'), ord('j')]: + if key == ord('k'): + # Increase head tilt + HEAD_TILT = min(pc.MAX_HEAD_ANGLE.radians, + HEAD_TILT + HEAD_INC) + elif key == ord('j'): + # Decrease head tilt + HEAD_TILT = max(pc.MIN_HEAD_ANGLE.radians, + HEAD_TILT - HEAD_INC) + + # Set the head angle + cli.set_head_angle(HEAD_TILT) + + # You get the idea by now + elif key in [ord('n'), ord('m')]: + if key == ord('m'): + # Increase the lift height + LIFT_HEIGHT = min(pc.MAX_LIFT_HEIGHT.mm, + LIFT_HEIGHT + LIFT_INC) + elif key == ord('n'): + # Decrease lift height + LIFT_HEIGHT = max(pc.MIN_LIFT_HEIGHT.mm, + LIFT_HEIGHT - LIFT_INC) + + # Set the height of the lift + cli.set_lift_height(height=LIFT_HEIGHT) + elif key == ord('l'): + # Toggle the head light + HEAD_LIGHT = not HEAD_LIGHT + # Set the head light + cli.set_head_light(enable=HEAD_LIGHT) + + else: + # Other keys have no effect, so skip the rest + continue + + print("Velocities: {:.2f} mm/s, {:.2f} deg/s, " + "Head angle: {:.2f} deg, " + "Head Light enabled: {}".format(LIN_VELOCITY, + math.degrees(ANG_VELOCITY), + math.degrees(HEAD_TILT), + HEAD_LIGHT), end='\r') + finally: + # This is to make sure that whatever happens during execution, the + # robot will always stop driving before exiting + cli.stop_all_motors() + + # Bring the lift down + cli.set_lift_height(height=pc.MIN_LIFT_HEIGHT.mm) + + # Set the head down as well + cli.set_head_angle(pc.MIN_HEAD_ANGLE.radians) + + # Close any display open by OpenCv + cv.destroyAllWindows() + + # Make sure the queue is empty, even if this has no real impact + while not IMG_QUEUE.empty(): + IMG_QUEUE.get() + IMG_QUEUE.task_done() + IMG_QUEUE.join() diff --git a/pycozmo/__init__.py b/pycozmo/__init__.py index 13bdabd..b183602 100644 --- a/pycozmo/__init__.py +++ b/pycozmo/__init__.py @@ -37,6 +37,10 @@ from . import brain from . import audiokinetic from . import expressions +from . import object_detector +from . import object_tracker +from . import object_detection_n_tracking +from . import display __version__ = "0.8.0" diff --git a/pycozmo/activity.py b/pycozmo/activity.py index a00e23a..29f3255 100644 --- a/pycozmo/activity.py +++ b/pycozmo/activity.py @@ -63,13 +63,13 @@ def init_scores(self) -> None: self.total_score = sum(self.behavior_scores) def init_repetition_penalty(self) -> None: - self.repetition_penaltys = [] + self.repetition_penalties = [] for b in self.behaviors: if 'repetitionPenalty' in b['scoring']: - self.repetition_penaltys.append( + self.repetition_penalties.append( DecayGraph([Node(x=n['x'], y=n['y']) for n in b['scoring']['repetitionPenalty']['nodes']])) else: - self.repetition_penaltys.append(DecayGraph([Node(x=1, y=0)])) + self.repetition_penalties.append(DecayGraph([Node(x=1, y=0)])) def apply_repetition_penalty(self, ref) -> None: if self.choice_type == 'Scoring': @@ -80,7 +80,7 @@ def apply_repetition_penalty(self, ref) -> None: else: raise TypeError('Invalid behavior index: {}'.format(ref)) self.behavior_repetitions[idx] += 1 - self.behavior_scores[idx] -= self.repetition_penaltys[idx].get_increment(self.behavior_repetitions[idx]) + self.behavior_scores[idx] -= self.repetition_penalties[idx].evaluate(self.behavior_repetitions[idx]) self.behavior_scores[idx] = max(0, self.behavior_scores[idx]) self.total_score = sum(self.behavior_scores) diff --git a/pycozmo/brain.py b/pycozmo/brain.py index 7cb6bbc..d30436f 100644 --- a/pycozmo/brain.py +++ b/pycozmo/brain.py @@ -154,13 +154,23 @@ def reaction_thread_run(self) -> None: continue try: + logger_reaction.info("Processing {}".format(reaction_trigger)) + self.process_emotion_event(reaction_trigger) self.process_reaction(reaction_trigger) except Exception as e: logger.error("Failed to dispatch reaction trigger '{}'. {}".format(reaction_trigger, e)) continue + def process_emotion_event(self, reaction_trigger: str) -> None: + """ Process emotion events by applying affectors to emotion types. """ + emotion_event = self.emotion_events.get(reaction_trigger) + if emotion_event: + for affector, delta in emotion_event.affectors.items(): + self.emotion_types[affector].add(delta) + else: + logger_reaction.warning("Failed to find emotion event for {}.".format(reaction_trigger)) + def process_reaction(self, reaction_trigger: str) -> None: - logger_reaction.info("Processing {}".format(reaction_trigger)) reaction = self.reaction_trigger_beahvior_map.get(reaction_trigger) if reaction: # TODO: Handle should_resume_last. diff --git a/pycozmo/camera.py b/pycozmo/camera.py index f6f6431..cd993eb 100644 --- a/pycozmo/camera.py +++ b/pycozmo/camera.py @@ -6,12 +6,15 @@ import numpy as np +from . import util from . import protocol_encoder __all__ = [ "RESOLUTIONS", + "CameraConfig", + "minigray_to_jpeg", "minicolor_to_jpeg", ] @@ -35,6 +38,39 @@ } +class CameraConfig: + """ Robot camera fixed property representation. """ + + def __init__(self, + focal_length_x: float, + focal_length_y: float, + center_x: float, + center_y: float, + fov_x_deg: float, + fov_y_deg: float, + min_exposure_time_ms: int, + max_exposure_time_ms: int, + min_gain: float, + max_gain: float): + self.focal_length = util.Vector2(focal_length_x, focal_length_y) + self.center = util.Vector2(center_x, center_y) + self.fov_x = util.Angle(degrees=fov_x_deg) + self.fov_y = util.Angle(degrees=fov_y_deg) + self.min_exposure_time_ms = int(min_exposure_time_ms) + self.max_exposure_time_ms = int(max_exposure_time_ms) + self.min_gain = float(min_gain) + self.max_gain = float(max_gain) + + def get_camera_matrix(self) -> np.array: + """ Return 3x3 camera matrix in format, suitable for use with OpenCV. """ + camera_matrix = np.array([ + [self.focal_length.x, 0.0, self.center.x], + [0.0, self.focal_length.y, self.center.y], + [0.0, 0.0, 1.0], + ]) + return camera_matrix + + def minigray_to_jpeg(minigray, width, height): """ Converts miniGrayToJpeg format to normal JPEG format. """ header50 = np.array([ diff --git a/pycozmo/client.py b/pycozmo/client.py index 348b5a2..9b0b7a0 100644 --- a/pycozmo/client.py +++ b/pycozmo/client.py @@ -15,6 +15,7 @@ from . import logger, logger_robot, logger_animation from . import protocol_encoder +from . import protocol_utils from . import event from . import camera from . import object @@ -62,6 +63,10 @@ def __init__(self, self.serial_number = None self.body_hw_version = None self.body_color = None + # Camera parameters. + self.camera_config = None + # Saved object IDs. + self.saved_objects = [] # Robot state # Heading in X-Y plane. self.pose_frame_id = 0 @@ -112,6 +117,7 @@ def start(self) -> None: self.add_handler(protocol_encoder.ObjectAvailable, self._on_object_available) self.add_handler(protocol_encoder.ObjectConnectionState, self._on_object_connection_state) self.add_handler(protocol_encoder.DebugData, self._on_debug_data) + self.add_handler(protocol_encoder.NvStorageOpResult, self._on_nv_storage_op_result) self.add_handler(event.EvtRobotPickedUpChange, self._on_robot_picked_up) self.add_handler(event.EvtRobotWheelsMovingChange, self._on_robot_moving) self.conn.start() @@ -137,6 +143,10 @@ def _enable_robot(self): self.conn.send(pkt) # This repetition seems to trigger BodyInfo def _initialize_robot(self): + # Get camera configuration + pkt = protocol_encoder.NvStorageOp( + tag=protocol_encoder.NvEntryTag.NVEntry_CameraCalib, length=1, op=protocol_encoder.NvOperation.NVOP_READ) + self.conn.send(pkt) # Set world frame origin to (0,0,0), frame ID to 0, and origin ID to 1. pkt = protocol_encoder.SetOrigin() self.conn.send(pkt) @@ -178,12 +188,6 @@ def _on_body_info(self, cli, pkt: protocol_encoder.BodyInfo): self._initialize_robot() self.dispatch(event.EvtRobotFound, self) - def wait_for(self, evt, timeout: Optional[float] = None): - e = Event() - self.add_handler(evt, lambda cli: e.set(), one_shot=True) - if not e.wait(timeout): - raise exception.Timeout("Timeout waiting for event {}".format(evt)) - def wait_for_robot(self, timeout: float = 5.0) -> None: if not self.robot_fw_sig: try: @@ -361,6 +365,30 @@ def _on_debug_data(self, cli, pkt: protocol_encoder.DebugData): msg = robot_debug.get_debug_message(pkt.name_id, pkt.format_id, pkt.args) logger_robot.log(robot_debug.get_log_level(pkt.level), msg) + def _on_nv_storage_op_result(self, cli, pkt: protocol_encoder.NvStorageOpResult): + print(pkt) + if pkt.op == protocol_encoder.NvOperation.NVOP_READ: + if pkt.result == protocol_encoder.NvResult.NV_OKAY: + if pkt.tag == protocol_encoder.NvEntryTag.NVEntry_CameraCalib and len(pkt.data) == 56: + values = protocol_utils.BinaryReader(pkt.data).read_farray("f", 14) + self.camera_config = camera.CameraConfig( + values[0], values[1], + values[2], values[3], + 57.82, 45.0, 1, 67, 0.1, 3.984375) + # Get saved cube IDs + pkt = protocol_encoder.NvStorageOp( + tag=protocol_encoder.NvEntryTag.NVEntry_SavedCubeIDs, length=28, + op=protocol_encoder.NvOperation.NVOP_READ) + self.conn.send(pkt) + elif pkt.tag == protocol_encoder.NvEntryTag.NVEntry_SavedCubeIDs and len(pkt.data) == 28: + values = protocol_utils.BinaryReader(pkt.data).read_farray("L", 7) + self.saved_objects = [value for value in values[-3:] if value] + print(self.saved_objects) + for i in self.saved_objects: + print("0x{:08x}".format(i)) + # Remove handler. + self.del_handler(protocol_encoder.NvStorageOpResult, self._on_nv_storage_op_result) + def set_head_angle(self, angle: float, accel: float = 10.0, max_speed: float = 10.0, duration: float = 0.0): pkt = protocol_encoder.SetHeadAngle(angle_rad=angle, accel_rad_per_sec2=accel, diff --git a/pycozmo/display.py b/pycozmo/display.py new file mode 100644 index 0000000..36caac0 --- /dev/null +++ b/pycozmo/display.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import cv2 as cv + + +class Display(object): + """ + This is just a wrapper around the imshow() method from the OpenCV library to make things more convenient. + """ + + def __init__(self, win_name): + """ + Initialize the parameters required for displaying images. + :param win_name: String. The name of the window in which the video stream will be displayed. + """ + + # Call the constructor for the parent class + super(Display, self).__init__() + + # Store the name of the window + self._win_name = win_name + + # Initialize a display thread + cv.startWindowThread() + # And a named window inside it + cv.namedWindow(self._win_name) + + def step(self, frame): + """ + Take the frame give in parameter and display it in the window. + :param frame: Numpy.ndarray. A numpy array representing the next frame in the video stream. + :return: None + """ + + # Do what it says on the tin + cv.imshow(self._win_name, frame) + + def stop(self): + """ + Destroy the named window created upon initialization. + :return: None + """ + + # Do what it says on the tin + cv.destroyWindow(self._win_name) diff --git a/pycozmo/emotions.py b/pycozmo/emotions.py index c02ba95..aefbfe3 100644 --- a/pycozmo/emotions.py +++ b/pycozmo/emotions.py @@ -6,7 +6,7 @@ import os import time -from typing import Dict, List, Tuple +from typing import Dict, List import numpy as np @@ -33,31 +33,15 @@ class DecayGraph: __slots__ = [ "nodes_x", "nodes_y", - "ext_line_params", ] def __init__(self, nodes: List[Node]) -> None: self.nodes_x = [node.x for node in nodes] self.nodes_y = [node.y for node in nodes] - self.ext_line_params = self.get_line_parameters(nodes[-2], nodes[-1]) if len(nodes) > 1 else None - def get_increment(self, val) -> float: - if self.ext_line_params is None: - f_out = self.nodes_y[0] - elif val <= self.nodes_x[-1]: - f_out = np.interp(val, self.nodes_x, self.nodes_y) - else: - f_out = self.ext_line_params[0] * val + self.ext_line_params[1] - return f_out - - @staticmethod - def get_line_parameters(p1: Node, p2: Node) -> Tuple[float]: - try: - m = (p1.y - p2.y) / (p1.x - p2.x) - b = p1.y - m * p1.x - except ZeroDivisionError: - m, b = 0, p1.y - return m, b + def evaluate(self, x: float) -> float: + y = np.interp(x, self.nodes_x, self.nodes_y) + return y class EmotionType: @@ -66,18 +50,46 @@ class EmotionType: __slots__ = [ "name", "decay_graph", - "repetition_penalty" + "repetition_penalty", + "min", + "max", + "last_change", + "last_value", + "value", ] - def __init__(self, name: str, decay_graph: DecayGraph, repetition_penaly: DecayGraph) -> None: + def __init__( + self, name: str, + decay_graph: DecayGraph, + repetition_penalty: DecayGraph, + default_value: float = 0.0, + min_value: float = -1.0, + max_value: float = 1.0) -> None: self.name = str(name) self.decay_graph = decay_graph - self.repetition_penalty = repetition_penaly - - def update(self): + self.repetition_penalty = repetition_penalty + self.min = float(min_value) + self.max = float(max_value) + self.last_change = time.perf_counter() + self.last_value = float(default_value) + self.value = self.last_value + + def set(self, value: float) -> None: + self.last_change = time.perf_counter() + self.last_value = np.clip(value, self.min, self.max) + self.value = self.last_value + + def add(self, delta: float) -> None: + self.last_change = time.perf_counter() + self.last_value = np.clip(self.last_value + delta, self.min, self.max) + self.value = self.last_value + + def update(self) -> None: """ Update from decay function. """ - # TODO - pass + now = time.perf_counter() + x = now - self.last_change + y = self.decay_graph.evaluate(x) + self.value = np.clip(self.last_value * y, self.min, self.max) class EmotionEvent: diff --git a/pycozmo/object_detection_n_tracking.py b/pycozmo/object_detection_n_tracking.py new file mode 100644 index 0000000..4b0daa6 --- /dev/null +++ b/pycozmo/object_detection_n_tracking.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +from os import path +from pycozmo.object_tracker import ObjectTracker +from pycozmo.object_detector import ObjectDetector + + +# TODO: Replace this monstrosity with util.get_cozmo_asset_dir() +# or util.get_cozmo_dir(), depending on where the weights +# and such configuration should live. +BASE_DIR = path.abspath(path.dirname(__file__)) + + +class ObjectDetectionNTracking(object): + """ + A simple class that uses both an ObjectDetector to detect objects in a frame and multiple ObjectTrackers to track + detected objects across frames. + """ + + def __init__(self, tracker_type, skip_frames, obj_cats, conf_thres=0.5, nms_thres=0.3, + conf_decay_rate=0.998, img_w=704, img_h=704, + classes_name_file=path.join(BASE_DIR, 'trackerCfg', 'openimages.names'), + model_conf_file=path.join(BASE_DIR, 'trackerCfg', 'yolo.cfg'), + model_weight_file=path.join(BASE_DIR, 'trackerCfg', 'yolo.weights')): + """ + Initialize the different attributes required to detect and track objects in pictures. + :param tracker_type: TrackType. The type of tracker to use. Some are more precise than others, but take more + CPU/GPU power to run. + :param skip_frames: Int. The number of frames to skip in-between two detection event. Since running YoloV4 might + take quite a lot of GPU power it can be interesting for low-spec computers to only run the detection algorithm + every so often. + :param obj_cats: List. A list of categories of objects to detect and track in every picture. + :param conf_thres: Float. The confidence threshold above which an object is considered as detected. + :param nms_thres: Float. The non-maximum threshold which determines how dissimilar two objects should be to be + considered as separate. + :param conf_decay_rate: Float. The rate at which the confidence associated with each object decays. This prevent + the tracker from drifting too much by virtually making objects disappear and forcing the detector to find the + object again. + :param img_w: Int. The width of the frame passed to the detector network. This size should be a multiple of 32. + :param img_h: Int. The height of the frame passed to the detector network. This size should be a multiple of 32. + :param classes_name_file: String. The relative path to the text file containing the name of the categories of + object to be detected. + :param model_conf_file: String. The relative path to the file containing the configuration of the YoloV4 network + to be used as the object detector. + :param model_weight_file: String. The relative path to the file containing the pre-trained weights for the + detector network. + """ + + # Initialize the parent class + super(ObjectDetectionNTracking, self).__init__() + + # How many frames to skip between detection + assert skip_frames > 0, "The number of frames skipped between " \ + "detection event should be greater than or " \ + "equal to 1." + self._skip_frames = skip_frames + self._frame_cpt = 0 + + # Get an object tracker + self._tracker = ObjectTracker(tracker_type, conf_thres, nms_thres, conf_decay_rate) + + # Get an object detector + self._detector = ObjectDetector(obj_cats, conf_thres, nms_thres, img_w, img_h, classes_name_file, + model_conf_file, model_weight_file) + + # Add the tracker's update_trakers() function as a listener to the detector + self._detector.add_listener(self._tracker.update_trackers) + + # Start both the tracker and detector + self._tracker.start() + self._detector.start() + + def process_frame(self, frame): + """ + Capture next frame from video device and hand it to both the ObjectTracker, and ObjectDetector + (when appropriate). + :param frame: numpy.ndarray. An array representing the frame to be processed by both the detector and tracker. + :return: None + """ + + # Pass the new frame to the tracker + self._tracker.track_objects(frame) + + # On the very first frame, or if no object is currently tracked, + # or on the nth frame + # /!\ Never reinitialize the number of skipped frames + if self._frame_cpt == 0 or self._tracker.nb_tracked_objects == 0 or \ + self._frame_cpt % self._skip_frames == 0: + # Pass the new frame to the detector + self._detector.detect_objects(frame) + + # Increase the number of frames processed + self._frame_cpt += 1 + + def get_next_frame(self, block=False, timeout=None): + """ + Get the next processed from from the ObjectTracker directly. + This method is not really required and adds layers to an already high cake, + but it hides away the 'complexities' of the implementation. + :param block: Boolean. If true the call to Queue.get() makes the program pause until a frame + is available. Otherwise, a frame is returned only if it is present at the right time. + If no frames are available and block is False, this method raises an Empty exception. + :param timeout: Float. If not None and block is True, the call to Queue.get() will wait timeout seconds for an + element to appear in the queue. + :return: None + """ + # Return next processed frame from the tracker + return self._tracker.get_next_frame(block=block, timeout=timeout) + + def stop(self): + """ + Cleanup before destroying the instance. + :return: None + """ + + # Ask the tracker to stop + self._tracker.stop() + self._tracker.join() + + # Ask the detector to stop + self._detector.stop() + self._detector.join() diff --git a/pycozmo/object_detector.py b/pycozmo/object_detector.py new file mode 100644 index 0000000..47ab6fd --- /dev/null +++ b/pycozmo/object_detector.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +from os import path +from enum import IntEnum +from threading import Thread +from collections import deque +import numpy as np +import cv2 as cv + + +# TODO: Replace this monstrosity with util.get_cozmo_asset_dir() +# or util.get_cozmo_dir(), depending on where the weights +# and such configuration should live. +BASE_DIR = path.abspath(path.dirname(__file__)) + + +class ObjCat(IntEnum): + HAND = 0 + HEAD = 1 + + +class ObjectDetector(Thread): + """ + A class that uses a Yolo V4 neural network to detect objects of different categories in a frame. + """ + + def __init__(self, obj_cats, conf_thres=0.5, nms_thres=0.3, img_w=704, img_h=704, + classes_name_file=path.join(BASE_DIR, 'trackerCfg', 'openimages.names'), + model_conf_file=path.join(BASE_DIR, 'trackerCfg', 'yolo.cfg'), + model_weight_file=path.join(BASE_DIR, 'trackerCfg', 'yolo.weights')): + """ + Initialize the different attributes required to detect objects in pictures. + :param obj_cats: List. A list of categories of objects to detect and track in every picture. + :param conf_thres: Float. The confidence threshold above which an object is considered as detected. + :param nms_thres: Float. The non-maximum threshold which determines how dissimilar two objects should be to be + considered as separate. + :param img_w: Int. The width of the frame passed to the detector network. This size should be a multiple of 32. + :param img_h: Int. The height of the frame passed to the detector network. This size should be a multiple of 32. + :param classes_name_file: String. The relative path to the text file containing the name of the categories of + object to be detected. + :param model_conf_file: String. The relative path to the file containing the configuration of the YoloV4 network + to be used as the object detector. + :param model_weight_file: String. The relative path to the file containing the pre-trained weights for the + detector network. + """ + + # Call the constructor for the parent class + super(ObjectDetector, self).__init__() + + # Classes and categories management + assert hasattr(obj_cats, '__getitem__') + self._tracked_cats = obj_cats + self._avail_classes = [] + with open(classes_name_file, 'rt') as f: + self._avail_classes = f.read().rstrip('\n').split('\n') + + # Yolo parameters + self._conf_thres = conf_thres + self._nms_thres = nms_thres + self._img_size = (img_w, img_h) + + # Instantiate a yolo-based object detection network + self._dnn = cv.dnn.readNetFromDarknet(model_conf_file, + model_weight_file) + self._dnn.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA) + self._dnn.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA) + # Get the name of the output layers + self._out_layers_name = [self._dnn.getLayerNames()[i[0] - 1] for i in + self._dnn.getUnconnectedOutLayers()] + + # Initialize a deque in which the next frame to be processed will be put + # We limit the size of the deque to 1 since only the most recent frame is of interest to us + self._in_queue = deque(maxlen=1) + + # Initialize a list of callable observers to notify when detecting objects + self._observers = [] + + # Define a flag telling the thread when to stop + self._continue = True + + def add_listener(self, func): + """ + Add a function to the list of observers for the availability of new bounding boxes. + :param func: Callable. A callable that takes a dictionary of bounding boxes, and a frame + as parameters. + :return: None + """ + + # Make sure the given parameter is a callable + if callable(func): + # Add the listener to the list + self._observers.append(func) + else: + print('The parameter {} provided should be a callable.'.format(func)) + + def remove_listener(self, func): + """ + Remove a function from the list of observers. + :param func: Callable. A function previously added to the list of observers. + :return: None + """ + + try: + # Remove the corresponding listener from the list of observers + self._observers.remove(func) + except ValueError: + print('{} was not found in the list of observers, so not removed.'.format(func)) + + def clear_listeners(self): + """ + Remove all the observers. + :return: None + """ + + # Assign a new empty list to the observers attribute + self._observers = [] + + def run(self): + while self._continue: + try: + # Get the next frame + frame = self._in_queue.pop() + except IndexError: + continue + + # Detect objects and extract corresponding bboxes + outs = self._detect(frame) + frame_h, frame_w = frame.shape[0:2] + bboxes = self._extract_bboxes(outs, frame_w, frame_h) + + # Make sure the thread was not killed during processing + if self._continue and len(bboxes) != 0: + # Notify the listeners of the new bounding boxes + for obs in self._observers: + obs(bboxes, frame) + + def stop(self): + """ + Cleanup before destroying the instance. + :return: None + """ + + # Ask the thread to stop + self._continue = False + + # Empty the input queue + print('D: Emptying input queue ...') + self._in_queue.clear() + print('D: Done.') + + def detect_objects(self, frame): + """ + A simple wrapper around the append() method of the detector's input deque. + Therefore, it just adds the frame given in parameter to the deque. + :param frame: Numpy.ndarray. An array representing a frame from the video stream, in which + objects are to be detected. + :return: None + """ + + # Add the given frame to the input queue so that it is processed during the next cycle + self._in_queue.append(frame) + + def _detect(self, frame): + # Create a blob from the image + blob = cv.dnn.blobFromImage(frame, 1 / 255, (self._img_size[0], + self._img_size[1]), + [0, 0, 0], 1, crop=False) + + # Set the blob as the network's input + self._dnn.setInput(blob) + + # Run the forward pass to get a prediction + return self._dnn.forward(self._out_layers_name) + + def _extract_bboxes(self, outs, frame_w, frame_h): + # TODO: Add comment to describe what you are doing here + outs = np.vstack(outs) + confidences = np.max(outs[:, 5:], axis=1) + outs = outs[np.flatnonzero(confidences > self._conf_thres), :] + if outs.size == 0: + return {} + + # Compute the centers and the size of the bounding boxes + # center_x, center_y, width, height all relative to frame size + outs[:, 0:4] *= np.array([frame_w, frame_h, frame_w, frame_h]) + center = outs[:, 0:2] + size = outs[:, 2:4] + left_top = center - size/2 + boxes = np.hstack((left_top, size)).astype(np.intp).tolist() + + # Non Maximum suppression + confidences = np.max(outs[:, 5:], axis=-1) + indices = cv.dnn.NMSBoxes(boxes, confidences, self._conf_thres, + self._nms_thres) + bboxes = {} + class_ids = np.argmax(outs[:, 5:], axis=-1) + for i in indices.flatten(): + cat = ObjCat(class_ids[i]) + if cat in self._tracked_cats: + bboxes.setdefault(cat, []).append((boxes[i], confidences[i])) + return bboxes diff --git a/pycozmo/object_tracker.py b/pycozmo/object_tracker.py new file mode 100644 index 0000000..0a4c8e1 --- /dev/null +++ b/pycozmo/object_tracker.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +from queue import Queue, Empty +from threading import Thread +from enum import IntEnum +from itertools import chain +from random import randint +import cv2 as cv + + +class TrackType(IntEnum): + CSRT = 0 + KCF = 1 + MOSSE = 2 + + +class ObjectTracker(Thread): + """ + A class that uses multiple trackers to follow detected objects across frames. + """ + + def __init__(self, tracker_type, conf_thres=0.5, nms_thres=0.3, conf_decay_rate=0.998): + """ + Initialize the different attributes required to track objects in pictures. + :param tracker_type: TrackType. The type of tracker to use. Some are more precise than others, but take more + CPU/GPU power to run. + :param conf_thres: Float. The confidence threshold above which an object is considered as detected. + :param nms_thres: Float. The non-maximum threshold which determines how dissimilar two objects should be to be + considered as separate. + :param conf_decay_rate: Float. The rate at which the confidence associated with each object decays. This prevent + the tracker from drifting too much by virtually making objects disappear and forcing the detector to find the + object again. + """ + + # Initialize the parent class + super(ObjectTracker, self).__init__() + + # Tracker, colors, and bonding box management + self._tracker_type = tracker_type + self._trackers = {} + self._colors = {} + self._bboxes = {} + self._confidences = {} + self._next_id = 0 + + # Other miscellaneous parameters + self._conf_decay_rate = conf_decay_rate + self._conf_thres = conf_thres + self._nms_thres = nms_thres + + # Initialize a queue in which the next frame to be processed will be put + self._in_queue = Queue() + + # Initialize a queue in which processed images will be put + self._out_queue = Queue() + + # Initialize a flag telling the thread when to stop + self._continue = True + + def run(self): + while self._continue: + # Get the next frame from the input queue + try: + frame = self._in_queue.get(block=False) + + # Indicate to the input queue that the task has been done + self._in_queue.task_done() + except Empty: + continue + + # Update the bounding boxes of all tracked objects + dead = self._update(frame) + # Remove dead objects + if len(dead) != 0: + self._remove_trackers(dead) + + # Draw the bounding boxes + self._draw_bounding_boxes(frame) + + # Make sure the thread was not killed during processing + if self._continue: + # Put the processed frame in the output queue + self._out_queue.put(frame) + + def stop(self): + """ + Cleanup before destroying the instance. + :return: None + """ + + # Tell the thread to stop + self._continue = False + + # Empty the input queue + print('T: Emptying input queue ...') + while not self._in_queue.empty(): + self._in_queue.get() + self._in_queue.task_done() + # Stop the input queue + self._in_queue.join() + print('T: Done.') + + # Empty the output queue + print('T: Emptying output queue ...') + while not self._out_queue.empty(): + self._out_queue.get() + self._out_queue.task_done() + # Stop the output queue + self._out_queue.join() + print('T: Done.') + + def track_objects(self, frame): + """ + This is a simple wrapper around a Queue.put() method for the input queue of the ObjectTracker. + It is only here to hide some of the complexities of the implementation. + The only thing done here is to put the frame given in parameter in the tracker's input queue. + :param frame: Numpy.ndarray. An array representing the next frame in the video stream. + :return: None + """ + + # Add the given frame to the input queue to be processed during the next cycle + self._in_queue.put(frame) + + def get_next_frame(self, block=False, timeout=None): + """ + Get the next processed frame from the output queue of the tracker. + :param block: Boolean. If true the call to Queue.get() makes the program pause until a frame + is available. Otherwise, a frame is returned only if it is present at the right time. + If no frames are available and block is False, this method raises an Empty exception. + :param timeout: Float. If not None and block is True, the call to Queue.get() will wait timeout seconds for an + element to appear in the queue. + :return: Numpy.ndarray + """ + + # Get the processed frame from the output queue + frame = self._out_queue.get(block=block) + # Indicate to the output queue that the task is done + self._out_queue.task_done() + + # Return the processed frame + return frame + + def update_trackers(self, bboxes, frame): + """ + Given the bounding boxes and the frame in parameter update the list of trackers. + Adding new ones for new objects. No tracker is being removed here this is actually done in the run() method. + :param bboxes: dict. A dictionary whose keys are the categories of the objects detected, and the values are + lists of coordinates representing the top-left corner of the bounding box, as well as its size. + :param frame: Numpy.ndarray. An array corresponding to the frame in which the objects represented by the + bounding boxes where detected. This is necessary for the trackers to initialize correctly. + :return: None + """ + + if len(bboxes) != 0: + # Add previously untracked objects + for bbox, conf in chain(*bboxes.values()): + # To check that the object is not already tracked, + # we compute the IOU. If the iou is over a given + # threshold, the two boxes are considered to be linked to + # the same object. This is similar to the NMS algorithm, + # but without taking the confidences into account. + for tracked_bbox in self._bboxes.values(): + # Compute the area of the intersection between the + # new object and the tracked object + inter_area = max(0, min(bbox[0] + bbox[2], + tracked_bbox[0] + tracked_bbox[2]) - max(bbox[0], tracked_bbox[0])) * \ + max(0, min(bbox[1] + bbox[3], + tracked_bbox[1] + tracked_bbox[3]) - max(bbox[1], tracked_bbox[1])) + + # Compute the area of the new object + bbox_area = bbox[2] * bbox[3] + # Compute the area of the tracked object + tracked_bbox_area = tracked_bbox[2] * tracked_bbox[3] + + # Finally compute the iou + iou = inter_area / (bbox_area + tracked_bbox_area - inter_area) + if iou > self._nms_thres: + break + else: + # We did not find any similar tracked object, + # so this is a new one that should be tracked + self._add_trackers(frame, bbox, conf) + + def _draw_bounding_boxes(self, frame): + # Draw the bounding boxes of all tracked objects + for obj_id, bbox in self._bboxes.items(): + cv.rectangle(frame, (int(bbox[0]), int(bbox[1])), + (int(bbox[0]+bbox[2]), int(bbox[1]+bbox[3])), + self._colors[obj_id], thickness=2) + + def _add_trackers(self, frame, bbox, conf): + # Instantiate and initialize a new tracker for each detected + # object + tracker = self._get_tracker() + # The tuple conversion is required for python < 3.7 + tracker.init(frame, tuple(bbox)) + + # Get a unique random color + color = (randint(0, 255), randint(0, 255), randint(0, 255)) + while color in self._colors.values(): + color = (randint(0, 255), randint(0, 255), randint(0, 255)) + + # Store everything related to the object into the + # corresponding structure + self._bboxes[self._next_id] = bbox + self._trackers[self._next_id] = tracker + self._colors[self._next_id] = color + self._confidences[self._next_id] = conf + + # Increase the number of objects tracked so far + self._next_id += 1 + + def _remove_trackers(self, obj_ids): + # Remove everything related to each object in the list + for obj_id in obj_ids: + self._trackers.pop(obj_id) + self._colors.pop(obj_id) + self._bboxes.pop(obj_id) + self._confidences.pop(obj_id) + + def _update(self, frame): + # Declare a list of all the trackers that failed + dead = [] + + # To prevent modifications from happening during processing + tmp = list(self._trackers.items()) + + # Update the bounding boxes of all tracked objects + for obj_id, tracker in tmp: + # By default remove any object whose confidence is not above + # threshold anymore + if self._confidences[obj_id] < self._conf_thres: + dead.append(obj_id) + else: + # Slowly decay the confidence associated with the object + self._confidences[obj_id] *= self._conf_decay_rate + # Update the tracked object's bounding box + ok, bbox = tracker.update(frame) + if not ok: + # The tracker has failed + dead.append(obj_id) + else: + self._bboxes[obj_id] = bbox + + # Return the list of failed trackers + return dead + + def _get_tracker(self): + if self._tracker_type == TrackType.MOSSE: + return cv.legacy.TrackerMOSSE_create() + if self._tracker_type == TrackType.KCF: + return cv.TrackerKCF_create() + if self._tracker_type == TrackType.CSRT: + return cv.TrackerCSRT_create() + + @property + def bboxes(self): + return self._bboxes + + @property + def confidences(self): + return self._confidences + + @property + def colors(self): + return self._colors + + @property + def nb_tracked_objects(self): + return len(self._trackers) diff --git a/pycozmo/tests/test_decay_graph.py b/pycozmo/tests/test_decay_graph.py index f6a2677..64a786d 100644 --- a/pycozmo/tests/test_decay_graph.py +++ b/pycozmo/tests/test_decay_graph.py @@ -11,15 +11,15 @@ def setUp(self): Node(x=0, y=1), Node(x=10, y=1), Node(x=60, y=0.6), - Node(x=100, y=0.2), + Node(x=100, y=0), ]) def test_increment_calculation(self): # negative values - self.assertAlmostEqual(self.graph.get_increment(-1), 1.0) + self.assertAlmostEqual(self.graph.evaluate(-1), 1.0) # value between nodes - self.assertAlmostEqual(self.graph.get_increment(40), 0.76) + self.assertAlmostEqual(self.graph.evaluate(40), 0.76) # value matches node - self.assertAlmostEqual(self.graph.get_increment(60), 0.6) + self.assertAlmostEqual(self.graph.evaluate(60), 0.6) # value is higher than last node - self.assertAlmostEqual(self.graph.get_increment(200), -0.8) + self.assertAlmostEqual(self.graph.evaluate(200), 0.0) diff --git a/pycozmo/trackerCfg/openimages.names b/pycozmo/trackerCfg/openimages.names new file mode 100644 index 0000000..a479450 --- /dev/null +++ b/pycozmo/trackerCfg/openimages.names @@ -0,0 +1,2 @@ +Human hand +Human head diff --git a/pycozmo/trackerCfg/yolo.cfg b/pycozmo/trackerCfg/yolo.cfg new file mode 100644 index 0000000..e21cd81 --- /dev/null +++ b/pycozmo/trackerCfg/yolo.cfg @@ -0,0 +1,283 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +#batch=64 +#subdivisions=16 +width=704 +height=704 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.00261 +burn_in=1000 +# max_batches = nb_classes * 2000 +max_batches = 4000 +policy=steps +# steps = 0.8 * max_batches, 0.9 * max_batches +steps=3200,3600 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +################################## + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=21 +activation=linear + + + +[yolo] +mask = 3,4,5 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=2 +num=6 +jitter=.3 +scale_x_y = 1.05 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +ignore_thresh = .7 +truth_thresh = 1 +random=1 +resize=1.5 +nms_kind=greedynms +beta_nms=0.6 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 23 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=21 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 +classes=2 +num=6 +jitter=.3 +scale_x_y = 1.05 +cls_normalizer=1.0 +iou_normalizer=0.07 +iou_loss=ciou +ignore_thresh = .7 +truth_thresh = 1 +random=1 +resize=1.5 +nms_kind=greedynms +beta_nms=0.6 diff --git a/pycozmo/trackerCfg/yolo.weights b/pycozmo/trackerCfg/yolo.weights new file mode 100644 index 0000000..90b8617 Binary files /dev/null and b/pycozmo/trackerCfg/yolo.weights differ diff --git a/pycozmo/util.py b/pycozmo/util.py index c51da22..b545a0e 100644 --- a/pycozmo/util.py +++ b/pycozmo/util.py @@ -17,6 +17,7 @@ 'Angle', 'Distance', 'Speed', + 'Vector2', 'Vector3', 'angle_z_to_quaternion', 'Matrix44', @@ -223,6 +224,71 @@ def mmps(self) -> float: return self._mmps +class Vector2: + """ + Represents a 2D Vector (type/units aren't specified) + + Args: + x (float): X component + y (float): Y component + """ + + __slots__ = ('_x', '_y') + + def __init__(self, x: float, y: float): + self._x = x + self._y = y + + def set_to(self, rhs) -> None: + """ + Copy the x and y components of the given vector. + + Args: + rhs (:class:`Vector2`): The right-hand-side of this assignment - the + source vector to copy into this vector. + """ + self._x = rhs.x + self._y = rhs.y + + @property + def x(self) -> float: + """ The x component. """ + return self._x + + @property + def y(self) -> float: + """ The y component. """ + return self._y + + @property + def x_y(self) -> Tuple[float, float]: + """ The X, Y elements of the Vector2 (x,y). """ + return self._x, self._y + + def __repr__(self): + return "<%s x: %.2f y: %.2f>" % (self.__class__.__name__, self.x, self.y) + + def __add__(self, other): + if not isinstance(other, Vector2): + raise TypeError("Unsupported operand for + expected Vector2") + return Vector2(self.x + other.x, self.y + other.y) + + def __sub__(self, other): + if not isinstance(other, Vector2): + raise TypeError("Unsupported operand for - expected Vector2") + return Vector2(self.x - other.x, self.y - other.y) + + def __mul__(self, other): + if not isinstance(other, (int, float)): + raise TypeError("Unsupported operand for * expected number") + return Vector2(self.x * other, self.y * other) + + def __truediv__(self, other): + if not isinstance(other, (int, float)): + raise TypeError("Unsupported operand for / expected number") + return Vector2(self.x / other, self.y / other) + + class Vector3: """ Represents a 3D Vector (type/units aren't specified). diff --git a/requirements.txt b/requirements.txt index 99babc3..4bd7c92 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ dpkt numpy Pillow>=6.0.0 flatbuffers +opencv-python>=4.0.0 diff --git a/setup.py b/setup.py index ab77ecf..44b9787 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ def get_readme(): author_email="zayfod@gmail.com", url="https://github.com/zayfod/pycozmo/", python_requires=">=3.6.0", - install_requires=["dpkt", "numpy", "Pillow>=6.0.0", "flatbuffers"], + install_requires=["dpkt", "numpy", "Pillow>=6.0.0", "flatbuffers", "opencv-python>=4.0.0"], keywords=["ddl", "anki", "cozmo", "robot", "robotics"], classifiers=[ "Development Status :: 4 - Beta",