diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 0d7cbb05..ef8ad797 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -2,9 +2,6 @@ { "name": "Existing Dockerfile", "image": "ghcr.io/tritonuas/obcpp:main", - // "build": { - // "dockerfile": "Dockerfile" - // }, "customizations": { "vscode": { diff --git a/.gitignore b/.gitignore index 30588b2f..572b559b 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,12 @@ _deps/ Testing/ imgs/ +models/* +!models/.gitkeep + +tests/integration/images/* +!tests/integration/images/.gitkeep + .vscode/* !.vscode/c_cpp_properties.json diff --git a/CMakeLists.txt b/CMakeLists.txt index f96a8574..6edc0223 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,11 +117,16 @@ target_add_mavsdk(playground) target_add_matplot(playground) target_add_protobuf(playground) target_add_loguru(playground) +# for some reason calling target_add_imagemagick here conflicts with, so we are including/linking without the function call +# target_add_imagemagick(path_plotting) +target_include_directories(playground PRIVATE ${ImageMagick_INCLUDE_DIRS}) +target_link_libraries(playground PRIVATE -Wl,--copy-dt-needed-entries ${ImageMagick_LIBRARIES}) # load_torchvision_model -add_executable(load_torchvision_model tests/integration/load_torchvision_model.cpp) +add_executable(load_torchvision_model "tests/integration/load_torchvision_model.cpp") target_add_torch(load_torchvision_model) target_add_torchvision(load_torchvision_model) +target_add_loguru(load_torchvision_model) # mavlink-client add_executable(mavlink_client ${SOURCES} "tests/integration/mavlink_client.cpp") @@ -135,6 +140,58 @@ target_add_httplib(mavlink_client) target_add_mavsdk(mavlink_client) target_add_matplot(mavlink_client) target_add_loguru(mavlink_client) +# for some reason calling target_add_imagemagick here conflicts with, so we are including/linking without the function call +# target_add_imagemagick(path_plotting) +target_include_directories(mavlink_client PRIVATE ${ImageMagick_INCLUDE_DIRS}) +target_link_libraries(mavlink_client PRIVATE -Wl,--copy-dt-needed-entries ${ImageMagick_LIBRARIES}) + +# cv_pipeline +add_executable(cv_pipeline ${SOURCES} "tests/integration/cv_pipeline.cpp") +target_add_json(cv_pipeline) +target_add_matplot(cv_pipeline) +target_add_opencv(cv_pipeline) +target_add_loguru(cv_pipeline) +target_add_httplib(cv_pipeline) +target_add_protobuf(cv_pipeline) +target_add_torch(cv_pipeline) +target_add_torchvision(cv_pipeline) +target_add_mavsdk(cv_pipeline) +target_add_loguru(cv_pipeline) +# for some reason calling target_add_imagemagick here conflicts with, so we are including/linking without the function call +# target_add_imagemagick(path_plotting) +target_include_directories(cv_pipeline PRIVATE ${ImageMagick_INCLUDE_DIRS}) +target_link_libraries(cv_pipeline PRIVATE -Wl,--copy-dt-needed-entries ${ImageMagick_LIBRARIES}) + +# cv_matching +add_executable(cv_matching ${SOURCES} "tests/integration/cv_matching.cpp") +target_add_json(cv_matching) +target_add_matplot(cv_matching) +target_add_opencv(cv_matching) +target_add_torch(cv_matching) +target_add_torchvision(cv_matching) +target_add_loguru(cv_matching) +target_add_httplib(cv_matching) +target_add_mavsdk(cv_matching) +# for some reason calling target_add_imagemagick here conflicts with, so we are including/linking without the function call +# target_add_imagemagick(path_plotting) +target_include_directories(cv_matching PRIVATE ${ImageMagick_INCLUDE_DIRS}) +target_link_libraries(cv_matching PRIVATE -Wl,--copy-dt-needed-entries ${ImageMagick_LIBRARIES}) + + +# cv_segmentation +add_executable(cv_segmentation ${SOURCES} "tests/integration/cv_segmentation.cpp") +target_add_json(cv_segmentation) +target_add_matplot(cv_segmentation) +target_add_opencv(cv_segmentation) +target_add_torch(cv_segmentation) +target_add_torchvision(cv_segmentation) +target_add_loguru(cv_segmentation) +target_add_httplib(cv_segmentation) +target_add_mavsdk(cv_segmentation) +# for some reason calling target_add_imagemagick here conflicts with, so we are including/linking without the function call +# target_add_imagemagick(path_plotting) +target_include_directories(cv_segmentation PRIVATE ${ImageMagick_INCLUDE_DIRS}) +target_link_libraries(cv_segmentation PRIVATE -Wl,--copy-dt-needed-entries ${ImageMagick_LIBRARIES}) # path_plotting add_executable(path_plotting ${SOURCES} tests/integration/path_plotting.cpp) @@ -154,6 +211,15 @@ target_link_libraries(path_plotting PRIVATE -Wl,--copy-dt-needed-entries ${Image # airdrop_sockets add_executable(airdrop_sockets src/network/airdrop_sockets.c tests/integration/airdrop_sockets.c) target_include_directories(airdrop_sockets PRIVATE ${INCLUDE_DIRECTORY}) + +add_executable(camera_playground tests/integration/camera_playground.cpp) +target_include_directories(camera_playground PRIVATE ${INCLUDE_DIRECTORY}) +target_add_arena(camera_playground) +add_custom_target( + run_camera_playground + ${CMAKE_COMMAND} + -E env LD_LIBRARY_s/target_siamese_1.pt +) # ============================= # ============================= @@ -163,25 +229,38 @@ add_subdirectory(${DEPS_DIRECTORY}/google-test) # ============================= # ============================= -# Integration tests -add_executable(camera_playground tests/integration/camera_playground.cpp) -target_include_directories(camera_playground PRIVATE ${INCLUDE_DIRECTORY}) -target_add_arena(camera_playground) -add_custom_target( - run_camera_playground - ${CMAKE_COMMAND} - -E env LD_LIBRARY_s/target_siamese_1.pt +# Pull models +add_custom_target(pull_models + DEPENDS pull_saliency pull_matching pull_segmentation ) +# Saliency model +add_custom_target(pull_saliency + COMMAND gdown 1S1IfXlGs_pCH49DwZmbD-tZA5YH0A1gx -O ${CMAKE_BINARY_DIR}/../models/torchscript_19.pth +) + +# Matching model +add_custom_target(pull_matching + COMMAND gdown 1NeFiAfSSLXAZWlehfd0ox7p_jFF4YdrO -O ${CMAKE_BINARY_DIR}/../models/target_siamese_1.pt +) + + # Segmentation model add_custom_target(pull_segmentation - COMMAND gdown https://drive.google.com/file/d/1U2EbfJFzcjVnjTuD6ud-bIf8YOiEassf -O ${CMAKE_BINARY_DIR}/../models/fcn-model_20-epochs_06-01-2023T21-16-02.pth + COMMAND gdown 1U2EbfJFzcjVnjTuD6ud-bIf8YOiEassf -O ${CMAKE_BINARY_DIR}/../models/fcn-model_20-epochs_06-01-2023T21-16-02.pth ) # ============================= # ============================= # Pull testing images -# + +# pull cropped images from fraternal_targets testing folder +add_custom_target(pull_matching_test_images + COMMAND gdown 1opxdXw75jSQZu9s61njE6hjQkfHiKvgp -O ${CMAKE_BINARY_DIR}/../tests/integration/images/test.zip && + mkdir -p ${CMAKE_BINARY_DIR}/../tests/integration/images/matching_cropped && + unzip ${CMAKE_BINARY_DIR}/../tests/integration/images/test.zip -d ${CMAKE_BINARY_DIR}/../tests/integration/images/matching_cropped +) + # ============================= # ============================= @@ -194,7 +273,8 @@ if(CPPLINT) add_custom_target(lint COMMAND cpplint # Do not require licenses, TODO assignment, Google versions of C++ libs - --filter=-legal,-readability/todo,-build/c++11 + # also don't check for runtime/references since Google's public style guidelines are behind https://github.com/cpplint/cpplint/issues/148 + --filter=-legal,-readability/todo,-build/c++11,-runtime/references --linelength=100 --recursive ../src diff --git a/include/camera/interface.hpp b/include/camera/interface.hpp index 7e79eafb..de4deb52 100644 --- a/include/camera/interface.hpp +++ b/include/camera/interface.hpp @@ -3,10 +3,26 @@ #include #include +#include #include #include +// class to contain all telemetry that should be tagged with an image. +// In the future this could be in a mavlink file. +class ImageTelemetry { + public: + ImageTelemetry(double latitude, double longitude, double altitude, double airspeed, double yaw, + double pitch, double roll); + const double latitude; + const double longitude; + const double altitude; + const double airspeed; + const double yaw; + const double pitch; + const double roll; +}; + /* * FYI: this is class that will standardize image data but * if all of our cameras have a uniform image output type @@ -19,14 +35,17 @@ class ImageData { private: const std::string NAME; - const std::string PATHS; + const std::string PATH; const cv::Mat DATA; + const ImageTelemetry TELEMETRY; public: - ImageData(std::string NAME, std::string PATH, cv::Mat DATA); - std::string getName(); - std::string getPath(); - cv::Mat getData(); + ImageData(std::string NAME, std::string PATH, cv::Mat DATA, ImageTelemetry TELEMETRY); + ImageData(const ImageData&) = default; + std::string getName() const; + std::string getPath() const; + cv::Mat getData() const; + ImageTelemetry getTelemetry() const; }; // ? possibly convert most common / important json fields to @@ -40,7 +59,7 @@ class CameraConfiguration { void updateConfig(nlohmann::json newSetting); - void updateConfigField(std::string key, T value); + // void updateConfigField(std::string key, T value); nlohmann::json getConfig(); @@ -50,8 +69,8 @@ class CameraConfiguration { class CameraInterface { private: CameraConfiguration config; - ImageData recentPicture; // might need to move it to public - bool doneTakingPicture; // overengineering time + std::unique_ptr recentPicture; // might need to move it to public + bool doneTakingPicture; // overengineering time std::string uploadPath; // Interpreter interp // TODO: SERVER CONNECTION HERE ? @@ -61,19 +80,21 @@ class CameraInterface { public: explicit CameraInterface(CameraConfiguration config); - void connect(); + virtual ~CameraInterface() = default; + + virtual void connect() = 0; - bool verifyConnection(); + virtual bool verifyConnection() = 0; - void takePicture(); + virtual void takePicture() = 0; - ImageData getLastPicture(); + virtual ImageData getLastPicture() = 0; - bool takePictureForSeconds(int sec); + virtual bool takePictureForSeconds(int sec) = 0; - void startTakingPictures(double intervalSec); + virtual void startTakingPictures(double intervalSec) = 0; - bool isDoneTakingPictures(); + virtual bool isDoneTakingPictures() = 0; CameraConfiguration getConfig(); diff --git a/include/camera/mock.hpp b/include/camera/mock.hpp new file mode 100644 index 00000000..0a66d76c --- /dev/null +++ b/include/camera/mock.hpp @@ -0,0 +1,24 @@ +#ifndef INCLUDE_CAMERA_MOCK_HPP_ +#define INCLUDE_CAMERA_MOCK_HPP_ + +#include + +#include "camera/interface.hpp" + +class MockCamera : public CameraInterface { + public: + explicit MockCamera(CameraConfiguration config); + ~MockCamera() = default; + void connect() override; + bool verifyConnection() override; + void takePicture() override; + ImageData getLastPicture() override; + bool takePictureForSeconds(int sec) override; + void startTakingPictures(double intervalSec) override; + bool isDoneTakingPictures() override; + + private: + std::unique_ptr lastPicture; +}; + +#endif // INCLUDE_CAMERA_MOCK_HPP_ diff --git a/include/cv/classification.hpp b/include/cv/classification.hpp new file mode 100644 index 00000000..d382bcc6 --- /dev/null +++ b/include/cv/classification.hpp @@ -0,0 +1,55 @@ +#ifndef INCLUDE_CV_CLASSIFICATION_HPP_ +#define INCLUDE_CV_CLASSIFICATION_HPP_ + +#include +#include + +struct ClassificationResults { + // TODO: replace with protobuf structs instead of strings + std::string shape; + std::string shapeColor; + std::string character; + std::string characterColor; +}; + +// Classification is responsible for predicting characteristics about +// ground comptition targets. These characterisitcs include shape type, +// alphanumeric character type, shape color, and alphanumeric character color. +// Currently, the shape and character classifiers are implemented using a +// Convolutional Neural Network (CNN). +// The color classifier is implented using a K-nearest neigbors model (KNN) +// The implementation of the models themselves can be found here: +// https://github.com/tritonuas/taxonomy-101 +// In this class we will take the pretrained models and use them to make +// inferences. +class Classification { + public: + // classify takes a cropped image of the target (saliency output) and + // two binary masks to represent which region of pixels correspond to + // shape and character respectivel (output of segmentation). Using this + // data, the shape type, character type, shape color and character color + // will be predicted. + ClassificationResults classify(cv::Mat croppedImage, cv::Mat shapeMask, cv::Mat characterMask); + + private: + // classifyShape takes a cropped image of the target (output of saliency) + // and a binary mask (output of segmentation). The binary mask should + // represent which region of pixels correspond to the shape region of + // the target. + std::string classifyShape(cv::Mat croppedImage, cv::Mat shapeMask); + + // classifyShape takes a cropped image of the target (output of saliency) + // and a binary mask (output of segmentation). The binary mask should + // represent which region of pixels correspond to the character region of + // the target. + std::string classifyCharacter(cv::Mat croppedImage, cv::Mat characterMask); + + // classify the primary color of a region described by a binary mask. + // This can be used for finding both shape and character color since + // we will use the same algorithm to detect the primary color in + // whatever region the mask describes. All that changes is the mask + // that's passed in. + std::string classifyColor(cv::Mat croppedImage, cv::Mat mask); +}; + +#endif // INCLUDE_CV_CLASSIFICATION_HPP_ diff --git a/include/cv/localization.hpp b/include/cv/localization.hpp new file mode 100644 index 00000000..87e8fa2e --- /dev/null +++ b/include/cv/localization.hpp @@ -0,0 +1,92 @@ +#ifndef INCLUDE_CV_LOCALIZATION_HPP_ +#define INCLUDE_CV_LOCALIZATION_HPP_ + +#include "cv/utilities.hpp" + +#include "camera/interface.hpp" +#include "utilities/datatypes.hpp" + +// TODO: these should be constants in the config file +// (or maybe queried by camera) +#define PIXEL_SIZE_MM 0.0024 +#define FOCAL_LENGTH_MM 50 +#define IMG_WIDTH_PX 5472 +#define IMG_HEIGHT_PX 3648 + +// Localization is responsible for calculating the real world latitude/longitude +// of competition targets. +// See our Python implementation here: https://github.com/tritonuas/localization +class Localization { + public: + // localize is responsible for transforming the position of a target + // within a full resolution image (image coordinates) to it's position + // in the real world (latitude/longitude coords). We are given the + // pixel coordinates of the target from saliency and the GPS position of + // the plane at the time of image capture. + // + // TODO: also need to pass in camera/lens information such as sensor width, + // focal length, and image width/height + virtual GPSCoord localize(const ImageTelemetry& telemetry, const Bbox& targetBbox) = 0; + + protected: + struct CameraIntrinsics { + double pixelSize; // mm + double focalLength; // mm + double resolutionX; // Pixels + double resolutionY; // Pixels + }; + + CameraIntrinsics camera{ + .pixelSize = PIXEL_SIZE_MM, + .focalLength = FOCAL_LENGTH_MM, + .resolutionX = IMG_WIDTH_PX, + .resolutionY = IMG_HEIGHT_PX, + }; +}; + +// Localization by converting via ECEF coordinates (Earth Centered, Earth Fixed) +class ECEFLocalization : Localization { + public: + GPSCoord localize(const ImageTelemetry& telemetry, const Bbox& targetBbox) override; + + private: + // ECEF - Earth Centered, Earth Fixed coordinate system. 0,0,0 is the center of the Earth. + struct ECEFCoordinates { + double x; // Meters in the plane of the equator in the direction of the prime meridian + double y; // Meters in the plane of the equator in the direction of 90 degrees East + double z; // Meters in the direction of the North pole + }; + + // ENU - East, North, Up coordinate system. + // Used to show an offset from a certain location on the Earth. + struct ENUCoordinates { + double e; // Meters East from reference location + double n; // Meters North from reference location + double u; // Meters Up from reference location + }; + + + struct CameraVector { + double roll; // Radians + double pitch; // Radians + double heading; // Radians + }; + + ECEFCoordinates GPStoECEF(GPSCoord gps); + ECEFCoordinates ENUtoECEF(ENUCoordinates offset, GPSCoord originGPS); + GPSCoord ECEFtoGPS(ECEFCoordinates ecef); + CameraVector PixelsToAngle( + CameraIntrinsics camera, + CameraVector state, + double targetX, + double targetY); + ENUCoordinates AngleToENU(CameraVector target, GPSCoord aircraft, double terrainHeight); +}; + +// Localization using GSD (ground sample distance) ratio +class GSDLocalization : Localization { + public: + GPSCoord localize(const ImageTelemetry& telemetry, const Bbox& targetBbox) override; +}; + +#endif // INCLUDE_CV_LOCALIZATION_HPP_ diff --git a/include/cv/matching.hpp b/include/cv/matching.hpp new file mode 100644 index 00000000..b9984c43 --- /dev/null +++ b/include/cv/matching.hpp @@ -0,0 +1,60 @@ +#ifndef INCLUDE_CV_MATCHING_HPP_ +#define INCLUDE_CV_MATCHING_HPP_ + +#include +#include +#include + +#include +#include +#include + +#include +#include "cv/utilities.hpp" +#include "utilities/constants.hpp" +#include "utilities/datatypes.hpp" + +#include "protos/obc.pb.h" + +struct MatchResult { + BottleDropIndex bottleDropIndex; + bool foundMatch; + double similarity; +}; + +// Matching is used to match targets to a potential corresponding competition +// bottle drop objective. Before the misssion starts, the competition tells us +// the characteristics of the ground targets that we should drop bottles on. +// Using this information we know which targets to look for. Then, we can +// calculate how similar the targets we capture are to the ones the competition +// enumerates for the bottle drop. +// This is implemented using a Siamese Neural Netowrk that takes two images as +// input and outputs a similarity score. One of the input images will be the +// cropped target from saliency. The other input will be an artificially +// generated image that matches the description of one of the competition +// objectives. We will use something similar to not-stolen-israeli-code to do +// the generation (https://github.com/tritonuas/not-stolen-israeli-code). +// The matching siamese model is implemented in this repo: +// https://github.com/tritonuas/fraternal-targets +// +// One important note is that matching is a replacement for the segmentation +// and classification stages of the pipeline. Instead of outright guessing +// the properties of the targets (as with segmentation/classification) we can +// see how close it is to known objectives. +class Matching { + public: + Matching(std::array competitionObjectives, + double matchThreshold, + std::vector> referenceImages, + const std::string &modelPath); + + MatchResult match(const CroppedTarget& croppedTarget); + + private: + std::array competitionObjectives; + double matchThreshold; + std::vector> referenceFeatures; + torch::jit::script::Module module; +}; + +#endif // INCLUDE_CV_MATCHING_HPP_ diff --git a/include/cv/pipeline.hpp b/include/cv/pipeline.hpp index 0062c496..ac66e1c8 100644 --- a/include/cv/pipeline.hpp +++ b/include/cv/pipeline.hpp @@ -1,5 +1,55 @@ #ifndef INCLUDE_CV_PIPELINE_HPP_ #define INCLUDE_CV_PIPELINE_HPP_ +#include +#include +#include + +#include + +#include "camera/interface.hpp" +#include "cv/classification.hpp" +#include "cv/localization.hpp" +#include "cv/matching.hpp" +#include "cv/saliency.hpp" +#include "cv/segmentation.hpp" + +// Processed image holds all predictions made concerning a given image. +// +// A single aerial image can have multiple targets that are matches with +// competition bottle assignments. +// At the same time, an aerial image can have other targets that are not matched +// with a bottle (or at least the pipeline is not confident in a match). +struct PipelineResults { + ImageData imageData; + std::vector matchedTargets; + // Not sure if unmatchedTargets should hold a different struct than + // matchedTargets. Both have basically the same info except unmatched won't + // have a bottle index assigned to it. We could populate bottle index to -1 + // or leave it as the bottle of the target it was closest in similarity to. + std::vector unmatchedTargets; +}; + +// Pipeline handles all infrastructure within the CV pipeline +class Pipeline { + public: + Pipeline(std::array competitionObjectives, + std::vector> referenceImages, + const std::string &matchingModelPath, + const std::string &segmentationModelPath); + + PipelineResults run(const ImageData& imageData); + + private: + Saliency detector; + + Matching matcher; + + Segmentation segmentor; + Classification classifier; + + ECEFLocalization ecefLocalizer; + GSDLocalization gsdLocalizer; +}; #endif // INCLUDE_CV_PIPELINE_HPP_ diff --git a/include/cv/saliency.hpp b/include/cv/saliency.hpp new file mode 100644 index 00000000..94aa3d26 --- /dev/null +++ b/include/cv/saliency.hpp @@ -0,0 +1,28 @@ +#ifndef INCLUDE_CV_SALIENCY_HPP_ +#define INCLUDE_CV_SALIENCY_HPP_ + +#include +#include + +#include "cv/utilities.hpp" + +// Saliency is responsible for detecting targets within a full-size aerial image +// It can detect the presence of standard targets (with shape and character) +// as well as emergent targets (mannikin). It outputs the position of the +// targets within the image (using coordinates of a bounding box) and a +// classification between if a target is standard or emergent. Saliency is +// implemented as a convolutional neural network using the Faster RCNN +// model. The training/testing code for the model can be found here: +// https://github.com/tritonuas/garretts-new-lunchbox +class Saliency { + public: + // saliency takes a full-size aerial image and detects any potential + // targets within the image. The full-size image can have multiple + // targets which is why it returns a vector of targets. For each target, + // it will predict the location within the full-size image (using + // coordinates of bounding box) and a prediction of whether or not a + // target is emergent (mannikin) or not. + std::vector salience(cv::Mat image); +}; + +#endif // INCLUDE_CV_SALIENCY_HPP_ diff --git a/include/cv/segmentation.hpp b/include/cv/segmentation.hpp new file mode 100644 index 00000000..4078df6f --- /dev/null +++ b/include/cv/segmentation.hpp @@ -0,0 +1,45 @@ +#ifndef INCLUDE_CV_SEGMENTATION_HPP_ +#define INCLUDE_CV_SEGMENTATION_HPP_ + +#include + +#include +#include +#include +#include +#include + +#include + +#include "cv/utilities.hpp" + +struct SegmentationResults { + cv::Mat shapeMask; + cv::Mat characterMask; +}; + +// Segmentation is the stage of the pipeline responsible for detecting the +// region of a cropped target that corresponds to the shape/character +// attributes. This is useful for simplifying the job of the classifcation model. +// Assuming that segmentation is accurate, classification only needs to look at +// the binary mask of the target's shape and character. This simplifies +// classification to a simple MNIST classification problem where we need to +// classify a black and white shape/character image. The training/testing code +// for segmentation models can be found here: +// https://github.com/tritonuas/hutzler-571 +class Segmentation { + public: + explicit Segmentation(const std::string &modelPath); + SegmentationResults segment(const CroppedTarget &target); + private: + torch::jit::script::Module module; +}; + +std::string get_image_type(const cv::Mat& img, bool more_info); +void show_image(cv::Mat& img, std::string title); +at::Tensor transpose(at::Tensor tensor, c10::IntArrayRef dims); +std::vector ToInput(at::Tensor tensor_image); +at::Tensor ToTensor(cv::Mat img, bool show_output, bool unsqueeze, int unsqueeze_dim); +cv::Mat ToCvImage(at::Tensor tensor); + +#endif // INCLUDE_CV_SEGMENTATION_HPP_ diff --git a/include/cv/utilities.hpp b/include/cv/utilities.hpp new file mode 100644 index 00000000..dc6c03f2 --- /dev/null +++ b/include/cv/utilities.hpp @@ -0,0 +1,25 @@ +#ifndef INCLUDE_CV_UTILITIES_HPP_ +#define INCLUDE_CV_UTILITIES_HPP_ + +#include + +class Bbox { + public: + int x1; + int y1; + int x2; + int y2; + + int width(); + int height(); +}; + +struct CroppedTarget { + cv::Mat croppedImage; + Bbox bbox; + bool isMannikin; +}; + +cv::Mat crop(cv::Mat original, Bbox bbox); + +#endif // INCLUDE_CV_UTILITIES_HPP_ diff --git a/models/.gitkeep b/models/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/camera/interface.cpp b/src/camera/interface.cpp new file mode 100644 index 00000000..75f0113d --- /dev/null +++ b/src/camera/interface.cpp @@ -0,0 +1,25 @@ +#include "camera/interface.hpp" + +ImageTelemetry::ImageTelemetry(double latitude, double longitude, double altitude, double airspeed, + double yaw, double pitch, double roll) + : latitude(latitude), + longitude(longitude), + altitude(altitude), + airspeed(airspeed), + yaw(yaw), + pitch(pitch), + roll(roll) {} + +ImageData::ImageData(std::string NAME, std::string PATH, cv::Mat DATA, ImageTelemetry TELEMETRY) + : NAME(NAME), PATH(PATH), DATA(DATA), TELEMETRY(TELEMETRY) {} + +std::string ImageData::getName() const { return NAME; } +std::string ImageData::getPath() const { return PATH; } + +cv::Mat ImageData::getData() const { return DATA; } + +ImageTelemetry ImageData::getTelemetry() const { return TELEMETRY; } + +CameraConfiguration::CameraConfiguration(nlohmann::json config) : configJson(config) {} + +CameraInterface::CameraInterface(CameraConfiguration config) : config(config) {} diff --git a/src/camera/mock.cpp b/src/camera/mock.cpp new file mode 100644 index 00000000..c1662a6b --- /dev/null +++ b/src/camera/mock.cpp @@ -0,0 +1,29 @@ +#include "camera/mock.hpp" + +MockCamera::MockCamera(CameraConfiguration config) : CameraInterface(config) {} + +void MockCamera::connect() { return; } + +bool MockCamera::verifyConnection() { return true; } + +void MockCamera::takePicture() { + ImageData newImg("mock_image.jpg", "/real/path/mock_image.jpg", + cv::Mat(cv::Size(4000, 3000), CV_8UC3, cv::Scalar(255)), + ImageTelemetry(38.31568, 76.55006, 75, 20, 100, 5, 3)); + + lastPicture = std::make_unique(newImg); +} + +ImageData MockCamera::getLastPicture() { return *lastPicture; } + +bool MockCamera::takePictureForSeconds(int sec) { + // TODO: + return true; +} + +void MockCamera::startTakingPictures(double intervalSec) { + // TODO: + return; +} + +bool MockCamera::isDoneTakingPictures() { return false; } diff --git a/src/cv/localization.cpp b/src/cv/localization.cpp new file mode 100644 index 00000000..6e3f29b4 --- /dev/null +++ b/src/cv/localization.cpp @@ -0,0 +1,125 @@ +#include "cv/localization.hpp" +#include + +#define PI 3.14159265 + +// struct ECEFCoordinates; +// struct CameraVector; +// struct ENUCoordinates; + +ECEFLocalization::ECEFCoordinates ECEFLocalization::GPStoECEF(GPSCoord gps) { + double a = 6378137; // Earth semi-major axis in meters + double b = 6356752; // Earth semi-minor axis in meters + double e2 = 1 - (b*b)/(a*a); + ECEFCoordinates ecef; + ecef.x = (gps.altitude() + a/ + (sqrt(1-e2*sin(gps.latitude())*sin(gps.latitude()))))* + cos(gps.latitude())*cos(gps.longitude()); + ecef.y = (gps.altitude() + a/ + (sqrt(1-e2*sin(gps.latitude())*sin(gps.latitude()))))* + cos(gps.latitude())*sin(gps.longitude()); + ecef.z = (gps.altitude() + (1-e2)*a/ + (sqrt(1-e2*sin(gps.latitude())*sin(gps.latitude()))))*sin(gps.latitude()); + return ecef; +} + +// Converts a GPS location and ENU offset to ECEF coordinates +ECEFLocalization::ECEFCoordinates ECEFLocalization::ENUtoECEF( + ENUCoordinates offset, GPSCoord originGPS) { + ECEFCoordinates origin = GPStoECEF(originGPS); + ECEFCoordinates target; + target.x = origin.x - sin(originGPS.longitude())*offset.e - + sin(originGPS.latitude())*cos(originGPS.longitude())*offset.n + + cos(originGPS.latitude())*cos(originGPS.longitude())*offset.u; + target.y = origin.y + cos(originGPS.longitude())*offset.e - + sin(originGPS.latitude())*sin(originGPS.longitude())*offset.n + + cos(originGPS.latitude())*sin(originGPS.longitude())*offset.u; + target.z = origin.z + cos(originGPS.longitude())*offset.n + sin(originGPS.latitude())*offset.u; + return target; +} + +// Converts ECEF coordinates to GPS coordinates using Heikkinen's procedure +GPSCoord ECEFLocalization::ECEFtoGPS(ECEFCoordinates ecef) { + GPSCoord gps; + double a = 6378137; + double b = 6356752; + double e2 = 1 - ((b*b)/(a*a)); + double ep2 = ((a*a)/(b*b)) - 1; + double p = sqrt((ecef.x*ecef.x) + (ecef.y*ecef.y)); + double F = 54*(b*b)*(ecef.z*ecef.z); + double G = (p*p) + ((1 - e2)*(ecef.z*ecef.z)) - (e2*(a*a - b*b)); + double c = e2*e2*F*p*p/(G*G*G); + double s = cbrt(1 + c + sqrt((c*c) + (2*c))); + double k = s + 1 + (1/s); + double P = F/(3*k*k*G*G); + double Q = sqrt(1 + (2*e2*e2*P)); + double r0 = (-P*e2*p/(1 + Q)) + sqrt((0.5*a*a*(1 + (1/Q))) - + (P*(1 - e2)*ecef.z*ecef.z/(Q*(1 + Q))) - (0.5*P*p*p)); + double U = sqrt((p - (e2*r0))*(p - (e2*r0)) + (ecef.z*ecef.z)); + double V = sqrt((p - (e2*r0))*(p - (e2*r0)) + ((1 - e2)*ecef.z*ecef.z)); + double z0 = b*b*ecef.z/(a*V); + gps.set_latitude(atan((ecef.z + ep2*z0)/p)); + gps.set_longitude(atan2(ecef.y, ecef.x)); + gps.set_altitude(U*(1 - ((b*b)/(a*V)))); + return gps; +} + +// Calculate angle offset based on target pixel coordinates using pinhole camera model +ECEFLocalization::CameraVector ECEFLocalization::PixelsToAngle( + CameraIntrinsics camera, CameraVector state, double targetX, double targetY) { + CameraVector target; + target.roll = atan(camera.pixelSize*(targetX - (camera.resolutionX/2))/camera.focalLength); + target.pitch = atan(camera.pixelSize*(targetY - (camera.resolutionY/2))/camera.focalLength); + target.heading = state.heading; + return target; +} + +// Calculate the ENU offset of the intersection of a vector from +// the plane to the ground (assume flat) +ECEFLocalization::ENUCoordinates ECEFLocalization::AngleToENU( + CameraVector target, GPSCoord aircraft, double terrainHeight) { + double x = aircraft.altitude()*tan(target.roll); + double y = aircraft.altitude()*tan(target.pitch); + ENUCoordinates offset; + offset.e = x*cos(target.heading) + y*sin(target.heading); + offset.n = -x*sin(target.heading) + y*cos(target.heading); + offset.u = terrainHeight - aircraft.altitude(); + return offset; +} + +GPSCoord ECEFLocalization::localize(const ImageTelemetry& telemetry, const Bbox& targetBbox) { + double terrainHeight = 0; + + double targetX = (targetBbox.x1 + targetBbox.x2)/2; + double targetY = (targetBbox.y1 + targetBbox.y2)/2; + + + CameraVector gimbalState; + gimbalState.roll = telemetry.roll*PI/180; + gimbalState.pitch = telemetry.pitch*PI/180; + gimbalState.heading = telemetry.yaw*PI/180; + + GPSCoord aircraft; + aircraft.set_latitude(telemetry.latitude*PI/180); + aircraft.set_longitude(telemetry.longitude*PI/180); + aircraft.set_altitude(telemetry.altitude*1000); + + CameraVector targetVector = PixelsToAngle(camera, gimbalState, targetX, targetY); + ENUCoordinates offset = AngleToENU(targetVector, aircraft, terrainHeight); + ECEFCoordinates targetLocationECEF = ENUtoECEF(offset, aircraft); + GPSCoord targetLocationGPS = ECEFtoGPS(targetLocationECEF); + + double lat = targetLocationGPS.latitude()*180/PI; + double lon = targetLocationGPS.longitude()*180/PI; + double alt = targetLocationGPS.altitude()/1000; + + GPSCoord targetCoord; + targetCoord.set_latitude(lat); + targetCoord.set_longitude(lon); + targetCoord.set_altitude(alt); + return targetCoord; +} + +GPSCoord GSDLocalization::localize(const ImageTelemetry& telemetry, const Bbox& targetBbox) { + return GPSCoord(); +} diff --git a/src/cv/matching.cpp b/src/cv/matching.cpp new file mode 100644 index 00000000..9d6ea8ef --- /dev/null +++ b/src/cv/matching.cpp @@ -0,0 +1,107 @@ +#include "cv/matching.hpp" + +#include + +/* +* IMPORTANT NOTE: +* Using torch::from_blob to convert cv::Mat to torch::Tensor results in NaN tensors +* for certain images. The solution of using std::memcpy instead was found and corroborated +* in this post: +* https://discuss.pytorch.org/t/convert-torch-tensor-to-cv-mat/42751/4 +* The old broken line is given here for reference (formerly at line 33): +* torch::Tensor tensor_image = torch::from_blob(img_dst.data, {1, 3, img_dst.rows, img_dst.cols}, torch::kF32); + +* The reason for from_blob's misbehaving tendencies may have to do with it taking in +* a pointer as its first argument (img_dst.data) as shown here: +* https://pytorch.org/cppdocs/api/function_namespacetorch_1ad7fb2a7759ef8c9443b489ddde494787.html + +* Every time we exit toInput(), img_dst is deallocated and the pointer to the image's data +* is thus rendered invalid. This results in tensors filled with NaN. +* This hypothesis is corroborated by the following post: +* https://discuss.pytorch.org/t/std-vector-torch-tensor-turns-into-nans/81853 +* If we wanted to use from_blob, we would have to perhaps pass in a constant reference to +* img, and only use img (rather than creating a new img_dst to do operations on). +* Std::memcpy works for the time being, so it will remain. +* +* Reference for resizing cv image: +* https://stackoverflow.com/questions/17533101/resize-a-matrix-after-created-it-in-opencv +*/ +auto toInput(cv::Mat img, bool show_output = false, bool unsqueeze = false, int unsqueeze_dim = 0) { + cv::Mat img_dst; + // scale the image and turn into Tensor, then input vector + cv::resize(img, img_dst, cv::Size(128, 128), 0, 0, cv::INTER_AREA); + img_dst.convertTo(img_dst, CV_32FC3); + torch::Tensor tensor_image = torch::ones({1, 3, img_dst.rows, img_dst.cols}, torch::kF32); + std::memcpy(tensor_image.data_ptr(), img_dst.data, sizeof(float)*tensor_image.numel()); + // convert back to CV image and display to verify fidelity of image if desired + if (unsqueeze) + tensor_image.unsqueeze_(unsqueeze_dim); + + if (show_output) { + std::ostringstream stream; + stream << tensor_image.slice(2, 0, 1); + LOG_F(INFO, stream.str().c_str()); + } + + return std::vector{tensor_image}; +} + +Matching::Matching(std::array + competitionObjectives, + double matchThreshold, + std::vector> referenceImages, + const std::string &modelPath) + : competitionObjectives(competitionObjectives), + matchThreshold(matchThreshold) { + + try { + this->module = torch::jit::load(modelPath); + } + catch (const c10::Error& e) { + LOG_F(ERROR, "could not load the model, check if model file is present at %s. If the file is present, try to verify that it's contents are valid model weights. Sometimes when pulling from google drive, an error html page will download with the same filename if the model fails to download. \n", modelPath.c_str()); // NOLINT + throw; + } + + if (referenceImages.size() == 0) { + LOG_F(WARNING, "Empty reference image vector passed as argument\n"); + } + + // Populate referenceFeatures by putting each refImg through model + for (int i = 0; i < referenceImages.size(); i++) { + cv::Mat image = referenceImages.at(i).first; + BottleDropIndex bottle = referenceImages.at(i).second; + std::vector input = toInput(image); + torch::Tensor output = this->module.forward(input).toTensor(); + this->referenceFeatures.push_back(std::make_pair(output, bottle)); + } +} + +/* +* Matches the given image with one of the referenceFeature elements +* (each referenceFeature element corresponds to an element of referenceImages, see constructor). +* @param croppedTarget - wrapper for cv::Mat image which we extract and use +* @return MatchResult - struct of bottleIndex, boolean isMatch, and minimum distance found. +* Boolean isMatch set to true if minimum distance found is below threshold, false otherwise. +* +* NOTE: Matching only occurs if loading model and ref. images was successful. +*/ +MatchResult Matching::match(const CroppedTarget& croppedTarget) { + std::vector input = toInput(croppedTarget.croppedImage); + torch::Tensor output = this->module.forward(input).toTensor(); + int minIndex = 0; + double minDist = torch::pairwise_distance(output, + referenceFeatures.at(minIndex).first).item().to(); + + for (int i = 1; i < referenceFeatures.size(); i++) { + torch::Tensor curr = referenceFeatures.at(i).first; + torch::Tensor dist = torch::pairwise_distance(output, curr); + double tmp = dist.item().to(); + if (tmp < minDist) { + minDist = tmp; + minIndex = i; + } + } + BottleDropIndex bottleIdx = referenceFeatures.at(minIndex).second; + bool isMatch = minDist < this->matchThreshold; + return MatchResult{bottleIdx, isMatch, minDist}; +} diff --git a/src/cv/pipeline.cpp b/src/cv/pipeline.cpp new file mode 100644 index 00000000..429e1f07 --- /dev/null +++ b/src/cv/pipeline.cpp @@ -0,0 +1,72 @@ +#include "cv/pipeline.hpp" + +const double DEFAULT_MATCHING_THRESHOLD = 0.5; + +// TODO: eventually we will need to invoke non-default constructors for all the +// modules of the pipeline (to customize model filepath, etc...) +// TODO: I also want to have a way to customize if the model will use +// matching vs segmentation/classification +Pipeline::Pipeline(std::array competitionObjectives, + std::vector> referenceImages, + const std::string &matchingModelPath, + const std::string &segmentationModelPath) : + // assumes reference images passed to pipeline from not_stolen + matcher(competitionObjectives, DEFAULT_MATCHING_THRESHOLD, referenceImages, + matchingModelPath), + segmentor(segmentationModelPath) {} + +/* + * Entrypoint of CV Pipeline. At a high level, it will include the following + * stages: + * - Saliency takes the full size image and outputs any cropped targets + * that are detected + * - Cropped targets are passed into target matching where they will + * be compared against targets associated with each water bottle. + * All targets (regardless of match status) will be passed onto + * later stages. + * - Bounding boxes predicted from saliency and GPS telemetry from when + * the photo was captured are passed into the localization algorithm. + * Here, the real latitude/longitude coordinates of the target are + * calculated. + * - Note, we may use a slight variation of this pipeline where the + * target matching stage is replaced by segmentation/classification. + */ +PipelineResults Pipeline::run(const ImageData &imageData) { + std::vector saliencyResults = this->detector.salience(imageData.getData()); + + // if saliency finds no potential targets, end early with no results + if (saliencyResults.empty()) { + return PipelineResults{imageData, {}, {}}; + } + + // go through saliency results and determine if each potential target + // matches one of the targets assigned to a bottle for a competition + /// objective + std::vector matchedTargets; + std::vector unmatchedTargets; + for (CroppedTarget target : saliencyResults) { + MatchResult potentialMatch = this->matcher.match(target); + + GPSCoord* targetPosition = new GPSCoord(this->ecefLocalizer.localize( + imageData.getTelemetry(), target.bbox)); + // GPSCoord* targetPosition = new GPSCoord(this->gsdLocalizer.localize( + // imageData.getTelemetry(), target.bbox)); + + AirdropTarget airdropTarget; + // TODO: Call set_index using a BottleDropIndex type instead of uint8. + // Requires modifying the output of matcher.match() + // airdropTarget.set_index(potentialMatch.bottleDropIndex); + // give ownership of targetPosition to protobuf, it should handle deallocation + airdropTarget.set_allocated_coordinate(targetPosition); + + // TODO: we should have some criteria to handle duplicate targets that + // show up in multiple images + if (potentialMatch.foundMatch) { + matchedTargets.push_back(airdropTarget); + } else { + unmatchedTargets.push_back(airdropTarget); + } + } + + return PipelineResults(imageData, matchedTargets, unmatchedTargets); +} diff --git a/src/cv/saliency.cpp b/src/cv/saliency.cpp new file mode 100644 index 00000000..3d5d7179 --- /dev/null +++ b/src/cv/saliency.cpp @@ -0,0 +1,3 @@ +#include "cv/saliency.hpp" + +std::vector Saliency::salience(cv::Mat image) { return {}; } diff --git a/src/cv/segmentation.cpp b/src/cv/segmentation.cpp new file mode 100644 index 00000000..7cd5e86c --- /dev/null +++ b/src/cv/segmentation.cpp @@ -0,0 +1,114 @@ +#include "cv/segmentation.hpp" + +#include + +Segmentation::Segmentation(const std::string &modelPath) { + try { + this->module = torch::jit::load(modelPath); + } + catch (const c10::Error& e) { + LOG_F(ERROR, "could not load the model, check if model file is present at %s. If the file is present, try to verify that it's contents are valid model weights. Sometimes when pulling from google drive, an error html page will download with the same filename if the model fails to download. \n", modelPath.c_str()); // NOLINT + throw; + } + } + +SegmentationResults Segmentation::segment(const CroppedTarget &target) { + auto device = torch::kCPU; + auto fcn = this->module; + fcn.to(device); + + auto target_tensor = ToTensor(target.croppedImage, false, false, 0); + target_tensor = target_tensor.clamp_max(c10::Scalar(50)); + target_tensor = target_tensor.toType(c10::kFloat).div(255); + target_tensor = transpose(target_tensor, { (2), (0), (1) }); + target_tensor.unsqueeze_(0); + + auto input_to_net = ToInput(target_tensor); + at::Tensor output = (fcn.forward(input_to_net).toGenericDict().begin()->value()).toTensor(); + torch::Tensor cpu_tensor = output.to(device); + cpu_tensor = cpu_tensor[0]; + cpu_tensor = torch::softmax(cpu_tensor, 0); + + auto character_mask = cpu_tensor[1]; + auto shape_mask = cpu_tensor[0]; + cv::Mat character_mask_mat(character_mask.sizes()[0], character_mask.sizes()[1], + CV_32FC1, character_mask.data_ptr()); + cv::Mat shape_mask_mat(shape_mask.sizes()[0], shape_mask.sizes()[1], + CV_32FC1, shape_mask.data_ptr()); + + SegmentationResults result; + result.shapeMask = character_mask_mat; + result.characterMask = shape_mask_mat; + return result; +} + + + +std::string get_image_type(const cv::Mat& img, bool more_info = true) { + std::string r; + int type = img.type(); + uchar depth = type & CV_MAT_DEPTH_MASK; + uchar chans = 1 + (type >> CV_CN_SHIFT); + + switch (depth) { + case CV_8U: r = "8U"; break; + case CV_8S: r = "8S"; break; + case CV_16U: r = "16U"; break; + case CV_16S: r = "16S"; break; + case CV_32S: r = "32S"; break; + case CV_32F: r = "32F"; break; + case CV_64F: r = "64F"; break; + default: r = "User"; break; + } + + r += "C"; + r += (chans + '0'); + + if (more_info) { + LOG_F(INFO, "depth: %d channels: %d\n", img.depth(), img.channels()); + } + return r; +} + +void show_image(cv::Mat& img, std::string title) { + std::string image_type = get_image_type(img); + cv::namedWindow(title + " type:" + image_type, cv::WINDOW_NORMAL); + cv::imshow(title, img); + cv::waitKey(0); +} + +at::Tensor transpose(at::Tensor tensor, c10::IntArrayRef dims = { 0, 3, 1, 2 }) { + tensor = tensor.permute(dims); + return tensor; +} + +at::Tensor ToTensor(cv::Mat img, bool show_output = false, bool unsqueeze = false, +int unsqueeze_dim = 0) { + at::Tensor tensor_image = torch::from_blob(img.data, { img.rows, img.cols, 3 }, at::kByte); + + if (unsqueeze) { + tensor_image.unsqueeze_(unsqueeze_dim); + } + + return tensor_image; +} + +std::vector ToInput(at::Tensor tensor_image) { + // Create a vector of inputs. + return std::vector{tensor_image}; +} + +cv::Mat ToCvImage(at::Tensor tensor) { + int width = tensor.sizes()[0]; + int height = tensor.sizes()[1]; + try { + cv::Mat output_mat(cv::Size{ height, width }, CV_8UC3, tensor.data_ptr()); + + show_image(output_mat, "converted image from tensor"); + return output_mat.clone(); + } + catch (const c10::Error& e) { + LOG_S(ERROR) << "error loading the model: : " << e.msg() << "\n"; + } + return cv::Mat(height, width, CV_8UC3); +} diff --git a/src/cv/utilities.cpp b/src/cv/utilities.cpp new file mode 100644 index 00000000..fb99ffa5 --- /dev/null +++ b/src/cv/utilities.cpp @@ -0,0 +1,18 @@ +#include "cv/utilities.hpp" + +int Bbox::width() { + return x2 - x1; +} + +int Bbox::height() { + return y2 - y1; +} + +// crop will crop an OpenCV image and return a new image cropped to the region +// specified by the given bounding box +cv::Mat crop(cv::Mat original, Bbox bbox) { + auto x = cv::Mat( + original.clone(), + cv::Rect(bbox.x1, bbox.y1, bbox.width(), bbox.height())); + return x; +} diff --git a/tests/integration/cv_matching.cpp b/tests/integration/cv_matching.cpp new file mode 100644 index 00000000..d4bd2b73 --- /dev/null +++ b/tests/integration/cv_matching.cpp @@ -0,0 +1,111 @@ +#include + +#include +#include + +#include "protos/obc.pb.h" + +#include "cv/matching.hpp" +#include "utilities/constants.hpp" + +// Download these test images by running "make pull_matching_test_images" or from the test.zip here https://drive.google.com/drive/u/1/folders/1opXBWx6bBF7J3-JSFtrhfFIkYis9qhGR +// Or, any cropped not-stolen images will work +// NOTE: images are given reverse order bottleIndexes, e.g. refImagePath0 -> index 4, etc. +const std::string imageTestDir = "../tests/integration/images/matching_cropped/test/"; +const std::string refImagePath0 = imageTestDir + "000000910.jpg"; // bottle 4 +const std::string refImagePath1 = imageTestDir + "000000920.jpg"; // bottle 3 +const std::string refImagePath2 = imageTestDir + "000000003.jpg"; // bottle 2 +const std::string refImagePath3 = imageTestDir + "000000004.jpg"; // bottle 1 +const std::string refImagePath4 = imageTestDir + "000000005.jpg"; // bottle 0 + +// model can be downloaded by running "make pull_matching" or from here: https://drive.google.com/drive/folders/1ciDfycNyJiLvRhJhwQZoeKH7vgV6dGHJ?usp=drive_link +const std::string modelPath = "../models/target_siamese_1.pt"; + +// These images can also come from the same source as the reference images. To accurately +// run this test, provide one image that is a positive match and one that doesn't match +// any of the references. +const std::string imageMatchPath = imageTestDir + "000000920.jpg"; +const int matchIndex = 3; +const std::string imageNotMatchPath = imageTestDir + "000000016.jpg"; + +int main(int argc, char* argv[]) { + // purely for the constructor, doesn't do much in matching + std::array bottlesToDrop; + + Bottle bottle1; + bottle1.set_shapecolor(ODLCColor::Red); + bottle1.set_shape(ODLCShape::Circle); + bottle1.set_alphanumericcolor(ODLCColor::Orange); + bottle1.set_alphanumeric("J"); + bottlesToDrop[0] = bottle1; + + Bottle bottle2; + bottle2.set_shapecolor(ODLCColor::Blue); + bottle2.set_shape(ODLCShape::Circle); + bottle2.set_alphanumericcolor(ODLCColor::Orange); + bottle2.set_alphanumeric("G"); + bottlesToDrop[1] = bottle2; + + Bottle bottle3; + bottle3.set_shapecolor(ODLCColor::Red); + bottle3.set_shape(ODLCShape::Circle); + bottle3.set_alphanumericcolor(ODLCColor::Blue); + bottle3.set_alphanumeric("X"); + bottlesToDrop[2] = bottle3; + + Bottle bottle4; + bottle4.set_shapecolor(ODLCColor::Red); + bottle4.set_shape(ODLCShape::Circle); + bottle4.set_alphanumericcolor(ODLCColor::Blue); + bottle4.set_alphanumeric("F"); + bottlesToDrop[3] = bottle4; + + Bottle bottle5; + bottle5.set_shapecolor(ODLCColor::Green); + bottle5.set_shape(ODLCShape::Circle); + bottle5.set_alphanumericcolor(ODLCColor::Black); + bottle5.set_alphanumeric("F"); + bottlesToDrop[4] = bottle5; + + std::vector> referenceImages; + cv::Mat ref0 = cv::imread(refImagePath0); + referenceImages.push_back(std::make_pair(ref0, BottleDropIndex(4))); + cv::Mat ref1 = cv::imread(refImagePath1); + referenceImages.push_back(std::make_pair(ref1, BottleDropIndex(3))); + cv::Mat ref2 = cv::imread(refImagePath2); + referenceImages.push_back(std::make_pair(ref2, BottleDropIndex(2))); + cv::Mat ref3 = cv::imread(refImagePath3); + referenceImages.push_back(std::make_pair(ref3, BottleDropIndex(1))); + cv::Mat ref4 = cv::imread(refImagePath4); + referenceImages.push_back(std::make_pair(ref4, BottleDropIndex(0))); + + Matching matcher(bottlesToDrop, 0.5, referenceImages, modelPath); + cv::Mat image = cv::imread(imageMatchPath); + Bbox dummyBox(0, 0, 0, 0); + CroppedTarget cropped = { + image, + dummyBox, + false + }; + + cv::Mat falseImage = cv::imread(imageNotMatchPath); + CroppedTarget croppedFalse = { + falseImage, + dummyBox, + false + }; + + MatchResult result = matcher.match(cropped); + LOG_F(INFO, "\nTRUE MATCH TEST:\nClosest is bottle at index %d\nfoundMatch is %d\nThe similarity is %.3f\n", + int(result.bottleDropIndex), + result.foundMatch, + result.similarity); + + MatchResult resultFalse = matcher.match(croppedFalse); + LOG_F(INFO, "\nFALSE MATCH TEST:\nClosest is bottle at index %d\nfoundMatch is %d\nThe similarity is %.3f\n", + int(resultFalse.bottleDropIndex), + resultFalse.foundMatch, + resultFalse.similarity); + + return 0; +} \ No newline at end of file diff --git a/tests/integration/cv_pipeline.cpp b/tests/integration/cv_pipeline.cpp new file mode 100644 index 00000000..77e71d2a --- /dev/null +++ b/tests/integration/cv_pipeline.cpp @@ -0,0 +1,105 @@ +#include + +#include +#include + +#include "cv/pipeline.hpp" + +// Download these test images from one of the zips here https://drive.google.com/drive/u/1/folders/1opXBWx6bBF7J3-JSFtrhfFIkYis9qhGR +// Or, any cropped not-stolen images will work + +// this image should be located at a relative path to the CMake build dir +const std::string imagePath = "mock_image.jpg"; + +const std::string refImagePath0 = "../bin/test/test/000000910.jpg"; +const std::string refImagePath1 = "../bin/test/test/000000920.jpg"; +const std::string refImagePath2 = "../bin/test/test/000000003.jpg"; +const std::string refImagePath3 = "../bin/test/test/000000004.jpg"; +const std::string refImagePath4 = "../bin/test/test/000000005.jpg"; + +// matching model can be downloaded from here: https://drive.google.com/drive/folders/1ciDfycNyJiLvRhJhwQZoeKH7vgV6dGHJ?usp=drive_link +const std::string matchingModelPath = "../models/target_siamese_1.pt"; +// segmentation model can be downloaded from here: https://drive.google.com/file/d/1U2EbfJFzcjVnjTuD6ud-bIf8YOiEassf/view?usp=drive_link +const std::string segmentationModelPath = "../models/fcn.pth"; + +// mock telemetry data +const double latitude = 38.31568; +const double longitude = 76.55006; +const double altitude = 75; +const double airspeed = 20; +const double yaw = 100; +const double pitch = 5; +const double roll = 3; + +// integration test to test all stages of the CV pipeline +// with an arbitrary image as input +int main() { + cv::Mat image = cv::imread(imagePath); + ImageTelemetry mockTelemetry(latitude, longitude, altitude, airspeed, + yaw, pitch, roll); + ImageData imageData("mock_image", imagePath, image, mockTelemetry); + + std::array bottlesToDrop; + + Bottle bottle1; + bottle1.set_shapecolor(ODLCColor::Red); + bottle1.set_shape(ODLCShape::Circle); + bottle1.set_alphanumericcolor(ODLCColor::Orange); + bottle1.set_alphanumeric("J"); + bottlesToDrop[0] = bottle1; + + Bottle bottle2; + bottle2.set_shapecolor(ODLCColor::Blue); + bottle2.set_shape(ODLCShape::Circle); + bottle2.set_alphanumericcolor(ODLCColor::Orange); + bottle2.set_alphanumeric("G"); + bottlesToDrop[1] = bottle2; + + Bottle bottle3; + bottle3.set_shapecolor(ODLCColor::Red); + bottle3.set_shape(ODLCShape::Circle); + bottle3.set_alphanumericcolor(ODLCColor::Blue); + bottle3.set_alphanumeric("X"); + bottlesToDrop[2] = bottle3; + + Bottle bottle4; + bottle4.set_shapecolor(ODLCColor::Red); + bottle4.set_shape(ODLCShape::Circle); + bottle4.set_alphanumericcolor(ODLCColor::Blue); + bottle4.set_alphanumeric("F"); + bottlesToDrop[3] = bottle4; + + Bottle bottle5; + bottle5.set_shapecolor(ODLCColor::Green); + bottle5.set_shape(ODLCShape::Circle); + bottle5.set_alphanumericcolor(ODLCColor::Black); + bottle5.set_alphanumeric("F"); + bottlesToDrop[4] = bottle5; + + std::vector> referenceImages; + cv::Mat ref0 = cv::imread(refImagePath0); + referenceImages.push_back(std::make_pair(ref0, BottleDropIndex(4))); + cv::Mat ref1 = cv::imread(refImagePath1); + referenceImages.push_back(std::make_pair(ref1, BottleDropIndex(3))); + cv::Mat ref2 = cv::imread(refImagePath2); + referenceImages.push_back(std::make_pair(ref2, BottleDropIndex(2))); + cv::Mat ref3 = cv::imread(refImagePath3); + referenceImages.push_back(std::make_pair(ref3, BottleDropIndex(1))); + cv::Mat ref4 = cv::imread(refImagePath4); + referenceImages.push_back(std::make_pair(ref4, BottleDropIndex(0))); + + Pipeline pipeline(bottlesToDrop, referenceImages, matchingModelPath, segmentationModelPath); + + PipelineResults output = pipeline.run(imageData); + + size_t numTargets = output.matchedTargets.size() + + output.unmatchedTargets.size(); + size_t numMatches = output.matchedTargets.size(); + + LOG_F(INFO, "Found %ld targets", numTargets); + LOG_F(INFO, "Found %ld matches", numMatches); + + for (AirdropTarget& match: output.matchedTargets) { + LOG_F(INFO, "Found match assigned to bottle index %d\n", match.index()); + } +} \ No newline at end of file diff --git a/tests/integration/cv_segmentation.cpp b/tests/integration/cv_segmentation.cpp new file mode 100644 index 00000000..9bad7e01 --- /dev/null +++ b/tests/integration/cv_segmentation.cpp @@ -0,0 +1,72 @@ +#include + +#include + +#include "cv/segmentation.hpp" +#include "protos/obc.pb.h" +#include "utilities/constants.hpp" + +// target image paths, modify to use your targets +const std::vector targetPaths = { + "../tests/integration/images/target0.jpg", + "../tests/integration/images/target1.jpg", + "../tests/integration/images/target2.jpg", + "../tests/integration/images/target3.jpg", + "../tests/integration/images/target4.jpg" +}; + +// model weights path +// Can pull model weights by running "make pull_segmentation" +const std::string modelPath = "../models/fcn.pth"; + +int main(int argc, char* argv[]) { + Segmentation segmentation(modelPath); + CroppedTarget ct; + + int width = 0; + int height = 0; + std::vector targetImgs; + for (int i = 0; i < targetPaths.size(); i++) { + auto target = cv::imread(targetPaths[i]); + ct.croppedImage = target; + auto result = segmentation.segment(ct); + + cv::Mat characterMask(result.characterMask.size(), result.characterMask.type()); + result.characterMask.copyTo(characterMask); + cv::Mat shapeMask(result.shapeMask.size(), result.shapeMask.type()); + result.shapeMask.copyTo(shapeMask); + + targetImgs.push_back(new cv::Mat(target)); + targetImgs.push_back(new cv::Mat(characterMask)); + targetImgs.push_back(new cv::Mat(shapeMask)); + + height += target.rows; + if (width < target.cols) { + width = target.cols; + } + } + + cv::Mat canvas_input = cv::Mat::zeros(height, width, targetImgs[0]->type()); + cv::Mat canvas_masks = cv::Mat::zeros(height, width*2, targetImgs[1]->type()); + + int j = 0; + for (int i = 0; i < targetImgs.size(); i+=3) { + targetImgs[i]->copyTo(canvas_input(cv::Rect(cv::Point(0, j), targetImgs[i]->size()))); + targetImgs[i+1]->copyTo(canvas_masks(cv::Rect(cv::Point(0, j), targetImgs[i+1]->size()))); + targetImgs[i+2]->copyTo(canvas_masks(cv::Rect(cv::Point(targetImgs[i+1]->cols, j), targetImgs[i+2]->size()))); + + j += targetImgs[i]->rows; + + delete targetImgs[i]; + delete targetImgs[i+1]; + delete targetImgs[i+2]; + } + + cv::namedWindow("targets"); + cv::imshow("targets", canvas_input); + cv::namedWindow("character masks, shape masks"); + cv::imshow("character masks, shape masks", canvas_masks); + cv::waitKey(0); + + return 0; +} \ No newline at end of file diff --git a/tests/integration/images/.gitkeep b/tests/integration/images/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/load_torchvision_model.cpp b/tests/integration/load_torchvision_model.cpp index dfb36366..2e0e9340 100644 --- a/tests/integration/load_torchvision_model.cpp +++ b/tests/integration/load_torchvision_model.cpp @@ -3,6 +3,8 @@ #include #include +#include + /* * Test loading a model that uses torchvision operators * bin/load_torchvision_model [model_file] @@ -10,17 +12,17 @@ */ int main (int argc, char *argv[]) { if (argc < 2) { - std::cout << "did not provide model_file as argument" << std::endl; + LOG_F(ERROR, "did not provide model_file as argument"); return 1; } torch::jit::script::Module module; try { module = torch::jit::load(argv[1]); } catch (const c10::Error& e) { - std::cerr << "error loading the model: : " << e.msg() << std::endl; + LOG_S(ERROR) << "error loading the model: : " << e.msg() << "\n"; return 1; } - std::cout << "loaded model without crashing" << std::endl; + LOG_F(INFO, "loaded model without crashing"); return 0; } \ No newline at end of file diff --git a/tests/integration/playground.cpp b/tests/integration/playground.cpp index f250f2e4..713cfc85 100644 --- a/tests/integration/playground.cpp +++ b/tests/integration/playground.cpp @@ -7,26 +7,27 @@ #include #include #include +#include // using json = nlohmann::json; int main (int argc, char *argv[]) { // test opencv - std::cout << "Testing opencv installation" << std::endl; + LOG_F(INFO, "Testing opencv installation\n"); cv::Mat opencv_mat = cv::Mat::eye(300, 300, CV_32F); - std::cout << "Rows: " << opencv_mat.rows << " Cols: " << opencv_mat.cols << std::endl; - + LOG_F(INFO, "Rows: %d Cols: %d\n", opencv_mat.rows, opencv_mat.cols); + // test matplot - std::cout << "Testing matplot installation" << std::endl; + LOG_F(INFO, "Testing matplot installation\n"); matplot::color c = matplot::color::black; - std::cout << "Black: " << static_cast(c) << std::endl; + LOG_F(INFO, "Black: %d\n", static_cast(c)); // test mavsdk - std::cout << "Testing mavsdk installation" << std::endl; + LOG_F(INFO, "Testing mavsdk installation\n"); mavsdk::Mavsdk mavsdk; std::string connection_address = "tcp://127.0.0.1:5760"; mavsdk::ConnectionResult conn_result = mavsdk.add_any_connection(connection_address); - std::cout << "Expected connection to be refused" << std::endl; + LOG_F(INFO, "Expected connection to be refused\n"); return 0; } \ No newline at end of file diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index 55c538ad..308c6b42 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -17,6 +17,7 @@ target_link_libraries(${BINARY} PUBLIC gtest_main) target_add_protobuf(${BINARY}) target_add_matplot(${BINARY}) target_add_opencv(${BINARY}) +target_add_torch(${BINARY}) target_add_httplib(${BINARY}) target_add_json(${BINARY}) target_add_mavsdk(${BINARY}) diff --git a/tests/unit/camera/mock.cpp b/tests/unit/camera/mock.cpp new file mode 100644 index 00000000..e76fbbba --- /dev/null +++ b/tests/unit/camera/mock.cpp @@ -0,0 +1,20 @@ +#include + +#include "camera/interface.hpp" +#include "camera/mock.hpp" + +// test that the mock camera returns a valid image +TEST(MockCamera, TakePicture) { + CameraConfiguration config({ + {"SampleConfigKey", 100}, + {"ExposureTime", 1000}, + }); + MockCamera camera(config); + + camera.connect(); + + camera.takePicture(); + ImageData image = camera.getLastPicture(); + + EXPECT_EQ(image.getData().size(), cv::Size(4000, 3000)); +} \ No newline at end of file diff --git a/tests/unit/cv/localization.cpp b/tests/unit/cv/localization.cpp new file mode 100644 index 00000000..6959d75a --- /dev/null +++ b/tests/unit/cv/localization.cpp @@ -0,0 +1,56 @@ +#include + +#include "cv/localization.hpp" + + +void assertLocalizationAccuracy( + const GPSCoord& expectedCoord, + const GPSCoord& predictedCoord, + // TODO: maybe we could specify the threshold in feet (or have a function to convert feet to relative lat/lon) + double latitudeDegThreshold = 0.00005, + double longitudeDegThreshold = 0.00005) { + + ASSERT_NEAR(expectedCoord.latitude(), predictedCoord.latitude(), latitudeDegThreshold); + ASSERT_NEAR(expectedCoord.longitude(), predictedCoord.longitude(), longitudeDegThreshold); +} + + +TEST(CVLocalization, LocalizationAccuracy) { + struct TestCase { + std::string name; + + ImageTelemetry inputImageTelemetry; + Bbox inputTargetBbox; + + GPSCoord expectedTargetCoord; + }; + + const std::vector testCases{{ + { + "directly underneath", + ImageTelemetry(0, 0, 100, 50, 0, 0, 0), + Bbox(1951, 1483, 1951, 1483), + makeGPSCoord(0, 0, 0), + }, + // TODO: move blender localization test cases here + // { + // "another test case", + // ImageTelemetry(0, 0, 100, 50, 0, 0, 0), + // Bbox(1951, 1483, 1951, 1483), + // makeGPSCoord(0, 0, 0), + // } + }}; + + for (const auto &testCase : testCases) { + ECEFLocalization ecefLocalizer; + GSDLocalization gsdLocalization; + + // NOTE: temporarily commenting out this assertion until we debug the ECEF localization algorithm + // GPSCoord ecefTargetCoord = ecefLocalizer.localize(testCase.inputImageTelemetry, testCase.inputTargetBbox); + // assertLocalizationAccuracy(testCase.expectedTargetCoord, ecefTargetCoord); + + // NOTE: this assertion is blocked on GSD implementation getting merged in + // GPSCoord gsdTargetCoord = gsdLocalization.localize(testCase.inputImageTelemetry, testCase.inputTargetBbox); + // assertLocalizationAccuracy(testCase.expectedTargetCoord, gsdTargetCoord); + }; +} diff --git a/tests/unit/cv/utilities_test.cpp b/tests/unit/cv/utilities_test.cpp new file mode 100644 index 00000000..1be7f837 --- /dev/null +++ b/tests/unit/cv/utilities_test.cpp @@ -0,0 +1,64 @@ +#include + +#include "cv/utilities.hpp" + +#include + +TEST(CVUtilities, CropValidAndInvalidSizes) { + struct TestCase { + std::string name; + cv::Mat fullSizeImg; + Bbox bbox; + cv::Mat expectedCroppedImg; + std::string expectedExceptionMsg; + }; + + const std::vector testCases{{ + { + "basic crop to 100x100", + // Note that opencv Mat contructor takes (rows, cols, type) + cv::Mat(1080, 1920, CV_8UC3), + Bbox{0, 0, 100, 100}, + cv::Mat(100, 100, CV_8UC3), + "" + }, + { + "no crop", + cv::Mat(1080, 1920, CV_8UC3), + Bbox{0, 0, 1920, 1080}, + cv::Mat(1080, 1920, CV_8UC3), + "" + }, + { + "crop to 0x0", + cv::Mat(1080, 1920, CV_8UC3), + Bbox{0, 0, 0, 0}, + cv::Mat(0, 0, CV_8UC3), + "" + }, + { + "bbox larger than original image", + cv::Mat(50, 50, CV_8UC3), + Bbox{0, 0, 100, 150}, + cv::Mat(0, 0, CV_8UC3), + // idk if it's best to do an equality check on the exception message. + // maybe it would be enough to check if an exception is thrown or not + "OpenCV(4.5.4) ./modules/core/src/matrix.cpp:810: error: (-215:Assertion failed) 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows in function 'Mat'\n" + }, + }}; + + for (const auto &testCase : testCases) { + LOG_F(INFO, "Test name: %s\n", testCase.name.c_str()); + + cv::Mat cropped; + try { + cropped = crop(testCase.fullSizeImg, testCase.bbox); + } catch(std::exception& e) { + EXPECT_EQ(testCase.expectedExceptionMsg, e.what()); + continue; + } + + EXPECT_EQ(cropped.rows, testCase.expectedCroppedImg.rows); + EXPECT_EQ(cropped.cols, testCase.expectedCroppedImg.cols); + }; +} diff --git a/tests/unit/dubins_test.cpp b/tests/unit/pathing/dubins_test.cpp similarity index 100% rename from tests/unit/dubins_test.cpp rename to tests/unit/pathing/dubins_test.cpp diff --git a/tests/unit/tree_test.cpp b/tests/unit/pathing/tree_test.cpp similarity index 100% rename from tests/unit/tree_test.cpp rename to tests/unit/pathing/tree_test.cpp