From 9165a4b69737038bd941646776d3c1fafb576733 Mon Sep 17 00:00:00 2001 From: Jim Wang Date: Tue, 13 Feb 2024 11:37:23 -0700 Subject: [PATCH] feat: init add capi yolov8 ensemble profile implementation Signed-off-by: Jim Wang --- .gitignore | 1 + Makefile | 12 +- configs/opencv-ovms/cmd_client/main.go | 26 +- .../capi_yolov8_ensemble/configuration.yaml | 19 + .../yolov8_custom_node.json | 85 + .../cmd_client/server/customNode.go | 90 + .../cmd_client/server/customNode_test.go | 67 + .../server/test-yolov8_custom_node.json | 85 + .../opencv-ovms/envs/capi_yolov8_ensemble.env | 13 + configs/opencv-ovms/gst_capi/Makefile | 13 +- .../pipelines/capi_yolov8_ensemble/Makefile | 28 + .../capi_yolov8_ensemble/buffersqueue.cpp | 66 + .../capi_yolov8_ensemble/buffersqueue.hpp | 49 + .../custom_node_interface.h | 78 + .../custom_node_library_internal_manager.cpp | 78 + .../custom_node_library_internal_manager.hpp | 60 + .../efficientnetb0_node.cpp | 630 +++++ .../efficientnetb0_node_debug.cpp | 910 +++++++ .../pipelines/capi_yolov8_ensemble/main.cpp | 2239 +++++++++++++++++ .../capi_yolov8_ensemble/opencv_utils.hpp | 163 ++ .../pipelines/capi_yolov8_ensemble/queue.hpp | 141 ++ .../pipelines/capi_yolov8_ensemble/utils.hpp | 145 ++ .../capi_yolov8_ensemble/yolo_efficientnet.sh | 71 + .../yolo_efficientnet_dual.sh | 93 + .../models/2022/config_template.json | 13 +- download_models/Dockerfile.yolov8-download | 42 + download_models/downloadOVMSModels.sh | 19 + download_models/model_build/convert-model.py | 24 + download_models/model_build/convert-model.sh | 36 + download_models/model_build/download-tools.sh | 6 + download_models/model_build/quantize-model.py | 343 +++ 31 files changed, 5635 insertions(+), 10 deletions(-) create mode 100644 configs/opencv-ovms/cmd_client/res/capi_yolov8_ensemble/configuration.yaml create mode 100644 configs/opencv-ovms/cmd_client/res/capi_yolov8_ensemble/yolov8_custom_node.json create mode 100644 configs/opencv-ovms/cmd_client/server/customNode.go create mode 100644 configs/opencv-ovms/cmd_client/server/customNode_test.go create mode 100644 configs/opencv-ovms/cmd_client/server/test-yolov8_custom_node.json create mode 100644 configs/opencv-ovms/envs/capi_yolov8_ensemble.env create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/Makefile create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/buffersqueue.cpp create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/buffersqueue.hpp create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_interface.h create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_library_internal_manager.cpp create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_library_internal_manager.hpp create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/efficientnetb0_node.cpp create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/efficientnetb0_node_debug.cpp create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/main.cpp create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/opencv_utils.hpp create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/queue.hpp create mode 100644 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/utils.hpp create mode 100755 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/yolo_efficientnet.sh create mode 100755 configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/yolo_efficientnet_dual.sh create mode 100644 download_models/Dockerfile.yolov8-download create mode 100644 download_models/model_build/convert-model.py create mode 100755 download_models/model_build/convert-model.sh create mode 100755 download_models/model_build/download-tools.sh create mode 100644 download_models/model_build/quantize-model.py diff --git a/.gitignore b/.gitignore index 035455c4..23cd25b2 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ configs/opencv-ovms/models/2022/person-detection-retail-0013/ configs/opencv-ovms/models/2022/person_vehicle_bike_detection_2000/ configs/opencv-ovms/models/2022/person-vehicle-bike-detection-2000/ configs/opencv-ovms/models/2022/text-detect-0002/ +configs/opencv-ovms/models/2022/yolov8/ configs/opencv-ovms/grpc_go/results/ configs/opencv-ovms/cmd_client/ovms-client configs/opencv-ovms/cmd_client/profile-launcher diff --git a/Makefile b/Makefile index 2e8e81cb..bb9e2da9 100644 --- a/Makefile +++ b/Makefile @@ -2,12 +2,12 @@ # SPDX-License-Identifier: Apache-2.0 .PHONY: build-dlstreamer build-dlstreamer-realsense build-grpc-python build-grpc-go build-python-apps build-telegraf -.PHONY: build-capi_face_detection build-capi_yolov5 build-capi_yolov5_ensemble +.PHONY: build-capi_face_detection build-capi_yolov5 build-capi_yolov5_ensemble build-capi_yolov8_ensemble .PHONY: run-camera-simulator run-telegraf run-portainer run-pipelines .PHONY: clean-grpc-go clean-segmentation clean-ovms clean-all clean-results clean-telegraf clean-models clean-webcam .PHONY: clean-ovms-server-configs clean-ovms-server .PHONY: down-portainer down-pipelines -.PHONY: clean clean-simulator clean-object-detection clean-classification clean-gst clean-capi_face_detection clean-capi_yolov5 clean-capi_yolov5_ensemble +.PHONY: clean clean-simulator clean-object-detection clean-classification clean-gst clean-capi_face_detection clean-capi_yolov5 clean-capi_yolov5_ensemble clean-capi_yolov8_ensemble .PHONY: list-profiles .PHONY: unit-test-profile-launcher build-profile-launcher profile-launcher-status clean-profile-launcher webcam-rtsp .PHONY: clean-test @@ -60,7 +60,7 @@ build-profile-launcher: build-ovms-server: HTTPS_PROXY=${HTTPS_PROXY} HTTP_PROXY=${HTTP_PROXY} docker pull openvino/model_server:2023.1-gpu -clean-profile-launcher: clean-grpc-python clean-grpc-go clean-segmentation clean-object-detection clean-classification clean-gst clean-capi_face_detection clean-test clean-capi_yolov5 clean-capi_yolov5_ensemble +clean-profile-launcher: clean-grpc-python clean-grpc-go clean-segmentation clean-object-detection clean-classification clean-gst clean-capi_face_detection clean-test clean-capi_yolov5 clean-capi_yolov5_ensemble clean-capi_yolov8_ensemble @echo "containers launched by profile-launcher are cleaned up." @pkill -9 profile-launcher || true @@ -103,6 +103,9 @@ clean-capi_yolov5: clean-capi_yolov5_ensemble: ./clean-containers.sh capi_yolov5_ensemble +clean-capi_yolov8_ensemble: + ./clean-containers.sh capi_yolov8_ensemble + clean-telegraf: ./clean-containers.sh influxdb2 ./clean-containers.sh telegraf @@ -164,6 +167,9 @@ build-capi_yolov5: build-profile-launcher build-capi_yolov5_ensemble: build-profile-launcher cd configs/opencv-ovms/gst_capi && DGPU_TYPE=$(DGPU_TYPE) $(MAKE) build_capi_yolov5_ensemble +build-capi_yolov8_ensemble: build-profile-launcher + cd configs/opencv-ovms/gst_capi && DGPU_TYPE=$(DGPU_TYPE) $(MAKE) build_capi_yolov8_ensemble + clean-docs: rm -rf docs/ diff --git a/configs/opencv-ovms/cmd_client/main.go b/configs/opencv-ovms/cmd_client/main.go index 93119231..08286af4 100644 --- a/configs/opencv-ovms/cmd_client/main.go +++ b/configs/opencv-ovms/cmd_client/main.go @@ -1,5 +1,5 @@ // ---------------------------------------------------------------------------------- -// Copyright 2023 Intel Corp. +// Copyright 2024 Intel Corp. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -124,6 +124,7 @@ type DockerLauncherInfo struct { type OvmsClientInfo struct { DockerLauncher DockerLauncherInfo + OVMSCustomNodeJson string PipelineScript string PipelineInputArgs string PipelineStreamDensityRun string @@ -192,7 +193,7 @@ func main() { // as the client itself has it like C-Api case if ovmsClientConf.OvmsSingleContainer { log.Println("running in single container mode, no distributed client-server") - ovmsClientConf.generateConfigJsonForCApi() + ovmsClientConf.generateConfigJsonForCApi(flags.configDir) } else { // launcher ovms server ovmsClientConf.startOvmsServer() @@ -273,10 +274,26 @@ func (ovmsClientConf *OvmsClientConfig) setEnvContainerCountAndGrpcPort() { log.Println("GRPC_PORT=", os.Getenv(GRPC_PORT_ENV)) } -func (ovmsClientConf *OvmsClientConfig) generateConfigJsonForCApi() { +func (ovmsClientConf *OvmsClientConfig) generateConfigJsonForCApi(configDir string) { log.Println("generate and update config json file for C-API case...") - deviceUpdater := server.NewDeviceUpdater(ovmsConfigJsonDir, ovmsTemplateConfigJson) + customNodeJsonConfigFile := strings.TrimSpace(ovmsClientConf.OvmsClient.OVMSCustomNodeJson) + + deviceConfigJsonFileInput := ovmsTemplateConfigJson + if len(customNodeJsonConfigFile) > 0 { + log.Printf("use custom node json from %s", customNodeJsonConfigFile) + pipelineProfile := strings.TrimSpace(os.Getenv(pipelineProfileEnv)) + customNodeJsonConfigFilePath := filepath.Join(configDir, resourceDir, pipelineProfile, customNodeJsonConfigFile) + customNodeUpdater := server.NewCustomNodeUpdater(customNodeJsonConfigFilePath, ovmsConfigJsonDir, ovmsTemplateConfigJson) + newUpdateConfigJson := "config_ovms-server_" + ovmsClientConf.OvmsClient.DockerLauncher.ContainerName + os.Getenv(CID_COUNT_ENV) + ".json" + if err := customNodeUpdater.UpdateCustomNode(filepath.Join(ovmsConfigJsonDir, newUpdateConfigJson)); err != nil { + log.Printf("Error: failed to update custom node information and produce a new ovms server config json: %v", err) + os.Exit(1) + } + deviceConfigJsonFileInput = newUpdateConfigJson + } + + deviceUpdater := server.NewDeviceUpdater(ovmsConfigJsonDir, deviceConfigJsonFileInput) targetDevice := defaultTargetDevice if len(os.Getenv(TARGET_DEVICE_ENV)) > 0 { // only set the value from env if env is not empty; otherwise defaults to the default value in defaultTargetDevice @@ -287,7 +304,6 @@ func (ovmsClientConf *OvmsClientConfig) generateConfigJsonForCApi() { log.Println("Updating config with DEVICE environment variable:", targetDevice) newUpdateConfigJson := "config_ovms-server_" + ovmsClientConf.OvmsClient.DockerLauncher.ContainerName + os.Getenv(CID_COUNT_ENV) + ".json" - if err := deviceUpdater.UpdateDeviceAndCreateJson(targetDevice, filepath.Join(ovmsConfigJsonDir, newUpdateConfigJson)); err != nil { log.Printf("Error: failed to update device and produce a new ovms server config json: %v", err) os.Exit(1) diff --git a/configs/opencv-ovms/cmd_client/res/capi_yolov8_ensemble/configuration.yaml b/configs/opencv-ovms/cmd_client/res/capi_yolov8_ensemble/configuration.yaml new file mode 100644 index 00000000..21e01911 --- /dev/null +++ b/configs/opencv-ovms/cmd_client/res/capi_yolov8_ensemble/configuration.yaml @@ -0,0 +1,19 @@ +OvmsSingleContainer: true +OvmsClient: + DockerLauncher: + Script: docker-launcher.sh + DockerImage: openvino/model_server-capi-gst-ovms-capi_yolov8_ensemble:latest + ContainerName: capi_yolov8_ensemble + Volumes: + - "$cl_cache_dir:/home/intel/gst-ovms/.cl-cache" + - /tmp/.X11-unix:/tmp/.X11-unix + - "$RUN_PATH/sample-media/:/home/intel/gst-ovms/vids" + - "$RUN_PATH/configs/opencv-ovms/gst_capi/extensions:/home/intel/gst-ovms/extensions" + - "$RUN_PATH/results:/tmp/results" + - "$RUN_PATH/configs/opencv-ovms/models/2022/:/models" + # when the OVMSCustomNodeJson is not empty, then it will add or replace the existing customNode info for CAPI-OVMS server config + OVMSCustomNodeJson: yolov8_custom_node.json + PipelineScript: ./run_gst_capi.sh + PipelineInputArgs: "" # space delimited like we run the script in command and take those input arguments + EnvironmentVariableFiles: + - capi_yolov8_ensemble.env diff --git a/configs/opencv-ovms/cmd_client/res/capi_yolov8_ensemble/yolov8_custom_node.json b/configs/opencv-ovms/cmd_client/res/capi_yolov8_ensemble/yolov8_custom_node.json new file mode 100644 index 00000000..905b401f --- /dev/null +++ b/configs/opencv-ovms/cmd_client/res/capi_yolov8_ensemble/yolov8_custom_node.json @@ -0,0 +1,85 @@ +{ +"custom_node_library_config_list": [ + { + "name": "efficientnetb0_extractor", + "base_path": "/ovms/lib/libcustom_node_efficientnetb0-yolov8.so" + } +], +"pipeline_config_list": [ + { + "name": "detect_classify", + "inputs": ["images"], + "nodes": [ + { + "name": "detection_node", + "model_name": "yolov8", + "type": "DL model", + "inputs": [ + {"images": {"node_name": "request", + "data_item": "images"} + } + ], + "outputs": [ + {"data_item": "output0", + "alias": "boxes"} + ] + }, + { + "name": "extract_node", + "library_name": "efficientnetb0_extractor", + "type": "custom", + "demultiply_count": 0, + "params": { + "original_image_width": "416", + "original_image_height": "416", + "original_image_layout": "NCHW", + "target_image_width": "224", + "target_image_height": "224", + "target_image_layout": "NCHW", + "convert_to_gray_scale": "false", + "confidence_threshold": "0.5", + "max_output_batch": "100", + "debug": "false" + }, + "inputs": [ + {"images": {"node_name": "request", + "data_item": "images"}}, + {"boxes": {"node_name": "detection_node", + "data_item": "boxes"}} + ], + "outputs": [ + {"data_item": "roi_images", + "alias": "roi_images"}, + {"data_item": "roi_coordinates", + "alias": "roi_coordinates"}, + {"data_item": "confidence_levels", + "alias": "confidence_levels"} + ] + }, + { + "name": "classification_node", + "model_name": "efficientnetb0_FP32INT8", + "type": "DL model", + "inputs": [ + {"sub": {"node_name": "extract_node", + "data_item": "roi_images"}} + ], + "outputs": [ + {"data_item": "efficientnet-b0/model/head/dense/BiasAdd/Add", + "alias": "classify_output"} + ] + } + ], + "outputs": [ + {"roi_images": {"node_name": "extract_node", + "data_item": "roi_images"}}, + {"roi_coordinates": {"node_name": "extract_node", + "data_item": "roi_coordinates"}}, + {"confidence_levels": {"node_name": "extract_node", + "data_item": "confidence_levels"}}, + {"classify_output": {"node_name": "classification_node", + "data_item": "classify_output"}} + ] + } +] +} \ No newline at end of file diff --git a/configs/opencv-ovms/cmd_client/server/customNode.go b/configs/opencv-ovms/cmd_client/server/customNode.go new file mode 100644 index 00000000..a9a45db0 --- /dev/null +++ b/configs/opencv-ovms/cmd_client/server/customNode.go @@ -0,0 +1,90 @@ +// ---------------------------------------------------------------------------------- +// Copyright 2024 Intel Corp. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ---------------------------------------------------------------------------------- + +package server + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" +) + +type CustomNodeUpdater struct { + customNodeConfigJsonFile string + templateConfigJsonDir string + templateConfigJsonFileName string +} + +type OvmsCustomNodeConfig struct { + CustomNode []map[string]interface{} `json:"custom_node_library_config_list"` + PipelineConfig []map[string]interface{} `json:"pipeline_config_list"` +} + +func NewCustomNodeUpdater(customNodeConfigJsonFile string, templateConfigJsonDir, templateConfigJsonFileName string) *CustomNodeUpdater { + return &CustomNodeUpdater{ + customNodeConfigJsonFile: customNodeConfigJsonFile, + templateConfigJsonDir: templateConfigJsonDir, + templateConfigJsonFileName: templateConfigJsonFileName, + } +} + +// UpdateCustomNode adds (if not exists) or updates custom_node_library_config_list and related json information from customNodeConfigJsonFile into +// the template config json file and then produces a new config json file based on the input newConfigJsonFile; +// it returns error if failed to parse json or failed to produce a new config json file +func (cu *CustomNodeUpdater) UpdateCustomNode(newConfigJsonFile string) error { + templateConfigJsonFile := filepath.Join(cu.templateConfigJsonDir, cu.templateConfigJsonFileName) + contents, err := os.ReadFile(templateConfigJsonFile) + if err != nil { + return fmt.Errorf("CustomNodeUpdater failed to read OVMS template config json file %s: %v", templateConfigJsonFile, err) + } + + var ovmsConfigData OvmsConfig + err = json.Unmarshal(contents, &ovmsConfigData) + if err != nil { + return fmt.Errorf("CustomNodeUpdater parsing OVMS template config json error: %v", err) + } + + // read also the customNodeConfigJsonFile: + customNodeConfig, err := os.ReadFile(cu.customNodeConfigJsonFile) + if err != nil { + return fmt.Errorf("CustomNodeUpdater failed to read OVMS custom node json file %s: %v", cu.customNodeConfigJsonFile, err) + } + + var customNodeData OvmsCustomNodeConfig + err = json.Unmarshal(customNodeConfig, &customNodeData) + if err != nil { + return fmt.Errorf("CustomNodeUpdater parsing OVMS custom node json error: %v", err) + } + + if len(customNodeData.CustomNode) > 0 { + // replacing: + ovmsConfigData.CustomNode = customNodeData.CustomNode + ovmsConfigData.PipelineConfig = customNodeData.PipelineConfig + } + + updateConfig, err := json.MarshalIndent(ovmsConfigData, "", " ") + if err != nil { + return fmt.Errorf("CustomNodeUpdater could not marshal config to JSON: %v", err) + } + + if err := os.WriteFile(newConfigJsonFile, updateConfig, 0644); err != nil { + return fmt.Errorf("CustomNodeUpdater could not write a updated config to a new JSON: %v", err) + } + + return nil +} diff --git a/configs/opencv-ovms/cmd_client/server/customNode_test.go b/configs/opencv-ovms/cmd_client/server/customNode_test.go new file mode 100644 index 00000000..ca3b7dd6 --- /dev/null +++ b/configs/opencv-ovms/cmd_client/server/customNode_test.go @@ -0,0 +1,67 @@ +// ---------------------------------------------------------------------------------- +// Copyright 2024 Intel Corp. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ---------------------------------------------------------------------------------- + +package server + +import ( + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCustomNodes(t *testing.T) { + tests := []struct { + name string + customNodeJsonFile string + templateJsonFileDir string + templateJsonFileName string + newJsonFileName string + expectError bool + expectedCustomNodeLib string + }{ + {"add a new test custom node", "./test-yolov8_custom_node.json", ".", "test-update-device.json", "./add-test-yolov8.json", false, "/ovms/lib/libcustom_node_efficientnetb0-yolov8.so"}, + {"replacing an existing custom node", "./test-yolov8_custom_node.json", ".", "test-customnode.json", "./a-replacing-test-yolov8.json", false, "/ovms/lib/libcustom_node_efficientnetb0-yolov8.so"}, + {"non-existing custom node file", "./non-existing", ".", "test-update-device.json", "./failed-non-existing.json", true, ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + customNodeUpdater := NewCustomNodeUpdater(tt.customNodeJsonFile, tt.templateJsonFileDir, tt.templateJsonFileName) + + err := customNodeUpdater.UpdateCustomNode(tt.newJsonFileName) + defer func() { + _ = os.Remove(tt.newJsonFileName) + }() + + if !tt.expectError { + require.NoError(t, err) + require.NotEmpty(t, customNodeUpdater) + require.FileExists(t, tt.newJsonFileName) + newData, readErr := os.ReadFile(tt.newJsonFileName) + require.NoError(t, readErr) + require.Contains(t, string(newData), "\"base_path\": \""+tt.expectedCustomNodeLib+"\"") + if tt.templateJsonFileName == "test-customnode.json" { + require.Contains(t, string(newData), "\"custom_node_library_config_list\":") + require.Contains(t, string(newData), "\"pipeline_config_list\":") + } + } else { + require.Error(t, err) + } + }) + } +} diff --git a/configs/opencv-ovms/cmd_client/server/test-yolov8_custom_node.json b/configs/opencv-ovms/cmd_client/server/test-yolov8_custom_node.json new file mode 100644 index 00000000..9d24d12f --- /dev/null +++ b/configs/opencv-ovms/cmd_client/server/test-yolov8_custom_node.json @@ -0,0 +1,85 @@ +{ +"custom_node_library_config_list": [ + { + "name": "efficientnetb0_extractor", + "base_path": "/ovms/lib/libcustom_node_efficientnetb0-yolov8.so" + } +], +"pipeline_config_list": [ + { + "name": "detect_classify", + "inputs": ["images"], + "nodes": [ + { + "name": "detection_node", + "model_name": "yolov8", + "type": "DL model", + "inputs": [ + {"images": {"node_name": "request", + "data_item": "images"} + } + ], + "outputs": [ + {"data_item": "output0", + "alias": "boxes"} + ] + }, + { + "name": "extract_node", + "library_name": "efficientnetb0_extractor", + "type": "custom", + "demultiply_count": 0, + "params": { + "original_image_width": "416", + "original_image_height": "416", + "original_image_layout": "NCHW", + "target_image_width": "224", + "target_image_height": "224", + "target_image_layout": "NCHW", + "convert_to_gray_scale": "false", + "confidence_threshold": "0.5", + "max_output_batch": "100", + "debug": "false" + }, + "inputs": [ + {"images": {"node_name": "request", + "data_item": "images"}}, + {"boxes": {"node_name": "detection_node", + "data_item": "boxes"}} + ], + "outputs": [ + {"data_item": "roi_images", + "alias": "roi_images"}, + {"data_item": "roi_coordinates", + "alias": "roi_coordinates"}, + {"data_item": "confidence_levels", + "alias": "confidence_levels"} + ] + }, + { + "name": "classification_node", + "model_name": "efficientnetb0", + "type": "DL model", + "inputs": [ + {"sub": {"node_name": "extract_node", + "data_item": "roi_images"}} + ], + "outputs": [ + {"data_item": "efficientnet-b0/model/head/dense/BiasAdd/Add", + "alias": "classify_output"} + ] + } + ], + "outputs": [ + {"roi_images": {"node_name": "extract_node", + "data_item": "roi_images"}}, + {"roi_coordinates": {"node_name": "extract_node", + "data_item": "roi_coordinates"}}, + {"confidence_levels": {"node_name": "extract_node", + "data_item": "confidence_levels"}}, + {"classify_output": {"node_name": "classification_node", + "data_item": "classify_output"}} + ] + } +] +} \ No newline at end of file diff --git a/configs/opencv-ovms/envs/capi_yolov8_ensemble.env b/configs/opencv-ovms/envs/capi_yolov8_ensemble.env new file mode 100644 index 00000000..8dc75512 --- /dev/null +++ b/configs/opencv-ovms/envs/capi_yolov8_ensemble.env @@ -0,0 +1,13 @@ +RENDER_PORTRAIT_MODE=0 +GST_DEBUG=1 +USE_ONEVPL=1 +PIPELINE_EXEC_PATH=pipelines/capi_yolov8_ensemble/capi_yolov8_ensemble +GST_VAAPI_DRM_DEVICE=/dev/dri/renderD128 +TARGET_GPU_DEVICE=--privileged +INPUT_TYPE=RTSP_H264 +LOG_LEVEL=0 +RENDER_MODE=1 +cl_cache_dir=/home/intel/gst-ovms/.cl-cache +WINDOW_WIDTH=1280 +WINDOW_HEIGHT=720 +DETECTION_THRESHOLD=0.5 diff --git a/configs/opencv-ovms/gst_capi/Makefile b/configs/opencv-ovms/gst_capi/Makefile index 2f39261a..4a11dc0a 100644 --- a/configs/opencv-ovms/gst_capi/Makefile +++ b/configs/opencv-ovms/gst_capi/Makefile @@ -40,4 +40,15 @@ build_capi_yolov5_ensemble: --build-arg DGPU_TYPE=$(DGPU_TYPE) \ --build-arg PIPELINE_NAME=capi_yolov5_ensemble \ --progress=plain \ - -t $(OVMS_CPP_DOCKER_IMAGE)-capi-gst-ovms-capi_yolov5_ensemble:$(OVMS_CPP_IMAGE_TAG) \ No newline at end of file + -t $(OVMS_CPP_DOCKER_IMAGE)-capi-gst-ovms-capi_yolov5_ensemble:$(OVMS_CPP_IMAGE_TAG) + +build_capi_yolov8_ensemble: + docker build $(NO_CACHE_OPTION) -f Dockerfile.ovms-capi-gst ../ \ + --build-arg http_proxy=$(HTTP_PROXY) \ + --build-arg https_proxy="$(HTTPS_PROXY)" \ + --build-arg no_proxy=$(NO_PROXY) \ + --build-arg BASE_IMAGE=ubuntu:22.04 \ + --build-arg DGPU_TYPE=$(DGPU_TYPE) \ + --build-arg PIPELINE_NAME=capi_yolov8_ensemble \ + --progress=plain \ + -t $(OVMS_CPP_DOCKER_IMAGE)-capi-gst-ovms-capi_yolov8_ensemble:$(OVMS_CPP_IMAGE_TAG) diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/Makefile b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/Makefile new file mode 100644 index 00000000..c74da114 --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/Makefile @@ -0,0 +1,28 @@ +# +# Copyright (c) 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +CV_LIBS = -L/usr/lib/x86_64-linux-gnu/ -L/usr/local/lib/x86_64-linux-gnu/ -L/ovms/lib/ +CV_INCLUDES = -I/usr/include/opencv4 -I/usr/include/gstreamer-1.0 +CUSTOM_NODE_FLAGS = -fpic -O2 -U_FORTIFY_SOURCE -fstack-protector -fno-omit-frame-pointer -D_FORTIFY_SOURCE=1 -fno-strict-overflow -Wall -Wno-unknown-pragmas -Wno-error=sign-compare -fno-delete-null-pointer-checks -fwrapv -fstack-clash-protection -Wformat -Wformat-security + +build: + g++ -c -std=c++17 efficientnetb0_node.cpp ${CUSTOM_NODE_FLAGS} ${CV_INCLUDES} + + g++ -std=c++17 -shared ${CUSTOM_NODE_FLAGS} -o libcustom_node_efficientnetb0-yolov8.so efficientnetb0_node.o ${CV_LIBS} ${CV_INCLUDES} -lopencv_core -lopencv_dnn -lopencv_imgproc -lopencv_imgcodecs + cp libcustom_node_efficientnetb0-yolov8.so /ovms/lib/ + + g++ -std=c++17 main.cpp -I/usr/include/gstreamer-1.0/usr/lib/x86_64-linux-gnu/ -I/usr/local/include/gstreamer-1.0 -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -I/ovms/include $(CV_INCLUDES) $(CV_LIBS) -L/usr/lib/x86_64-linux-gnu/gstreamer-1.0 -lgstbase-1.0 -lgobject-2.0 -lglib-2.0 -lgstreamer-1.0 -lgstapp-1.0 -lopencv_imgcodecs -lopencv_imgproc -lopencv_core -lovms_shared -lopencv_highgui -lopencv_dnn -lpthread -fPIC --std=c++17 -o capi_yolov8_ensemble + diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/buffersqueue.cpp b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/buffersqueue.cpp new file mode 100644 index 00000000..d338eac2 --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/buffersqueue.cpp @@ -0,0 +1,66 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include "buffersqueue.hpp" + +#include + +namespace ovms { +namespace custom_nodes_common { +BuffersQueue::BuffersQueue(size_t singleBufferSize, int streamsLength) : + Queue(streamsLength), + singleBufferSize(singleBufferSize), + size(singleBufferSize * streamsLength), + memoryPool(std::make_unique(size)) { + for (int i = 0; i < streamsLength; ++i) { + inferRequests.push_back(memoryPool.get() + i * singleBufferSize); + } +} + +BuffersQueue::~BuffersQueue() {} + +void* BuffersQueue::getBuffer() { + // can be easily switched to async version if need arise + auto idleId = tryToGetIdleStream(); + if (idleId.has_value()) { + return getInferRequest(idleId.value()); + } + return nullptr; +} + +bool BuffersQueue::returnBuffer(void* buffer) { + if ((static_cast(buffer) < memoryPool.get()) || + ((memoryPool.get() + size - 1) < buffer) || + ((static_cast(buffer) - memoryPool.get()) % singleBufferSize != 0)) { + return false; + } + returnStream(getBufferId(buffer)); + return true; +} + +int BuffersQueue::getBufferId(void* buffer) { + return (static_cast(buffer) - memoryPool.get()) / singleBufferSize; +} + +const size_t BuffersQueue::getSize() { + return this->size; +} + +const size_t BuffersQueue::getSingleBufferSize() { + return this->singleBufferSize; +} +} // namespace custom_nodes_common +} // namespace ovms diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/buffersqueue.hpp b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/buffersqueue.hpp new file mode 100644 index 00000000..8e31b316 --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/buffersqueue.hpp @@ -0,0 +1,49 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../queue.hpp" + +namespace ovms { +namespace custom_nodes_common { + +class BuffersQueue : protected Queue { + size_t singleBufferSize; + size_t size; + std::unique_ptr memoryPool; + +public: + BuffersQueue(size_t singleBufferSize, int streamsLength); + ~BuffersQueue(); + void* getBuffer(); + bool returnBuffer(void* buffer); + const size_t getSize(); + const size_t getSingleBufferSize(); + +private: + int getBufferId(void* buffer); +}; +} // namespace custom_nodes_common +} // namespace ovms diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_interface.h b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_interface.h new file mode 100644 index 00000000..7d6e6d50 --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_interface.h @@ -0,0 +1,78 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +typedef enum { + UNSPECIFIED, + FP32, + FP16, + U8, + I8, + I16, + U16, + I32, + FP64, + I64 +} CustomNodeTensorPrecision; + +struct CustomNodeTensor { + const char* name; + uint8_t* data; + uint64_t dataBytes; + uint64_t* dims; + uint64_t dimsCount; + CustomNodeTensorPrecision precision; +}; + +struct CustomNodeTensorInfo { + const char* name; + uint64_t* dims; + uint64_t dimsCount; + CustomNodeTensorPrecision precision; +}; + +struct CustomNodeParam { + const char *key, *value; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Custom node library initialize enables creation of resources to be reused between predictions. + * Potential use cases include optimized temporary buffers allocation. + * Using initialize is optional and not required for custom node to work. + * CustomNodeLibraryInternalManager should be created here if initialize is used. + * On initialize failure status not equal to zero is returned and error log is printed. + */ +int initialize(void** customNodeLibraryInternalManager, const struct CustomNodeParam* params, int paramsCount); +/** + * @brief Custom node library deinitialize enables destruction of resources that were used between predictions. + * Using deinitialize is optional and not required for custom node to work. + * CustomNodeLibraryInternalManager should be destroyed here if deinitialize is used. + * On deinitialize failure only error log is printed. + */ +int deinitialize(void* customNodeLibraryInternalManager); +int execute(const struct CustomNodeTensor* inputs, int inputsCount, struct CustomNodeTensor** outputs, int* outputsCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager); +int getInputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager); +int getOutputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager); +int release(void* ptr, void* customNodeLibraryInternalManager); + +#ifdef __cplusplus +} +#endif diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_library_internal_manager.cpp b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_library_internal_manager.cpp new file mode 100644 index 00000000..58a90581 --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_library_internal_manager.cpp @@ -0,0 +1,78 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include "custom_node_library_internal_manager.hpp" + +#include +#include +#include +#include + +namespace ovms { +namespace custom_nodes_common { +CustomNodeLibraryInternalManager::CustomNodeLibraryInternalManager() { +} + +CustomNodeLibraryInternalManager::~CustomNodeLibraryInternalManager() { +} + +bool CustomNodeLibraryInternalManager::createBuffersQueue(const std::string& name, size_t singleBufferSize, int streamsLength) { + auto it = outputBuffers.find(name); + if (it != outputBuffers.end()) { + return false; + } + outputBuffers.emplace(name, std::make_unique(singleBufferSize, streamsLength)); + return true; +} + +bool CustomNodeLibraryInternalManager::recreateBuffersQueue(const std::string& name, size_t singleBufferSize, int streamsLength) { + auto it = outputBuffers.find(name); + if (it != outputBuffers.end()) { + if (!(it->second->getSize() == singleBufferSize && + it->second->getSingleBufferSize() == streamsLength * singleBufferSize)) { + it->second.reset(new BuffersQueue(singleBufferSize, streamsLength)); + } + return true; + } + return false; +} + +BuffersQueue* CustomNodeLibraryInternalManager::getBuffersQueue(const std::string& name) { + auto it = outputBuffers.find(name); + if (it == outputBuffers.end()) + return nullptr; + return it->second.get(); +} + +bool CustomNodeLibraryInternalManager::releaseBuffer(void* ptr) { + for (auto it = outputBuffers.begin(); it != outputBuffers.end(); ++it) { + if (it->second->returnBuffer(ptr)) { + return true; + } + } + return false; +} + +std::shared_timed_mutex& CustomNodeLibraryInternalManager::getInternalManagerLock() { + return this->internalManagerLock; +} +} // namespace custom_nodes_common +} // namespace ovms + +void cleanup(CustomNodeTensor& tensor, ovms::custom_nodes_common::CustomNodeLibraryInternalManager* internalManager) { + release(tensor.data, internalManager); + release(tensor.dims, internalManager); +} diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_library_internal_manager.hpp b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_library_internal_manager.hpp new file mode 100644 index 00000000..197cffc4 --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/custom_node_library_internal_manager.hpp @@ -0,0 +1,60 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include + +#include "custom_node_interface.h" +#include "buffersqueue.hpp" + +namespace ovms { +namespace custom_nodes_common { + +class CustomNodeLibraryInternalManager { + std::unordered_map> outputBuffers; + std::shared_timed_mutex internalManagerLock; + +public: + CustomNodeLibraryInternalManager(); + ~CustomNodeLibraryInternalManager(); + bool createBuffersQueue(const std::string& name, size_t singleBufferSize, int streamsLength); + bool recreateBuffersQueue(const std::string& name, size_t singleBufferSize, int streamsLength); + BuffersQueue* getBuffersQueue(const std::string& name); + bool releaseBuffer(void* ptr); + std::shared_timed_mutex& getInternalManagerLock(); +}; +} // namespace custom_nodes_common +} // namespace ovms + +template +bool get_buffer(ovms::custom_nodes_common::CustomNodeLibraryInternalManager* internalManager, T** buffer, const char* buffersQueueName, uint64_t byte_size) { + auto buffersQueue = internalManager->getBuffersQueue(buffersQueueName); + if (!(buffersQueue == nullptr)) { + *buffer = static_cast(buffersQueue->getBuffer()); + } + if (*buffer == nullptr || buffersQueue == nullptr) { + *buffer = (T*)malloc(byte_size); + if (*buffer == nullptr) { + return false; + } + } + return true; +} + +void cleanup(CustomNodeTensor& tensor, ovms::custom_nodes_common::CustomNodeLibraryInternalManager* internalManager); diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/efficientnetb0_node.cpp b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/efficientnetb0_node.cpp new file mode 100644 index 00000000..f13aaf86 --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/efficientnetb0_node.cpp @@ -0,0 +1,630 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include + +#include "custom_node_interface.h" +#include "opencv_utils.hpp" +#include "utils.hpp" +#include "opencv2/opencv.hpp" + +bool debugMode = false; + +static constexpr const char* IMAGE_TENSOR_NAME = "images"; +static constexpr const char* GEOMETRY_TENSOR_NAME = "boxes"; +static constexpr const char* TEXT_IMAGES_TENSOR_NAME = "roi_images"; +static constexpr const char* COORDINATES_TENSOR_NAME = "roi_coordinates"; +static constexpr const char* CONFIDENCE_TENSOR_NAME = "confidence_levels"; + +typedef struct DetectedResult { + int frameId; + int x; + int y; + int width; + int height; + float confidence; + int classId; + char classText[1024]; +} DetectedResult; + +float boxiou_threshold = 0.3; +float iou_threshold = 0.3; + +static bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector& boxes, const cv::Mat& originalImage, int targetImageHeight, int targetImageWidth, const std::string& targetImageLayout, bool convertToGrayScale) { + const uint64_t outputBatch = boxes.size(); + int channels = convertToGrayScale ? 1 : 3; + + uint64_t byteSize = sizeof(float) * targetImageHeight * targetImageWidth * channels * outputBatch; + float* buffer = (float*)malloc(byteSize); + + NODE_ASSERT(buffer != nullptr, "malloc has failed"); + + for (uint64_t i = 0; i < outputBatch; i++) { + cv::Size targetShape(targetImageWidth, targetImageHeight); + cv::Mat image; + + cv::Rect box = boxes[i]; + if (box.x < 0) + box.x = 0; + if (box.y < 0) + box.y = 0; + + // std::string imgname = "/tmp/results/classifyimage" + std::to_string(i) + ".jpg"; + // cv::imwrite(imgname.c_str(), originalImage); + + cv::Mat cropped = originalImage(box); + cv::resize(cropped, image, targetShape); + + // std::string imgname = "/tmp/results/classifyimage" + std::to_string(i) + ".jpg"; + // cv::Mat tmp2; + // image.convertTo(tmp2, CV_8UC3); + // cv::imwrite(imgname.c_str(), tmp2); + + if (convertToGrayScale) { + image = apply_grayscale(image); + } + + if (targetImageLayout == "NCHW") { + auto imgBuffer = reorder_to_nchw((float*)image.data, image.rows, image.cols, image.channels()); + std::memcpy(buffer + (i * channels * targetImageWidth * targetImageHeight), imgBuffer.data(), byteSize / outputBatch); + } else { + std::memcpy(buffer + (i * channels * targetImageWidth * targetImageHeight), image.data, byteSize / outputBatch); + } + } + + output->data = reinterpret_cast(buffer); + output->dataBytes = byteSize; + output->dimsCount = 5; + output->dims = (uint64_t*)malloc(output->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(output->dims != nullptr, "malloc has failed"); + output->dims[0] = outputBatch; + output->dims[1] = 1; + if (targetImageLayout == "NCHW") { + output->dims[2] = channels; + output->dims[3] = targetImageHeight; + output->dims[4] = targetImageWidth; + } else { + output->dims[2] = targetImageHeight; + output->dims[3] = targetImageWidth; + output->dims[4] = channels; + } + output->precision = FP32; + return true; +} + +static bool copy_coordinates_into_output(struct CustomNodeTensor* output, const std::vector& boxes) { + const uint64_t outputBatch = boxes.size(); + + //printf("---------->NumberOfDets by coords %li\n", outputBatch); + + uint64_t byteSize = sizeof(int32_t) * 4 * outputBatch; + + int32_t* buffer = (int32_t*)malloc(byteSize); + NODE_ASSERT(buffer != nullptr, "malloc has failed"); + + for (uint64_t i = 0; i < outputBatch; i++) { + int32_t entry[] = { + boxes[i].x, + boxes[i].y, + boxes[i].width, + boxes[i].height}; + + std::memcpy(buffer + (i * 4), entry, byteSize / outputBatch); + } + output->data = reinterpret_cast(buffer); + output->dataBytes = byteSize; + output->dimsCount = 3; + output->dims = (uint64_t*)malloc(output->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(output->dims != nullptr, "malloc has failed"); + output->dims[0] = outputBatch; + output->dims[1] = 1; + output->dims[2] = 4; + output->precision = I32; + + return true; +} + +static bool copy_scores_into_output(struct CustomNodeTensor* output, const std::vector& scores) { + const uint64_t outputBatch = scores.size(); + //printf("Number of scores------------------>%li\n", outputBatch); + uint64_t byteSize = sizeof(float) * outputBatch; + + float* buffer = (float*)malloc(byteSize); + NODE_ASSERT(buffer != nullptr, "malloc has failed"); + std::memcpy(buffer, scores.data(), byteSize); + + output->data = reinterpret_cast(buffer); + output->dataBytes = byteSize; + output->dimsCount = 3; + output->dims = (uint64_t*)malloc(output->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(output->dims != nullptr, "malloc has failed"); + output->dims[0] = outputBatch; + output->dims[1] = 1; + output->dims[2] = 1; + output->precision = FP32; + return true; +} + +int initialize(void** customNodeLibraryInternalManager, const struct CustomNodeParam* params, int paramsCount) { + return 0; +} + +int deinitialize(void* customNodeLibraryInternalManager) { + return 0; +} + +// YoloV8 PostProcessing + +const std::string labels[80] = { + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" +}; + +std::vector nms(const std::vector& res, const float thresh, bool includeBoundaries=false, size_t keep_top_k=0) { + if (keep_top_k == 0) { + keep_top_k = 10; //res.size(); + } + std::vector areas(res.size()); + for (size_t i = 0; i < res.size(); ++i) { + areas[i] = (float) (res[i].width - res[i].x + includeBoundaries) * (res[i].height - res[i].y + includeBoundaries); + } + std::vector order(res.size()); + std::iota(order.begin(), order.end(), 0); + std::sort(order.begin(), order.end(), [&res](int o1, int o2) { return res[o1].confidence > res[o2].confidence; }); + + size_t ordersNum = 0; + for (; ordersNum < order.size() && res[order[ordersNum]].confidence >= 0 && ordersNum < keep_top_k; ordersNum++); + + std::vector keep; + bool shouldContinue = true; + for (size_t i = 0; shouldContinue && i < ordersNum; ++i) { + int idx1 = order[i]; + if (idx1 >= 0) { + keep.push_back(idx1); + shouldContinue = false; + for (size_t j = i + 1; j < ordersNum; ++j) { + int idx2 = order[j]; + if (idx2 >= 0) { + shouldContinue = true; + float overlappingWidth = std::fminf(res[idx1].width, res[idx2].width) - std::fmaxf(res[idx1].x, res[idx2].x); + float overlappingHeight = std::fminf(res[idx1].height, res[idx2].height) - std::fmaxf(res[idx1].y, res[idx2].y); + float intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0; + float union_area = areas[idx1] + areas[idx2] - intersection; + if (0.0f == union_area || intersection / union_area > thresh) { + order[j] = -1; + } + } + } + } + } + return keep; +} + +std::vector multiclass_nms(const std::vector& res, const float iou_threshold, bool includeBoundaries, size_t maxNum) { + std::vector boxes_copy; + boxes_copy.reserve(res.size()); + + float max_coord = 0.f; + for (const auto& box : res) { + max_coord = std::max(max_coord, std::max((float)box.width, (float)box.height)); + } + for (auto& box : res) { + float offset = box.classId * max_coord; + DetectedResult tmp; + tmp.x = box.x + offset; + tmp.y = box.y + offset; + tmp.width = box.width + offset; + tmp.height = box.height + offset; + tmp.classId = box.classId; + tmp.confidence = box.confidence; + boxes_copy.emplace_back(tmp); + } + + return nms(boxes_copy, iou_threshold, includeBoundaries, maxNum); +} + + +void postprocess(const float confidence_threshold, const int imageWidth, const int imageHeight, + const uint64_t* output_shape, const void* voutputData, const uint32_t dimCount, + std::vector &rects, std::vector &scores) +{ + if (!voutputData || !output_shape) { + // nothing to do + return; + } + + /* + https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/230-yolov8-optimization/230-yolov8-object-detection.ipynb + detection box has the [x, y, h, w, class_no_1, ..., class_no_80] format, where: + (x, y) - raw coordinates of box center + h, w - raw height and width of the box + class_no_1, ..., class_no_80 - probability distribution over the classes. + */ + + std::vector detectedResults; + size_t num_proposals = output_shape[2]; + std::vector boxes_with_class; + std::vector confidences; + const float* const detections = (float*)(voutputData); + for (size_t i = 0; i < num_proposals; ++i) { + float confidence = 0.0f; + size_t max_id = 0; + constexpr size_t LABELS_START = 4; + // get max confidence level in the 80 classes for this detection + for (size_t j = LABELS_START; j < output_shape[1]; ++j) { + if (detections[j * num_proposals + i] > confidence) { + confidence = detections[j * num_proposals + i]; + max_id = j; + } + } + // add the detection if the max confi. meets the threshold + if (confidence > confidence_threshold) { + DetectedResult obj; + obj.x = detections[0 * num_proposals + i] - detections[2 * num_proposals + i] / 2.0f; + obj.y = detections[1 * num_proposals + i] - detections[3 * num_proposals + i] / 2.0f; + obj.width = detections[0 * num_proposals + i] + detections[2 * num_proposals + i] / 2.0f; + obj.height = detections[1 * num_proposals + i] + detections[3 * num_proposals + i] / 2.0f; + obj.classId = max_id - LABELS_START; + obj.confidence = confidence; + + boxes_with_class.emplace_back(obj); + confidences.push_back(confidence); + } + } + + constexpr bool includeBoundaries = false; + constexpr size_t keep_top_k = 30000; + std::vector keep; + bool agnostic_nms = true; + + if (agnostic_nms) { + keep = nms(boxes_with_class, boxiou_threshold, includeBoundaries, keep_top_k); + } else { + keep = multiclass_nms(boxes_with_class, boxiou_threshold, includeBoundaries, keep_top_k); + } + + int padLeft = 15, padTop = 0; // adjust padding for optimal efficientnet inference + float floatInputImgWidth = float(imageWidth), + floatInputImgHeight = float(imageHeight), + netInputWidth = floatInputImgWidth, + netInputHeight = floatInputImgHeight, + invertedScaleX = floatInputImgWidth / netInputWidth, + invertedScaleY = floatInputImgHeight / netInputHeight; + + for (size_t idx : keep) { + int x1 = std::clamp( + round((boxes_with_class[idx].x - padLeft) * invertedScaleX), + 0.f, + floatInputImgWidth); + int y1 = std::clamp( + round((boxes_with_class[idx].y - padTop) * invertedScaleY), + 0.f, + floatInputImgHeight); + int x2 = std::clamp( + round((boxes_with_class[idx].width + padLeft) * invertedScaleX) - x1, + 0.f, + floatInputImgWidth - x1); + int y2 = std::clamp( + round((boxes_with_class[idx].height + padTop) * invertedScaleY) - y1, + 0.f, + floatInputImgHeight-y1); + + rects.emplace_back(x1, y1, x2, y2); + scores.emplace_back(confidences[idx]); + } +} +// End of Yolov8 PostProcessing + +int execute(const struct CustomNodeTensor* inputs, int inputsCount, struct CustomNodeTensor** outputs, int* outputsCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + + // Parameters reading + int originalImageHeight = get_int_parameter("original_image_height", params, paramsCount, -1); + int originalImageWidth = get_int_parameter("original_image_width", params, paramsCount, -1); + NODE_ASSERT(originalImageHeight > 0, "original image height must be larger than 0"); + NODE_ASSERT(originalImageWidth > 0, "original image width must be larger than 0"); + int targetImageHeight = get_int_parameter("target_image_height", params, paramsCount, -1); + int targetImageWidth = get_int_parameter("target_image_width", params, paramsCount, -1); + NODE_ASSERT(targetImageHeight > 0, "target image height must be larger than 0"); + NODE_ASSERT(targetImageWidth > 0, "target image width must be larger than 0"); + std::string originalImageLayout = get_string_parameter("original_image_layout", params, paramsCount, "NCHW"); + NODE_ASSERT(originalImageLayout == "NCHW" || originalImageLayout == "NHWC", "original image layout must be NCHW or NHWC"); + std::string targetImageLayout = get_string_parameter("target_image_layout", params, paramsCount, "NCHW"); + NODE_ASSERT(targetImageLayout == "NCHW" || targetImageLayout == "NHWC", "target image layout must be NCHW or NHWC"); + bool convertToGrayScale = get_string_parameter("convert_to_gray_scale", params, paramsCount) == "true"; + float confidenceThreshold = get_float_parameter("confidence_threshold", params, paramsCount, -1.0); + NODE_ASSERT(confidenceThreshold >= 0 && confidenceThreshold <= 1.0, "confidence threshold must be in 0-1 range"); + uint64_t maxOutputBatch = get_int_parameter("max_output_batch", params, paramsCount, 100); + NODE_ASSERT(maxOutputBatch > 0, "max output batch must be larger than 0"); + debugMode = get_string_parameter("debug", params, paramsCount) == "true"; + + const CustomNodeTensor* imageTensor = nullptr; + const CustomNodeTensor* boxesTensor = nullptr; + + for (int i = 0; i < inputsCount; i++) { + if (std::strcmp(inputs[i].name, IMAGE_TENSOR_NAME) == 0) { + imageTensor = &(inputs[i]); + } else if (std::strcmp(inputs[i].name, GEOMETRY_TENSOR_NAME) == 0) { + boxesTensor = &(inputs[i]); + } else { + std::cout << "Unrecognized input: " << inputs[i].name << std::endl; + return 1; + } + } + + NODE_ASSERT(imageTensor != nullptr, "Missing input image"); + NODE_ASSERT(boxesTensor != nullptr, "Missing input boxes"); + NODE_ASSERT(imageTensor->precision == FP32, "image input is not FP32"); + NODE_ASSERT(boxesTensor->precision == FP32, "boxes input is not FP32"); + + NODE_ASSERT(imageTensor->dimsCount == 4, "input image shape must have 4 dimensions"); + NODE_ASSERT(imageTensor->dims[0] == 1, "input image batch must be 1"); + uint64_t _imageHeight = imageTensor->dims[originalImageLayout == "NCHW" ? 2 : 1]; + uint64_t _imageWidth = imageTensor->dims[originalImageLayout == "NCHW" ? 3 : 2]; + NODE_ASSERT(_imageHeight <= static_cast(std::numeric_limits::max()), "image height is too large"); + NODE_ASSERT(_imageWidth <= static_cast(std::numeric_limits::max()), "image width is too large"); + int imageHeight = static_cast(_imageHeight); + int imageWidth = static_cast(_imageWidth); + + if (debugMode) { + std::cout << "Processing input tensor image resolution: " << cv::Size(imageHeight, imageWidth) << "; expected resolution: " << cv::Size(originalImageHeight, originalImageWidth) << std::endl; + } + + NODE_ASSERT(imageHeight == originalImageHeight, "original image size parameter differs from original image tensor size"); + NODE_ASSERT(imageWidth == originalImageWidth, "original image size parameter differs from original image tensor size"); + + cv::Mat image; + if (originalImageLayout == "NHWC") { + image = nhwc_to_mat(imageTensor); + } else { + image = nchw_to_mat(imageTensor); + } + + NODE_ASSERT(image.cols == imageWidth, "Mat generation failed"); + NODE_ASSERT(image.rows == imageHeight, "Mat generation failed"); + + + std::vector rects; + std::vector scores; + postprocess(confidenceThreshold, originalImageWidth, originalImageHeight, boxesTensor->dims, boxesTensor->data, boxesTensor->dimsCount, rects, scores); + + NODE_ASSERT(rects.size() == scores.size(), "rects and scores are not equal length"); + if (rects.size() > maxOutputBatch) { + rects.resize(maxOutputBatch); + scores.resize(maxOutputBatch); + } + + if (debugMode) + std::cout << "Total findings: " << rects.size() << std::endl; + + *outputsCount = 3; // pipeline outputs for efficientnetb0_extractor e.g. roi_images, roi_coordinates, confidence_levels + *outputs = (struct CustomNodeTensor*)malloc(*outputsCount * sizeof(CustomNodeTensor)); + + NODE_ASSERT((*outputs) != nullptr, "malloc has failed"); + CustomNodeTensor& textImagesTensor = (*outputs)[0]; + textImagesTensor.name = TEXT_IMAGES_TENSOR_NAME; + + if (!copy_images_into_output(&textImagesTensor, rects, image, targetImageHeight, targetImageWidth, targetImageLayout, convertToGrayScale)) { + free(*outputs); + return 1; + } + + CustomNodeTensor& coordinatesTensor = (*outputs)[1]; + coordinatesTensor.name = COORDINATES_TENSOR_NAME; + if (!copy_coordinates_into_output(&coordinatesTensor, rects)) { + free(*outputs); + cleanup(textImagesTensor); + return 1; + } + + + CustomNodeTensor& confidenceTensor = (*outputs)[2]; + confidenceTensor.name = CONFIDENCE_TENSOR_NAME; + if (!copy_scores_into_output(&confidenceTensor, scores)) { + free(*outputs); + cleanup(textImagesTensor); + cleanup(coordinatesTensor); + return 1; + } + + return 0; +} + +int getInputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + int originalImageHeight = get_int_parameter("original_image_height", params, paramsCount, -1); + int originalImageWidth = get_int_parameter("original_image_width", params, paramsCount, -1); + NODE_ASSERT(originalImageHeight > 0, "original image height must be larger than 0"); + NODE_ASSERT(originalImageWidth > 0, "original image width must be larger than 0"); + std::string originalImageLayout = get_string_parameter("original_image_layout", params, paramsCount, "NCHW"); + NODE_ASSERT(originalImageLayout == "NCHW" || originalImageLayout == "NHWC", "original image layout must be NCHW or NHWC"); + + *infoCount = 2; + *info = (struct CustomNodeTensorInfo*)malloc(*infoCount * sizeof(struct CustomNodeTensorInfo)); + NODE_ASSERT((*info) != nullptr, "malloc has failed"); + + (*info)[0].name = IMAGE_TENSOR_NAME; + (*info)[0].dimsCount = 4; + (*info)[0].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[0].dims) != nullptr, "malloc has failed"); + (*info)[0].dims[0] = 1; + if (originalImageLayout == "NCHW") { + (*info)[0].dims[1] = 3; + (*info)[0].dims[2] = originalImageHeight; + (*info)[0].dims[3] = originalImageWidth; + } else { + (*info)[0].dims[1] = originalImageHeight; + (*info)[0].dims[2] = originalImageWidth; + (*info)[0].dims[3] = 3; + } + (*info)[0].precision = FP32; + + (*info)[1].name = GEOMETRY_TENSOR_NAME; + (*info)[1].dimsCount = 3; + (*info)[1].dims = (uint64_t*)malloc((*info)[1].dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[1].dims) != nullptr, "malloc has failed"); + // 416x416 + (*info)[1].dims[0] = 1; + (*info)[1].dims[1] = 84; + (*info)[1].dims[2] = 3549; + + // 512x512 + // (*info)[1].dims[0] = 1; + // (*info)[1].dims[1] = 84; + // (*info)[1].dims[2] = 5376; + + //640x640 + // (*info)[1].dims[0] = 1; + // (*info)[1].dims[1] = 84; + // (*info)[1].dims[2] = 8400; + + (*info)[1].precision = FP32; + return 0; +} + +int getOutputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + int targetImageHeight = get_int_parameter("target_image_height", params, paramsCount, -1); + int targetImageWidth = get_int_parameter("target_image_width", params, paramsCount, -1); + NODE_ASSERT(targetImageHeight > 0, "target image height must be larger than 0"); + NODE_ASSERT(targetImageWidth > 0, "target image width must be larger than 0"); + std::string targetImageLayout = get_string_parameter("target_image_layout", params, paramsCount, "NCHW"); + NODE_ASSERT(targetImageLayout == "NCHW" || targetImageLayout == "NHWC", "target image layout must be NCHW or NHWC"); + bool convertToGrayScale = get_string_parameter("convert_to_gray_scale", params, paramsCount) == "true"; + + *infoCount = 3; + *info = (struct CustomNodeTensorInfo*)malloc(*infoCount * sizeof(struct CustomNodeTensorInfo)); + NODE_ASSERT((*info) != nullptr, "malloc has failed"); + + (*info)[0].name = TEXT_IMAGES_TENSOR_NAME; + (*info)[0].dimsCount = 5; + (*info)[0].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[0].dims) != nullptr, "malloc has failed"); + (*info)[0].dims[0] = 0; + (*info)[0].dims[1] = 1; + if (targetImageLayout == "NCHW") { + (*info)[0].dims[2] = convertToGrayScale ? 1 : 3; + (*info)[0].dims[3] = targetImageHeight; + (*info)[0].dims[4] = targetImageWidth; + } else { + (*info)[0].dims[2] = targetImageHeight; + (*info)[0].dims[3] = targetImageWidth; + (*info)[0].dims[4] = convertToGrayScale ? 1 : 3; + } + (*info)[0].precision = FP32; + + (*info)[1].name = COORDINATES_TENSOR_NAME; + (*info)[1].dimsCount = 3; + (*info)[1].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[1].dims) != nullptr, "malloc has failed"); + (*info)[1].dims[0] = 0; + (*info)[1].dims[1] = 1; + (*info)[1].dims[2] = 4; + (*info)[1].precision = I32; + + (*info)[2].name = CONFIDENCE_TENSOR_NAME; + (*info)[2].dimsCount = 3; + (*info)[2].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[2].dims) != nullptr, "malloc has failed"); + (*info)[2].dims[0] = 0; + (*info)[2].dims[1] = 1; + (*info)[2].dims[2] = 1; + (*info)[2].precision = FP32; + + return 0; +} + +int release(void* ptr, void* customNodeLibraryInternalManager) { + free(ptr); + return 0; +} diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/efficientnetb0_node_debug.cpp b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/efficientnetb0_node_debug.cpp new file mode 100644 index 00000000..43ec5c9e --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/efficientnetb0_node_debug.cpp @@ -0,0 +1,910 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include + +#include "custom_node_interface.h" +#include "opencv_utils.hpp" +#include "utils.hpp" +#include "opencv2/opencv.hpp" + +bool debugMode = false; + +static constexpr const char* IMAGE_TENSOR_NAME = "images"; +static constexpr const char* GEOMETRY_TENSOR_NAME = "boxes"; +static constexpr const char* TEXT_IMAGES_TENSOR_NAME = "roi_images"; +static constexpr const char* COORDINATES_TENSOR_NAME = "roi_coordinates"; +static constexpr const char* CONFIDENCE_TENSOR_NAME = "confidence_levels"; + +typedef struct DetectedResult { + int frameId; + int x; + int y; + int width; + int height; + float confidence; + int classId; + char classText[1024]; +} DetectedResult; + +float boxiou_threshold = 0.3; +float iou_threshold = 0.3; + +static bool copy_images_into_output(struct CustomNodeTensor* output, const std::vector& boxes, const cv::Mat& originalImage, int targetImageHeight, int targetImageWidth, const std::string& targetImageLayout, bool convertToGrayScale) { + const uint64_t outputBatch = boxes.size(); + int channels = convertToGrayScale ? 1 : 3; + + uint64_t byteSize = sizeof(float) * targetImageHeight * targetImageWidth * channels * outputBatch; + float* buffer = (float*)malloc(byteSize); + + NODE_ASSERT(buffer != nullptr, "malloc has failed"); + + for (uint64_t i = 0; i < outputBatch; i++) { + cv::Size targetShape(targetImageWidth, targetImageHeight); + cv::Mat image; + + cv::Rect box = boxes[i]; + if (box.x < 0) + box.x = 0; + if (box.y < 0) + box.y = 0; + + // std::string imgname = "/tmp/results/classifyimage" + std::to_string(i) + ".jpg"; + // cv::imwrite(imgname.c_str(), originalImage); + + cv::Mat tmp; + originalImage.convertTo(tmp, CV_32F, 255.0f); + cv::Mat cropped = tmp(box); + cv::resize(cropped, image, targetShape); + + // std::string imgname = "/tmp/results/classifyimage" + std::to_string(i) + ".jpg"; + // cv::Mat tmp2; + // image.convertTo(tmp2, CV_8UC3); + // cv::imwrite(imgname.c_str(), tmp2); + + if (convertToGrayScale) { + image = apply_grayscale(image); + } + + if (targetImageLayout == "NCHW") { + auto imgBuffer = reorder_to_nchw((float*)image.data, image.rows, image.cols, image.channels()); + std::memcpy(buffer + (i * channels * targetImageWidth * targetImageHeight), imgBuffer.data(), byteSize / outputBatch); + } else { + std::memcpy(buffer + (i * channels * targetImageWidth * targetImageHeight), image.data, byteSize / outputBatch); + } + } + + output->data = reinterpret_cast(buffer); + output->dataBytes = byteSize; + output->dimsCount = 5; + output->dims = (uint64_t*)malloc(output->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(output->dims != nullptr, "malloc has failed"); + output->dims[0] = outputBatch; + output->dims[1] = 1; + if (targetImageLayout == "NCHW") { + output->dims[2] = channels; + output->dims[3] = targetImageHeight; + output->dims[4] = targetImageWidth; + } else { + output->dims[2] = targetImageHeight; + output->dims[3] = targetImageWidth; + output->dims[4] = channels; + } + output->precision = FP32; + return true; +} + +static bool copy_coordinates_into_output(struct CustomNodeTensor* output, const std::vector& boxes) { + const uint64_t outputBatch = boxes.size(); + + //printf("---------->NumberOfDets by coords %li\n", outputBatch); + + uint64_t byteSize = sizeof(int32_t) * 4 * outputBatch; + + int32_t* buffer = (int32_t*)malloc(byteSize); + NODE_ASSERT(buffer != nullptr, "malloc has failed"); + + for (uint64_t i = 0; i < outputBatch; i++) { + int32_t entry[] = { + boxes[i].x, + boxes[i].y, + boxes[i].width, + boxes[i].height}; + + std::memcpy(buffer + (i * 4), entry, byteSize / outputBatch); + } + output->data = reinterpret_cast(buffer); + output->dataBytes = byteSize; + output->dimsCount = 3; + output->dims = (uint64_t*)malloc(output->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(output->dims != nullptr, "malloc has failed"); + output->dims[0] = outputBatch; + output->dims[1] = 1; + output->dims[2] = 4; + output->precision = I32; + + return true; +} + +static bool copy_scores_into_output(struct CustomNodeTensor* output, const std::vector& scores) { + const uint64_t outputBatch = scores.size(); + //printf("Number of scores------------------>%li\n", outputBatch); + uint64_t byteSize = sizeof(float) * outputBatch; + + float* buffer = (float*)malloc(byteSize); + NODE_ASSERT(buffer != nullptr, "malloc has failed"); + std::memcpy(buffer, scores.data(), byteSize); + + output->data = reinterpret_cast(buffer); + output->dataBytes = byteSize; + output->dimsCount = 3; + output->dims = (uint64_t*)malloc(output->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(output->dims != nullptr, "malloc has failed"); + output->dims[0] = outputBatch; + output->dims[1] = 1; + output->dims[2] = 1; + output->precision = FP32; + return true; +} + +int initialize(void** customNodeLibraryInternalManager, const struct CustomNodeParam* params, int paramsCount) { + return 0; +} + +int deinitialize(void* customNodeLibraryInternalManager) { + return 0; +} + +// YoloV8 PostProcessing + +const std::string labels[80] = { + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush" +}; + +int calculateEntryIndex(int totalCells, int lcoords, size_t lclasses, int location, int entry) { + int n = location / totalCells; + int loc = location % totalCells; + return (n * (lcoords + lclasses) + entry) * totalCells + loc; +} + +static inline float sigmoid(float x) { + return 1.f / (1.f + std::exp(-x)); +} + +double intersectionOverUnion(const DetectedResult& o1, const DetectedResult& o2) { + double overlappingWidth = std::fmin(o1.x + o1.width, o2.x + o2.width) - std::fmax(o1.x, o2.x); + double overlappingHeight = std::fmin(o1.y + o1.height, o2.y + o2.height) - std::fmax(o1.y, o2.y); + double intersectionArea = (overlappingWidth < 0 || overlappingHeight < 0) ? 0 : overlappingHeight * overlappingWidth; + double unionArea = o1.width * o1.height + o2.width * o2.height - intersectionArea; + return intersectionArea / unionArea; +} + +// IOU postproc filter +void postprocess(std::vector &detectedResults, + std::vector &rects, std::vector &scores) +{ + bool useAdvancedPostprocessing = false; + + if (useAdvancedPostprocessing) { + // Advanced postprocessing + // Checking IOU threshold conformance + // For every i-th object we're finding all objects it intersects with, and comparing confidence + // If i-th object has greater confidence than all others, we include it into result + for (const auto& obj1 : detectedResults) { + bool isGoodResult = true; + for (const auto& obj2 : detectedResults) { + if (obj1.classId == obj2.classId && obj1.confidence < obj2.confidence && + intersectionOverUnion(obj1, obj2) >= boxiou_threshold) { // if obj1 is the same as obj2, condition + // expression will evaluate to false anyway + isGoodResult = false; + break; + } + } + if (isGoodResult) { + //outDetectedResults.push_back(obj1); + rects.emplace_back( + obj1.x, + obj1.y, + obj1.width, + obj1.height); + scores.emplace_back(obj1.confidence); + } + } + } else { + // Classic postprocessing + printf("Checking %i boxes for IOU\n", detectedResults.size()); + std::sort(detectedResults.begin(), detectedResults.end(), [](const DetectedResult& x, const DetectedResult& y) { + return x.confidence > y.confidence; + }); + for (size_t i = 0; i < detectedResults.size(); ++i) { + if (detectedResults[i].confidence == 0) + continue; + + for (size_t j = i + 1; j < detectedResults.size(); ++j) + { + float iou = intersectionOverUnion(detectedResults[i], detectedResults[j]); + + if (iou >= boxiou_threshold) + detectedResults[j].confidence = 0; + // else + // printf("\tiou saving: %f", iou); + } + + if (detectedResults[i].confidence > 0) + { + //outDetectedResults.push_back(detectedResults[i]); + rects.emplace_back( + detectedResults[i].x, + detectedResults[i].y, + detectedResults[i].width, + detectedResults[i].height); + scores.emplace_back(detectedResults[i].confidence); + } + } //end for + } // end if +} // end postprocess IOU filter + + +std::vector nms(const std::vector& res, const float thresh, bool includeBoundaries=false, size_t keep_top_k=0) { + if (keep_top_k == 0) { + keep_top_k = 10; //res.size(); + } + std::vector areas(res.size()); + for (size_t i = 0; i < res.size(); ++i) { + areas[i] = (float) (res[i].width - res[i].x + includeBoundaries) * (res[i].height - res[i].y + includeBoundaries); + } + std::vector order(res.size()); + std::iota(order.begin(), order.end(), 0); + std::sort(order.begin(), order.end(), [&res](int o1, int o2) { return res[o1].confidence > res[o2].confidence; }); + + size_t ordersNum = 0; + for (; ordersNum < order.size() && res[order[ordersNum]].confidence >= 0 && ordersNum < keep_top_k; ordersNum++); + + std::vector keep; + bool shouldContinue = true; + for (size_t i = 0; shouldContinue && i < ordersNum; ++i) { + int idx1 = order[i]; + if (idx1 >= 0) { + keep.push_back(idx1); + shouldContinue = false; + for (size_t j = i + 1; j < ordersNum; ++j) { + int idx2 = order[j]; + if (idx2 >= 0) { + shouldContinue = true; + float overlappingWidth = std::fminf(res[idx1].width, res[idx2].width) - std::fmaxf(res[idx1].x, res[idx2].x); + float overlappingHeight = std::fminf(res[idx1].height, res[idx2].height) - std::fmaxf(res[idx1].y, res[idx2].y); + float intersection = overlappingWidth > 0 && overlappingHeight > 0 ? overlappingWidth * overlappingHeight : 0; + float union_area = areas[idx1] + areas[idx2] - intersection; + if (0.0f == union_area || intersection / union_area > thresh) { + order[j] = -1; + } + } + } + } + } + return keep; +} + +std::vector multiclass_nms(const std::vector& res, const float iou_threshold, bool includeBoundaries, size_t maxNum) { + std::vector boxes_copy; + boxes_copy.reserve(res.size()); + + float max_coord = 0.f; + for (const auto& box : res) { + max_coord = std::max(max_coord, std::max((float)box.width, (float)box.height)); + } + for (auto& box : res) { + float offset = box.classId * max_coord; + DetectedResult tmp; + tmp.x = box.x + offset; + tmp.y = box.y + offset; + tmp.width = box.width + offset; + tmp.height = box.height + offset; + tmp.classId = box.classId; + tmp.confidence = box.confidence; + boxes_copy.emplace_back(tmp); + } + + return nms(boxes_copy, iou_threshold, includeBoundaries, maxNum); +} + + +void postprocess(const float confidence_threshold, const int imageWidth, const int imageHeight, + const uint64_t* output_shape, const void* voutputData, const uint32_t dimCount, + std::vector &rects, std::vector &scores) +{ + if (!voutputData || !output_shape) { + // nothing to do + return; + } + + /* + https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/230-yolov8-optimization/230-yolov8-object-detection.ipynb + detection box has the [x, y, h, w, class_no_1, ..., class_no_80] format, where: + (x, y) - raw coordinates of box center + h, w - raw height and width of the box + class_no_1, ..., class_no_80 - probability distribution over the classes. + */ + + std::vector detectedResults; + size_t num_proposals = output_shape[2]; + std::vector boxes_with_class; + std::vector confidences; + const float* const detections = (float*)(voutputData); + for (size_t i = 0; i < num_proposals; ++i) { + float confidence = 0.0f; + size_t max_id = 0; + constexpr size_t LABELS_START = 4; + // get max confidence level in the 80 classes for this detection + for (size_t j = LABELS_START; j < output_shape[1]; ++j) { + if (detections[j * num_proposals + i] > confidence) { + confidence = detections[j * num_proposals + i]; + max_id = j; + } + } + // add the detection if the max confi. meets the threshold + if (confidence > confidence_threshold) { + DetectedResult obj; + obj.x = detections[0 * num_proposals + i] - detections[2 * num_proposals + i] / 2.0f; + obj.y = detections[1 * num_proposals + i] - detections[3 * num_proposals + i] / 2.0f; + obj.width = detections[0 * num_proposals + i] + detections[2 * num_proposals + i] / 2.0f; + obj.height = detections[1 * num_proposals + i] + detections[3 * num_proposals + i] / 2.0f; + obj.classId = max_id - LABELS_START; + obj.confidence = confidence; + + // if (obj.classId != 39 && obj.classId != 0) + // printf("Found: %i conf: %f\n", obj.classId, obj.confidence); + + boxes_with_class.emplace_back(obj); + confidences.push_back(confidence); + } + } + + constexpr bool includeBoundaries = false; + constexpr size_t keep_top_k = 30000; + std::vector keep; + bool agnostic_nms = true; + + if (agnostic_nms) { + keep = nms(boxes_with_class, boxiou_threshold, includeBoundaries, keep_top_k); + } else { + keep = multiclass_nms(boxes_with_class, boxiou_threshold, includeBoundaries, keep_top_k); + } + + int padLeft = 15, padTop = 0; // adjust padding for optimal efficientnet inference + float floatInputImgWidth = float(imageWidth), + floatInputImgHeight = float(imageHeight), + netInputWidth = floatInputImgWidth, + netInputHeight = floatInputImgHeight, + invertedScaleX = floatInputImgWidth / netInputWidth, + invertedScaleY = floatInputImgHeight / netInputHeight; + + for (size_t idx : keep) { + int x1 = std::clamp( + round((boxes_with_class[idx].x - padLeft) * invertedScaleX), + 0.f, + floatInputImgWidth); + int y1 = std::clamp( + round((boxes_with_class[idx].y - padTop) * invertedScaleY), + 0.f, + floatInputImgHeight); + int x2 = std::clamp( + round((boxes_with_class[idx].width + padLeft) * invertedScaleX) - x1, + 0.f, + floatInputImgWidth - x1); + int y2 = std::clamp( + round((boxes_with_class[idx].height + padTop) * invertedScaleY) - y1, + 0.f, + floatInputImgHeight-y1); + + // if (boxes_with_class[idx].classId != 39 && boxes_with_class[idx].classId != 0) + //printf("Keeping: %i conf: %f %i,%i,%i,%i/%i\n", boxes_with_class[idx].classId, boxes_with_class[idx].confidence, x1, y1, x2,y2,y3); + + //printf("Adding detection %i conf:%f/%f %ix%ix%ix%i\n",boxes_with_class[idx].classId, boxes_with_class[idx].confidence, confidences[idx], x1, y1, x2, y2); + rects.emplace_back(x1, y1, x2, y2); + scores.emplace_back(confidences[idx]); + + //desc.classId = static_cast(boxes_with_class[idx].classId); + //desc.label = getLabelName(desc.labelID); + //detectedResults.push_back(desc); + } + + //printf("Number results detected %i vs. nms saved: %i\n", boxes_with_class.size(), rects.size()); + + + //printf ( " Number of dets is: %i obj size is: %i\n", numberOfDetections, objectSize); + //for (int i = 0; i < numberOfDetections; i++) + //{ + // printf("%fx%fx%fx%f -> %f %f %f %f, %f next box x: %f \n", + // outData[0], outData[1], outData[2],outData[3], + // outData[4], outData[5],outData[6], outData[82], outData[84], outData[85]); + // break; + // double bestProb; + // cv::Point classIdx; + // cv::Mat probs(1, objectSize-4, CV_32FC1, (void*)(outData + 4)); + // cv::minMaxLoc(probs, nullptr, &bestProb, nullptr, &classIdx); + + // if (bestProb > confidence_threshold) + // { + // float x, y, height, width; + // height = outData[3]; + // width = outData[2]; + // x = std::max((outData[0] - 0.5f * width + 0.5f), 0.0f); + // y = std::max((outData[1] - 0.5f * height + 0.5f), 0.0f); + + // // cv::Rect_ bbox = cv::Rect_ (out_left, out_top, (out_w + 0.5), (out_h + 0.5)); + // // cv::Rect_ scaled_bbox = scale_boxes(getCvSize(), bbox, image_info.raw_size); + + // // boxes.push_back(scaled_bbox); + // float bestResultConfidence = 0.0f; + // float currResultConfidence; + // int bestResultClassId = 0; + // auto postprocessRawData = sigmoid; //sigmoid or linear + + // // get the classIdx for the highest confidence in this detection + // printf("detection #%i ", i); + // for (int classSearchIdx = 0; classSearchIdx < (objectSize-4); classSearchIdx++) + // { + // currResultConfidence = *(outData+4+classSearchIdx); + // printf("%i prob: %f ", classSearchIdx, currResultConfidence); + // if ( currResultConfidence > bestResultConfidence ) + // { + // bestResultConfidence = currResultConfidence; + // bestResultClassId = classSearchIdx; + // } + // } + // printf("\n\n"); + + // continue; + + //printf("Best classId returned for detection %i is : Conf: %f -> %fx%fx%fx%f\n"); + // if (bestResultConfidence > confidence_threshold) + // { + // float x, y, height, width; + // height = outData[3]; + // width = outData[2]; + // x = std::max((outData[0] - 0.5f * width + 0.5f), 0.0f); + // y = std::max((outData[1] - 0.5f * height + 0.5f), 0.0f); + + // //printf("1---->>>> %f %f %f %f\n", outData[0], outData[1], outData[2], outData[3]); + // //printf("2---->>>> %f %f %f %f\n", outData[0], outData[1], outData[2], outData[3]); + + + // DetectedResult obj; + // obj.x = std::clamp(x, 0.f, static_cast(imageWidth)); + // obj.y = std::clamp(y, 0.f, static_cast(imageHeight)); + // obj.width = std::clamp(width+0.5f, 0.f, static_cast(imageWidth - obj.x)); + // obj.height = std::clamp(height+0.5f, 0.f, static_cast(imageHeight - obj.y)); + // obj.confidence = bestResultConfidence; + + // if (obj.width > 0.0f && obj.height > 0.0f && outData[0] > 0.0f && outData[1] > 0.0f) + // { + // printf("Adding detection #%i obj: classid:%i conf: %f -> %i %i %i %i vs. raw %f %f %f %f\n", + // i, + // bestResultClassId, + // obj.confidence, + // obj.x, obj.y, obj.width, obj.height, + // outData[0], outData[1], outData[2], outData[3]); + // detectedResults.push_back(obj); + // } + + + //masks.emplace_back(pdata + 4 + class_names_num, pdata + data_width); + //class_ids.push_back(class_id.x); + //confidences.push_back((float) max_conf); + + // float out_w = pdata[2]; + // float out_h = pdata[3]; + // float out_left = MAX((pdata[0] - 0.5 * out_w + 0.5), 0); + // float out_top = MAX((pdata[1] - 0.5 * out_h + 0.5), 0); + + // cv::Rect_ bbox = cv::Rect_ (out_left, out_top, (out_w + 0.5), (out_h + 0.5)); + // cv::Rect_ scaled_bbox = scale_boxes(getCvSize(), bbox, image_info.raw_size); + + // boxes.push_back(scaled_bbox); + // } + // outData += objectSize; + //} + + // filter IOU + //postprocess(detectedResults, rects, scores); + // std::vector nms_result; + // cv::dnn::NMSBoxes(rects, scores, confidence_threshold, iou_threshold, nms_result); // , nms_eta, top_k); // default is 1.0f, 0 + //detectedResults.clear(); + + // for (int idx : nms_result) + // { + // //boxes[idx] = boxes[idx] & cv::Rect(0, 0, image_info.raw_size.width, image_info.raw_size.height); + // YoloResults result = { class_ids[idx] ,confidences[idx] ,boxes[idx] }; + // output.push_back(result); + // } + + // std::vector nms_result; + // cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, iou_threshold, nms_result); // , nms_eta, top_k); + // for (int idx : nms_result) + // { + // boxes[idx] = boxes[idx] & cv::Rect(0, 0, image_info.raw_size.width, image_info.raw_size.height); + // YoloResults result = { class_ids[idx] ,confidences[idx] ,boxes[idx] }; + // output.push_back(result); + // } + + // + // Yolov5 anchor based postprocessing + // const int regionCoordsCount = dimCount; + // const int sideH = 13; //output_shape[2]; // NCHW + // const int sideW = 13; //output_shape[3]; // NCHW + // const int regionNum = 3; + + // const int scaleH = 416; + // const int scaleW = 416; + + // auto entriesNum = sideW * sideH; + // const float* outData = reinterpret_cast(voutputData); + // int original_im_w = imageWidth; + // int original_im_h = imageHeight; + // size_t classes = 80; // from yolo dataset + + // auto postprocessRawData = sigmoid; //sigmoid or linear + + // for (int i = 0; i < entriesNum; ++i) { + // int row = i / sideW; + // int col = i % sideW; + + // for (int n = 0; n < regionNum; ++n) { + + // int obj_index = calculateEntryIndex(entriesNum, regionCoordsCount, classes + 1 /* + confidence byte */, n * entriesNum + i,regionCoordsCount); + // int box_index = calculateEntryIndex(entriesNum, regionCoordsCount, classes + 1, n * entriesNum + i, 0); + // //float outdata = outData[obj_index]; + // float scale = postprocessRawData(outData[obj_index]); + + // if (scale >= confidence_threshold) { + // float x, y,height,width; + // x = static_cast((col + postprocessRawData(outData[box_index + 0 * entriesNum])) / sideW * original_im_w); + // y = static_cast((row + postprocessRawData(outData[box_index + 1 * entriesNum])) / sideH * original_im_h); + // height = static_cast(std::pow(2*postprocessRawData(outData[box_index + 3 * entriesNum]),2) * anchors_13[2 * n + 1] * original_im_h / scaleH ); + // width = static_cast(std::pow(2*postprocessRawData(outData[box_index + 2 * entriesNum]),2) * anchors_13[2 * n] * original_im_w / scaleW ); + + // //cv::Rect obj; + // DetectedResult obj; + + // obj.x = std::clamp(x - width / 2, 0.f, static_cast(original_im_w)); + // obj.y = std::clamp(y - height / 2, 0.f, static_cast(original_im_h)); + // obj.width = std::clamp(width, 0.f, static_cast(original_im_w - obj.x)); + // obj.height = std::clamp(height, 0.f, static_cast(original_im_h - obj.y)); + + // for (size_t j = 0; j < classes; ++j) { + // int class_index = calculateEntryIndex(entriesNum, regionCoordsCount, classes + 1, n * entriesNum + i, regionCoordsCount + 1 + j); + // float prob = scale * postprocessRawData(outData[class_index]); + + // if (prob >= confidence_threshold) { + // //obj.classId = j; + // //obj.classText = getClassLabelText(j).c_str(); + // obj.confidence = prob; + // detectedResults.push_back(obj); + // //rects.emplace_back(obj.x, obj.y, obj.width, obj.height); + // //scores.emplace_back(prob); + // } + // } + // } // end else + // } // end for + // } // end for + + // NMS + //postprocess(detectedResults, rects, scores); +} +// End of Yolov8 PostProcessing + +int execute(const struct CustomNodeTensor* inputs, int inputsCount, struct CustomNodeTensor** outputs, int* outputsCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + + // Parameters reading + int originalImageHeight = get_int_parameter("original_image_height", params, paramsCount, -1); + int originalImageWidth = get_int_parameter("original_image_width", params, paramsCount, -1); + NODE_ASSERT(originalImageHeight > 0, "original image height must be larger than 0"); + NODE_ASSERT(originalImageWidth > 0, "original image width must be larger than 0"); + int targetImageHeight = get_int_parameter("target_image_height", params, paramsCount, -1); + int targetImageWidth = get_int_parameter("target_image_width", params, paramsCount, -1); + NODE_ASSERT(targetImageHeight > 0, "target image height must be larger than 0"); + NODE_ASSERT(targetImageWidth > 0, "target image width must be larger than 0"); + std::string originalImageLayout = get_string_parameter("original_image_layout", params, paramsCount, "NCHW"); + NODE_ASSERT(originalImageLayout == "NCHW" || originalImageLayout == "NHWC", "original image layout must be NCHW or NHWC"); + std::string targetImageLayout = get_string_parameter("target_image_layout", params, paramsCount, "NCHW"); + NODE_ASSERT(targetImageLayout == "NCHW" || targetImageLayout == "NHWC", "target image layout must be NCHW or NHWC"); + bool convertToGrayScale = get_string_parameter("convert_to_gray_scale", params, paramsCount) == "true"; + float confidenceThreshold = get_float_parameter("confidence_threshold", params, paramsCount, -1.0); + NODE_ASSERT(confidenceThreshold >= 0 && confidenceThreshold <= 1.0, "confidence threshold must be in 0-1 range"); + uint64_t maxOutputBatch = get_int_parameter("max_output_batch", params, paramsCount, 100); + NODE_ASSERT(maxOutputBatch > 0, "max output batch must be larger than 0"); + debugMode = get_string_parameter("debug", params, paramsCount) == "true"; + + const CustomNodeTensor* imageTensor = nullptr; + const CustomNodeTensor* boxesTensor = nullptr; + + for (int i = 0; i < inputsCount; i++) { + if (std::strcmp(inputs[i].name, IMAGE_TENSOR_NAME) == 0) { + imageTensor = &(inputs[i]); + } else if (std::strcmp(inputs[i].name, GEOMETRY_TENSOR_NAME) == 0) { + boxesTensor = &(inputs[i]); + } else { + std::cout << "Unrecognized input: " << inputs[i].name << std::endl; + return 1; + } + } + + NODE_ASSERT(imageTensor != nullptr, "Missing input image"); + NODE_ASSERT(boxesTensor != nullptr, "Missing input boxes"); + NODE_ASSERT(imageTensor->precision == FP32, "-------------->>>>>>>image input is not FP32"); + NODE_ASSERT(boxesTensor->precision == FP32, "image input is not FP32"); + + NODE_ASSERT(imageTensor->dimsCount == 4, "input image shape must have 4 dimensions"); + NODE_ASSERT(imageTensor->dims[0] == 1, "input image batch must be 1"); + uint64_t _imageHeight = imageTensor->dims[originalImageLayout == "NCHW" ? 2 : 1]; + uint64_t _imageWidth = imageTensor->dims[originalImageLayout == "NCHW" ? 3 : 2]; + NODE_ASSERT(_imageHeight <= static_cast(std::numeric_limits::max()), "image height is too large"); + NODE_ASSERT(_imageWidth <= static_cast(std::numeric_limits::max()), "image width is too large"); + int imageHeight = static_cast(_imageHeight); + int imageWidth = static_cast(_imageWidth); + + if (debugMode) { + std::cout << "Processing input tensor image resolution: " << cv::Size(imageHeight, imageWidth) << "; expected resolution: " << cv::Size(originalImageHeight, originalImageWidth) << std::endl; + } + + NODE_ASSERT(imageHeight == originalImageHeight, "original image size parameter differs from original image tensor size"); + NODE_ASSERT(imageWidth == originalImageWidth, "original image size parameter differs from original image tensor size"); + + cv::Mat image; + if (originalImageLayout == "NHWC") { + image = nhwc_to_mat(imageTensor); + } else { + image = nchw_to_mat(imageTensor); + } + + NODE_ASSERT(image.cols == imageWidth, "Mat generation failed"); + NODE_ASSERT(image.rows == imageHeight, "Mat generation failed"); + +// TODO: + std::vector rects; + std::vector scores; + postprocess(confidenceThreshold, originalImageWidth, originalImageHeight, boxesTensor->dims, boxesTensor->data, boxesTensor->dimsCount, rects, scores); + + NODE_ASSERT(rects.size() == scores.size(), "rects and scores are not equal length"); + if (rects.size() > maxOutputBatch) { + rects.resize(maxOutputBatch); + scores.resize(maxOutputBatch); + } + + if (debugMode) + std::cout << "Total findings: " << rects.size() << std::endl; + + *outputsCount = 3; // pipeline outputs for efficientnetb0_extractor e.g. roi_images, roi_coordinates, confidence_levels + *outputs = (struct CustomNodeTensor*)malloc(*outputsCount * sizeof(CustomNodeTensor)); + + NODE_ASSERT((*outputs) != nullptr, "malloc has failed"); + CustomNodeTensor& textImagesTensor = (*outputs)[0]; + textImagesTensor.name = TEXT_IMAGES_TENSOR_NAME; + + if (!copy_images_into_output(&textImagesTensor, rects, image, targetImageHeight, targetImageWidth, targetImageLayout, convertToGrayScale)) { + free(*outputs); + return 1; + } + + CustomNodeTensor& coordinatesTensor = (*outputs)[1]; + coordinatesTensor.name = COORDINATES_TENSOR_NAME; + if (!copy_coordinates_into_output(&coordinatesTensor, rects)) { + free(*outputs); + cleanup(textImagesTensor); + return 1; + } + + + CustomNodeTensor& confidenceTensor = (*outputs)[2]; + confidenceTensor.name = CONFIDENCE_TENSOR_NAME; + if (!copy_scores_into_output(&confidenceTensor, scores)) { + free(*outputs); + cleanup(textImagesTensor); + cleanup(coordinatesTensor); + return 1; + } + + //printf("finished execute\n"); + return 0; +} + +int getInputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + int originalImageHeight = get_int_parameter("original_image_height", params, paramsCount, -1); + int originalImageWidth = get_int_parameter("original_image_width", params, paramsCount, -1); + NODE_ASSERT(originalImageHeight > 0, "original image height must be larger than 0"); + NODE_ASSERT(originalImageWidth > 0, "original image width must be larger than 0"); + std::string originalImageLayout = get_string_parameter("original_image_layout", params, paramsCount, "NCHW"); + NODE_ASSERT(originalImageLayout == "NCHW" || originalImageLayout == "NHWC", "original image layout must be NCHW or NHWC"); + + *infoCount = 2; + *info = (struct CustomNodeTensorInfo*)malloc(*infoCount * sizeof(struct CustomNodeTensorInfo)); + NODE_ASSERT((*info) != nullptr, "malloc has failed"); + + (*info)[0].name = IMAGE_TENSOR_NAME; + (*info)[0].dimsCount = 4; + (*info)[0].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[0].dims) != nullptr, "malloc has failed"); + (*info)[0].dims[0] = 1; + if (originalImageLayout == "NCHW") { + (*info)[0].dims[1] = 3; + (*info)[0].dims[2] = originalImageHeight; + (*info)[0].dims[3] = originalImageWidth; + } else { + (*info)[0].dims[1] = originalImageHeight; + (*info)[0].dims[2] = originalImageWidth; + (*info)[0].dims[3] = 3; + } + (*info)[0].precision = FP32; + + (*info)[1].name = GEOMETRY_TENSOR_NAME; + (*info)[1].dimsCount = 3; + (*info)[1].dims = (uint64_t*)malloc((*info)[1].dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[1].dims) != nullptr, "malloc has failed"); + // 416x416 + (*info)[1].dims[0] = 1; + (*info)[1].dims[1] = 84; + (*info)[1].dims[2] = 3549; + + // 512x512 + // (*info)[1].dims[0] = 1; + // (*info)[1].dims[1] = 84; + // (*info)[1].dims[2] = 5376; + //640x640 + // (*info)[1].dims[0] = 1; + // (*info)[1].dims[1] = 84; + // (*info)[1].dims[2] = 8400; + + (*info)[1].precision = FP32; + return 0; +} + +int getOutputsInfo(struct CustomNodeTensorInfo** info, int* infoCount, const struct CustomNodeParam* params, int paramsCount, void* customNodeLibraryInternalManager) { + int targetImageHeight = get_int_parameter("target_image_height", params, paramsCount, -1); + int targetImageWidth = get_int_parameter("target_image_width", params, paramsCount, -1); + NODE_ASSERT(targetImageHeight > 0, "target image height must be larger than 0"); + NODE_ASSERT(targetImageWidth > 0, "target image width must be larger than 0"); + std::string targetImageLayout = get_string_parameter("target_image_layout", params, paramsCount, "NCHW"); + NODE_ASSERT(targetImageLayout == "NCHW" || targetImageLayout == "NHWC", "target image layout must be NCHW or NHWC"); + bool convertToGrayScale = get_string_parameter("convert_to_gray_scale", params, paramsCount) == "true"; + + *infoCount = 3; + *info = (struct CustomNodeTensorInfo*)malloc(*infoCount * sizeof(struct CustomNodeTensorInfo)); + NODE_ASSERT((*info) != nullptr, "malloc has failed"); + + (*info)[0].name = TEXT_IMAGES_TENSOR_NAME; + (*info)[0].dimsCount = 5; + (*info)[0].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[0].dims) != nullptr, "malloc has failed"); + (*info)[0].dims[0] = 0; + (*info)[0].dims[1] = 1; + if (targetImageLayout == "NCHW") { + (*info)[0].dims[2] = convertToGrayScale ? 1 : 3; + (*info)[0].dims[3] = targetImageHeight; + (*info)[0].dims[4] = targetImageWidth; + } else { + (*info)[0].dims[2] = targetImageHeight; + (*info)[0].dims[3] = targetImageWidth; + (*info)[0].dims[4] = convertToGrayScale ? 1 : 3; + } + (*info)[0].precision = FP32; + + (*info)[1].name = COORDINATES_TENSOR_NAME; + (*info)[1].dimsCount = 3; + (*info)[1].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[1].dims) != nullptr, "malloc has failed"); + (*info)[1].dims[0] = 0; + (*info)[1].dims[1] = 1; + (*info)[1].dims[2] = 4; + (*info)[1].precision = I32; + + (*info)[2].name = CONFIDENCE_TENSOR_NAME; + (*info)[2].dimsCount = 3; + (*info)[2].dims = (uint64_t*)malloc((*info)->dimsCount * sizeof(uint64_t)); + NODE_ASSERT(((*info)[2].dims) != nullptr, "malloc has failed"); + (*info)[2].dims[0] = 0; + (*info)[2].dims[1] = 1; + (*info)[2].dims[2] = 1; + (*info)[2].precision = FP32; + + return 0; +} + +int release(void* ptr, void* customNodeLibraryInternalManager) { + free(ptr); + return 0; +} diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/main.cpp b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/main.cpp new file mode 100644 index 00000000..199b2d15 --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/main.cpp @@ -0,0 +1,2239 @@ +//***************************************************************************** +// Copyright 2024 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +// Utilized for GStramer hardware accelerated decode and pre-preprocessing +#include +#include +#include + +// Utilized for OpenCV based Rendering only +#include +#include +#include +#include + +// Utilized for infernece output layer post-processing +#include + +#include "ovms.h" // NOLINT + +using namespace std; +using namespace cv; + +std::mutex _mtx; +std::mutex _infMtx; +std::mutex _drawingMtx; +std::condition_variable _cvAllDecodersInitd; +bool _allDecodersInitd = false; + +typedef struct DetectedResult { + int frameId; + int x; + int y; + int width; + int height; + float confidence; + int classId; + char classText[1024]; +} DetectedResult; + +class MediaPipelineServiceInterface { +public: + enum VIDEO_TYPE { + H265, + H264 + }; + + virtual ~MediaPipelineServiceInterface() {} + virtual const std::string getVideoDecodedPreProcessedPipeline(std::string mediaLocation, VIDEO_TYPE videoType, int video_width, int video_height, bool use_onevpl) = 0; + + const std::string updateVideoDecodedPreProcessedPipeline(int video_width, int video_height, bool use_onevpl) + { + return getVideoDecodedPreProcessedPipeline(m_mediaLocation, m_videoType, video_width, video_height, use_onevpl); + } + +protected: + std::string m_mediaLocation; + VIDEO_TYPE m_videoType; + int m_videoWidth; + int m_videoHeight; +}; + +OVMS_Server* _srv; +OVMS_ServerSettings* _serverSettings = 0; +OVMS_ModelsSettings* _modelsSettings = 0; +int _server_grpc_port; +int _server_http_port; + +std::string _videoStreamPipeline; +std::string _videoStreamPipeline2; +MediaPipelineServiceInterface::VIDEO_TYPE _videoType = MediaPipelineServiceInterface::VIDEO_TYPE::H264; +MediaPipelineServiceInterface::VIDEO_TYPE _videoType2 = MediaPipelineServiceInterface::VIDEO_TYPE::H264; +int _detectorModel = 0; +bool _render = 0; +bool _use_onevpl = 0; +bool _renderPortrait = 0; +cv::Mat _presentationImg; +int _video_input_width = 0; // Get from media _img +int _video_input_height = 0; // Get from media _img +std::vector _vidcaps; +int _window_width = 1280; +int _window_height = 720; +float _detection_threshold = 0.5; + +class GStreamerMediaPipelineService : public MediaPipelineServiceInterface { +public: + const std::string getVideoDecodedPreProcessedPipeline(std::string mediaLocation, VIDEO_TYPE videoType, int video_width, int video_height, bool use_onevpl) { + m_mediaLocation = mediaLocation; + m_videoType = videoType; + m_videoWidth = video_width; + m_videoHeight = video_height; + + if (mediaLocation.find("rtsp") != std::string::npos ) { + switch (videoType) + { + case H264: + if (use_onevpl) + return "rtspsrc location=" + mediaLocation + " ! rtph264depay ! h264parse ! " + + "msdkh264dec ! msdkvpp scaling-mode=lowpower ! " + + "video/x-raw, width=" + std::to_string(video_width) + + ", height=" + std::to_string(video_height) + " ! videoconvert ! video/x-raw,format=RGB ! queue ! appsink drop=1 sync=0"; + else + return "rtspsrc location=" + mediaLocation + " ! rtph264depay ! h264parse ! vah264dec ! video/x-raw(memory:VAMemory),format=NV12 " + + " ! vapostproc ! " + + " video/x-raw, width=" + std::to_string(video_width) + + ", height=" + std::to_string(video_height) + + " ! videoconvert ! video/x-raw,format=RGB ! queue ! appsink drop=1 sync=0"; + case H265: + if (use_onevpl) + return "rtspsrc location=" + mediaLocation + " ! rtph265depay ! h265parse ! " + + "msdkh265dec ! " + + "msdkvpp scaling-mode=lowpower ! " + + "video/x-raw, width=" + std::to_string(video_width) + + ", height=" + std::to_string(video_height) + " ! videoconvert ! video/x-raw,format=RGB ! queue ! appsink drop=1 sync=0"; + else + return "rtspsrc location=" + mediaLocation + " ! rtph265depay ! h265parse ! vah265dec ! video/x-raw(memory:VAMemory),format=NV12 " + + " ! vapostproc ! " + + " video/x-raw, width=" + std::to_string(video_width) + + ", height=" + std::to_string(video_height) + + " ! videoconvert ! video/x-raw,format=RGB ! queue ! appsink drop=1 sync=0"; + default: + std::cout << "Video type not supported!" << videoType << std::endl; + return ""; + } + } + else if (mediaLocation.find(".mp4") != std::string::npos ) { + switch (videoType) + { + case H264: + if (use_onevpl) + return "filesrc location=" + mediaLocation + " ! qtdemux ! h264parse ! " + + "msdkh264dec ! msdkvpp scaling-mode=lowpower ! " + + "video/x-raw, width=" + std::to_string(video_width) + ", height=" + std::to_string(video_height) + + " ! videoconvert ! video/x-raw,format=RGB ! queue ! appsink drop=1 sync=0"; + else + return "filesrc location=" + mediaLocation + " ! qtdemux ! h264parse ! vaapidecodebin ! vaapipostproc" + + " width=" + std::to_string(video_width) + + " height=" + std::to_string(video_height) + + " scale-method=fast ! videoconvert ! video/x-raw,format=RGB ! appsink drop=1 sync=0"; + case H265: + if (use_onevpl) + return "filesrc location=" + mediaLocation + " ! qtdemux ! h265parse ! " + + "msdkh265dec ! msdkvpp scaling-mode=lowpower ! " + + " video/x-raw, width=" + std::to_string(video_width) + ", height=" + std::to_string(video_height) + + " ! videoconvert ! video/x-raw,format=RGB ! queue ! appsink drop=1 sync=0"; + else + return "filesrc location=" + mediaLocation + " ! qtdemux ! h265parse ! vaapidecodebin ! vaapipostproc" + + " width=" + std::to_string(video_width) + + " height=" + std::to_string(video_height) + + " scale-method=fast ! videoconvert ! video/x-raw,format=RGB ! appsink drop=1 sync=0"; + default: + std::cout << "Video type not supported!" << videoType << std::endl; + return ""; + } + } + else { + std::cout << "Unknown media source specified " << mediaLocation << " !!" << std::endl; + return ""; + } + } +protected: + +}; + +class ObjectDetectionInterface { +public: + const static size_t MODEL_DIM_COUNT = 4; + int64_t model_input_shape[MODEL_DIM_COUNT] = { 0 }; + + virtual ~ObjectDetectionInterface() {} + virtual const char* getModelName() = 0; + virtual const uint64_t getModelVersion() = 0; + virtual const char* getModelInputName() = 0; + virtual const size_t getModelDimCount() = 0; + virtual const std::vector getModelInputShape() = 0; + virtual const std::string getClassLabelText(int classIndex) = 0; + + double intersectionOverUnion(const DetectedResult& o1, const DetectedResult& o2) { + double overlappingWidth = std::fmin(o1.x + o1.width, o2.x + o2.width) - std::fmax(o1.x, o2.x); + double overlappingHeight = std::fmin(o1.y + o1.height, o2.y + o2.height) - std::fmax(o1.y, o2.y); + double intersectionArea = (overlappingWidth < 0 || overlappingHeight < 0) ? 0 : overlappingHeight * overlappingWidth; + double unionArea = o1.width * o1.height + o2.width * o2.height - intersectionArea; + return intersectionArea / unionArea; + } + + virtual void postprocess(const int64_t* output_shape, const void* voutputData, const size_t bytesize, const uint32_t dimCount, std::vector &detectedResults) + { + // do nothing + } + + // Yolov8Ensemble detection/classification postprocess + virtual void postprocess( + const int64_t* output_shape_conf, const void* voutputData_conf, const size_t bytesize_conf, const uint32_t dimCount_conf, + const int64_t* output_shape_boxes, const void* voutputData_boxes, const size_t bytesize_boxes, const uint32_t dimCount_boxes, + const int64_t* output_shape_classification, const void* voutputData_classification, const size_t bytesize_classification, const uint32_t dimCount_classification, + std::vector &detectedResults) + { + // derived to implement + } + +protected: + float confidence_threshold = .9; + float boxiou_threshold = .4; + float iou_threshold = 0.4; + int classes = 80; + bool useAdvancedPostprocessing = false; + +}; + +class Yolov8Ensemble : public ObjectDetectionInterface { +public: + + Yolov8Ensemble() { + confidence_threshold = _detection_threshold; + // end of pipeline is efficientnet results + classes = 1000; + std::vector vmodel_input_shape = getModelInputShape(); + std::copy(vmodel_input_shape.begin(), vmodel_input_shape.end(), model_input_shape); + } + + const char* getModelName() { + return MODEL_NAME; + } + + const uint64_t getModelVersion() { + return MODEL_VERSION; + } + + const char* getModelInputName() { + return INPUT_NAME; + } + + const size_t getModelDimCount() { + return MODEL_DIM_COUNT; + } + + const std::vector getModelInputShape() { + std::vector shape{1, 3, 416, 416}; + return shape; + } + const std::string labels[1000] = { + "tench, Tinca tinca", + "goldfish, Carassius auratus", + "great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias", + "tiger shark, Galeocerdo cuvieri", + "hammerhead, hammerhead shark", + "electric ray, crampfish, numbfish, torpedo", + "stingray", + "cock", + "hen", + "ostrich, Struthio camelus", + "brambling, Fringilla montifringilla", + "goldfinch, Carduelis carduelis", + "house finch, linnet, Carpodacus mexicanus", + "junco, snowbird", + "indigo bunting, indigo finch, indigo bird, Passerina cyanea", + "robin, American robin, Turdus migratorius", + "bulbul", + "jay", + "magpie", + "chickadee", + "water ouzel, dipper", + "kite", + "bald eagle, American eagle, Haliaeetus leucocephalus", + "vulture", + "great grey owl, great gray owl, Strix nebulosa", + "European fire salamander, Salamandra salamandra", + "common newt, Triturus vulgaris", + "eft", + "spotted salamander, Ambystoma maculatum", + "axolotl, mud puppy, Ambystoma mexicanum", + "bullfrog, Rana catesbeiana", + "tree frog, tree-frog", + "tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui", + "loggerhead, loggerhead turtle, Caretta caretta", + "leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea", + "mud turtle", + "terrapin", + "box turtle, box tortoise", + "banded gecko", + "common iguana, iguana, Iguana iguana", + "American chameleon, anole, Anolis carolinensis", + "whiptail, whiptail lizard", + "agama", + "frilled lizard, Chlamydosaurus kingi", + "alligator lizard", + "Gila monster, Heloderma suspectum", + "green lizard, Lacerta viridis", + "African chameleon, Chamaeleo chamaeleon", + "Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis", + "African crocodile, Nile crocodile, Crocodylus niloticus", + "American alligator, Alligator mississipiensis", + "triceratops", + "thunder snake, worm snake, Carphophis amoenus", + "ringneck snake, ring-necked snake, ring snake", + "hognose snake, puff adder, sand viper", + "green snake, grass snake", + "king snake, kingsnake", + "garter snake, grass snake", + "water snake", + "vine snake", + "night snake, Hypsiglena torquata", + "boa constrictor, Constrictor constrictor", + "rock python, rock snake, Python sebae", + "Indian cobra, Naja naja", + "green mamba", + "sea snake", + "horned viper, cerastes, sand viper, horned asp, Cerastes cornutus", + "diamondback, diamondback rattlesnake, Crotalus adamanteus", + "sidewinder, horned rattlesnake, Crotalus cerastes", + "trilobite", + "harvestman, daddy longlegs, Phalangium opilio", + "scorpion", + "black and gold garden spider, Argiope aurantia", + "barn spider, Araneus cavaticus", + "garden spider, Aranea diademata", + "black widow, Latrodectus mactans", + "tarantula", + "wolf spider, hunting spider", + "tick", + "centipede", + "black grouse", + "ptarmigan", + "ruffed grouse, partridge, Bonasa umbellus", + "prairie chicken, prairie grouse, prairie fowl", + "peacock", + "quail", + "partridge", + "African grey, African gray, Psittacus erithacus", + "macaw", + "sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita", + "lorikeet", + "coucal", + "bee eater", + "hornbill", + "hummingbird", + "jacamar", + "toucan", + "drake", + "red-breasted merganser, Mergus serrator", + "goose", + "black swan, Cygnus atratus", + "tusker", + "echidna, spiny anteater, anteater", + "platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus", + "wallaby, brush kangaroo", + "koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus", + "wombat", + "jellyfish", + "sea anemone, anemone", + "brain coral", + "flatworm, platyhelminth", + "nematode, nematode worm, roundworm", + "conch", + "snail", + "slug", + "sea slug, nudibranch", + "chiton, coat-of-mail shell, sea cradle, polyplacophore", + "chambered nautilus, pearly nautilus, nautilus", + "Dungeness crab, Cancer magister", + "rock crab, Cancer irroratus", + "fiddler crab", + "king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica", + "American lobster, Northern lobster, Maine lobster, Homarus americanus", + "spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish", + "crayfish, crawfish, crawdad, crawdaddy", + "hermit crab", + "isopod", + "white stork, Ciconia ciconia", + "black stork, Ciconia nigra", + "spoonbill", + "flamingo", + "little blue heron, Egretta caerulea", + "American egret, great white heron, Egretta albus", + "bittern", + "crane", + "limpkin, Aramus pictus", + "European gallinule, Porphyrio porphyrio", + "American coot, marsh hen, mud hen, water hen, Fulica americana", + "bustard", + "ruddy turnstone, Arenaria interpres", + "red-backed sandpiper, dunlin, Erolia alpina", + "redshank, Tringa totanus", + "dowitcher", + "oystercatcher, oyster catcher", + "pelican", + "king penguin, Aptenodytes patagonica", + "albatross, mollymawk", + "grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus", + "killer whale, killer, orca, grampus, sea wolf, Orcinus orca", + "dugong, Dugong dugon", + "sea lion", + "Chihuahua", + "Japanese spaniel", + "Maltese dog, Maltese terrier, Maltese", + "Pekinese, Pekingese, Peke", + "Shih-Tzu", + "Blenheim spaniel", + "papillon", + "toy terrier", + "Rhodesian ridgeback", + "Afghan hound, Afghan", + "basset, basset hound", + "beagle", + "bloodhound, sleuthhound", + "bluetick", + "black-and-tan coonhound", + "Walker hound, Walker foxhound", + "English foxhound", + "redbone", + "borzoi, Russian wolfhound", + "Irish wolfhound", + "Italian greyhound", + "whippet", + "Ibizan hound, Ibizan Podenco", + "Norwegian elkhound, elkhound", + "otterhound, otter hound", + "Saluki, gazelle hound", + "Scottish deerhound, deerhound", + "Weimaraner", + "Staffordshire bullterrier, Staffordshire bull terrier", + "American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier", + "Bedlington terrier", + "Border terrier", + "Kerry blue terrier", + "Irish terrier", + "Norfolk terrier", + "Norwich terrier", + "Yorkshire terrier", + "wire-haired fox terrier", + "Lakeland terrier", + "Sealyham terrier, Sealyham", + "Airedale, Airedale terrier", + "cairn, cairn terrier", + "Australian terrier", + "Dandie Dinmont, Dandie Dinmont terrier", + "Boston bull, Boston terrier", + "miniature schnauzer", + "giant schnauzer", + "standard schnauzer", + "Scotch terrier, Scottish terrier, Scottie", + "Tibetan terrier, chrysanthemum dog", + "silky terrier, Sydney silky", + "soft-coated wheaten terrier", + "West Highland white terrier", + "Lhasa, Lhasa apso", + "flat-coated retriever", + "curly-coated retriever", + "golden retriever", + "Labrador retriever", + "Chesapeake Bay retriever", + "German short-haired pointer", + "vizsla, Hungarian pointer", + "English setter", + "Irish setter, red setter", + "Gordon setter", + "Brittany spaniel", + "clumber, clumber spaniel", + "English springer, English springer spaniel", + "Welsh springer spaniel", + "cocker spaniel, English cocker spaniel, cocker", + "Sussex spaniel", + "Irish water spaniel", + "kuvasz", + "schipperke", + "groenendael", + "malinois", + "briard", + "kelpie", + "komondor", + "Old English sheepdog, bobtail", + "Shetland sheepdog, Shetland sheep dog, Shetland", + "collie", + "Border collie", + "Bouvier des Flandres, Bouviers des Flandres", + "Rottweiler", + "German shepherd, German shepherd dog, German police dog, alsatian", + "Doberman, Doberman pinscher", + "miniature pinscher", + "Greater Swiss Mountain dog", + "Bernese mountain dog", + "Appenzeller", + "EntleBucher", + "boxer", + "bull mastiff", + "Tibetan mastiff", + "French bulldog", + "Great Dane", + "Saint Bernard, St Bernard", + "Eskimo dog, husky", + "malamute, malemute, Alaskan malamute", + "Siberian husky", + "dalmatian, coach dog, carriage dog", + "affenpinscher, monkey pinscher, monkey dog", + "basenji", + "pug, pug-dog", + "Leonberg", + "Newfoundland, Newfoundland dog", + "Great Pyrenees", + "Samoyed, Samoyede", + "Pomeranian", + "chow, chow chow", + "keeshond", + "Brabancon griffon", + "Pembroke, Pembroke Welsh corgi", + "Cardigan, Cardigan Welsh corgi", + "toy poodle", + "miniature poodle", + "standard poodle", + "Mexican hairless", + "timber wolf, grey wolf, gray wolf, Canis lupus", + "white wolf, Arctic wolf, Canis lupus tundrarum", + "red wolf, maned wolf, Canis rufus, Canis niger", + "coyote, prairie wolf, brush wolf, Canis latrans", + "dingo, warrigal, warragal, Canis dingo", + "dhole, Cuon alpinus", + "African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus", + "hyena, hyaena", + "red fox, Vulpes vulpes", + "kit fox, Vulpes macrotis", + "Arctic fox, white fox, Alopex lagopus", + "grey fox, gray fox, Urocyon cinereoargenteus", + "tabby, tabby cat", + "tiger cat", + "Persian cat", + "Siamese cat, Siamese", + "Egyptian cat", + "cougar, puma, catamount, mountain lion, painter, panther, Felis concolor", + "lynx, catamount", + "leopard, Panthera pardus", + "snow leopard, ounce, Panthera uncia", + "jaguar, panther, Panthera onca, Felis onca", + "lion, king of beasts, Panthera leo", + "tiger, Panthera tigris", + "cheetah, chetah, Acinonyx jubatus", + "brown bear, bruin, Ursus arctos", + "American black bear, black bear, Ursus americanus, Euarctos americanus", + "ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus", + "sloth bear, Melursus ursinus, Ursus ursinus", + "mongoose", + "meerkat, mierkat", + "tiger beetle", + "ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle", + "ground beetle, carabid beetle", + "long-horned beetle, longicorn, longicorn beetle", + "leaf beetle, chrysomelid", + "dung beetle", + "rhinoceros beetle", + "weevil", + "fly", + "bee", + "ant, emmet, pismire", + "grasshopper, hopper", + "cricket", + "walking stick, walkingstick, stick insect", + "cockroach, roach", + "mantis, mantid", + "cicada, cicala", + "leafhopper", + "lacewing, lacewing fly", + "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk", + "damselfly", + "admiral", + "ringlet, ringlet butterfly", + "monarch, monarch butterfly, milkweed butterfly, Danaus plexippus", + "cabbage butterfly", + "sulphur butterfly, sulfur butterfly", + "lycaenid, lycaenid butterfly", + "starfish, sea star", + "sea urchin", + "sea cucumber, holothurian", + "wood rabbit, cottontail, cottontail rabbit", + "hare", + "Angora, Angora rabbit", + "hamster", + "porcupine, hedgehog", + "fox squirrel, eastern fox squirrel, Sciurus niger", + "marmot", + "beaver", + "guinea pig, Cavia cobaya", + "sorrel", + "zebra", + "hog, pig, grunter, squealer, Sus scrofa", + "wild boar, boar, Sus scrofa", + "warthog", + "hippopotamus, hippo, river horse, Hippopotamus amphibius", + "ox", + "water buffalo, water ox, Asiatic buffalo, Bubalus bubalis", + "bison", + "ram, tup", + "bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis", + "ibex, Capra ibex", + "hartebeest", + "impala, Aepyceros melampus", + "gazelle", + "Arabian camel, dromedary, Camelus dromedarius", + "llama", + "weasel", + "mink", + "polecat, fitch, foulmart, foumart, Mustela putorius", + "black-footed ferret, ferret, Mustela nigripes", + "otter", + "skunk, polecat, wood pussy", + "badger", + "armadillo", + "three-toed sloth, ai, Bradypus tridactylus", + "orangutan, orang, orangutang, Pongo pygmaeus", + "gorilla, Gorilla gorilla", + "chimpanzee, chimp, Pan troglodytes", + "gibbon, Hylobates lar", + "siamang, Hylobates syndactylus, Symphalangus syndactylus", + "guenon, guenon monkey", + "patas, hussar monkey, Erythrocebus patas", + "baboon", + "macaque", + "langur", + "colobus, colobus monkey", + "proboscis monkey, Nasalis larvatus", + "marmoset", + "capuchin, ringtail, Cebus capucinus", + "howler monkey, howler", + "titi, titi monkey", + "spider monkey, Ateles geoffroyi", + "squirrel monkey, Saimiri sciureus", + "Madagascar cat, ring-tailed lemur, Lemur catta", + "indri, indris, Indri indri, Indri brevicaudatus", + "Indian elephant, Elephas maximus", + "African elephant, Loxodonta africana", + "lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens", + "giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca", + "barracouta, snoek", + "eel", + "coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch", + "rock beauty, Holocanthus tricolor", + "anemone fish", + "sturgeon", + "gar, garfish, garpike, billfish, Lepisosteus osseus", + "lionfish", + "puffer, pufferfish, blowfish, globefish", + "abacus", + "abaya", + "academic gown, academic robe, judge's robe", + "accordion, piano accordion, squeeze box", + "acoustic guitar", + "aircraft carrier, carrier, flattop, attack aircraft carrier", + "airliner", + "airship, dirigible", + "altar", + "ambulance", + "amphibian, amphibious vehicle", + "analog clock", + "apiary, bee house", + "apron", + "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin", + "assault rifle, assault gun", + "backpack, back pack, knapsack, packsack, rucksack, haversack", + "bakery, bakeshop, bakehouse", + "balance beam, beam", + "balloon", + "ballpoint, ballpoint pen, ballpen, Biro", + "Band Aid", + "banjo", + "bannister, banister, balustrade, balusters, handrail", + "barbell", + "barber chair", + "barbershop", + "barn", + "barometer", + "barrel, cask", + "barrow, garden cart, lawn cart, wheelbarrow", + "baseball", + "basketball", + "bassinet", + "bassoon", + "bathing cap, swimming cap", + "bath towel", + "bathtub, bathing tub, bath, tub", + "beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon", + "beacon, lighthouse, beacon light, pharos", + "beaker", + "bearskin, busby, shako", + "beer bottle", + "beer glass", + "bell cote, bell cot", + "bib", + "bicycle-built-for-two, tandem bicycle, tandem", + "bikini, two-piece", + "binder, ring-binder", + "binoculars, field glasses, opera glasses", + "birdhouse", + "boathouse", + "bobsled, bobsleigh, bob", + "bolo tie, bolo, bola tie, bola", + "bonnet, poke bonnet", + "bookcase", + "bookshop, bookstore, bookstall", + "bottlecap", + "bow", + "bow tie, bow-tie, bowtie", + "brass, memorial tablet, plaque", + "brassiere, bra, bandeau", + "breakwater, groin, groyne, mole, bulwark, seawall, jetty", + "breastplate, aegis, egis", + "broom", + "bucket, pail", + "buckle", + "bulletproof vest", + "bullet train, bullet", + "butcher shop, meat market", + "cab, hack, taxi, taxicab", + "caldron, cauldron", + "candle, taper, wax light", + "cannon", + "canoe", + "can opener, tin opener", + "cardigan", + "car mirror", + "carousel, carrousel, merry-go-round, roundabout, whirligig", + "carpenter's kit, tool kit", + "carton", + "car wheel", + "cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM", + "cassette", + "cassette player", + "castle", + "catamaran", + "CD player", + "cello, violoncello", + "cellular telephone, cellular phone, cellphone, cell, mobile phone", + "chain", + "chainlink fence", + "chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour", + "chain saw, chainsaw", + "chest", + "chiffonier, commode", + "chime, bell, gong", + "china cabinet, china closet", + "Christmas stocking", + "church, church building", + "cinema, movie theater, movie theatre, movie house, picture palace", + "cleaver, meat cleaver, chopper", + "cliff dwelling", + "cloak", + "clog, geta, patten, sabot", + "cocktail shaker", + "coffee mug", + "coffeepot", + "coil, spiral, volute, whorl, helix", + "combination lock", + "computer keyboard, keypad", + "confectionery, confectionary, candy store", + "container ship, containership, container vessel", + "convertible", + "corkscrew, bottle screw", + "cornet, horn, trumpet, trump", + "cowboy boot", + "cowboy hat, ten-gallon hat", + "cradle", + "crane", + "crash helmet", + "crate", + "crib, cot", + "Crock Pot", + "croquet ball", + "crutch", + "cuirass", + "dam, dike, dyke", + "desk", + "desktop computer", + "dial telephone, dial phone", + "diaper, nappy, napkin", + "digital clock", + "digital watch", + "dining table, board", + "dishrag, dishcloth", + "dishwasher, dish washer, dishwashing machine", + "disk brake, disc brake", + "dock, dockage, docking facility", + "dogsled, dog sled, dog sleigh", + "dome", + "doormat, welcome mat", + "drilling platform, offshore rig", + "drum, membranophone, tympan", + "drumstick", + "dumbbell", + "Dutch oven", + "electric fan, blower", + "electric guitar", + "electric locomotive", + "entertainment center", + "envelope", + "espresso maker", + "face powder", + "feather boa, boa", + "file, file cabinet, filing cabinet", + "fireboat", + "fire engine, fire truck", + "fire screen, fireguard", + "flagpole, flagstaff", + "flute, transverse flute", + "folding chair", + "football helmet", + "forklift", + "fountain", + "fountain pen", + "four-poster", + "freight car", + "French horn, horn", + "frying pan, frypan, skillet", + "fur coat", + "garbage truck, dustcart", + "gasmask, respirator, gas helmet", + "gas pump, gasoline pump, petrol pump, island dispenser", + "goblet", + "go-kart", + "golf ball", + "golfcart, golf cart", + "gondola", + "gong, tam-tam", + "gown", + "grand piano, grand", + "greenhouse, nursery, glasshouse", + "grille, radiator grille", + "grocery store, grocery, food market, market", + "guillotine", + "hair slide", + "hair spray", + "half track", + "hammer", + "hamper", + "hand blower, blow dryer, blow drier, hair dryer, hair drier", + "hand-held computer, hand-held microcomputer", + "handkerchief, hankie, hanky, hankey", + "hard disc, hard disk, fixed disk", + "harmonica, mouth organ, harp, mouth harp", + "harp", + "harvester, reaper", + "hatchet", + "holster", + "home theater, home theatre", + "honeycomb", + "hook, claw", + "hoopskirt, crinoline", + "horizontal bar, high bar", + "horse cart, horse-cart", + "hourglass", + "iPod", + "iron, smoothing iron", + "jack-o'-lantern", + "jean, blue jean, denim", + "jeep, landrover", + "jersey, T-shirt, tee shirt", + "jigsaw puzzle", + "jinrikisha, ricksha, rickshaw", + "joystick", + "kimono", + "knee pad", + "knot", + "lab coat, laboratory coat", + "ladle", + "lampshade, lamp shade", + "laptop, laptop computer", + "lawn mower, mower", + "lens cap, lens cover", + "letter opener, paper knife, paperknife", + "library", + "lifeboat", + "lighter, light, igniter, ignitor", + "limousine, limo", + "liner, ocean liner", + "lipstick, lip rouge", + "Loafer", + "lotion", + "loudspeaker, speaker, speaker unit, loudspeaker system, speaker system", + "loupe, jeweler's loupe", + "lumbermill, sawmill", + "magnetic compass", + "mailbag, postbag", + "mailbox, letter box", + "maillot", + "maillot, tank suit", + "manhole cover", + "maraca", + "marimba, xylophone", + "mask", + "matchstick", + "maypole", + "maze, labyrinth", + "measuring cup", + "medicine chest, medicine cabinet", + "megalith, megalithic structure", + "microphone, mike", + "microwave, microwave oven", + "military uniform", + "milk can", + "minibus", + "miniskirt, mini", + "minivan", + "missile", + "mitten", + "mixing bowl", + "mobile home, manufactured home", + "Model T", + "modem", + "monastery", + "monitor", + "moped", + "mortar", + "mortarboard", + "mosque", + "mosquito net", + "motor scooter, scooter", + "mountain bike, all-terrain bike, off-roader", + "mountain tent", + "mouse, computer mouse", + "mousetrap", + "moving van", + "muzzle", + "nail", + "neck brace", + "necklace", + "nipple", + "notebook, notebook computer", + "obelisk", + "oboe, hautboy, hautbois", + "ocarina, sweet potato", + "odometer, hodometer, mileometer, milometer", + "oil filter", + "organ, pipe organ", + "oscilloscope, scope, cathode-ray oscilloscope, CRO", + "overskirt", + "oxcart", + "oxygen mask", + "packet", + "paddle, boat paddle", + "paddlewheel, paddle wheel", + "padlock", + "paintbrush", + "pajama, pyjama, pj's, jammies", + "palace", + "panpipe, pandean pipe, syrinx", + "paper towel", + "parachute, chute", + "parallel bars, bars", + "park bench", + "parking meter", + "passenger car, coach, carriage", + "patio, terrace", + "pay-phone, pay-station", + "pedestal, plinth, footstall", + "pencil box, pencil case", + "pencil sharpener", + "perfume, essence", + "Petri dish", + "photocopier", + "pick, plectrum, plectron", + "pickelhaube", + "picket fence, paling", + "pickup, pickup truck", + "pier", + "piggy bank, penny bank", + "pill bottle", + "pillow", + "ping-pong ball", + "pinwheel", + "pirate, pirate ship", + "pitcher, ewer", + "plane, carpenter's plane, woodworking plane", + "planetarium", + "plastic bag", + "plate rack", + "plow, plough", + "plunger, plumber's helper", + "Polaroid camera, Polaroid Land camera", + "pole", + "police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria", + "poncho", + "pool table, billiard table, snooker table", + "pop bottle, soda bottle", + "pot, flowerpot", + "potter's wheel", + "power drill", + "prayer rug, prayer mat", + "printer", + "prison, prison house", + "projectile, missile", + "projector", + "puck, hockey puck", + "punching bag, punch bag, punching ball, punchball", + "purse", + "quill, quill pen", + "quilt, comforter, comfort, puff", + "racer, race car, racing car", + "racket, racquet", + "radiator", + "radio, wireless", + "radio telescope, radio reflector", + "rain barrel", + "recreational vehicle, RV, R.V.", + "reel", + "reflex camera", + "refrigerator, icebox", + "remote control, remote", + "restaurant, eating house, eating place, eatery", + "revolver, six-gun, six-shooter", + "rifle", + "rocking chair, rocker", + "rotisserie", + "rubber eraser, rubber, pencil eraser", + "rugby ball", + "rule, ruler", + "running shoe", + "safe", + "safety pin", + "saltshaker, salt shaker", + "sandal", + "sarong", + "sax, saxophone", + "scabbard", + "scale, weighing machine", + "school bus", + "schooner", + "scoreboard", + "screen, CRT screen", + "screw", + "screwdriver", + "seat belt, seatbelt", + "sewing machine", + "shield, buckler", + "shoe shop, shoe-shop, shoe store", + "shoji", + "shopping basket", + "shopping cart", + "shovel", + "shower cap", + "shower curtain", + "ski", + "ski mask", + "sleeping bag", + "slide rule, slipstick", + "sliding door", + "slot, one-armed bandit", + "snorkel", + "snowmobile", + "snowplow, snowplough", + "soap dispenser", + "soccer ball", + "sock", + "solar dish, solar collector, solar furnace", + "sombrero", + "soup bowl", + "space bar", + "space heater", + "space shuttle", + "spatula", + "speedboat", + "spider web, spider's web", + "spindle", + "sports car, sport car", + "spotlight, spot", + "stage", + "steam locomotive", + "steel arch bridge", + "steel drum", + "stethoscope", + "stole", + "stone wall", + "stopwatch, stop watch", + "stove", + "strainer", + "streetcar, tram, tramcar, trolley, trolley car", + "stretcher", + "studio couch, day bed", + "stupa, tope", + "submarine, pigboat, sub, U-boat", + "suit, suit of clothes", + "sundial", + "sunglass", + "sunglasses, dark glasses, shades", + "sunscreen, sunblock, sun blocker", + "suspension bridge", + "swab, swob, mop", + "sweatshirt", + "swimming trunks, bathing trunks", + "swing", + "switch, electric switch, electrical switch", + "syringe", + "table lamp", + "tank, army tank, armored combat vehicle, armoured combat vehicle", + "tape player", + "teapot", + "teddy, teddy bear", + "television, television system", + "tennis ball", + "thatch, thatched roof", + "theater curtain, theatre curtain", + "thimble", + "thresher, thrasher, threshing machine", + "throne", + "tile roof", + "toaster", + "tobacco shop, tobacconist shop, tobacconist", + "toilet seat", + "torch", + "totem pole", + "tow truck, tow car, wrecker", + "toyshop", + "tractor", + "trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi", + "tray", + "trench coat", + "tricycle, trike, velocipede", + "trimaran", + "tripod", + "triumphal arch", + "trolleybus, trolley coach, trackless trolley", + "trombone", + "tub, vat", + "turnstile", + "typewriter keyboard", + "umbrella", + "unicycle, monocycle", + "upright, upright piano", + "vacuum, vacuum cleaner", + "vase", + "vault", + "velvet", + "vending machine", + "vestment", + "viaduct", + "violin, fiddle", + "volleyball", + "waffle iron", + "wall clock", + "wallet, billfold, notecase, pocketbook", + "wardrobe, closet, press", + "warplane, military plane", + "washbasin, handbasin, washbowl, lavabo, wash-hand basin", + "washer, automatic washer, washing machine", + "water bottle", + "water jug", + "water tower", + "whiskey jug", + "whistle", + "wig", + "window screen", + "window shade", + "Windsor tie", + "wine bottle", + "wing", + "wok", + "wooden spoon", + "wool, woolen, woollen", + "worm fence, snake fence, snake-rail fence, Virginia fence", + "wreck", + "yawl", + "yurt", + "web site, website, internet site, site", + "comic book", + "crossword puzzle, crossword", + "street sign", + "traffic light, traffic signal, stoplight", + "book jacket, dust cover, dust jacket, dust wrapper", + "menu", + "plate", + "guacamole", + "consomme", + "hot pot, hotpot", + "trifle", + "ice cream, icecream", + "ice lolly, lolly, lollipop, popsicle", + "French loaf", + "bagel, beigel", + "pretzel", + "cheeseburger", + "hotdog, hot dog, red hot", + "mashed potato", + "head cabbage", + "broccoli", + "cauliflower", + "zucchini, courgette", + "spaghetti squash", + "acorn squash", + "butternut squash", + "cucumber, cuke", + "artichoke, globe artichoke", + "bell pepper", + "cardoon", + "mushroom", + "Granny Smith", + "strawberry", + "orange", + "lemon", + "fig", + "pineapple, ananas", + "banana", + "jackfruit, jak, jack", + "custard apple", + "pomegranate", + "hay", + "carbonara", + "chocolate sauce, chocolate syrup", + "dough", + "meat loaf, meatloaf", + "pizza, pizza pie", + "potpie", + "burrito", + "red wine", + "espresso", + "cup", + "eggnog", + "alp", + "bubble", + "cliff, drop, drop-off", + "coral reef", + "geyser", + "lakeside, lakeshore", + "promontory, headland, head, foreland", + "sandbar, sand bar", + "seashore, coast, seacoast, sea-coast", + "valley, vale", + "volcano", + "ballplayer, baseball player", + "groom, bridegroom", + "scuba diver", + "rapeseed", + "daisy", + "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", + "corn", + "acorn", + "hip, rose hip, rosehip", + "buckeye, horse chestnut, conker", + "coral fungus", + "agaric", + "gyromitra", + "stinkhorn, carrion fungus", + "earthstar", + "hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa", + "bolete", + "ear, spike, capitulum", + "toilet tissue, toilet paper, bathroom tissue" + }; + + const std::string getClassLabelText(int classIndex) { + return labels[classIndex]; + } + + int argmax(const float* tensor, int numberElements) { + float topConfidence = 0; + int topLabel = -1; + for (int i = 0; i < numberElements; i++) { + float confidence = tensor[i]; + if (topLabel == -1 || topConfidence < confidence) { + topLabel = i; + topConfidence = confidence; + } + } + return topLabel; + } + + void postprocess( + const int64_t* output_shape_conf, const void* voutputData_conf, const size_t bytesize_conf, const uint32_t dimCount_conf, + const int64_t* output_shape_boxes, const void* voutputData_boxes, const size_t bytesize_boxes, const uint32_t dimCount_boxes, + const int64_t* output_shape_classification, const void* voutputData_classification, const size_t bytesize_classification, const uint32_t dimCount_classification, + std::vector &detectedResults) + { + if (!voutputData_boxes || !voutputData_conf || !voutputData_classification) { + // nothing to do + return; + } + + if (dimCount_conf != 3 || dimCount_boxes != 3 || dimCount_classification != 3) + { + printf("Unknown yolov8 detection and/or efficientnet-b0 model.\n"); + return; + } + + // Output Info + // classify_output 1,1,1000 + // confidence - 1, 1, 1 + // boxes - 1,1,4 + const int numberOfDetections = output_shape_boxes[0]; + const int boxesSize = output_shape_boxes[2]; + const int* outData_boxes = reinterpret_cast(voutputData_boxes); + const float* outData_confidence = reinterpret_cast(voutputData_conf); + const float* outData_classify_labels = reinterpret_cast(voutputData_classification); + + std::vector input_shape = getModelInputShape(); + int network_h = input_shape[2]; + int network_w = input_shape[3]; + + for (int i = 0; i < numberOfDetections; i++) + { + float confidence = outData_confidence[i]; + + //printf("Confidence found: %f ClassID found: %i NetworkW %i NetworkH: %i BoxSize %i \n", confidence, classId, network_w, network_h, boxesSize); + + if (confidence > confidence_threshold ) { + int classId = argmax(outData_classify_labels, output_shape_classification[2]); + //printf("numberofTensors %li classId %i\n", output_shape_classification[2], classId); + DetectedResult obj; + obj.x = std::clamp( + static_cast((outData_boxes[i * boxesSize + 0] / ((float)network_w / (float)_video_input_width))), + 0, + _video_input_width); + obj.y = std::clamp( + static_cast((outData_boxes[i * boxesSize + 1] / ((float)network_h/(float)_video_input_height))), + 0, + _video_input_height); + obj.width = std::clamp( + static_cast((outData_boxes[i * boxesSize + 2] / ((float)network_w/(float)_video_input_width) )), + 0, + _video_input_width); + obj.height = std::clamp( + static_cast((outData_boxes[i * boxesSize + 3] / ((float)network_h/(float)_video_input_height) )), + 0, + _video_input_height); + obj.confidence = confidence; + obj.classId = (int) classId; + strncpy(obj.classText, getClassLabelText(obj.classId).c_str(), sizeof(obj.classText)); + + // printf("Actual found: %f %s...%i,%i,%i,%i vs. %i,%i,%i,%i...%ix%i \n", + // confidence, + // obj.classText, + // obj.x, + // obj.y, + // obj.width, + // obj.height, + // outData_boxes[i * boxesSize + 0], + // outData_boxes[i * boxesSize + 1], + // outData_boxes[i * boxesSize + 2], + // outData_boxes[i * boxesSize + 3], + // _video_input_width, + // _video_input_height); + + detectedResults.push_back(obj); + } // end if confidence + } // end for + } + +private: + // yolov8 - ?x3x416x416 NCHW + const char* MODEL_NAME = "detect_classify"; + const uint64_t MODEL_VERSION = 0; + const char* INPUT_NAME = "images"; +}; + +GStreamerMediaPipelineService* _mediaService = NULL; +std::string _user_request; + +namespace { +volatile sig_atomic_t shutdown_request = 0; +} + +bool stringIsInteger(std::string strInput) { + std::string::const_iterator it = strInput.begin(); + while (it != strInput.end() && std::isdigit(*it)) ++it; + return !strInput.empty() && it == strInput.end(); +} + +bool stringIsFloat(std::string strInput) { + std::istringstream iss(strInput); + float f; + iss >> std::noskipws >> f; // noskipws considers leading whitespace invalid + return iss.eof() && !iss.fail(); +} + +bool setActiveModel(int detectionType, ObjectDetectionInterface** objDet) +{ + if (objDet == NULL) + return false; + + *objDet = new Yolov8Ensemble(); + return true; +} + +static void onInterrupt(int status) { + shutdown_request = 1; +} + +static void onTerminate(int status) { + shutdown_request = 1; +} + +static void onIllegal(int status) { + shutdown_request = 2; +} + +static void installSignalHandlers() { + static struct sigaction sigIntHandler; + sigIntHandler.sa_handler = onInterrupt; + sigemptyset(&sigIntHandler.sa_mask); + sigIntHandler.sa_flags = 0; + sigaction(SIGINT, &sigIntHandler, NULL); + + static struct sigaction sigTermHandler; + sigTermHandler.sa_handler = onTerminate; + sigemptyset(&sigTermHandler.sa_mask); + sigTermHandler.sa_flags = 0; + sigaction(SIGTERM, &sigTermHandler, NULL); + + static struct sigaction sigIllHandler; + sigIllHandler.sa_handler = onIllegal; + sigemptyset(&sigIllHandler.sa_mask); + sigIllHandler.sa_flags = 0; + sigaction(SIGILL, &sigIllHandler, NULL); +} + +void printInferenceResults(std::vector &results) +{ + for (auto & obj : results) { + std::cout << "Rect: [ " << obj.x << " , " << obj.y << " " << obj.width << ", " << obj.height << "] Class: " << obj.classText << "(" << obj.classId << ") Conf: " << obj.confidence << std::endl; + } +} + +// TODO: Multiple references state that imshow can't be used in any other thread than main! +void displayGUIInferenceResults(cv::Mat analytics_frame, std::vector &results, int latency, int througput) +{ + auto ttid = std::this_thread::get_id(); + std::stringstream ss; + ss << ttid; + std::string tid = ss.str(); + + for (auto & obj : results) { + const float x0 = obj.x; + const float y0 = obj.y; + const float x1 = obj.x + obj.width; + const float y1 = obj.y + obj.height; + + //printf("--------->coords: %f %f %f %f\n", x0, y0, x1, y1); + cv::rectangle( analytics_frame, + cv::Point( (int)(x0),(int)(y0) ), + cv::Point( (int)x1, (int)y1 ), + cv::Scalar(255, 0, 0), + 2, cv::LINE_8 ); + + cv::Size textsize = cv::getTextSize(obj.classText, cv::FONT_HERSHEY_PLAIN, 1, 0,0); + + cv::rectangle(analytics_frame, + cv::Point( (int)(x0),(int)(y0-20) ), + cv::Point((int)x0 + textsize.width, (int)y0 + textsize.height), + CV_RGB(0, 0, 0), + -1); + + std::string putText = obj.classText; + putText += " " + std::to_string(obj.confidence); + cv::putText(analytics_frame, + obj.classText, + cv::Size((int)x0, (int)y0), + cv::FONT_HERSHEY_PLAIN, 1, CV_RGB(255, 255, 255), 1); + + } // end for + + cv::Mat presenter; + { + std::lock_guard lock(_drawingMtx); + // printf("drawing frame\n"); + // cv::imwrite("result.jpg", analytics_frame); + cv::cvtColor(analytics_frame, analytics_frame, cv::COLOR_BGR2RGB); + cv::imshow("OpenVINO Results " + tid, analytics_frame); + cv::waitKey(1); + } +} + + +// This function is responsible for generating a GST pipeline that +// decodes and resizes the video stream based on the desired window size or +// the largest analytics frame size needed if running headless +std::string getVideoPipelineText(std::string mediaPath, ObjectDetectionInterface* objDet, ObjectDetectionInterface* textDet) +{ + + std::vector modelFrameShape = objDet->getModelInputShape(); + if (textDet) { + modelFrameShape = textDet->getModelInputShape(); + } + + int frame_width = modelFrameShape[3]; + int frame_height = modelFrameShape[2]; + + if (_render) + { + frame_width = _window_width; + frame_height = _window_height; + } + + return _mediaService->getVideoDecodedPreProcessedPipeline( + mediaPath, + _videoType, + frame_width, + frame_height, + _use_onevpl); +} + +bool createModelServer() +{ + if (_srv == NULL) + return false; + + OVMS_Status* res = OVMS_ServerStartFromConfigurationFile(_srv, _serverSettings, _modelsSettings); + + if (res) { + uint32_t code = 0; + const char* details = nullptr; + + OVMS_StatusCode(res, &code); + OVMS_StatusDetails(res, &details); + std::cerr << "ERROR: during start: code:" << code << "; details:" << details + << "; grpc_port: " << _server_grpc_port + << "; http_port: " << _server_http_port + << ";" << std::endl; + + OVMS_StatusDelete(res); + + if (_srv) + OVMS_ServerDelete(_srv); + + if (_modelsSettings) + OVMS_ModelsSettingsDelete(_modelsSettings); + + if (_serverSettings) + OVMS_ServerSettingsDelete(_serverSettings); + + return false; + } + + return true; +} + +bool loadGStreamer(GstElement** pipeline, GstElement** appsink, std::string mediaPath, ObjectDetectionInterface* _objDet) +{ + static int threadCnt = 0; + + std::string videoPipelineText = getVideoPipelineText(mediaPath, _objDet, NULL); + std::cout << "--------------------------------------------------------------" << std::endl; + std::cout << "Opening Media Pipeline: " << videoPipelineText << std::endl; + std::cout << "--------------------------------------------------------------" << std::endl; + + *pipeline = gst_parse_launch (videoPipelineText.c_str(), NULL); + if (*pipeline == NULL) { + std::cout << "ERROR: Failed to parse GST pipeline. Quitting." << std::endl; + return false; + } + + std::string appsinkName = "appsink" + std::to_string(threadCnt++); + + *appsink = gst_bin_get_by_name (GST_BIN (*pipeline), appsinkName.c_str()); + + // Check if all elements were created + if (!(*appsink)) + { + printf("ERROR: Failed to initialize GST pipeline (missing %s) Quitting.\n", appsinkName.c_str()); + return false; + } + + GstStateChangeReturn gst_res; + + // Start pipeline so it could process incoming data + gst_res = gst_element_set_state(*pipeline, GST_STATE_PLAYING); + + if (gst_res != GST_STATE_CHANGE_SUCCESS && gst_res != GST_STATE_CHANGE_ASYNC ) { + printf("ERROR: StateChange not successful. Error Code: %d\n", gst_res); + return false; + } + + return true; +} + +// OVMS C-API is a global process (singleton design) wide server so can't create many of them +//"./models/config_active.json" +bool loadOVMS() +{ + OVMS_Status* res = NULL; + + OVMS_ServerSettingsNew(&_serverSettings); + OVMS_ModelsSettingsNew(&_modelsSettings); + OVMS_ServerNew(&_srv); + OVMS_ServerSettingsSetGrpcPort(_serverSettings, _server_grpc_port); + OVMS_ServerSettingsSetRestPort(_serverSettings, _server_http_port); + OVMS_ServerSettingsSetLogLevel(_serverSettings, OVMS_LOG_ERROR); + + char * ovmsCofigJsonFilePath = std::getenv("OVMS_MODEL_CONFIG_JSON"); + std::cout << "ovmsCofigJsonFilePath: "< channels; + cv::split(src, channels); + + for (auto &img : channels) { + img = img.reshape(1, 1); + } + + // Concatenate three vectors to one + cv::hconcat( channels, dst ); +} + +void run_stream(std::string mediaPath, GstElement* pipeline, GstElement* appsink, ObjectDetectionInterface* objDet) +{ + auto ttid = std::this_thread::get_id(); + std::stringstream ss; + ss << ttid; + std::string tid = ss.str(); + + // Wait for all decoder streams to init...otherwise causes a segfault when OVMS loads + // https://stackoverflow.com/questions/48271230/using-condition-variablenotify-all-to-notify-multiple-threads + std::unique_lock lk(_mtx); + _cvAllDecodersInitd.wait(lk, [] { return _allDecodersInitd;} ); + lk.unlock(); + + printf("Starting thread: %s\n", tid.c_str()) ; + + auto initTime = std::chrono::high_resolution_clock::now(); + unsigned long numberOfFrames = 0; + long long numberOfSkipFrames = 0; + int highest_latency_frame = 0; + int lowest_latency_frame = 9999; + int avg_latency_frame = 0; + int total_latency_frames = 0; + OVMS_Status* res = NULL; + + while (!shutdown_request) { + auto startTime = std::chrono::high_resolution_clock::now(); + + // classify_output + const void* voutputData1; + size_t bytesize1 = 0; + OVMS_DataType datatype1 = (OVMS_DataType)42; + const int64_t* shape1{nullptr}; + size_t dimCount1 = 0; + OVMS_BufferType bufferType1 = (OVMS_BufferType)42; + uint32_t deviceId1 = 42; + const char* outputName1{nullptr}; + + // confidence_levels + const void* voutputData2; + size_t bytesize2 = 0; + OVMS_DataType datatype2 = (OVMS_DataType)42; + const int64_t* shape2{nullptr}; + size_t dimCount2 = 0; + OVMS_BufferType bufferType2 = (OVMS_BufferType)42; + uint32_t deviceId2 = 42; + const char* outputName2{nullptr}; + + // roi_coordinates + const void* voutputData3; + size_t bytesize3 = 0; + OVMS_DataType datatype3 = (OVMS_DataType)42; + const int64_t* shape3{nullptr}; + size_t dimCount3 = 0; + OVMS_BufferType bufferType3 = (OVMS_BufferType)42; + uint32_t deviceId3 = 42; + const char* outputName3{nullptr}; + + // Common across getoutput API + uint32_t outputCount = 0; + uint32_t outputId; + + GstSample *sample; + GstStructure *s; + GstBuffer *buffer; + GstMapInfo m; + + std::vector detectedResults; + + auto metricStartTime = std::chrono::high_resolution_clock::now(); + if (gst_app_sink_is_eos(GST_APP_SINK(appsink))) { + std::cout << "INFO: EOS " << std::endl; + return; + } + auto metricEndTime = std::chrono::high_resolution_clock::now(); + auto metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "Get appsink latency (ms): " << metricLatencyTime << endl; + + metricStartTime = std::chrono::high_resolution_clock::now(); + sample = gst_app_sink_try_pull_sample (GST_APP_SINK(appsink), 50 * GST_SECOND); + + if (sample == nullptr) { + std::cout << "ERROR: No sample found" << std::endl; + return; + } + metricEndTime = std::chrono::high_resolution_clock::now(); + metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "Pull sample latency (ms): " << metricLatencyTime << endl; + + GstCaps *caps; + caps = gst_sample_get_caps(sample); + + if (caps == nullptr) { + std::cout << "ERROR: No caps found for sample" << std::endl; + return; + } + + s = gst_caps_get_structure(caps, 0); + gst_structure_get_int(s, "width", &_video_input_width); + gst_structure_get_int(s, "height", &_video_input_height); + + metricStartTime = std::chrono::high_resolution_clock::now(); + buffer = gst_sample_get_buffer(sample); + metricEndTime = std::chrono::high_resolution_clock::now(); + metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "Get sample buffer latency (ms): " << metricLatencyTime << endl; + + metricStartTime = std::chrono::high_resolution_clock::now(); + gst_buffer_map(buffer, &m, GST_MAP_READ); + metricEndTime = std::chrono::high_resolution_clock::now(); + metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "Copy sample buffer latency (ms): " << metricLatencyTime << endl; + + if (m.size <= 0) { + std::cout << "ERROR: Invalid buffer size" << std::endl; + return; + } + + cv::Mat analytics_frame; + cv::Mat floatImage; + std::vector inputShape; + + inputShape = objDet->getModelInputShape(); + + metricStartTime = std::chrono::high_resolution_clock::now(); + cv::Mat img(_video_input_height, _video_input_width, CV_8UC3, (void *) m.data); + metricEndTime = std::chrono::high_resolution_clock::now(); + metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "Copy decoded frame to mat latency (ms): " << metricLatencyTime << endl; + + // When rendering is enabled then the input frame is resized to window size and not the needed model input size + if (_render) { + + if (dynamic_cast(objDet) != nullptr) + { + metricStartTime = std::chrono::high_resolution_clock::now(); + resize(img, analytics_frame, cv::Size(inputShape[2], inputShape[3]), 0, 0, cv::INTER_AREA /*cv::INTER_LINEAR*/); + metricEndTime = std::chrono::high_resolution_clock::now(); + metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "Resize decoded frame latency (ms): " << metricLatencyTime << endl; + //cv::imwrite("faceresized.jpg", analytics_frame); + } + else + { + printf("ERROR: Unknown model type\n"); + return; + } + metricStartTime = std::chrono::high_resolution_clock::now(); + analytics_frame.convertTo(analytics_frame, CV_32F); + metricEndTime = std::chrono::high_resolution_clock::now(); + metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "DataType transform decoded frame latency (ms): " << metricLatencyTime << endl; + + metricStartTime = std::chrono::high_resolution_clock::now(); + hwc_to_chw(analytics_frame, floatImage); + metricEndTime = std::chrono::high_resolution_clock::now(); + metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "Layout transform decoded frame latency (ms): " << metricLatencyTime << endl; + } + else { + img.convertTo(analytics_frame, CV_32F); + hwc_to_chw(analytics_frame, floatImage); + } + + const int DATA_SIZE = floatImage.step[0] * floatImage.rows; + + OVMS_InferenceResponse* response = nullptr; + OVMS_InferenceRequest* request{nullptr}; + + // OD Inference + { + //std::lock_guard lock(_infMtx); + + metricStartTime = std::chrono::high_resolution_clock::now(); + + OVMS_InferenceRequestNew(&request, _srv, objDet->getModelName(), objDet->getModelVersion()); + + OVMS_InferenceRequestAddInput( + request, + objDet->getModelInputName(), + OVMS_DATATYPE_FP32, + objDet->model_input_shape, + objDet->getModelDimCount() + ); + + // run sync request + OVMS_InferenceRequestInputSetData( + request, + objDet->getModelInputName(), + reinterpret_cast(floatImage.data), + DATA_SIZE , + OVMS_BUFFERTYPE_CPU, + 0 + ); + + res = OVMS_Inference(_srv, request, &response); + + metricEndTime = std::chrono::high_resolution_clock::now(); + metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "Inference latency (ms): " << metricLatencyTime << endl; + + if (res != nullptr) { + + //std::cout << "OVMS_Inference failed " << std::endl; + uint32_t code = 0; + const char* details = 0; + OVMS_StatusCode(res, &code); + OVMS_StatusDetails(res, &details); + + OVMS_StatusDelete(res); + if (request) + OVMS_InferenceRequestDelete(request); + + metricStartTime = std::chrono::high_resolution_clock::now(); + + gst_buffer_unmap(buffer, &m); + gst_sample_unref(sample); + + metricEndTime = std::chrono::high_resolution_clock::now(); + metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "Decoded frame release latency (ms): " << metricLatencyTime << endl; + + if (code != 177 /*OVMS_Status::PIPELINE_DEMULTIPLEXER_NO_RESULTS */) + { + std::cout << "Error occured during inference. Code:" << code << std::endl; + //std::cout << "Details: " << details << std::endl; + break; + } + else + continue; + } + } // end lock on inference request to server + + metricStartTime = std::chrono::high_resolution_clock::now(); + OVMS_InferenceResponseOutputCount(response, &outputCount); + outputId = 0; + + // anchor free yolov8 results + OVMS_InferenceResponseOutput(response, outputId, &outputName1, &datatype1, &shape1, &dimCount1, &voutputData1, &bytesize1, &bufferType1, &deviceId1); + // std::cout << "------------>" << tid << " : " << "DeviceID " << deviceId1 + // << ", OutputName " << outputName1 + // << ", DimCount " << dimCount1 + // << ", shape " << shape1[0] << " " << shape1[1] << " " << shape1[2] + // << ", byteSize " << bytesize1 + // << ", OutputCount " << outputCount << std::endl; + + // roi_coordinates + outputId = 1; + OVMS_InferenceResponseOutput(response, outputId, &outputName2, &datatype2, &shape2, &dimCount2, &voutputData2, &bytesize2, &bufferType2, &deviceId2); + // std::cout << "------------>" << tid << " : " << "DeviceID " << deviceId1 + // << ", OutputName " << outputName2 + // << ", DimCount " << dimCount2 + // << ", shape " << shape2[0] << " " << shape2[1] << " " << shape2[2] + // << ", byteSize " << bytesize2 + // << ", OutputCount " << outputCount << std::endl; + + // classify_output e.g. Classification results + outputId = 2; + OVMS_InferenceResponseOutput(response, outputId, &outputName3, &datatype3, &shape3, &dimCount3, &voutputData3, &bytesize3, &bufferType3, &deviceId3); + // std::cout << "------------>" << tid << " : " << "DeviceID " << deviceId1 + // << ", OutputName " << outputName3 + // << ", DimCount " << dimCount3 + // << ", shape " << shape3[0] << " " << shape3[1] << " " << shape3[2] + // << ", byteSize " << bytesize3 + // << ", OutputCount " << outputCount << std::endl; + + // roi_images dims == 5 batch, 1, c, h, w + + objDet->postprocess( + shape1, voutputData1, bytesize1, dimCount1, + shape2, voutputData2, bytesize2, dimCount2, + shape3, voutputData3, bytesize3, dimCount3, + detectedResults); + //objDet->postprocess(detectedResults, detectedResultsFiltered); + + metricEndTime = std::chrono::high_resolution_clock::now(); + metricLatencyTime = ((std::chrono::duration_cast(metricEndTime-metricStartTime)).count()); + //cout << "Post-processing latency (ms): " << metricLatencyTime << endl; + + numberOfSkipFrames++; + float fps = 0; + int skip_frames = 120; + if (numberOfSkipFrames <= skip_frames) // allow warm up for latency/fps measurements + { + initTime = std::chrono::high_resolution_clock::now(); + //printf("Too early...Skipping frames..\n"); + } + else + { + numberOfFrames++; + + auto endTime = std::chrono::high_resolution_clock::now(); + auto latencyTime = ((std::chrono::duration_cast(endTime-startTime)).count()); + auto runningLatencyTime = ((std::chrono::duration_cast(endTime-initTime)).count()); + if (runningLatencyTime > 0) { // skip a few to account for init + fps = (float)numberOfFrames/(float)(runningLatencyTime/1000); // convert to seconds + } + + if (_render) + displayGUIInferenceResults(img, detectedResults, latencyTime, fps); + + int frame_latency = chrono::duration_cast(endTime - startTime).count(); + + if (frame_latency > highest_latency_frame) + highest_latency_frame = frame_latency; + if (frame_latency < lowest_latency_frame) + lowest_latency_frame = frame_latency; + + total_latency_frames += frame_latency; + + if (numberOfFrames % 30 == 0) { + avg_latency_frame = total_latency_frames / 30; + + time_t currTime = time(0); + struct tm tstruct; + char bCurrTime[80]; + tstruct = *localtime(&currTime); + + strftime(bCurrTime, sizeof(bCurrTime), "%Y-%m-%d.%X", &tstruct); + + cout << detectedResults.size() << " object(s) detected at " << bCurrTime << endl; + cout << "Avg. Pipeline Throughput FPS: " << ((isinf(fps)) ? "..." : std::to_string(fps)) << endl; + cout << "Avg. Pipeline Latency (ms): " << avg_latency_frame << endl; + cout << "Max. Pipeline Latency (ms): " << highest_latency_frame << endl; + cout << "Min. Pipeline Latency (ms): " << lowest_latency_frame << endl; + highest_latency_frame = 0; + lowest_latency_frame = 9999; + total_latency_frames = 0; + } + } + + if (request) { + OVMS_InferenceRequestInputRemoveData(request, objDet->getModelInputName()); // doesn't help + OVMS_InferenceRequestRemoveInput(request, objDet->getModelInputName()); + OVMS_InferenceRequestDelete(request); + } + + if (response) { + OVMS_InferenceResponseDelete(response); + } + + gst_buffer_unmap(buffer, &m); + gst_sample_unref(sample); + + // DEBUG: TODO + //shutdown_request = 1; + + if (shutdown_request > 0) + break; + } // end while get frames + + std::cout << "Goodbye..." << std::endl; + + if (res != NULL) { + OVMS_StatusDelete(res); + res = NULL; + } + + if (objDet) { + delete objDet; + objDet = NULL; + } + + gst_element_set_state (pipeline, GST_STATE_NULL); + if (pipeline) + gst_object_unref(pipeline); + + if (appsink) + gst_object_unref(appsink); +} + +void print_usage(const char* programName) { + std::cout << "Usage: ./" << programName << " \n\n" + << "\tmedia 1 location: an rtsp://127.0.0.1:8554/camera_0 url or a path to a *.mp4 file\n" + // << "\tmedia 2 location: an rtsp://127.0.0.1:8554/camera_0 url, a path to a *.mp4 file, or leave blank\n" + << "\tuse_onevpl: 0 (libva) or 1 for oneVPL\n" + << "\trender mode: 0 for headless or 1 to launch a render window \n" + << "\trender portrait mode: 0 for headless or 1 to launch a render window \n" + // << "\tmodel server config file name: config_yolov8_ensemble_cpu.json, config_yolov8_ensemble_gpu.json, or config_yolov8_ensemble_cpu_gpu.json \n" + << "\tvideo_type for media 1: 0 for HEVC or 1 for AVC\n" + // << "\tvideo_type for media 2: 0 for HEVC or 1 for AVC\n"; + << "window_width is display window width\n" + << "window_height is display window height\n" + << "detection_threshold is confidence threshold value in floating point that needs to be between 0.0 to 1.0\n"; +} + +//$INPUTSRC $INPUTSRC2 $USE_ONEVPL $RENDER_MODE $RENDER_PORTRAIT_MODE $CONFIG_NAME $CODEC_TYPE $CODEC_TYPE2 +bool parse_cl_args(int argc, char** argv, string &inputsrc, string &inputsrc2, bool &usevpl, bool &renderMode, + bool &renderPortraitMode, string &configName, + MediaPipelineServiceInterface::VIDEO_TYPE &codecType1, + MediaPipelineServiceInterface::VIDEO_TYPE &codecType2) +{ + int cli = 1; + + + if (argc < 2) + return false; + + inputsrc = argv[cli++]; + if (inputsrc.empty()) + return false; + + if (argc >= 9) + inputsrc2 = argv[cli++]; + + string useVPL = argv[cli++]; + if (useVPL.empty()) + return false; + usevpl = std::stoi(useVPL.c_str()); + + string render = argv[cli++]; + if (render.empty()) + return false; + renderMode = std::stoi(render.c_str()); + + string renderPortrait = argv[cli++]; + if (renderPortrait.empty()) + return false; + renderPortraitMode = std::stoi(renderPortrait.c_str()); + configName = argv[cli++]; + if (configName.empty()) + return false; + string videoType = argv[cli++]; + if (videoType.empty()) + return false; + codecType1 = (MediaPipelineServiceInterface::VIDEO_TYPE) std::stoi(videoType); + if (inputsrc2.empty()) + return true; + videoType = argv[cli++]; + if (videoType.empty()) + return false; + codecType2 = (MediaPipelineServiceInterface::VIDEO_TYPE) std::stoi(videoType); + + return true; +} + + +int get_running_model_servers() { + const char * val = std::getenv("cid_count"); + std::cout << "val: "< 1.0 || _detection_threshold < 0.0) { + std::cout << "detection_threshold: " << _detection_threshold << ", is confidence threshold value in floating point that needs to be between 0.0 to 1.0.\n" << endl; + return 1; + } + + // Swap width/height to enable portrait mode + if (_renderPortrait) { + int tmp = _window_width; + _window_width = _window_height; + _window_height = tmp; + } + } + + // if (!parse_cl_args(argc, argv, _videoStreamPipeline, _videoStreamPipeline2, _use_onevpl, _render, _renderPortrait, model_server_config, _videoType, _videoType2)) + // { + // print_usage(argv[0]); + // return 1; + // } + + // Use GST pipelines for media HWA decode and pre-procesing + _mediaService = new GStreamerMediaPipelineService(); + + // get valid server port numbers + int running_servers = get_running_model_servers(); + _server_grpc_port = 9178 + running_servers; + _server_http_port = 11338 + running_servers; + + gst_init(NULL, NULL); + + std::vector running_streams; + _allDecodersInitd = false; + + GstElement *pipeline; + GstElement *appsink; + ObjectDetectionInterface* objDet; + getMAPipeline(_videoStreamPipeline, &pipeline, &appsink, &objDet); + running_streams.emplace_back(run_stream, _videoStreamPipeline, pipeline, appsink, objDet); + + GstElement *pipeline2; + GstElement *appsink2; + ObjectDetectionInterface* objDet2; + if (!_videoStreamPipeline2.empty()) + { + std::cout << "in the 2nd inputsrc..." << std::endl; + + getMAPipeline(_videoStreamPipeline2, &pipeline2, &appsink2, &objDet2); + running_streams.emplace_back(run_stream, _videoStreamPipeline2, pipeline2, appsink2, objDet2); + } + + if (!loadOVMS()) + return -1; + + _allDecodersInitd = true; + _cvAllDecodersInitd.notify_all();; + + for(auto& running_stream : running_streams) + running_stream.join(); + + if (_mediaService != NULL) { + delete _mediaService; + _mediaService = NULL; + } + + if (_srv) + OVMS_ServerDelete(_srv); + if (_modelsSettings) + OVMS_ModelsSettingsDelete(_modelsSettings); + if (_serverSettings) + OVMS_ServerSettingsDelete(_serverSettings); + + return 0; +} diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/opencv_utils.hpp b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/opencv_utils.hpp new file mode 100644 index 00000000..9eb478ad --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/opencv_utils.hpp @@ -0,0 +1,163 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include + +#include "custom_node_interface.h" +#include "opencv2/opencv.hpp" + +template +void reorder_to_nhwc_2(const T* sourceNchwBuffer, T* destNhwcBuffer, int rows, int cols, int channels) { + for (int y = 0; y < rows; ++y) { + for (int x = 0; x < cols; ++x) { + for (int c = 0; c < channels; ++c) { + destNhwcBuffer[y * channels * cols + x * channels + c] = reinterpret_cast(sourceNchwBuffer)[c * (rows * cols) + y * cols + x]; + } + } + } +} + +template +std::vector reorder_to_nhwc(const T* nchwVector, int rows, int cols, int channels) { + std::vector nhwcVector(rows * cols * channels); + reorder_to_nhwc_2(nchwVector, nhwcVector.data(), rows, cols, channels); + return nhwcVector; +} + +template +void reorder_to_nchw_2(const T* sourceNhwcBuffer, T* destNchwBuffer, int rows, int cols, int channels) { + for (int y = 0; y < rows; ++y) { + for (int x = 0; x < cols; ++x) { + for (int c = 0; c < channels; ++c) { + destNchwBuffer[c * (rows * cols) + y * cols + x] = reinterpret_cast(sourceNhwcBuffer)[y * channels * cols + x * channels + c]; + } + } + } +} + +template +std::vector reorder_to_nchw(const T* nhwcVector, int rows, int cols, int channels) { + std::vector nchwVector(rows * cols * channels); + reorder_to_nchw_2(nhwcVector, nchwVector.data(), rows, cols, channels); + return nchwVector; +} + +const cv::Mat nhwc_to_mat(const CustomNodeTensor* input) { + uint64_t height = input->dims[1]; + uint64_t width = input->dims[2]; + return cv::Mat(height, width, CV_32FC3, input->data); +} + +const cv::Mat nchw_to_mat(const CustomNodeTensor* input) { + uint64_t channels = input->dims[1]; + uint64_t rows = input->dims[2]; + uint64_t cols = input->dims[3]; + auto nhwcVector = reorder_to_nhwc((float*)input->data, rows, cols, channels); + + cv::Mat image(rows, cols, CV_32FC3); + std::memcpy(image.data, nhwcVector.data(), nhwcVector.size() * sizeof(float)); + return image; +} + +bool crop_rotate_resize(cv::Mat originalImage, cv::Mat& targetImage, cv::Rect roi, float angle, float originalTextWidth, float originalTextHeight, cv::Size targetShape) { + try { + // Limit roi to be in range of original image. + // Face detection detections may go beyond original image. + roi.x = roi.x < 0 ? 0 : roi.x; + roi.y = roi.y < 0 ? 0 : roi.y; + roi.width = roi.width + roi.x > originalImage.size().width ? originalImage.size().width - roi.x : roi.width; + roi.height = roi.height + roi.y > originalImage.size().height ? originalImage.size().height - roi.y : roi.height; + cv::Mat cropped = originalImage(roi); + + cv::Mat rotated; + if (angle != 0.0) { + cv::Mat rotationMatrix = cv::getRotationMatrix2D(cv::Point2f(cropped.size().width / 2, cropped.size().height / 2), angle, 1.0); + cv::warpAffine(cropped, rotated, rotationMatrix, cropped.size()); + } else { + rotated = cropped; + } + cv::Mat rotatedSlicedImage; + if (angle != 0.0) { + int sliceOffset = (rotated.size().height - originalTextHeight) / 2; + rotatedSlicedImage = rotated(cv::Rect(0, sliceOffset, rotated.size().width, originalTextHeight)); + } else { + rotatedSlicedImage = rotated; + } + cv::resize(rotatedSlicedImage, targetImage, targetShape); + } catch (const cv::Exception& e) { + std::cout << e.what() << std::endl; + return false; + } + return true; +} + +cv::Mat apply_grayscale(cv::Mat image) { + cv::Mat grayscaled; + cv::cvtColor(image, grayscaled, cv::COLOR_BGR2GRAY); + return grayscaled; +} + +bool scale_image( + bool isScaleDefined, + const float scale, + const std::vector& meanValues, + const std::vector& scaleValues, + cv::Mat& image) { + if (!isScaleDefined && scaleValues.size() == 0 && meanValues.size() == 0) { + return true; + } + + size_t colorChannels = static_cast(image.channels()); + if (meanValues.size() > 0 && meanValues.size() != colorChannels) { + return false; + } + if (scaleValues.size() > 0 && scaleValues.size() != colorChannels) { + return false; + } + + std::vector channels; + if (meanValues.size() > 0 || scaleValues.size() > 0) { + cv::split(image, channels); + if (channels.size() != colorChannels) { + return false; + } + } else { + channels.emplace_back(image); + } + + for (size_t i = 0; i < meanValues.size(); i++) { + channels[i] -= meanValues[i]; + } + + if (scaleValues.size() > 0) { + for (size_t i = 0; i < channels.size(); i++) { + channels[i] /= scaleValues[i]; + } + } else if (isScaleDefined) { + for (size_t i = 0; i < channels.size(); i++) { + channels[i] /= scale; + } + } + + if (channels.size() == 1) { + image = channels[0]; + } else { + cv::merge(channels, image); + } + + return true; +} diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/queue.hpp b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/queue.hpp new file mode 100644 index 00000000..88032518 --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/queue.hpp @@ -0,0 +1,141 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// #include "profiler.hpp" + +namespace ovms { + +template +class Queue { +public: + /** + * @brief Allocating idle stream for execution + */ + std::future getIdleStream() { + // OVMS_PROFILE_FUNCTION(); + int value; + std::promise idleStreamPromise; + std::future idleStreamFuture = idleStreamPromise.get_future(); + std::unique_lock lk(front_mut); + if (streams[front_idx] < 0) { // we need to wait for any idle stream to be returned + std::unique_lock queueLock(queue_mutex); + promises.push(std::move(idleStreamPromise)); + } else { // we can give idle stream right away + value = streams[front_idx]; + streams[front_idx] = -1; // negative value indicate consumed vector index + front_idx = (front_idx + 1) % streams.size(); + lk.unlock(); + idleStreamPromise.set_value(value); + } + return idleStreamFuture; + } + + std::optional tryToGetIdleStream() { + // OVMS_PROFILE_FUNCTION(); + int value; + std::unique_lock lk(front_mut); + if (streams[front_idx] < 0) { // we need to wait for any idle stream to be returned + return std::nullopt; + } else { // we can give idle stream right away + value = streams[front_idx]; + streams[front_idx] = -1; // negative value indicate consumed vector index + front_idx = (front_idx + 1) % streams.size(); + lk.unlock(); + return value; + } + } + + /** + * @brief Release stream after execution + */ + void returnStream(int streamID) { + // OVMS_PROFILE_FUNCTION(); + std::unique_lock lk(queue_mutex); + if (promises.size()) { + std::promise promise = std::move(promises.front()); + promises.pop(); + lk.unlock(); + promise.set_value(streamID); + return; + } + std::uint32_t old_back = back_idx.load(); + while (!back_idx.compare_exchange_weak( + old_back, + (old_back + 1) % streams.size(), + std::memory_order_relaxed)) { + } + streams[old_back] = streamID; + } + + /** + * @brief Constructor with initialization + */ + Queue(int streamsLength) : + streams(streamsLength), + front_idx{0}, + back_idx{0} { + for (int i = 0; i < streamsLength; ++i) { + streams[i] = i; + } + } + + /** + * @brief Give InferRequest + */ + T& getInferRequest(int streamID) { + return inferRequests[streamID]; + } + +protected: + /** + * @brief Vector representing circular buffer for infer queue + */ + std::vector streams; + + /** + * @brief Index of the front of the idle streams list + */ + std::uint32_t front_idx; + + /** + * @brief Index of the back of the idle streams list + */ + std::atomic back_idx; + + /** + * @brief Vector representing OV streams and used for notification about completed inference operations + */ + std::mutex front_mut; + std::mutex queue_mutex; + /** + * + */ + std::vector inferRequests; + std::queue> promises; +}; +} // namespace ovms diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/utils.hpp b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/utils.hpp new file mode 100644 index 00000000..b066b1ff --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/utils.hpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright 2021 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include +#include + +#include "custom_node_interface.h" + +#define NODE_ASSERT(cond, msg) \ + if (!(cond)) { \ + std::cout << "[" << __LINE__ << "] Assert: " << msg << std::endl; \ + return 1; \ + } + +#define NODE_EXPECT(cond, msg) \ + if (!(cond)) { \ + std::cout << "[" << __LINE__ << "] Assert: " << msg << std::endl; \ + } + +int get_int_parameter(const std::string& name, const struct CustomNodeParam* params, int paramsCount, int defaultValue = 0) { + for (int i = 0; i < paramsCount; i++) { + if (name == params[i].key) { + try { + return std::stoi(params[i].value); + } catch (std::invalid_argument& e) { + return defaultValue; + } catch (std::out_of_range& e) { + return defaultValue; + } + } + } + return defaultValue; +} + +float get_float_parameter(const std::string& name, const struct CustomNodeParam* params, int paramsCount, float defaultValue = 0.0f) { + for (int i = 0; i < paramsCount; i++) { + if (name == params[i].key) { + try { + return std::stof(params[i].value); + } catch (std::invalid_argument& e) { + return defaultValue; + } catch (std::out_of_range& e) { + return defaultValue; + } + } + } + return defaultValue; +} + +float get_float_parameter(const std::string& name, const struct CustomNodeParam* params, int paramsCount, bool& isDefined, float defaultValue = 0.0f) { + isDefined = true; + for (int i = 0; i < paramsCount; i++) { + if (name == params[i].key) { + try { + return std::stof(params[i].value); + } catch (std::invalid_argument& e) { + isDefined = false; + return defaultValue; + } catch (std::out_of_range& e) { + isDefined = false; + return defaultValue; + } + } + } + isDefined = false; + return defaultValue; +} + +std::string get_string_parameter(const std::string& name, const struct CustomNodeParam* params, int paramsCount, const std::string& defaultValue = "") { + for (int i = 0; i < paramsCount; i++) { + if (name == params[i].key) { + return params[i].value; + } + } + return defaultValue; +} + +std::vector get_float_list_parameter(const std::string& name, const struct CustomNodeParam* params, int paramsCount) { + std::string listStr; + for (int i = 0; i < paramsCount; i++) { + if (name == params[i].key) { + listStr = params[i].value; + break; + } + } + + if (listStr.length() < 2 || listStr.front() != '[' || listStr.back() != ']') { + return {}; + } + + listStr = listStr.substr(1, listStr.size() - 2); + + std::vector result; + + std::stringstream lineStream(listStr); + std::string element; + while (std::getline(lineStream, element, ',')) { + try { + float e = std::stof(element.c_str()); + result.push_back(e); + } catch (std::invalid_argument& e) { + NODE_EXPECT(false, "error parsing list parameter"); + return {}; + } catch (std::out_of_range& e) { + NODE_EXPECT(false, "error parsing list parameter"); + return {}; + } + } + + return result; +} + +std::string floatListToString(const std::vector& values) { + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < values.size(); ++i) { + if (i != 0) + ss << ","; + ss << values[i]; + } + ss << "]"; + return ss.str(); +} + +void cleanup(CustomNodeTensor& tensor) { + free(tensor.data); + free(tensor.dims); +} diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/yolo_efficientnet.sh b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/yolo_efficientnet.sh new file mode 100755 index 00000000..fa18d98c --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/yolo_efficientnet.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# +# Copyright (C) 2023 Intel Corporation. +# +# SPDX-License-Identifier: Apache-2.0 +# + +cid_count=0 + +# User configured parameters +if [ -z "$INPUT_TYPE" ] +then + echo "INPUT_TYPE is required" + exit 1 + #INPUT_TYPE="FILE_H264" + #INPUT_TYPE="RTSP_H265" +fi + +if [ -z "$INPUTSRC" ] +then + echo "INPUTSRC is required" + exit 1 + #INPUTSRC="sample-video.mp4" + #INPUTSRC="rtsp://127.0.0.1:8554/camera_0" +fi + +CODEC_TYPE=0 +if [ "$INPUT_TYPE" == "FILE_H264" ] || [ "$INPUT_TYPE" == "RTSP_H264" ] +then + CODEC_TYPE=1 +elif [ "$INPUT_TYPE" == "FILE_H265" ] || [ "$INPUT_TYPE" == "RTSP_H265" ] +then + CODEC_TYPE=0 +fi + +if [ -z "$USE_VPL" ] +then + USE_VPL=0 +fi + +if [ -z "$RENDER_MODE" ] +then + RENDER_MODE=0 +fi + +if [ -z "$RENDER_PORTRAIT_MODE" ] +then + RENDER_PORTRAIT_MODE=0 +fi + +if [ "1" == "$LOW_POWER" ] +then + echo "Enabled GPU based low power pipeline " + CONFIG_NAME="/app/gst-ovms/pipelines/yolov8_ensemble/models/config_yolov8_ensemble_gpu.json" + +elif [ "$CPU_ONLY" == "1" ] +then + echo "Enabled CPU inference pipeline only" + CONFIG_NAME="/app/gst-ovms/pipelines/yolov8_ensemble/models/config_yolov8_ensemble_cpu.json" +else + echo "Enabled CPU+iGPU pipeline" + CONFIG_NAME="/app/gst-ovms/pipelines/yolov8_ensemble/models/config_yolov8_ensemble_cpu_gpu.json" +fi + +# Direct console output +if [ -z "$DC" ] +then + cid_count=0 /app/gst-ovms/pipelines/yolov8_ensemble/yolov8_ensemble $INPUTSRC $USE_VPL $RENDER_MODE $RENDER_PORTRAIT_MODE $CONFIG_NAME $CODEC_TYPE > /app/yolov8_ensemble/results/pipeline$cid_count.log 2>&1 +else + cid_count=0 /app/gst-ovms/pipelines/yolov8_ensemble/yolov8_ensemble $INPUTSRC $USE_VPL $RENDER_MODE $RENDER_PORTRAIT_MODE $CONFIG_NAME $CODEC_TYPE +fi \ No newline at end of file diff --git a/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/yolo_efficientnet_dual.sh b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/yolo_efficientnet_dual.sh new file mode 100755 index 00000000..b6bdcca8 --- /dev/null +++ b/configs/opencv-ovms/gst_capi/pipelines/capi_yolov8_ensemble/yolo_efficientnet_dual.sh @@ -0,0 +1,93 @@ +#!/bin/bash +# +# Copyright (C) 2023 Intel Corporation. +# +# SPDX-License-Identifier: Apache-2.0 +# + +cid_count=0 + +# User configured parameters +if [ -z "$INPUT_TYPE" ] +then + echo "INPUT_TYPE is required" + exit 1 + #INPUT_TYPE="FILE_H264" + #INPUT_TYPE="RTSP_H265" +fi +if [ -z "$INPUT_TYPE2" ] +then + echo "INPUT_TYPE2 is required" + exit 1 + #INPUT_TYPE2="FILE_H264" + #INPUT_TYPE2="RTSP_H265" +fi + +if [ -z "$INPUTSRC" ] +then + echo "INPUTSRC is required" + exit 1 + #INPUTSRC="sample-video.mp4 " + #INPUTSRC="rtsp://127.0.0.1:8554/camera_0 " +fi +if [ -z "$INPUTSRC2" ] +then + echo "INPUTSRC2 is required" + exit 1 + #INPUTSRC2="sample-video.mp4 " + #INPUTSRC="rtsp://127.0.0.1:8554/camera_0 " +fi + +CODEC_TYPE2="" +CODEC_TYPE=0 +if [ "$INPUT_TYPE" == "FILE_H264" ] || [ "$INPUT_TYPE" == "RTSP_H264" ] +then + CODEC_TYPE=1 +elif [ "$INPUT_TYPE" == "FILE_H265" ] || [ "$INPUT_TYPE" == "RTSP_H265" ] +then + CODEC_TYPE=0 +fi +if [ "$INPUT_TYPE2" == "FILE_H264" ] || [ "$INPUT_TYPE2" == "RTSP_H264" ] +then + CODEC_TYPE2=1 +elif [ "$INPUT_TYPE2" == "FILE_H265" ] || [ "$INPUT_TYPE2" == "RTSP_H265" ] +then + CODEC_TYPE2=0 +fi + +if [ -z "$USE_VPL" ] +then + USE_VPL=0 +fi + +if [ -z "$RENDER_MODE" ] +then + RENDER_MODE=0 +fi + +if [ -z "$RENDER_PORTRAIT_MODE" ] +then + RENDER_PORTRAIT_MODE=0 +fi + +if [ "1" == "$LOW_POWER" ] +then + echo "Enabled GPU based low power pipeline " + CONFIG_NAME="/app/gst-ovms/pipelines/yolov8_ensemble/models/config_yolov8_ensemble_gpu.json" + +elif [ "$CPU_ONLY" == "1" ] +then + echo "Enabled CPU inference pipeline only" + CONFIG_NAME="/app/gst-ovms/pipelines/yolov8_ensemble/models/config_yolov8_ensemble_cpu.json" +else + echo "Enabled CPU+iGPU pipeline" + CONFIG_NAME="/app/gst-ovms/pipelines/yolov8_ensemble/models/config_yolov8_ensemble_cpu_gpu.json" +fi + +# Direct console output +if [ -z "$DC" ] +then + cid_count=0 /app/gst-ovms/pipelines/yolov8_ensemble/yolov8_ensemble $INPUTSRC $INPUTSRC2 $USE_VPL $RENDER_MODE $RENDER_PORTRAIT_MODE $CONFIG_NAME $CODEC_TYPE $CODEC_TYPE2 > /app/yolov8_ensemble/results/pipeline$cid_count.log 2>&1 +else + cid_count=0 /app/gst-ovms/pipelines/yolov8_ensemble/yolov8_ensemble $INPUTSRC $INPUTSRC2 $USE_VPL $RENDER_MODE $RENDER_PORTRAIT_MODE $CONFIG_NAME $CODEC_TYPE $CODEC_TYPE2 +fi \ No newline at end of file diff --git a/configs/opencv-ovms/models/2022/config_template.json b/configs/opencv-ovms/models/2022/config_template.json index e548c18f..9f07160d 100644 --- a/configs/opencv-ovms/models/2022/config_template.json +++ b/configs/opencv-ovms/models/2022/config_template.json @@ -89,6 +89,18 @@ "num_versions": 2 } }, + { "config": { + "name": "yolov8", + "base_path": "/models/yolov8/FP32-INT8", + "shape": "(1,3,416,416)", + "layout": "NCHW:NCHW", + "nireq": 3, + "batch_size": "1", + "plugin_config": { "PERFORMANCE_HINT": "LATENCY" }, + "target_device": "{target_device}" + }, + "latest": { "num_versions": 2 } + }, { "config": { "name": "efficientnetb0_FP32INT8", @@ -231,6 +243,5 @@ } ] } - ] } \ No newline at end of file diff --git a/download_models/Dockerfile.yolov8-download b/download_models/Dockerfile.yolov8-download new file mode 100644 index 00000000..8b00f56c --- /dev/null +++ b/download_models/Dockerfile.yolov8-download @@ -0,0 +1,42 @@ +# +# Copyright (C) 2023-2024 Intel Corporation. +# +# +# ----------------------------------------------------------- + +# https://hub.docker.com/r/openvino/model_server +ARG BASE_IMAGE=openvino/ubuntu22_dev:2023.2.0 +FROM $BASE_IMAGE as release + +USER root +WORKDIR / +ENV DEBIAN_FRONTEND=noninteractive +# SHELL ["/bin/bash", "-c"] + +# Install build tools and media UMD +ARG BUILD_DEPENDENCIES="cmake build-essential git-gui python3 python3-pip libopencv-dev python3-opencv wget" +RUN apt -y update && \ + apt install -y ${BUILD_DEPENDENCIES} && \ + rm -rf /var/lib/apt/lists/* && rm -rf /tmp/* + +WORKDIR /app-build +COPY model_build/ /app-build/ + +# Install needed tools +RUN ./download-tools.sh + +# 1. Downloads latest release of yolov8n.pt->convert to onnx->converts to openvino fp32 IR +# 2. Downloads needed quantization dependencies +# if not already existing in the current model_build directory +# 3. Finally, converts OpenVINO FP32 IR to INT8 with necessary preprocessing layer +RUN ./convert-model.sh $REFRESH_OPTION + +ENTRYPOINT ["/bin/bash", "-c", "cp yolov8n-int8-416.* /savedir"] + +# Example builds: +# Force refresh +# sudo docker build -t openvino_yolov8-download:1.1 --build-arg REFRESH_OPTION=--refresh -f Dockerfile.yolov8-download . + +# Greenfield: +# sudo docker build -t openvino_yolov8-download:1.1 -f Dockerfile.yolov8-download . + diff --git a/download_models/downloadOVMSModels.sh b/download_models/downloadOVMSModels.sh index 082016b7..a5a9c5cf 100755 --- a/download_models/downloadOVMSModels.sh +++ b/download_models/downloadOVMSModels.sh @@ -257,6 +257,24 @@ downloadTextRecognition() { fi } +downloadYolov8FP32INT8() { + yolov8ModelDirName="yolov8" + yolov8ModelFile="$yolov8ModelDirName/$modelPrecisionFP32INT8/1/yolov8n-int8-416.bin" + if [ -f "$yolov8ModelFile" ]; then + echo "yolov8 $modelPrecisionFP32INT8 model already exists in $yolov8ModelFile, skip downloading..." + else + echo "download yolov8 $modelPrecisionFP32INT8 model..." + YOLOV8_MODEL_DOWNLOADER=$(docker images --format "{{.Repository}}" | grep "openvino_yolov8-download") + if [ -z "$YOLOV8_MODEL_DOWNLOADER" ] + then + docker build -t openvino_yolov8-download:1.1 $buildargs -f "$MODEL_EXEC_PATH"/Dockerfile.yolov8-download "$MODEL_EXEC_PATH"/ + fi + docker run --rm -v "$modelDir/$yolov8ModelDirName/$modelPrecisionFP32INT8"/1/:/savedir openvino_yolov8-download:1.1 + # make the yolov8ModelDirName owned by local user instead of root + sudo chown -R "${USER:=$(/usr/bin/id -run)}:$USER" "$modelDir"/"$yolov8ModelDirName" + echo "yolov8 model downloaded in $yolov8ModelFile" + fi +} ### Run normal downloader via omz model downloader: configFile="$modelDir"/config_template.json @@ -301,3 +319,4 @@ downloadYolov5sFP16INT8 downloadEfficientnetb0 downloadHorizontalText downloadTextRecognition +downloadYolov8FP32INT8 diff --git a/download_models/model_build/convert-model.py b/download_models/model_build/convert-model.py new file mode 100644 index 00000000..562d7116 --- /dev/null +++ b/download_models/model_build/convert-model.py @@ -0,0 +1,24 @@ + + +from pathlib import Path +from typing import Tuple, Dict +import cv2 +import numpy as np +from ultralytics.yolo.utils.plotting import colors +from PIL import Image +from ultralytics import YOLO + +models_dir = Path('./models') +models_dir.mkdir(exist_ok=True) + +DET_MODEL_NAME = "yolov8n" + +det_model = YOLO(models_dir / f'{DET_MODEL_NAME}.pt') +label_map = det_model.model.names + + +det_model_path = models_dir / f"{DET_MODEL_NAME}.xml" +if not det_model_path.exists(): + det_model.export(format="openvino", dynamic=True, half=False) + + diff --git a/download_models/model_build/convert-model.sh b/download_models/model_build/convert-model.sh new file mode 100755 index 00000000..1134e7ee --- /dev/null +++ b/download_models/model_build/convert-model.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +if [ "$1" == "--refresh" ] +then + rm -R models/ + rm yolov8n*.xml + rm yolov8n*.bin +fi + +if [ -f models/yolov8n.onnx ] || [ -f yolov8n-int8-416.bin ] +then + echo "--refresh required" + exit 1 +fi + +python3 convert-model.py + +if [ ! -f models/yolov8n.onnx ] +then + echo "Model conversion failed for onnx!" + exit 1 +fi + +if [ ! -f models/yolov8n_openvino_model/yolov8n.bin ] +then + echo "Model conversion failed for IR!" + exit 1 +fi + +echo "OpenVINO FP32 yolov8n 416x416 creation successful!" +mv models/yolov8n_openvino_model/* . +mv yolov8n.xml yolov8n-fp32-416.xml +mv yolov8n.bin yolov8n-fp32-416.bin + +echo "OpenVINO INT8 quantization starting..." +python3 quantize-model.py diff --git a/download_models/model_build/download-tools.sh b/download_models/model_build/download-tools.sh new file mode 100755 index 00000000..af7184fb --- /dev/null +++ b/download_models/model_build/download-tools.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# tqdm needed? +pip install -q "torch>=2.1" "torchvision>=0.16" "ultralytics==8.0.43" onnx --extra-index-url https://download.pytorch.org/whl/cpu +pip install nncf>=2.5.0 + diff --git a/download_models/model_build/quantize-model.py b/download_models/model_build/quantize-model.py new file mode 100644 index 00000000..bd02ab73 --- /dev/null +++ b/download_models/model_build/quantize-model.py @@ -0,0 +1,343 @@ + + + +import os +import threading +import time +import urllib.parse +from os import PathLike +from pathlib import Path +from typing import List, NamedTuple, Optional, Tuple + +import numpy as np +from openvino.runtime import Core, get_version +from zipfile import ZipFile +from tqdm import tqdm + +import nncf # noqa: F811 +from typing import Dict + + +from ultralytics.yolo.utils import DEFAULT_CFG +from ultralytics.yolo.cfg import get_cfg +from ultralytics.yolo.data.utils import check_det_dataset +import openvino as ov + +#from pathlib import Path +#from typing import Tuple, Dict +#import cv2 +import numpy as np +from ultralytics.yolo.utils.plotting import colors +from PIL import Image +from ultralytics.yolo.utils import ops +import torch +from ultralytics import YOLO + + + +def download_file( + url: PathLike, + filename: PathLike = None, + directory: PathLike = None, + show_progress: bool = True, + silent: bool = False, + timeout: int = 65000, +) -> PathLike: + """ + Download a file from a url and save it to the local filesystem. The file is saved to the + current directory by default, or to `directory` if specified. If a filename is not given, + the filename of the URL will be used. + + :param url: URL that points to the file to download + :param filename: Name of the local file to save. Should point to the name of the file only, + not the full path. If None the filename from the url will be used + :param directory: Directory to save the file to. Will be created if it doesn't exist + If None the file will be saved to the current working directory + :param show_progress: If True, show an TQDM ProgressBar + :param silent: If True, do not print a message if the file already exists + :param timeout: Number of seconds before cancelling the connection attempt + :return: path to downloaded file + """ + from tqdm.notebook import tqdm_notebook + import requests + + filename = filename or Path(urllib.parse.urlparse(url).path).name + chunk_size = 16384 # make chunks bigger + + filename = Path(filename) + if len(filename.parts) > 1: + raise ValueError( + "`filename` should refer to the name of the file, excluding the directory. " + "Use the `directory` parameter to specify a target directory for the downloaded file." + ) + + # create the directory if it does not exist, and add the directory to the filename + if directory is not None: + directory = Path(directory) + directory.mkdir(parents=True, exist_ok=True) + filename = directory / Path(filename) + + try: + response = requests.get(url=url, + headers={"User-agent": "Mozilla/5.0"}, + stream=True) + response.raise_for_status() + except requests.exceptions.HTTPError as error: # For error associated with not-200 codes. Will output something like: "404 Client Error: Not Found for url: {url}" + raise Exception(error) from None + except requests.exceptions.Timeout: + raise Exception( + "Connection timed out. If you access the internet through a proxy server, please " + "make sure the proxy is set in the shell from where you launched Jupyter." + ) from None + except requests.exceptions.RequestException as error: + raise Exception(f"File downloading failed with error: {error}") from None + + # download the file if it does not exist, or if it exists with an incorrect file size + filesize = int(response.headers.get("Content-length", 0)) + if not filename.exists() or (os.stat(filename).st_size != filesize): + +# with tqdm_notebook( +# total=filesize, +# unit="B", +# unit_scale=True, +# unit_divisor=1024, +# desc=str(filename), +# disable=not show_progress, +# ) as progress_bar: + +# with open(filename, "wb") as file_object: +# for chunk in response.iter_content(chunk_size): +# file_object.write(chunk) +# progress_bar.update(len(chunk)) +# progress_bar.refresh() +# else: +# if not silent: +# print(f"'{filename}' already exists.") + + with tqdm(range(filesize),unit="B", + unit_scale=True, + unit_divisor=1024, + desc=str(filename)) as progress_bar: + with open(filename, "wb") as file_object: + for chunk in response.iter_content(chunk_size): + file_object.write(chunk) + progress_bar.update(len(chunk)) + progress_bar.refresh() + else: + printf(f"'{filename}' already exists.") + + response.close() + + return filename.resolve() + + +DATA_URL = "http://images.cocodataset.org/zips/val2017.zip" +LABELS_URL = "https://github.com/ultralytics/yolov5/releases/download/v1.0/coco2017labels-segments.zip" +CFG_URL = "https://raw.githubusercontent.com/ultralytics/ultralytics/8ebe94d1e928687feaa1fee6d5668987df5e43be/ultralytics/datasets/coco.yaml" + +OUT_DIR = Path('./datasets') + +DATA_PATH = OUT_DIR / "val2017.zip" +LABELS_PATH = OUT_DIR / "coco2017labels-segments.zip" +CFG_PATH = OUT_DIR / "coco.yaml" + +if not (DATA_PATH).exists(): + download_file(DATA_URL, DATA_PATH.name, DATA_PATH.parent) +if not (LABELS_PATH).exists(): + download_file(LABELS_URL, LABELS_PATH.name, LABELS_PATH.parent) +if not (CFG_PATH).exists(): + download_file(CFG_URL, CFG_PATH.name, CFG_PATH.parent) + +if not (OUT_DIR / "coco/labels").exists(): + with ZipFile(LABELS_PATH , "r") as zip_ref: + zip_ref.extractall(OUT_DIR) + with ZipFile(DATA_PATH , "r") as zip_ref: + zip_ref.extractall(OUT_DIR / 'coco/images') + +def transform_fn(data_item:Dict): + """ + Quantization transform function. Extracts and preprocess input data from dataloader item for quantization. + Parameters: + data_item: Dict with data item produced by DataLoader during iteration + Returns: + input_tensor: Input data for quantization + """ + input_tensor = det_validator.preprocess(data_item)['img'].numpy() + return input_tensor + + +args = get_cfg(cfg=DEFAULT_CFG) +args.data = str(CFG_PATH) +#args.imgsz = 416 + +models_dir = Path('./models') +models_dir.mkdir(exist_ok=True) +DET_MODEL_NAME = "yolov8n" +det_model = YOLO(models_dir / f'{DET_MODEL_NAME}.pt') +#det_model.imgsz = 416 + +det_validator = det_model.ValidatorClass(args=args) +det_validator.data = check_det_dataset(args.data) +det_data_loader = det_validator.get_dataloader("datasets/coco", 1) + +det_validator.is_coco = True +det_validator.class_map = ops.coco80_to_coco91_class() +det_validator.names = det_model.model.names +det_validator.metrics.names = det_validator.names +det_validator.nc = det_model.model.model[-1].nc + + +## +#from tqdm.notebook import tqdm +from ultralytics.yolo.utils.metrics import ConfusionMatrix + + +def test(model:ov.Model, core:ov.Core, data_loader:torch.utils.data.DataLoader, validator, num_samples:int = None): + """ + OpenVINO YOLOv8 model accuracy validation function. Runs model validation on dataset and returns metrics + Parameters: + model (Model): OpenVINO model + data_loader (torch.utils.data.DataLoader): dataset loader + validator: instance of validator class + num_samples (int, *optional*, None): validate model only on specified number samples, if provided + Returns: + stats: (Dict[str, float]) - dictionary with aggregated accuracy metrics statistics, key is metric name, value is metric value + """ + validator.seen = 0 + validator.jdict = [] + validator.stats = [] + validator.batch_i = 1 + validator.confusion_matrix = ConfusionMatrix(nc=validator.nc) + model.reshape({0: [1, 3, -1, -1]}) + compiled_model = core.compile_model(model) + for batch_i, batch in enumerate(tqdm(data_loader, total=num_samples)): + if num_samples is not None and batch_i == num_samples: + break + batch = validator.preprocess(batch) + results = compiled_model(batch["img"]) + preds = torch.from_numpy(results[compiled_model.output(0)]) + preds = validator.postprocess(preds) + validator.update_metrics(preds, batch) + stats = validator.get_stats() + return stats + + +def print_stats(stats:np.ndarray, total_images:int, total_objects:int): + """ + Helper function for printing accuracy statistic + Parameters: + stats: (Dict[str, float]) - dictionary with aggregated accuracy metrics statistics, key is metric name, value is metric value + total_images (int) - number of evaluated images + total objects (int) + Returns: + None + """ + print("Boxes:") + mp, mr, map50, mean_ap = stats['metrics/precision(B)'], stats['metrics/recall(B)'], stats['metrics/mAP50(B)'], stats['metrics/mAP50-95(B)'] + # Print results + s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'Precision', 'Recall', 'mAP@.5', 'mAP@.5:.95') + print(s) + pf = '%20s' + '%12i' * 2 + '%12.3g' * 4 # print format + print(pf % ('all', total_images, total_objects, mp, mr, map50, mean_ap)) + if 'metrics/precision(M)' in stats: + s_mp, s_mr, s_map50, s_mean_ap = stats['metrics/precision(M)'], stats['metrics/recall(M)'], stats['metrics/mAP50(M)'], stats['metrics/mAP50-95(M)'] + # Print results + s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Labels', 'Precision', 'Recall', 'mAP@.5', 'mAP@.5:.95') + print(s) + pf = '%20s' + '%12i' * 2 + '%12.3g' * 4 # print format + print(pf % ('all', total_images, total_objects, s_mp, s_mr, s_map50, s_mean_ap)) + + +#NUM_TEST_SAMPLES = 300 +#fp_det_stats = test(det_ov_model, core, det_data_loader, det_validator, num_samples=NUM_TEST_SAMPLES) +#print_stats(fp_det_stats, det_validator.seen, det_validator.nt_per_class.sum()) +## + + + +#det_validator.imgsz = 416 + +quantization_dataset = nncf.Dataset(det_data_loader, transform_fn) + +# Ignored nodes with name ['/model.22/Add_7', '/model.22/Add_4', '/model.22/Add_6', '/model.22/Add_3', '/model.22/Add_10', '/model.22/Add_8', '/model.22/Add_9', '/model.22/Add_5'] +#iignored_scope = nncf.IgnoredScope( +# types=["Multiply", "Subtract", "Sigmoid"], # ignore operations +# names=[ +# "/model.22/dfl/conv/Conv", # in the post-processing subgraph +# "/model.22/Add", +# "/model.22/Add_1", +# "/model.22/Add_2" +# ] +#) + +ignored_scope = nncf.IgnoredScope( + types=["Multiply", "Subtract", "Sigmoid"], # ignore operations + names=[ + "/model.22/dfl/conv/Conv", # in the post-processing subgraph + "/model.22/Add", + "/model.22/Add_1", + "/model.22/Add_2", + "/model.22/Add_3", + "/model.22/Add_4", + "/model.22/Add_5", + "/model.22/Add_6", + "/model.22/Add_7", + "/model.22/Add_8", + "/model.22/Add_9", + "/model.22/Add_10" + ] +) + + + +# Detection model +core = ov.Core() +det_model_path = "./yolov8n-fp32-416.xml" +det_ov_model = core.read_model(det_model_path) +det_ov_model.reshape({0: [1, 3, 640, 640]}) +det_compiled_model = core.compile_model(det_ov_model, "CPU") + +NUM_TEST_SAMPLES = 300 +fp_det_stats = test(det_ov_model, core, det_data_loader, det_validator, num_samples=NUM_TEST_SAMPLES) +print_stats(fp_det_stats, det_validator.seen, det_validator.nt_per_class.sum()) + + + + + +quantized_det_model = nncf.quantize( + det_ov_model, + quantization_dataset, + preset=nncf.QuantizationPreset.MIXED, + ignored_scope=ignored_scope +) + +quantized_det_model.reshape({0: [1, 3, 640, 640]}) + +int8_det_stats = test(quantized_det_model, core, det_data_loader, det_validator, num_samples=NUM_TEST_SAMPLES) +print("FP32 model accuracy") +print_stats(fp_det_stats, det_validator.seen, det_validator.nt_per_class.sum()) +print("INT8 model accuracy") +print_stats(int8_det_stats, det_validator.seen, det_validator.nt_per_class.sum()) + +print("Build-in prrocessing") +from openvino.preprocess import PrePostProcessor +ppp = PrePostProcessor(quantized_det_model) + +# orig tutorial +#ppp.input(0).tensor().set_shape([1, 640, 640, 3]).set_element_type(ov.Type.u8).set_layout(ov.Layout('NHWC')) +#ppp.input(0).preprocess().convert_element_type(ov.Type.f32).convert_layout(ov.Layout('NCHW')).scale([255., 255., 255.]) + +ppp.input(0).tensor().set_shape([1, 3, 416,416]).set_layout(ov.Layout('NCHW')) +ppp.input(0).preprocess().convert_element_type(ov.Type.f32).scale([255., 255., 255.]) + +print(ppp) + +quantized_pp_model = ppp.build() + +from openvino.runtime import serialize +int8_model_det_path = f'./yolov8n-int8-416.xml' +print(f"Quantized detection model will be saved to {int8_model_det_path}") +#serialize(quantized_det_model, str(int8_model_det_path)) +serialize(quantized_pp_model, str(int8_model_det_path)) +