From 411fd2b761c7ad054d1bb9eeb496cdbd881da3fa Mon Sep 17 00:00:00 2001 From: Tiago De Gaspari Date: Mon, 7 Jun 2021 10:55:23 -0300 Subject: [PATCH 001/128] Add Thickness parameter in drawMatches function This commit adds the feature of selecting the thickness of the matches drawn by the drawMatches function. In larger images, the default thickness of 1 pixel creates images that are hard to visualize. --- .../features2d/include/opencv2/features2d.hpp | 7 ++++++ modules/features2d/src/draw.cpp | 25 ++++++++++++++++--- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index 86b5e935c8ad..cff09170c500 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -1314,6 +1314,13 @@ CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector& key const std::vector& matchesMask=std::vector(), int flags=DrawMatchesFlags::DEFAULT ); /** @overload */ +CV_EXPORTS_W void drawMatches( InputArray img1, const std::vector& keypoints1, + InputArray img2, const std::vector& keypoints2, + const std::vector& matches1to2, InputOutputArray outImg, + const int matchesThickness, const Scalar& matchColor=Scalar::all(-1), + const Scalar& singlePointColor=Scalar::all(-1), const std::vector& matchesMask=std::vector(), + int flags=DrawMatchesFlags::DEFAULT ); + CV_EXPORTS_AS(drawMatchesKnn) void drawMatches( InputArray img1, const std::vector& keypoints1, InputArray img2, const std::vector& keypoints2, const std::vector >& matches1to2, InputOutputArray outImg, diff --git a/modules/features2d/src/draw.cpp b/modules/features2d/src/draw.cpp index dc74ecb0809c..e4c75144fbec 100644 --- a/modules/features2d/src/draw.cpp +++ b/modules/features2d/src/draw.cpp @@ -183,7 +183,8 @@ static void _prepareImgAndDrawKeypoints( InputArray img1, const std::vector& keypoints1, @@ -207,6 +208,21 @@ void drawMatches( InputArray img1, const std::vector& keypoints1, const std::vector& matches1to2, InputOutputArray outImg, const Scalar& matchColor, const Scalar& singlePointColor, const std::vector& matchesMask, int flags ) +{ + drawMatches( img1, keypoints1, + img2, keypoints2, + matches1to2, outImg, + 1, matchColor, + singlePointColor, matchesMask, + flags); +} + +void drawMatches( InputArray img1, const std::vector& keypoints1, + InputArray img2, const std::vector& keypoints2, + const std::vector& matches1to2, InputOutputArray outImg, + const int matchesThickness, const Scalar& matchColor, + const Scalar& singlePointColor, const std::vector& matchesMask, + int flags ) { if( !matchesMask.empty() && matchesMask.size() != matches1to2.size() ) CV_Error( Error::StsBadSize, "matchesMask must have the same size as matches1to2" ); @@ -226,11 +242,12 @@ void drawMatches( InputArray img1, const std::vector& keypoints1, CV_Assert(i2 >= 0 && i2 < static_cast(keypoints2.size())); const KeyPoint &kp1 = keypoints1[i1], &kp2 = keypoints2[i2]; - _drawMatch( outImg, outImg1, outImg2, kp1, kp2, matchColor, flags ); + _drawMatch( outImg, outImg1, outImg2, kp1, kp2, matchColor, flags, matchesThickness ); } } } + void drawMatches( InputArray img1, const std::vector& keypoints1, InputArray img2, const std::vector& keypoints2, const std::vector >& matches1to2, InputOutputArray outImg, @@ -254,7 +271,7 @@ void drawMatches( InputArray img1, const std::vector& keypoints1, if( matchesMask.empty() || matchesMask[i][j] ) { const KeyPoint &kp1 = keypoints1[i1], &kp2 = keypoints2[i2]; - _drawMatch( outImg, outImg1, outImg2, kp1, kp2, matchColor, flags ); + _drawMatch( outImg, outImg1, outImg2, kp1, kp2, matchColor, flags, 1 ); } } } From 8f4f834ce6ff054b16aeb3c92f6ea1279185b632 Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Wed, 9 Jun 2021 18:43:42 +0300 Subject: [PATCH 002/128] applied modifier mask to the state --- modules/highgui/src/window_gtk.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/highgui/src/window_gtk.cpp b/modules/highgui/src/window_gtk.cpp index 17307ea7f988..78e78e12a290 100644 --- a/modules/highgui/src/window_gtk.cpp +++ b/modules/highgui/src/window_gtk.cpp @@ -1881,6 +1881,7 @@ static gboolean icvOnMouse( GtkWidget *widget, GdkEvent *event, gpointer user_da (unsigned)pt.y < (unsigned)(image_widget->original_image->height) )) { + state &= gtk_accelerator_get_default_mod_mask(); flags |= BIT_MAP(state, GDK_SHIFT_MASK, CV_EVENT_FLAG_SHIFTKEY) | BIT_MAP(state, GDK_CONTROL_MASK, CV_EVENT_FLAG_CTRLKEY) | BIT_MAP(state, GDK_MOD1_MASK, CV_EVENT_FLAG_ALTKEY) | From 7ee181661231a949d7998f3caaa9e4cba6250e3e Mon Sep 17 00:00:00 2001 From: rogday Date: Tue, 1 Jun 2021 17:05:27 +0300 Subject: [PATCH 003/128] split if into map of functions --- modules/dnn/src/tensorflow/tf_importer.cpp | 3789 +++++++++++--------- 1 file changed, 2006 insertions(+), 1783 deletions(-) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 084e4ac6daa5..39c230939474 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -510,2051 +510,2274 @@ class TFImporter private: void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId); + + typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&); + typedef std::map DispatchMap; + + const DispatchMap dispatch; + static const DispatchMap buildDispatchMap(); + + void parseConvolution (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseBias (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseMatMul (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseReshape (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseFlatten (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseTranspose (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseConstant (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseLrn (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseConcat (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseMaxPool (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseAvgPool (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseMaxPoolGrad (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parsePlaceholder (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseSplit (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseSlice (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseStridedSlice (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseMul (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseFusedBatchNorm (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseConv2DBackpropInput(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseBlockLSTM (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseResize (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseL2Normalize (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parsePriorBox (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseSoftmax (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseCropAndResize (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseMean (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parsePack (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseClipByValue (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseLeakyRelu (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + void parseActivation (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); + + void parseCustomLayer (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); }; -TFImporter::TFImporter(Net& net, const char *model, const char *config) - : dstNet(net) +const TFImporter::DispatchMap TFImporter::buildDispatchMap() { - if (model && model[0]) - { - CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from file: " << model); - ReadTFNetParamsFromBinaryFileOrDie(model, &netBin); - } - if (config && config[0]) - { - CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from file: " << config); - ReadTFNetParamsFromTextFileOrDie(config, &netTxt); - } - - populateNet(); + static DispatchMap dispatch; + dispatch["Conv2D"] = dispatch["SpaceToBatchND"] = dispatch["DepthwiseConv2dNative"] = + dispatch["Pad"] = dispatch["MirrorPad"] = dispatch["Conv3D"] = &TFImporter::parseConvolution; + dispatch["BiasAdd"] = dispatch["Add"] = dispatch["AddV2"] = dispatch["Sub"] = dispatch["AddN"] = &TFImporter::parseBias; + dispatch["MatMul"] = &TFImporter::parseMatMul; + dispatch["Reshape"] = &TFImporter::parseReshape; + dispatch["Flatten"] = dispatch["Squeeze"] = &TFImporter::parseFlatten; + dispatch["Transpose"] = &TFImporter::parseTranspose; + dispatch["Const"] = &TFImporter::parseConstant; + dispatch["LRN"] = &TFImporter::parseLrn; + dispatch["Concat"] = dispatch["ConcatV2"] = &TFImporter::parseConcat; + dispatch["MaxPool"] = dispatch["MaxPool3D"] = &TFImporter::parseMaxPool; + dispatch["AvgPool"] = dispatch["AvgPool3D"] = &TFImporter::parseAvgPool; + dispatch["MaxPoolGrad"] = &TFImporter::parseMaxPoolGrad; + dispatch["Placeholder"] = &TFImporter::parsePlaceholder; + dispatch["Split"] = &TFImporter::parseSplit; + dispatch["Slice"] = &TFImporter::parseSlice; + dispatch["StridedSlice"] = &TFImporter::parseStridedSlice; + dispatch["Mul"] = dispatch["RealDiv"] = &TFImporter::parseMul; + dispatch["FusedBatchNorm"] = dispatch["FusedBatchNormV3"] = &TFImporter::parseFusedBatchNorm; + dispatch["Conv2DBackpropInput"] = &TFImporter::parseConv2DBackpropInput; + dispatch["BlockLSTM"] = &TFImporter::parseBlockLSTM; + dispatch["ResizeNearestNeighbor"] = dispatch["ResizeBilinear"] = dispatch["FusedResizeAndPadConv2D"] = &TFImporter::parseResize; + dispatch["L2Normalize"] = &TFImporter::parseL2Normalize; + dispatch["PriorBox"] = &TFImporter::parsePriorBox; + dispatch["Softmax"] = &TFImporter::parseSoftmax; + dispatch["CropAndResize"] = &TFImporter::parseCropAndResize; + dispatch["Mean"] = dispatch["Sum"] = &TFImporter::parseMean; + dispatch["Pack"] = &TFImporter::parsePack; + dispatch["ClipByValue"] = &TFImporter::parseClipByValue; + dispatch["LeakyRelu"] = &TFImporter::parseLeakyRelu; + dispatch["Abs"] = dispatch["Tanh"] = dispatch["Sigmoid"] = dispatch["Relu"] = + dispatch["Elu"] = dispatch["Exp"] = dispatch["Identity"] = dispatch["Relu6"] = &TFImporter::parseActivation; + + return dispatch; } -TFImporter::TFImporter( - Net& net, - const char *dataModel, size_t lenModel, - const char *dataConfig, size_t lenConfig -) - : dstNet(net) +void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer_, LayerParams& layerParams) { - if (dataModel != NULL && lenModel > 0) - { - CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from memory (" << lenModel << " bytes)"); - ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin); - } - if (dataConfig != NULL && lenConfig > 0) + tensorflow::NodeDef layer = layer_; + std::string name = layer.name(); + std::string type = layer.op(); + int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + // The first node of dilated convolution subgraph. + // Extract input node, dilation rate and paddings. + std::string input = layer.input(0); + StrIntVector next_layers; + if (type == "SpaceToBatchND" || type == "Pad") { - CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from memory (" << lenConfig << " bytes)"); - ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt); + next_layers = getNextLayers(net, name, "Conv2D"); + if (next_layers.empty()) + next_layers = getNextLayers(net, name, "DepthwiseConv2dNative"); } - populateNet(); -} - -void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob) -{ - MatShape shape; - blobShapeFromTensor(tensor, shape); - int dims = (int)shape.size(); - - // TODO: other blob types - CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT || - tensor.dtype() == tensorflow::DT_HALF); - CV_Assert(dims == 4 || dims == 5); - int out_c, input_c, depth, height, width; - if (dims == 4) + if (type == "SpaceToBatchND") { - // REORDER kernel HWIO to OIHW - swap(shape[0], shape[2]); // IWHO - swap(shape[1], shape[3]); // IOHW - swap(shape[0], shape[1]); // OIHW - depth = 1; height = shape[2]; width = shape[3]; + // op: "SpaceToBatchND" + // input: "input" + // input: "SpaceToBatchND/block_shape" + // input: "SpaceToBatchND/paddings" + CV_CheckEQ(num_inputs, 3, ""); + + DictValue dilation = parseDims(getConstBlob(layer, value_id, 1)); + CV_Assert(dilation.size() == 2); + layerParams.set("dilation_h", dilation.get(0)); + layerParams.set("dilation_w", dilation.get(1)); + + Mat paddings; + parseTensor(getConstBlob(layer, value_id, 2), paddings); + + // paddings is a 2x2 matrix: [[top, bot], [left, right]] + layerParams.set("pad_h", paddings.at(0)); + layerParams.set("pad_w", paddings.at(2)); + + CV_Assert(next_layers.size() == 1); + layers_to_ignore.insert(next_layers[0].first); + + // FIXIT don't override, rewrite this code + layer = net.node(next_layers[0].second); + name = layer.name(); + type = layer.op(); + num_inputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); } - else + else if (type == "Pad" || type == "MirrorPad") { - // REORDER kernel DHWIO to OIDHW - swap(shape[0], shape[4]); // OHWID - swap(shape[1], shape[3]); // OIWHD - swap(shape[2], shape[4]); // OIDHW - depth = shape[2]; height = shape[3]; width = shape[4]; - } - out_c = shape[0]; input_c = shape[1]; + Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(paddings.type() == CV_32SC1); + if (paddings.total() == 8) + { + // Perhaps, we have NHWC padding dimensions order. + // N H W C + // 0 1 2 3 4 5 6 7 + std::swap(paddings.at(2), paddings.at(6)); + std::swap(paddings.at(3), paddings.at(7)); + // N C W H + // 0 1 2 3 4 5 6 7 + std::swap(paddings.at(4), paddings.at(6)); + std::swap(paddings.at(5), paddings.at(7)); + // N C H W + // 0 1 2 3 4 5 6 7 + } - dstBlob.create(shape, CV_32F); + if (next_layers.empty() || paddings.total() != 8 || + paddings.at(4) != paddings.at(5) || + paddings.at(6) != paddings.at(7) || type == "MirrorPad") + { + // Just a single padding layer. + layerParams.set("paddings", DictValue::arrayInt((int*)paddings.data, paddings.total())); + if (type == "MirrorPad") + layerParams.set("type", "reflect"); - Mat tensorContent = getTensorContent(tensor, /*no copy*/false); - int size = tensorContent.total(); - CV_Assert(size == (int)dstBlob.total()); + int id = dstNet.addLayer(name, "Padding", layerParams); + layer_id[name] = id; - float *dstData = dstBlob.ptr(); - const float *data = reinterpret_cast(tensorContent.data); + connect(layer_id, dstNet, parsePin(input), id, 0); + return; + } + else + { + // Merge with subsequent convolutional layer. + CV_Assert(next_layers.size() == 1); - int total = out_c * input_c * depth * height * width; - for (int i_oc = 0; i_oc < out_c; i_oc++) { - for (int i_ic = 0; i_ic < input_c; i_ic++) { - for (int i_d = 0; i_d < depth; i_d++) { - for (int i_h = 0; i_h < height; i_h++) { - for (int i_w = 0; i_w < width; i_w++) { - int dst_i = input_c * depth * height * width * i_oc + - depth * height * width * i_ic + height * width * i_d + width * i_h + i_w; - int src_i = out_c * input_c * width * height * i_d + - out_c * input_c * width * i_h + out_c * input_c * i_w + out_c * i_ic + i_oc; - CV_Assert(dst_i < total); - CV_Assert(src_i < total); - dstData[dst_i] = data[src_i]; - } - } - } + layerParams.set("pad_h", paddings.at(4)); + layerParams.set("pad_w", paddings.at(6)); + + layers_to_ignore.insert(next_layers[0].first); + + // FIXIT don't override, rewrite this code + layer = net.node(next_layers[0].second); + name = layer.name(); + type = layer.op(); + num_inputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); } } -} -void TFImporter::connect(const std::map& layers_name_id_map, Net& network, const Pin& outPin, - const int input_layer_id, const int input_blob_id) -{ - std::map::const_iterator it = layers_name_id_map.find(outPin.name); - if (it == layers_name_id_map.end()) - CV_Error(Error::StsError, "Input layer not found: " + outPin.name); + // For the object detection networks, TensorFlow Object Detection API + // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) + // order. We can manage it at DetectionOutput layer parsing predictions + // or shuffle last convolution's weights. + bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") && + getLayerAttr(layer, "loc_pred_transposed").b(); - std::vector::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name); - int blobIndex; - if (inpNameIt == netInputsNames.end()) - blobIndex = outPin.blobIndex; - else - blobIndex = inpNameIt - netInputsNames.begin(); - network.connect(it->second, blobIndex, input_layer_id, input_blob_id); -} + layerParams.set("bias_term", false); + layerParams.blobs.resize(1); -void TFImporter::connectToAllBlobs(const std::map& layer_id, Net& network, const Pin& outPin, - const int input_layer_id, const int input_blobs_count) -{ - for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++) - connect(layer_id, network, outPin, input_layer_id, input_blob_id); -} + next_layers = getNextLayers(net, name, "BiasAdd"); + if (next_layers.size() == 1) { + layerParams.set("bias_term", true); + layerParams.blobs.resize(2); -const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map const_layers, - int input_blob_index, int* actual_inp_blob_idx) { - if (input_blob_index == -1) { - for(int i = 0; i < layer.input_size(); i++) { - Pin input = parsePin(layer.input(i)); - if (const_layers.find(input.name) != const_layers.end()) { - if (input_blob_index != -1) - CV_Error(Error::StsError, "More than one input is Const op"); + int weights_layer_index = next_layers[0].second; - input_blob_index = i; + blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); + ExcludeLayer(net, weights_layer_index, 0, false); + layers_to_ignore.insert(next_layers[0].first); + + // Shuffle bias from yxYX to xyXY. + if (locPredTransposed) + { + const int numWeights = layerParams.blobs[1].total(); + float* biasData = reinterpret_cast(layerParams.blobs[1].data); + CV_Assert(numWeights % 4 == 0); + for (int i = 0; i < numWeights; i += 2) + { + std::swap(biasData[i], biasData[i + 1]); } } } - if (input_blob_index == -1) - CV_Error(Error::StsError, "Const input blob for weights not found"); - - Pin kernel_inp = parsePin(layer.input(input_blob_index)); - if (const_layers.find(kernel_inp.name) == const_layers.end()) - CV_Error(Error::StsError, "Input [" + layer.input(input_blob_index) + - "] for node [" + layer.name() + "] not found"); - if (kernel_inp.blobIndex != 0) - CV_Error(Error::StsError, "Unsupported kernel input"); - - if(actual_inp_blob_idx) { - *actual_inp_blob_idx = input_blob_index; - } - - int nodeIdx = const_layers.at(kernel_inp.name); - if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name) + int kernelTensorInpId = -1; + const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernelTensorInpId); + const String kernelTensorName = layer.input(kernelTensorInpId); + std::map::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName); + if (sharedWeightsIt == sharedWeights.end()) { - return netBin.node(nodeIdx).attr().at("value").tensor(); + kernelFromTensor(kernelTensor, layerParams.blobs[0]); + releaseTensor(const_cast(&kernelTensor)); + + int* kshape = layerParams.blobs[0].size.p; + const int outCh = kshape[0]; + const int inCh = kshape[1]; + const int height = kshape[2]; + const int width = kshape[3]; + if (type == "DepthwiseConv2dNative") + { + CV_Assert(!locPredTransposed); + const int chMultiplier = kshape[0]; + + Mat copy = layerParams.blobs[0].clone(); + float* src = (float*)copy.data; + float* dst = (float*)layerParams.blobs[0].data; + for (int i = 0; i < chMultiplier; ++i) + for (int j = 0; j < inCh; ++j) + for (int s = 0; s < height * width; ++s) + { + int src_i = (i * inCh + j) * height * width + s; + int dst_i = (j * chMultiplier + i) * height* width + s; + dst[dst_i] = src[src_i]; + } + // TODO Use reshape instead + kshape[0] = inCh * chMultiplier; + kshape[1] = 1; + size_t* kstep = layerParams.blobs[0].step.p; + kstep[0] = kstep[1]; // fix steps too + } + + // Shuffle output channels from yxYX to xyXY. + if (locPredTransposed) + { + const int slice = height * width * inCh; + for (int i = 0; i < outCh; i += 2) + { + cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr(i)); + cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr(i + 1)); + std::swap_ranges(src.begin(), src.end(), dst.begin()); + } + } + sharedWeights[kernelTensorName] = layerParams.blobs[0]; } else { - CV_Assert_N(nodeIdx < netTxt.node_size(), - netTxt.node(nodeIdx).name() == kernel_inp.name); - return netTxt.node(nodeIdx).attr().at("value").tensor(); + layerParams.blobs[0] = sharedWeightsIt->second; } -} + Mat weights = layerParams.blobs[0]; + layerParams.set("kernel_size", DictValue::arrayInt(&weights.size[2], weights.dims - 2)); -static void addConstNodes(tensorflow::GraphDef& net, std::map& const_layers, - std::set& layers_to_ignore) -{ - CV_LOG_DEBUG(NULL, "DNN/TF: addConstNodes(): handling " << net.node_size() << " nodes..."); - for (int li = 0; li < net.node_size(); li++) - { - const tensorflow::NodeDef &layer = net.node(li); - String name = layer.name(); - String type = layer.op(); + layerParams.set("num_output", layerParams.blobs[0].size[0]); - //CV_LOG_DEBUG(NULL, "DNN/TF: layer_id=" << li << " - '" << name << "' @ " << type); + setStrides(layerParams, layer); + if (!layerParams.has("pad_w") && !layerParams.has("pad_h")) + setPadding(layerParams, layer); - try - { - if (type == "Dequantize") - { - // Example of Dequantize node: - // name: "conv2d_1/bias" - // op: "Dequantize" - // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8) - // input: "conv2d_1/bias_quantized_min" - // input: "conv2d_1/bias_quantized_max" - // attr { key: "T" value { type: DT_QUINT8 } } (quantized type) - // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique) - CV_CheckEQ(layer.input_size(), 3, "Dequantize: 3 inputs is supported only"); - for (int i = 0; i < 3; ++i) - CV_Assert(const_layers.find(layer.input(i)) != const_layers.end()); - CV_Assert(hasLayerAttr(layer, "mode") && - getLayerAttr(layer, "mode").s() == "MIN_FIRST"); + // The final node of dilated convolution subgraph. + next_layers = getNextLayers(net, name, "BatchToSpaceND"); + if (!next_layers.empty()) + { + CV_Assert(next_layers.size() == 1); + ExcludeLayer(net, next_layers[0].second, 0, false); + layers_to_ignore.insert(next_layers[0].first); + } - int tensorId = const_layers[layer.input(0)]; - int minId = const_layers[layer.input(1)]; - int maxId = const_layers[layer.input(2)]; + int id = dstNet.addLayer(name, "Convolution", layerParams); + layer_id[name] = id; - tensorflow::TensorProto* tensor = net.mutable_node(tensorId) - ->mutable_attr()->at("value") - .mutable_tensor(); - CV_CheckEQ((int)tensor->dtype(), (int)tensorflow::DT_QUINT8, ""); + // one input only + connect(layer_id, dstNet, parsePin(input), id, 0); - Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor()); - Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor()); - CV_CheckEQ(qMin.total(), (size_t)1, ""); - CV_CheckTypeEQ(qMin.type(), CV_32FC1, ""); - CV_CheckEQ(qMax.total(), (size_t)1, ""); - CV_CheckTypeEQ(qMax.type(), CV_32FC1, ""); - Mat content = getTensorContent(*tensor); + if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN) + data_layouts[name] = DATA_LAYOUT_NHWC; +} - float minVal = qMin.at(0); - float rangeScale = (qMax.at(0) - minVal) / 255; - CV_Assert(rangeScale >= 0); - content.convertTo(content, CV_32FC1, rangeScale, - rangeScale * cvRound(minVal / rangeScale)); +void TFImporter::parseBias(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); - tensor->set_dtype(tensorflow::DT_FLOAT); - tensor->set_tensor_content(content.data, content.total() * content.elemSize1()); + CV_CheckGT(num_inputs, 0, ""); + bool haveConst = false; + for(int ii = 0; !haveConst && ii < num_inputs; ++ii) + { + Pin input = parsePin(layer.input(ii)); + haveConst = value_id.find(input.name) != value_id.end(); + } + CV_Assert(!haveConst || num_inputs == 2); - net.mutable_node(tensorId)->set_name(name); - CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second); - layers_to_ignore.insert(name); - continue; - } - else if (type != "Const") - continue; // only Const parameters are supported + if (haveConst) + { + Mat values = getTensorContent(getConstBlob(layer, value_id)); + CV_Assert(values.type() == CV_32FC1); + if (type == "Sub") + values *= -1.0f; - if (layer.attr().find("value") != layer.attr().end()) - { - CV_Assert(const_layers.insert(std::make_pair(name, li)).second); - } - layers_to_ignore.insert(name); + int id; + if (values.total() == 1) // is a scalar. + { + layerParams.set("shift", values.at(0)); + id = dstNet.addLayer(name, "Power", layerParams); } - catch (const std::exception& e) + else // is a vector { - CV_LOG_ERROR(NULL, "DNN/TF: Can't handle node='" << name << "'. Exception: " << e.what()); - throw; + layerParams.blobs.resize(1, values); + id = dstNet.addLayer(name, "Shift", layerParams); + } + layer_id[name] = id; + + // one input only + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + } + else + { + layerParams.set("operation", "sum"); + if (type == "Sub") + { + static float subCoeffs[] = {1.f, -1.f}; + layerParams.set("coeff", DictValue::arrayReal(subCoeffs, 2)); + } + + int id = dstNet.addLayer(name, "Eltwise", layerParams); + layer_id[name] = id; + + for (int ii = 0; ii < num_inputs; ii++) + { + Pin inp = parsePin(layer.input(ii)); + if (layer_id.find(inp.name) == layer_id.end()) + CV_Error(Error::StsError, "Input layer not found: " + inp.name); + connect(layer_id, dstNet, inp, id, ii); } } - CV_LOG_DEBUG(NULL, "DNN/TF: layers_to_ignore.size() = " << layers_to_ignore.size()); } -// If all inputs of specific layer have the same data layout we can say that -// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. -DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) +void TFImporter::parseMatMul(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { - DataLayout layout = getDataLayout(layer); - if (layout != DATA_LAYOUT_UNKNOWN) + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 2, ""); + + // For the object detection networks, TensorFlow Object Detection API + // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) + // order. We can manage it at DetectionOutput layer parsing predictions + // or shuffle last Faster-RCNN's matmul weights. + bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") && + getLayerAttr(layer, "loc_pred_transposed").b(); + + layerParams.set("bias_term", false); + layerParams.blobs.resize(1); + + StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); // FIXIT Use layers fusion instead + if (next_layers.empty()) { - CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)"); - return layout; + next_layers = getNextLayers(net, name, "Add"); } + if (next_layers.size() == 1) { + layerParams.set("bias_term", true); + layerParams.blobs.resize(2); - // Determine layout by layer's inputs - for (int i = 0, n = layer.input_size(); i < n; ++i) - { - std::map::const_iterator it = data_layouts.find(getNodeName(layer.input(i))); - if (it != data_layouts.end()) + int weights_layer_index = next_layers[0].second; + blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); + ExcludeLayer(net, weights_layer_index, 0, false); + layers_to_ignore.insert(next_layers[0].first); + + if (locPredTransposed) { - if (layout != DATA_LAYOUT_UNKNOWN) + const int numWeights = layerParams.blobs[1].total(); + float* biasData = reinterpret_cast(layerParams.blobs[1].data); + CV_Assert(numWeights % 4 == 0); + for (int i = 0; i < numWeights; i += 2) { - if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN) - return DATA_LAYOUT_UNKNOWN; + std::swap(biasData[i], biasData[i + 1]); } - else - layout = it->second; } } - if (layout != DATA_LAYOUT_UNKNOWN) - { - CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)"); - return layout; - } - - // Determine layout by layer's consumers recursively. - std::map::const_iterator it = data_layouts.find(layer.name()); - CV_Assert(it != data_layouts.end()); - return it->second; -} - -void TFImporter::populateNet() -{ - CV_Assert(netBin.ByteSize() || netTxt.ByteSize()); - - CV_LOG_INFO(NULL, "DNN/TF: parsing model" - << (netBin.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netBin.versions().producer(), (int)netBin.versions().min_consumer()) : cv::String(" (N/A version info)")) - << ". Number of nodes = " << netBin.node_size() - ); - - if (netTxt.ByteSize()) + int kernel_blob_index = -1; + const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index); + const String kernelTensorName = layer.input(kernel_blob_index); + std::map::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName); + if (sharedWeightsIt == sharedWeights.end()) { - CV_LOG_INFO(NULL, "DNN/TF: parsing config" - << (netTxt.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netTxt.versions().producer(), (int)netTxt.versions().min_consumer()) : cv::String(" (N/A version info)")) - << ". Number of nodes = " << netTxt.node_size() - ); - - RemoveIdentityOps(netBin); - CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); - RemoveIdentityOps(netTxt); - CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(config) => " << netTxt.node_size() << " nodes"); - - sortByExecutionOrder(netTxt); - CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(config) => " << netTxt.node_size() << " nodes"); + blobFromTensor(kernelTensor, layerParams.blobs[0]); + releaseTensor(const_cast(&kernelTensor)); + sharedWeights[kernelTensorName] = layerParams.blobs[0]; } else { - removePhaseSwitches(netBin); - CV_LOG_DEBUG(NULL, "DNN/TF: removePhaseSwitches(model) => " << netBin.node_size() << " nodes"); + layerParams.blobs[0] = sharedWeightsIt->second; + } - RemoveIdentityOps(netBin); - CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); + if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed + Mat data = layerParams.blobs[0].t(); + layerParams.blobs[0] = data.clone(); + } - simplifySubgraphs(netBin); - CV_LOG_DEBUG(NULL, "DNN/TF: simplifySubgraphs(model) => " << netBin.node_size() << " nodes"); - sortByExecutionOrder(netBin); - CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(model) => " << netBin.node_size() << " nodes"); + layerParams.set("num_output", layerParams.blobs[0].size[0]); + if (locPredTransposed) + { + CV_Assert(layerParams.blobs[0].dims == 2); + for (int i = 0; i < layerParams.blobs[0].size[0]; i += 2) + { + cv::Mat src = layerParams.blobs[0].row(i); + cv::Mat dst = layerParams.blobs[0].row(i + 1); + std::swap_ranges(src.begin(), src.end(), dst.begin()); + } } - tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; + int id = dstNet.addLayer(name, "InnerProduct", layerParams); + layer_id[name] = id; - int layersSize = net.node_size(); + // one input only + int input_blob_index = kernel_blob_index == 0 ? 1 : 0; + connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0); + data_layouts[name] = DATA_LAYOUT_PLANAR; +} - // Pre-fill data layouts where they are set explicitly. - // Assuming that nodes are in topological order - for (int i = layersSize - 1; i >= 0; --i) +void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + Pin inpId = parsePin(layer.input(0)); + DataLayout inpLayout = getDataLayout(layer.input(0), data_layouts); + // There are two possible implementations: reshape an input using + // predefined sizes or use a second input blob as a source of new shape. + if (value_id.find(layer.input(1)) != value_id.end()) { - const tensorflow::NodeDef& layer = net.node(i); - std::string name = layer.name(); - - CV_LOG_DEBUG(NULL, "DNN/TF: node(" << i << " - '" << name << "') propagating layout..."); - - try + Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1)); + int newShapeSize = newShape.total(); + bool hasSwap = false; + if (newShapeSize == 4 && hasAllOnes(newShape, 0, 2)) { - DataLayout layout = getDataLayout(layer); - std::map::iterator it = data_layouts.find(name); - if (it != data_layouts.end()) + // NHWC->NCHW + std::swap(*newShape.ptr(0, 2), *newShape.ptr(0, 3)); + std::swap(*newShape.ptr(0, 1), *newShape.ptr(0, 2)); + hasSwap = true; + } + if (inpLayout == DATA_LAYOUT_NHWC) + { + if (newShapeSize >= 2 || newShape.at(1) == 1) { - if (layout != DATA_LAYOUT_UNKNOWN) + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + addPermuteLayer(order, name + "/nhwc", inpId); + if (newShapeSize < 4) { - if (it->second == DATA_LAYOUT_UNKNOWN) - it->second = layout; - else if (it->second != layout) - { - it->second = DATA_LAYOUT_UNKNOWN; - layout = DATA_LAYOUT_UNKNOWN; - } + inpLayout = DATA_LAYOUT_NCHW; } else - layout = it->second; - } - else - data_layouts[name] = layout; - - // Specify input layers to have the same data layout. - for (int j = 0; j < layer.input_size(); ++j) - { - name = getNodeName(layer.input(j)); - it = data_layouts.find(name); - if (it != data_layouts.end()) { - if (layout != DATA_LAYOUT_UNKNOWN) - { - if (it->second == DATA_LAYOUT_UNKNOWN) - it->second = layout; - else if (it->second != layout) - it->second = DATA_LAYOUT_UNKNOWN; - } + inpLayout = DATA_LAYOUT_NHWC; } - else - data_layouts[name] = layout; } } - catch (const std::exception& e) - { - CV_LOG_ERROR(NULL, "DNN/TF: Can't propagate layout for node='" << name << "'. Exception: " << e.what()); - throw; - } - } - - addConstNodes(netBin, value_id, layers_to_ignore); - addConstNodes(netTxt, value_id, layers_to_ignore); + layerParams.set("dim", DictValue::arrayInt(newShape.ptr(), newShapeSize)); + int id = dstNet.addLayer(name, "Reshape", layerParams); + layer_id[name] = id; - for (int li = 0; li < layersSize; li++) - { - const tensorflow::NodeDef& layer = net.node(li); + // one input only + connect(layer_id, dstNet, inpId, id, 0); + inpId = Pin(name); - const std::string name = layer.name(); - const std::string type = layer.op(); - const int ninputs = layer.input_size(); - CV_LOG_DEBUG(NULL, "DNN/TF: (" << li << "/" << layersSize << ") Parse layer " << name << " @ " << type << " with " << ninputs << " inputs"); + if ((inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_UNKNOWN || inpLayout == DATA_LAYOUT_PLANAR) && + newShapeSize == 4 && !hasSwap) + { + int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW. + addPermuteLayer(order, name + "/nchw", inpId); + inpLayout = DATA_LAYOUT_NCHW; + } - parseNode(layer); + data_layouts[name] = newShapeSize == 2 ? DATA_LAYOUT_PLANAR : inpLayout; } - - for (size_t i = 0; i < netInputsNames.size(); i++) + else { - CV_LOG_DEBUG(NULL, "DNN/TF: Model input: " << i << " - '" << netInputsNames[i] << "'"); - CV_Assert(!netInputsNames[i].empty()); + int id = dstNet.addLayer(name, "Reshape", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, inpId, id, 0); + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); + data_layouts[name] = inpLayout; } - dstNet.setInputsNames(netInputsNames); - CV_LOG_DEBUG(NULL, "DNN/TF: ===================== Import completed ====================="); } -void TFImporter::addPermuteLayer(const int* order, const std::string& permName, Pin& inpId) +void TFImporter::parseFlatten(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { - LayerParams permLP; - permLP.set("order", DictValue::arrayInt(order, 4)); - CV_Assert(layer_id.find(permName) == layer_id.end()); - int permId = dstNet.addLayer(permName, "Permute", permLP); - layer_id[permName] = permId; - connect(layer_id, dstNet, inpId, permId, 0); - inpId = Pin(permName); + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + Pin inpId = parsePin(layer.input(0)); + int inpLayout = getDataLayout(layer.input(0), data_layouts); + if (type == "Squeeze") + { + CV_Assert(hasLayerAttr(layer, "squeeze_dims")); + const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims"); + std::vector dimsVector(dims.list().i_size()); + for (int i = 0; i < dimsVector.size(); ++i) + dimsVector[i] = dims.list().i(i); + + // Flatten layer can squeeze dimensions range into one. + std::sort(dimsVector.begin(), dimsVector.end()); + for (int i = 1; i < dimsVector.size(); ++i) + { + if (dimsVector[i] != dimsVector[i - 1] + 1) + CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration"); + } + int start = dimsVector.front() - 1, end = dimsVector.back(); + if (start == -1 && end == 0) // squeeze 0th dimension + { + start = 0; + end = 1; + } + layerParams.set("axis", start); + layerParams.set("end_axis", end); + } + if (inpLayout == DATA_LAYOUT_NHWC) + { + LayerParams permLP; + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + permLP.set("order", DictValue::arrayInt(order, 4)); + + std::string permName = name + "/nchw"; + CV_Assert(layer_id.find(permName) == layer_id.end()); + int permId = dstNet.addLayer(permName, "Permute", permLP); + layer_id[permName] = permId; + connect(layer_id, dstNet, inpId, permId, 0); + inpId = Pin(permName); + } + int id = dstNet.addLayer(name, "Flatten", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, inpId, id, 0); + data_layouts[name] = DATA_LAYOUT_PLANAR; } -void TFImporter::parseNode(const tensorflow::NodeDef& layer_) +void TFImporter::parseTranspose(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { - tensorflow::NodeDef layer = layer_; - - tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; - - /*const*/ std::string name = layer.name(); - /*const*/ std::string type = layer.op(); - /*const*/ int num_inputs = layer.input_size(); - - try + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + Mat perm = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(perm.type() == CV_32SC1); + int* permData = (int*)perm.data; + if (perm.total() == 4) { - LayerParams layerParams; - - if (layers_to_ignore.find(name) != layers_to_ignore.end()) + // Only NHWC <-> NCHW permutations are allowed. OpenCV is always + // keep NCHW layout this way. + int inpLayout = getDataLayout(layer.input(0), data_layouts); + std::string type = "Identity"; + if (inpLayout == DATA_LAYOUT_NHWC) { - CV_LOG_DEBUG(NULL, "DNN/TF: ignored"); - return; + if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2) + { + // in TensorFlow: NHWC->NCHW + // in OpenCV: NCHW->NCHW + data_layouts[name] = DATA_LAYOUT_NCHW; + } + else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) + { + // in TensorFlow: NHWC->NHWC + // in OpenCV: NCHW->NCHW + data_layouts[name] = DATA_LAYOUT_NHWC; + } + else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1) + { + // in TensorFlow: NHWC->NCWH + // in OpenCV: NCHW->NCWH + int permData[] = {0, 1, 3, 2}; + layerParams.set("order", DictValue::arrayInt(permData, perm.total())); + data_layouts[name] = DATA_LAYOUT_NCHW; // we keep track NCHW because channels position only matters + type = "Permute"; + } + else + CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); } - - DataLayout predictedLayout = predictOutputDataLayout(layer); - data_layouts[name] = predictedLayout; - - if (type == "Conv2D" || type == "SpaceToBatchND" || type == "DepthwiseConv2dNative" || type == "Pad" || type == "MirrorPad" || type == "Conv3D") + else if (inpLayout == DATA_LAYOUT_NCHW) { - CV_CheckGT(num_inputs, 0, ""); - // The first node of dilated convolution subgraph. - // Extract input node, dilation rate and paddings. - std::string input = layer.input(0); - StrIntVector next_layers; - if (type == "SpaceToBatchND" || type == "Pad") + if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1) { - next_layers = getNextLayers(net, name, "Conv2D"); - if (next_layers.empty()) - next_layers = getNextLayers(net, name, "DepthwiseConv2dNative"); + // in TensorFlow: NCHW->NHWC + // in OpenCV: NCHW->NCHW + data_layouts[name] = DATA_LAYOUT_NHWC; } - - if (type == "SpaceToBatchND") + else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) { - // op: "SpaceToBatchND" - // input: "input" - // input: "SpaceToBatchND/block_shape" - // input: "SpaceToBatchND/paddings" - CV_CheckEQ(num_inputs, 3, ""); + // in TensorFlow: NCHW->NCHW + // in OpenCV: NCHW->NCHW + data_layouts[name] = DATA_LAYOUT_NCHW; + } + else + CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); + } + int id = dstNet.addLayer(name, type, layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + } + else + { + layerParams.set("order", DictValue::arrayInt(permData, perm.total())); - DictValue dilation = parseDims(getConstBlob(layer, value_id, 1)); - CV_Assert(dilation.size() == 2); - layerParams.set("dilation_h", dilation.get(0)); - layerParams.set("dilation_w", dilation.get(1)); + int id = dstNet.addLayer(name, "Permute", layerParams); + layer_id[name] = id; - Mat paddings; - parseTensor(getConstBlob(layer, value_id, 2), paddings); + // one input only + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + data_layouts[name] = DATA_LAYOUT_UNKNOWN; + } +} - // paddings is a 2x2 matrix: [[top, bot], [left, right]] - layerParams.set("pad_h", paddings.at(0)); - layerParams.set("pad_w", paddings.at(2)); +void TFImporter::parseConstant(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ +} - CV_Assert(next_layers.size() == 1); - layers_to_ignore.insert(next_layers[0].first); +void TFImporter::parseLrn(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - // FIXIT don't override, rewrite this code - layer = net.node(next_layers[0].second); - name = layer.name(); - type = layer.op(); - num_inputs = layer.input_size(); - CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); - } - else if (type == "Pad" || type == "MirrorPad") - { - Mat paddings = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_Assert(paddings.type() == CV_32SC1); - if (paddings.total() == 8) - { - // Perhaps, we have NHWC padding dimensions order. - // N H W C - // 0 1 2 3 4 5 6 7 - std::swap(paddings.at(2), paddings.at(6)); - std::swap(paddings.at(3), paddings.at(7)); - // N C W H - // 0 1 2 3 4 5 6 7 - std::swap(paddings.at(4), paddings.at(6)); - std::swap(paddings.at(5), paddings.at(7)); - // N C H W - // 0 1 2 3 4 5 6 7 - } + CV_CheckGT(num_inputs, 0, ""); + if(hasLayerAttr(layer, "alpha")) { + layerParams.set("alpha", getLayerAttr(layer, "alpha").f()); + } + if(hasLayerAttr(layer, "beta")) { + layerParams.set("beta", getLayerAttr(layer, "beta").f()); + } + if(hasLayerAttr(layer, "depth_radius")) { + int radius = (int)getLayerAttr(layer, "depth_radius").i(); + layerParams.set("local_size", 2*radius + 1); + } + if(hasLayerAttr(layer, "bias")) { + layerParams.set("bias", getLayerAttr(layer, "bias").f()); + } + layerParams.set("norm_by_size", false); - if (next_layers.empty() || paddings.total() != 8 || - paddings.at(4) != paddings.at(5) || - paddings.at(6) != paddings.at(7) || type == "MirrorPad") - { - // Just a single padding layer. - layerParams.set("paddings", DictValue::arrayInt((int*)paddings.data, paddings.total())); - if (type == "MirrorPad") - layerParams.set("type", "reflect"); + int id = dstNet.addLayer(name, "LRN", layerParams); + layer_id[name] = id; - int id = dstNet.addLayer(name, "Padding", layerParams); - layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - connect(layer_id, dstNet, parsePin(input), id, 0); - return; - } - else - { - // Merge with subsequent convolutional layer. - CV_Assert(next_layers.size() == 1); +void TFImporter::parseConcat(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); - layerParams.set("pad_h", paddings.at(4)); - layerParams.set("pad_w", paddings.at(6)); + CV_CheckGT(num_inputs, 0, ""); + int axisId = (type == "Concat" ? 0 : num_inputs - 1); + int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); - layers_to_ignore.insert(next_layers[0].first); + if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + axis = toNCHW(axis); + else if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NDHWC) + axis = toNCDHW(axis); + layerParams.set("axis", axis); - // FIXIT don't override, rewrite this code - layer = net.node(next_layers[0].second); - name = layer.name(); - type = layer.op(); - num_inputs = layer.input_size(); - CV_LOG_DEBUG(NULL, "DNN/TF: switched to layer " << name << " @ " << type << ") with " << num_inputs << " inputs"); - } - } + // input(0) or input(n-1) is concat_dim + int from = (type == "Concat" ? 1 : 0); + int to = (type == "Concat" ? num_inputs : num_inputs - 1); - // For the object detection networks, TensorFlow Object Detection API - // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) - // order. We can manage it at DetectionOutput layer parsing predictions - // or shuffle last convolution's weights. - bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") && - getLayerAttr(layer, "loc_pred_transposed").b(); + for (int ii = from; ii < to; ii++) + { + Pin inp = parsePin(layer.input(ii)); + if (layer_id.find(inp.name) == layer_id.end()) + { + // There are constant inputs. + LayerParams lp; + lp.name = inp.name; + lp.type = "Const"; + lp.blobs.resize(1); + blobFromTensor(getConstBlob(layer, value_id, ii), lp.blobs.back()); + CV_Assert_N(!lp.blobs[0].empty(), lp.blobs[0].type() == CV_32F); + + int constInpId = dstNet.addLayer(lp.name, lp.type, lp); + layer_id[lp.name] = constInpId; + } + } - layerParams.set("bias_term", false); - layerParams.blobs.resize(1); + int id = dstNet.addLayer(name, "Concat", layerParams); + layer_id[name] = id; - next_layers = getNextLayers(net, name, "BiasAdd"); - if (next_layers.size() == 1) { - layerParams.set("bias_term", true); - layerParams.blobs.resize(2); + for (int ii = from; ii < to; ii++) + { + Pin inp = parsePin(layer.input(ii)); + if (layer_id.find(inp.name) == layer_id.end()) + CV_Error(Error::StsError, "Input layer not found: " + inp.name); + connect(layer_id, dstNet, inp, id, ii - from); + } +} - int weights_layer_index = next_layers[0].second; +void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); - ExcludeLayer(net, weights_layer_index, 0, false); - layers_to_ignore.insert(next_layers[0].first); + CV_CheckGT(num_inputs, 0, ""); + layerParams.set("pool", "max"); - // Shuffle bias from yxYX to xyXY. - if (locPredTransposed) - { - const int numWeights = layerParams.blobs[1].total(); - float* biasData = reinterpret_cast(layerParams.blobs[1].data); - CV_Assert(numWeights % 4 == 0); - for (int i = 0; i < numWeights; i += 2) - { - std::swap(biasData[i], biasData[i + 1]); - } - } - } + setKSize(layerParams, layer); + setStrides(layerParams, layer); + setPadding(layerParams, layer); + // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU + layerParams.set("ceil_mode", false); - int kernelTensorInpId = -1; - const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernelTensorInpId); - const String kernelTensorName = layer.input(kernelTensorInpId); - std::map::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName); - if (sharedWeightsIt == sharedWeights.end()) - { - kernelFromTensor(kernelTensor, layerParams.blobs[0]); - releaseTensor(const_cast(&kernelTensor)); - - int* kshape = layerParams.blobs[0].size.p; - const int outCh = kshape[0]; - const int inCh = kshape[1]; - const int height = kshape[2]; - const int width = kshape[3]; - if (type == "DepthwiseConv2dNative") - { - CV_Assert(!locPredTransposed); - const int chMultiplier = kshape[0]; - - Mat copy = layerParams.blobs[0].clone(); - float* src = (float*)copy.data; - float* dst = (float*)layerParams.blobs[0].data; - for (int i = 0; i < chMultiplier; ++i) - for (int j = 0; j < inCh; ++j) - for (int s = 0; s < height * width; ++s) - { - int src_i = (i * inCh + j) * height * width + s; - int dst_i = (j * chMultiplier + i) * height* width + s; - dst[dst_i] = src[src_i]; - } - // TODO Use reshape instead - kshape[0] = inCh * chMultiplier; - kshape[1] = 1; - size_t* kstep = layerParams.blobs[0].step.p; - kstep[0] = kstep[1]; // fix steps too - } + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; - // Shuffle output channels from yxYX to xyXY. - if (locPredTransposed) - { - const int slice = height * width * inCh; - for (int i = 0; i < outCh; i += 2) - { - cv::Mat src(1, slice, CV_32F, layerParams.blobs[0].ptr(i)); - cv::Mat dst(1, slice, CV_32F, layerParams.blobs[0].ptr(i + 1)); - std::swap_ranges(src.begin(), src.end(), dst.begin()); - } - } - sharedWeights[kernelTensorName] = layerParams.blobs[0]; - } - else - { - layerParams.blobs[0] = sharedWeightsIt->second; - } - Mat weights = layerParams.blobs[0]; - layerParams.set("kernel_size", DictValue::arrayInt(&weights.size[2], weights.dims - 2)); + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - layerParams.set("num_output", layerParams.blobs[0].size[0]); +void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - setStrides(layerParams, layer); - if (!layerParams.has("pad_w") && !layerParams.has("pad_h")) - setPadding(layerParams, layer); + CV_CheckGT(num_inputs, 0, ""); + layerParams.set("pool", "ave"); + layerParams.set("ave_pool_padded_area", false); + setKSize(layerParams, layer); + setStrides(layerParams, layer); + setPadding(layerParams, layer); - // The final node of dilated convolution subgraph. - next_layers = getNextLayers(net, name, "BatchToSpaceND"); - if (!next_layers.empty()) - { - CV_Assert(next_layers.size() == 1); - ExcludeLayer(net, next_layers[0].second, 0, false); - layers_to_ignore.insert(next_layers[0].first); - } + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; - int id = dstNet.addLayer(name, "Convolution", layerParams); - layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - // one input only - connect(layer_id, dstNet, parsePin(input), id, 0); +void TFImporter::parseMaxPoolGrad(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + CV_CheckEQ(num_inputs, 3, ""); - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_UNKNOWN) - data_layouts[name] = DATA_LAYOUT_NHWC; - } - else if (type == "BiasAdd" || type == "Add" || type == "AddV2" || type == "Sub" || type=="AddN") - { - CV_CheckGT(num_inputs, 0, ""); - bool haveConst = false; - for(int ii = 0; !haveConst && ii < num_inputs; ++ii) - { - Pin input = parsePin(layer.input(ii)); - haveConst = value_id.find(input.name) != value_id.end(); - } - CV_Assert(!haveConst || num_inputs == 2); + layerParams.set("pool_k_h", 0); + layerParams.set("pool_k_w", 0); + layerParams.set("pool_stride_h", 0); + layerParams.set("pool_stride_w", 0); + layerParams.set("pool_pad_h", 0); + layerParams.set("pool_pad_w", 0); - if (haveConst) - { - Mat values = getTensorContent(getConstBlob(layer, value_id)); - CV_Assert(values.type() == CV_32FC1); - if (type == "Sub") - values *= -1.0f; + int id = dstNet.addLayer(name, "MaxUnpool", layerParams); + layer_id[name] = id; - int id; - if (values.total() == 1) // is a scalar. - { - layerParams.set("shift", values.at(0)); - id = dstNet.addLayer(name, "Power", layerParams); - } - else // is a vector - { - layerParams.blobs.resize(1, values); - id = dstNet.addLayer(name, "Shift", layerParams); - } - layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); + connect(layer_id, dstNet, parsePin(layer.input(1) + ":1"), id, 1); + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 2); +} - // one input only - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - } - else - { - layerParams.set("operation", "sum"); - if (type == "Sub") - { - static float subCoeffs[] = {1.f, -1.f}; - layerParams.set("coeff", DictValue::arrayReal(subCoeffs, 2)); - } +void TFImporter::parsePlaceholder(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); - int id = dstNet.addLayer(name, "Eltwise", layerParams); - layer_id[name] = id; + DataLayout predictedLayout = data_layouts[name]; - for (int ii = 0; ii < num_inputs; ii++) - { - Pin inp = parsePin(layer.input(ii)); - if (layer_id.find(inp.name) == layer_id.end()) - CV_Error(Error::StsError, "Input layer not found: " + inp.name); - connect(layer_id, dstNet, inp, id, ii); - } - } + if (!hasLayerAttr(layer, "dtype") || + getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL) // If input is not a train/test flag. + { + netInputsNames.push_back(name); + layer_id[name] = 0; + } + tensorflow::TensorShapeProto shape; + if (hasLayerAttr(layer, "shape")) + shape = getLayerAttr(layer, "shape").shape(); + else if (hasLayerAttr(layer, "_output_shapes")) + { + tensorflow::AttrValue_ListValue list = getLayerAttr(layer, "_output_shapes").list(); + if (list.shape_size()) + shape = list.shape()[0]; + } + if (shape.dim_size()) + { + MatShape dims(shape.dim_size()); + for (int i = 0; i < dims.size(); ++i) + dims[i] = shape.dim(i).size(); + if (dims.size() == 4 && predictedLayout == DATA_LAYOUT_NHWC) + { + std::swap(dims[1], dims[3]); // NHWC->NCWH + std::swap(dims[2], dims[3]); // NCWH->NCHW + if (dims[0] == -1) // It's OK to have undetermined batch size + dims[0] = 1; } - else if (type == "MatMul") + bool hasNeg = false; + for (int i = 0; i < dims.size() && !hasNeg; ++i) { - CV_CheckEQ(num_inputs, 2, ""); + hasNeg = dims[i] < 0; + } + if (!hasNeg) + netInputShapes.push_back(dims); + } +} - // For the object detection networks, TensorFlow Object Detection API - // predicts deltas for bounding boxes in yxYX (ymin, xmin, ymax, xmax) - // order. We can manage it at DetectionOutput layer parsing predictions - // or shuffle last Faster-RCNN's matmul weights. - bool locPredTransposed = hasLayerAttr(layer, "loc_pred_transposed") && - getLayerAttr(layer, "loc_pred_transposed").b(); +void TFImporter::parseSplit(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // TODO: determining axis index remapping by input dimensions order of input blob + // TODO: slicing input may be Const op + // TODO: slicing kernels for convolutions - in current implementation it is impossible + // TODO: add parsing num of slices parameter + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 2, ""); + // num_split + // 1st blob is dims tensor + int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); + if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + axis = toNCHW(axis); + layerParams.set("axis", axis); + + if (hasLayerAttr(layer, "num_split")) + layerParams.set("num_split", getLayerAttr(layer, "num_split").i()); + + int id = dstNet.addLayer(name, "Slice", layerParams); + layer_id[name] = id; + + // one input only + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); +} - layerParams.set("bias_term", false); - layerParams.blobs.resize(1); +void TFImporter::parseSlice(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "Slice" + // input: "input_node" + // input: "Slice/begin" + // input: "Slice/size" + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 3, ""); + Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); + Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2)); + CV_Assert_N(!begins.empty(), !sizes.empty()); + CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); + CV_CheckTypeEQ(sizes.type(), CV_32SC1, ""); + + if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + { + // Swap NHWC parameters' order to NCHW. + std::swap(*begins.ptr(0, 2), *begins.ptr(0, 3)); + std::swap(*begins.ptr(0, 1), *begins.ptr(0, 2)); + std::swap(*sizes.ptr(0, 2), *sizes.ptr(0, 3)); + std::swap(*sizes.ptr(0, 1), *sizes.ptr(0, 2)); + } + layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total())); + layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total())); - StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); // FIXIT Use layers fusion instead - if (next_layers.empty()) - { - next_layers = getNextLayers(net, name, "Add"); - } - if (next_layers.size() == 1) { - layerParams.set("bias_term", true); - layerParams.blobs.resize(2); + int id = dstNet.addLayer(name, "Slice", layerParams); + layer_id[name] = id; - int weights_layer_index = next_layers[0].second; - blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); - ExcludeLayer(net, weights_layer_index, 0, false); - layers_to_ignore.insert(next_layers[0].first); + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); +} - if (locPredTransposed) - { - const int numWeights = layerParams.blobs[1].total(); - float* biasData = reinterpret_cast(layerParams.blobs[1].data); - CV_Assert(numWeights % 4 == 0); - for (int i = 0; i < numWeights; i += 2) - { - std::swap(biasData[i], biasData[i + 1]); - } - } - } +void TFImporter::parseStridedSlice(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 4, ""); + Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); + Mat ends = getTensorContent(getConstBlob(layer, value_id, 2)); + Mat strides = getTensorContent(getConstBlob(layer, value_id, 3)); + CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); + CV_CheckTypeEQ(ends.type(), CV_32SC1, ""); + CV_CheckTypeEQ(strides.type(), CV_32SC1, ""); + const int num = begins.total(); + CV_Assert_N(num == ends.total(), num == strides.total()); + + int end_mask = getLayerAttr(layer, "end_mask").i(); + for (int i = 0; i < num; ++i) + { + if (ends.at(i) < 0) + ends.at(i) -= 1; + if (end_mask & (1 << i)) + ends.at(i) = -1; + if (strides.at(i) != 1) + CV_Error(Error::StsNotImplemented, + format("StridedSlice with stride %d", strides.at(i))); + } + if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + { + // Swap NHWC parameters' order to NCHW. + std::swap(begins.at(2), begins.at(3)); + std::swap(begins.at(1), begins.at(2)); + std::swap(ends.at(2), ends.at(3)); + std::swap(ends.at(1), ends.at(2)); + } + layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total())); + layerParams.set("end", DictValue::arrayInt((int*)ends.data, ends.total())); - int kernel_blob_index = -1; - const tensorflow::TensorProto& kernelTensor = getConstBlob(layer, value_id, -1, &kernel_blob_index); - const String kernelTensorName = layer.input(kernel_blob_index); - std::map::iterator sharedWeightsIt = sharedWeights.find(kernelTensorName); - if (sharedWeightsIt == sharedWeights.end()) - { - blobFromTensor(kernelTensor, layerParams.blobs[0]); - releaseTensor(const_cast(&kernelTensor)); - sharedWeights[kernelTensorName] = layerParams.blobs[0]; - } - else - { - layerParams.blobs[0] = sharedWeightsIt->second; - } + int id = dstNet.addLayer(name, "Slice", layerParams); + layer_id[name] = id; - if (kernel_blob_index == 1) { // In this case output is computed by x*W formula - W should be transposed - Mat data = layerParams.blobs[0].t(); - layerParams.blobs[0] = data.clone(); - } + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); +} - layerParams.set("num_output", layerParams.blobs[0].size[0]); - if (locPredTransposed) - { - CV_Assert(layerParams.blobs[0].dims == 2); - for (int i = 0; i < layerParams.blobs[0].size[0]; i += 2) - { - cv::Mat src = layerParams.blobs[0].row(i); - cv::Mat dst = layerParams.blobs[0].row(i + 1); - std::swap_ranges(src.begin(), src.end(), dst.begin()); - } - } +void TFImporter::parseMul(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); - int id = dstNet.addLayer(name, "InnerProduct", layerParams); - layer_id[name] = id; + CV_CheckGT(num_inputs, 0, ""); + int constId = -1; + for(int ii = 0; ii < num_inputs; ++ii) + { + Pin input = parsePin(layer.input(ii)); + if (value_id.find(input.name) != value_id.end()) + { + constId = ii; + break; + } + } + CV_Assert((constId != -1) || (num_inputs == 2)); - // one input only - int input_blob_index = kernel_blob_index == 0 ? 1 : 0; - connect(layer_id, dstNet, parsePin(layer.input(input_blob_index)), id, 0); - data_layouts[name] = DATA_LAYOUT_PLANAR; + if (constId != -1) + { + // Multiplication by constant. + CV_CheckEQ(num_inputs, 2, ""); + Mat scaleMat = getTensorContent(getConstBlob(layer, value_id)); + CV_Assert(scaleMat.type() == CV_32FC1); + if (type == "RealDiv") + { + if (constId == 0) + CV_Error(Error::StsNotImplemented, "Division of constant over variable"); + scaleMat = 1.0f / scaleMat; } - else if (type == "Reshape") + + int id; + if (scaleMat.total() == 1) // is a scalar. { - CV_CheckGT(num_inputs, 0, ""); - Pin inpId = parsePin(layer.input(0)); - DataLayout inpLayout = getDataLayout(layer.input(0), data_layouts); - // There are two possible implementations: reshape an input using - // predefined sizes or use a second input blob as a source of new shape. - if (value_id.find(layer.input(1)) != value_id.end()) + // Try to match with a LeakyRelu: + // node { + // name: "LeakyRelu/mul" + // op: "Mul" + // input: "LeakyRelu/alpha" + // input: "input" + // } + // node { + // name: "LeakyRelu/Maximum" + // op: "Maximum" + // input: "LeakyRelu/mul" + // input: "input" + // } + StrIntVector next_layers = getNextLayers(net, name, "Maximum"); + if (!next_layers.empty()) { - Mat newShape = getTensorContent(getConstBlob(layer, value_id, 1)); - int newShapeSize = newShape.total(); - bool hasSwap = false; - if (newShapeSize == 4 && hasAllOnes(newShape, 0, 2)) - { - // NHWC->NCHW - std::swap(*newShape.ptr(0, 2), *newShape.ptr(0, 3)); - std::swap(*newShape.ptr(0, 1), *newShape.ptr(0, 2)); - hasSwap = true; - } - if (inpLayout == DATA_LAYOUT_NHWC) - { - if (newShapeSize >= 2 || newShape.at(1) == 1) - { - int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - addPermuteLayer(order, name + "/nhwc", inpId); - if (newShapeSize < 4) - { - inpLayout = DATA_LAYOUT_NCHW; - } - else - { - inpLayout = DATA_LAYOUT_NHWC; - } - } - } - layerParams.set("dim", DictValue::arrayInt(newShape.ptr(), newShapeSize)); + int maximumLayerIdx = next_layers[0].second; - int id = dstNet.addLayer(name, "Reshape", layerParams); - layer_id[name] = id; + CV_Assert(net.node(maximumLayerIdx).input_size() == 2); - // one input only - connect(layer_id, dstNet, inpId, id, 0); - inpId = Pin(name); + // The input from the Mul layer can also be at index 1. + int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1; - if ((inpLayout == DATA_LAYOUT_NHWC || inpLayout == DATA_LAYOUT_UNKNOWN || inpLayout == DATA_LAYOUT_PLANAR) && - newShapeSize == 4 && !hasSwap) - { - int order[] = {0, 3, 1, 2}; // Transform back to OpenCV's NCHW. - addPermuteLayer(order, name + "/nchw", inpId); - inpLayout = DATA_LAYOUT_NCHW; - } + ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false); + layers_to_ignore.insert(next_layers[0].first); - data_layouts[name] = newShapeSize == 2 ? DATA_LAYOUT_PLANAR : inpLayout; + layerParams.set("negative_slope", scaleMat.at(0)); + id = dstNet.addLayer(name, "ReLU", layerParams); } else { - int id = dstNet.addLayer(name, "Reshape", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, inpId, id, 0); - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); - data_layouts[name] = inpLayout; + // Just a multiplication. + layerParams.set("scale", scaleMat.at(0)); + id = dstNet.addLayer(name, "Power", layerParams); } } - else if (type == "Flatten" || type == "Squeeze") + else // is a vector { - CV_CheckGT(num_inputs, 0, ""); - Pin inpId = parsePin(layer.input(0)); - int inpLayout = getDataLayout(layer.input(0), data_layouts); - if (type == "Squeeze") - { - CV_Assert(hasLayerAttr(layer, "squeeze_dims")); - const tensorflow::AttrValue& dims = getLayerAttr(layer, "squeeze_dims"); - std::vector dimsVector(dims.list().i_size()); - for (int i = 0; i < dimsVector.size(); ++i) - dimsVector[i] = dims.list().i(i); - - // Flatten layer can squeeze dimensions range into one. - std::sort(dimsVector.begin(), dimsVector.end()); - for (int i = 1; i < dimsVector.size(); ++i) - { - if (dimsVector[i] != dimsVector[i - 1] + 1) - CV_Error(Error::StsNotImplemented, "Unsupported squeeze configuration"); - } - int start = dimsVector.front() - 1, end = dimsVector.back(); - if (start == -1 && end == 0) // squeeze 0th dimension - { - start = 0; - end = 1; - } - layerParams.set("axis", start); - layerParams.set("end_axis", end); - } - if (inpLayout == DATA_LAYOUT_NHWC) + layerParams.blobs.resize(1, scaleMat); + + StrIntVector next_layers = getNextLayers(net, name, "Add"); + if (!next_layers.empty()) { - LayerParams permLP; - int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - permLP.set("order", DictValue::arrayInt(order, 4)); + layerParams.set("bias_term", true); + layerParams.blobs.resize(2); - std::string permName = name + "/nchw"; - CV_Assert(layer_id.find(permName) == layer_id.end()); - int permId = dstNet.addLayer(permName, "Permute", permLP); - layer_id[permName] = permId; - connect(layer_id, dstNet, inpId, permId, 0); - inpId = Pin(permName); + int weights_layer_index = next_layers[0].second; + blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back()); + ExcludeLayer(net, weights_layer_index, 0, false); + layers_to_ignore.insert(next_layers[0].first); } - int id = dstNet.addLayer(name, "Flatten", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, inpId, id, 0); - data_layouts[name] = DATA_LAYOUT_PLANAR; + + if (hasLayerAttr(layer, "axis")) + layerParams.set("axis", getLayerAttr(layer, "axis").i()); + + id = dstNet.addLayer(name, "Scale", layerParams); } - else if (type == "Transpose") + layer_id[name] = id; + + Pin inp0 = parsePin(layer.input(0)); + if (layer_id.find(inp0.name) != layer_id.end()) + // First operand is a constant. + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + else + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); + } + else + { + // Check if all the inputs have the same shape. + bool equalInpShapes = true; + bool isShapeOnes = false; + MatShape outShape0; + for (int ii = 0; ii < num_inputs && !netInputShapes.empty(); ii++) { - CV_CheckGT(num_inputs, 0, ""); - Mat perm = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_Assert(perm.type() == CV_32SC1); - int* permData = (int*)perm.data; - if (perm.total() == 4) + Pin pin = parsePin(layer.input(ii)); + int inpId = layer_id.find(pin.name)->second; + + // Get input shape + MatShape outShape; + std::vector inpShapes, outShapes; + dstNet.getLayerShapes(netInputShapes, inpId, inpShapes, outShapes); + CV_CheckGT(static_cast(outShapes.size()), pin.blobIndex, ""); + outShape = outShapes[pin.blobIndex]; + + if (ii == 0) { - // Only NHWC <-> NCHW permutations are allowed. OpenCV is always - // keep NCHW layout this way. - int inpLayout = getDataLayout(layer.input(0), data_layouts); - std::string type = "Identity"; - if (inpLayout == DATA_LAYOUT_NHWC) - { - if (permData[0] == 0 && permData[1] == 3 && permData[2] == 1 && permData[3] == 2) - { - // in TensorFlow: NHWC->NCHW - // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NCHW; - } - else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) - { - // in TensorFlow: NHWC->NHWC - // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NHWC; - } - else if (permData[0] == 0 && permData[1] == 3 && permData[2] == 2 && permData[3] == 1) - { - // in TensorFlow: NHWC->NCWH - // in OpenCV: NCHW->NCWH - int permData[] = {0, 1, 3, 2}; - layerParams.set("order", DictValue::arrayInt(permData, perm.total())); - data_layouts[name] = DATA_LAYOUT_NCHW; // we keep track NCHW because channels position only matters - type = "Permute"; - } - else - CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); - } - else if (inpLayout == DATA_LAYOUT_NCHW) - { - if (permData[0] == 0 && permData[1] == 2 && permData[2] == 3 && permData[3] == 1) - { - // in TensorFlow: NCHW->NHWC - // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NHWC; - } - else if (permData[0] == 0 && permData[1] == 1 && permData[2] == 2 && permData[3] == 3) - { - // in TensorFlow: NCHW->NCHW - // in OpenCV: NCHW->NCHW - data_layouts[name] = DATA_LAYOUT_NCHW; - } - else - CV_Error(Error::StsParseError, "Only NHWC <-> NCHW permutations are allowed."); - } - int id = dstNet.addLayer(name, type, layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + outShape0 = outShape; } - else + else if (outShape != outShape0) { - layerParams.set("order", DictValue::arrayInt(permData, perm.total())); - - int id = dstNet.addLayer(name, "Permute", layerParams); - layer_id[name] = id; - - // one input only - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + equalInpShapes = false; + isShapeOnes = isAllOnes(outShape, 2, outShape.size()) || + isAllOnes(outShape0, 2, outShape0.size()); + break; } } - else if (type == "Const") + + int id; + if (equalInpShapes || netInputShapes.empty() || (!equalInpShapes && isShapeOnes)) { + layerParams.set("operation", type == "RealDiv" ? "div" : "prod"); + id = dstNet.addLayer(name, "Eltwise", layerParams); } - else if (type == "LRN") + else { - CV_CheckGT(num_inputs, 0, ""); - if(hasLayerAttr(layer, "alpha")) { - layerParams.set("alpha", getLayerAttr(layer, "alpha").f()); - } - if(hasLayerAttr(layer, "beta")) { - layerParams.set("beta", getLayerAttr(layer, "beta").f()); - } - if(hasLayerAttr(layer, "depth_radius")) { - int radius = (int)getLayerAttr(layer, "depth_radius").i(); - layerParams.set("local_size", 2*radius + 1); - } - if(hasLayerAttr(layer, "bias")) { - layerParams.set("bias", getLayerAttr(layer, "bias").f()); - } - layerParams.set("norm_by_size", false); + if (type == "RealDiv") + CV_Error(Error::StsNotImplemented, "Division of non equal tensors"); + id = dstNet.addLayer(name, "Scale", layerParams); + } - int id = dstNet.addLayer(name, "LRN", layerParams); - layer_id[name] = id; + layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); - } - else if (type == "Concat" || type == "ConcatV2") + for (int ii = 0; ii < num_inputs; ii++) { - CV_CheckGT(num_inputs, 0, ""); - int axisId = (type == "Concat" ? 0 : num_inputs - 1); - int axis = getConstBlob(layer, value_id, axisId).int_val().Get(0); + Pin inp = parsePin(layer.input(ii)); + if (layer_id.find(inp.name) == layer_id.end()) + CV_Error(Error::StsError, "Input layer not found: " + inp.name); + connect(layer_id, dstNet, inp, id, ii); + } + } +} - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) - axis = toNCHW(axis); - else if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NDHWC) - axis = toNCDHW(axis); - layerParams.set("axis", axis); +void TFImporter::parseFusedBatchNorm(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "FusedBatchNorm" + // input: "input" + // input: "BatchNorm/gamma" + // input: "BatchNorm/beta" + // input: "BatchNorm/moving_mean" + // input: "BatchNorm/moving_variance" - // input(0) or input(n-1) is concat_dim - int from = (type == "Concat" ? 1 : 0); - int to = (type == "Concat" ? num_inputs : num_inputs - 1); + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - for (int ii = from; ii < to; ii++) - { - Pin inp = parsePin(layer.input(ii)); - if (layer_id.find(inp.name) == layer_id.end()) - { - // There are constant inputs. - LayerParams lp; - lp.name = inp.name; - lp.type = "Const"; - lp.blobs.resize(1); - blobFromTensor(getConstBlob(layer, value_id, ii), lp.blobs.back()); - CV_Assert_N(!lp.blobs[0].empty(), lp.blobs[0].type() == CV_32F); - - int constInpId = dstNet.addLayer(lp.name, lp.type, lp); - layer_id[lp.name] = constInpId; - } - } + CV_CheckEQ(num_inputs, 5, "Expected gamma, beta, mean and std"); + Pin inpId = parsePin(layer.input(0)); - int id = dstNet.addLayer(name, "Concat", layerParams); - layer_id[name] = id; + bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b(); - for (int ii = from; ii < to; ii++) - { - Pin inp = parsePin(layer.input(ii)); - if (layer_id.find(inp.name) == layer_id.end()) - CV_Error(Error::StsError, "Input layer not found: " + inp.name); - connect(layer_id, dstNet, inp, id, ii - from); - } - } - else if (type == "MaxPool" || type == "MaxPool3D") - { - CV_CheckGT(num_inputs, 0, ""); - layerParams.set("pool", "max"); + layerParams.blobs.resize(2); - setKSize(layerParams, layer); - setStrides(layerParams, layer); - setPadding(layerParams, layer); - // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU - layerParams.set("ceil_mode", false); + const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1); + if (!gammaTensor.tensor_content().empty()) + { + layerParams.blobs.resize(layerParams.blobs.size() + 1); + layerParams.set("has_weight", true); + blobFromTensor(gammaTensor, layerParams.blobs.back()); + } + else + layerParams.set("has_weight", false); - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; + const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2); + if (!betaTensor.tensor_content().empty()) + { + layerParams.blobs.resize(layerParams.blobs.size() + 1); + layerParams.set("has_bias", true); + blobFromTensor(betaTensor, layerParams.blobs.back()); + } + else + layerParams.set("has_bias", false); - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); - } - else if (type == "AvgPool" || type == "AvgPool3D") - { - CV_CheckGT(num_inputs, 0, ""); - layerParams.set("pool", "ave"); - layerParams.set("ave_pool_padded_area", false); - setKSize(layerParams, layer); - setStrides(layerParams, layer); - setPadding(layerParams, layer); + Mat mean, std; + if (isTraining) + { + if (layerParams.blobs.size() == 2) + CV_Error(Error::StsNotImplemented, "Cannot determine number " + "of parameters for batch normalization layer."); + mean = Mat::zeros(1, layerParams.blobs[2].total(), CV_32F); + std = Mat::ones(1, layerParams.blobs[2].total(), CV_32F); + + // Add an extra layer: Mean-Variance normalization + LayerParams mvnParams; + std::string mvnName = name + "/MVN"; + CV_Assert(layer_id.find(mvnName) == layer_id.end()); + int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams); + layer_id[mvnName] = mvnId; + connect(layer_id, dstNet, inpId, mvnId, 0); + inpId = Pin(mvnName); + } + else + { + blobFromTensor(getConstBlob(layer, value_id, 3), mean); + blobFromTensor(getConstBlob(layer, value_id, 4), std); + } + layerParams.blobs[0] = mean; + layerParams.blobs[1] = std; - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; + if (hasLayerAttr(layer, "epsilon")) + layerParams.set("eps", getLayerAttr(layer, "epsilon").f()); - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); - } - else if (type == "MaxPoolGrad") - { - CV_CheckEQ(num_inputs, 3, ""); + int id = dstNet.addLayer(name, "BatchNorm", layerParams); + layer_id[name] = id; - layerParams.set("pool_k_h", 0); - layerParams.set("pool_k_w", 0); - layerParams.set("pool_stride_h", 0); - layerParams.set("pool_stride_w", 0); - layerParams.set("pool_pad_h", 0); - layerParams.set("pool_pad_w", 0); + // one input only + connect(layer_id, dstNet, inpId, id, 0); +} - int id = dstNet.addLayer(name, "MaxUnpool", layerParams); - layer_id[name] = id; +void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "Conv2DBackpropInput" + // input: "conv2d_transpose/output_shape" + // input: "weights" + // input: "input" - connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); - connect(layer_id, dstNet, parsePin(layer.input(1) + ":1"), id, 1); - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 2); - } - else if (type == "Placeholder") - { - if (!hasLayerAttr(layer, "dtype") || - getLayerAttr(layer, "dtype").type() != tensorflow::DT_BOOL) // If input is not a train/test flag. - { - netInputsNames.push_back(name); - layer_id[name] = 0; - } - tensorflow::TensorShapeProto shape; - if (hasLayerAttr(layer, "shape")) - shape = getLayerAttr(layer, "shape").shape(); - else if (hasLayerAttr(layer, "_output_shapes")) - { - tensorflow::AttrValue_ListValue list = getLayerAttr(layer, "_output_shapes").list(); - if (list.shape_size()) - shape = list.shape()[0]; - } - if (shape.dim_size()) - { - MatShape dims(shape.dim_size()); - for (int i = 0; i < dims.size(); ++i) - dims[i] = shape.dim(i).size(); - if (dims.size() == 4 && predictedLayout == DATA_LAYOUT_NHWC) - { - std::swap(dims[1], dims[3]); // NHWC->NCWH - std::swap(dims[2], dims[3]); // NCWH->NCHW - if (dims[0] == -1) // It's OK to have undetermined batch size - dims[0] = 1; - } - bool hasNeg = false; - for (int i = 0; i < dims.size() && !hasNeg; ++i) - { - hasNeg = dims[i] < 0; - } - if (!hasNeg) - netInputShapes.push_back(dims); - } - } - else if (type == "Split") { - // TODO: determining axis index remapping by input dimensions order of input blob - // TODO: slicing input may be Const op - // TODO: slicing kernels for convolutions - in current implementation it is impossible - // TODO: add parsing num of slices parameter - CV_CheckEQ(num_inputs, 2, ""); - // num_split - // 1st blob is dims tensor - int axis = getConstBlob(layer, value_id, 0).int_val().Get(0); - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) - axis = toNCHW(axis); - layerParams.set("axis", axis); - - if (hasLayerAttr(layer, "num_split")) - layerParams.set("num_split", getLayerAttr(layer, "num_split").i()); - - int id = dstNet.addLayer(name, "Slice", layerParams); - layer_id[name] = id; + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - // one input only - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); - } - else if (type == "Slice") - { - // op: "Slice" - // input: "input_node" - // input: "Slice/begin" - // input: "Slice/size" - CV_CheckEQ(num_inputs, 3, ""); - Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); - Mat sizes = getTensorContent(getConstBlob(layer, value_id, 2)); - CV_Assert_N(!begins.empty(), !sizes.empty()); - CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); - CV_CheckTypeEQ(sizes.type(), CV_32SC1, ""); - - if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) - { - // Swap NHWC parameters' order to NCHW. - std::swap(*begins.ptr(0, 2), *begins.ptr(0, 3)); - std::swap(*begins.ptr(0, 1), *begins.ptr(0, 2)); - std::swap(*sizes.ptr(0, 2), *sizes.ptr(0, 3)); - std::swap(*sizes.ptr(0, 1), *sizes.ptr(0, 2)); - } - layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total())); - layerParams.set("size", DictValue::arrayInt((int*)sizes.data, sizes.total())); + CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes"); - int id = dstNet.addLayer(name, "Slice", layerParams); - layer_id[name] = id; + layerParams.set("bias_term", false); + layerParams.blobs.resize(1); - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - } - else if (type == "StridedSlice") + StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); + if (next_layers.size() == 1) + { + layerParams.set("bias_term", true); + layerParams.blobs.resize(2); + + int weights_layer_index = next_layers[0].second; + + blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); + ExcludeLayer(net, weights_layer_index, 0, false); + layers_to_ignore.insert(next_layers[0].first); + } + + kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]); + + const int* kshape = layerParams.blobs[0].size.p; + const int kernelH = kshape[2]; + const int kernelW = kshape[3]; + layerParams.set("kernel_h", kernelH); + layerParams.set("kernel_w", kernelW); + layerParams.set("num_output", kshape[1]); + + setStrides(layerParams, layer); + setPadding(layerParams, layer); + + // For convolution layer, output shape computes as + // o = 1 + (i - k + 2*p) / s + // i - input size, o - output size, k - kernel size, p - pad, s - stride + // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2 + // considering that k is odd. + // SAME: o = 1 + (i - 1) / s + // VALID: o = 1 + i / s + // Deconvolution's layer output shape computes as + // SAME: o = 1 + (i - 1)*s + // VALID: o = (i - 1)*s + // If output_shape differs from formulas above then adjust padding is applied. + + const int strideY = layerParams.get("stride_h"); + const int strideX = layerParams.get("stride_w"); + Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); + const int outH = outShape.at(1); + const int outW = outShape.at(2); + if (layerParams.get("pad_mode") == "SAME") + { + layerParams.set("adj_w", (outW - 1) % strideX); + layerParams.set("adj_h", (outH - 1) % strideY); + } + else if (layerParams.get("pad_mode") == "VALID") + { + layerParams.set("adj_w", (outW - kernelW) % strideX); + layerParams.set("adj_h", (outH - kernelH) % strideY); + } + int id = dstNet.addLayer(name, "Deconvolution", layerParams); + layer_id[name] = id; + + // one input only + connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); +} + +void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "BlockLSTM" + // input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps) + // input: "input" + // input: "lstm_block_wrapper/zeros" (ignore) + // input: "lstm_block_wrapper/zeros" (ignore) + // input: "lstm_block_wrapper/kernel" + // input: "lstm_block_wrapper/w_i_diag" + // input: "lstm_block_wrapper/w_f_diag" + // input: "lstm_block_wrapper/w_o_diag" + // input: "lstm_block_wrapper/bias" + + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 9, "Unexpected number of input nodes"); + + if (hasLayerAttr(layer, "forget_bias")) + layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f()); + + if (hasLayerAttr(layer, "forget_bias")) + { + float cellClip = getLayerAttr(layer, "cell_clip").f(); + // Cell clip disabled if it's negative. + if (cellClip >= 0) { - CV_CheckEQ(num_inputs, 4, ""); - Mat begins = getTensorContent(getConstBlob(layer, value_id, 1)); - Mat ends = getTensorContent(getConstBlob(layer, value_id, 2)); - Mat strides = getTensorContent(getConstBlob(layer, value_id, 3)); - CV_CheckTypeEQ(begins.type(), CV_32SC1, ""); - CV_CheckTypeEQ(ends.type(), CV_32SC1, ""); - CV_CheckTypeEQ(strides.type(), CV_32SC1, ""); - const int num = begins.total(); - CV_Assert_N(num == ends.total(), num == strides.total()); - - int end_mask = getLayerAttr(layer, "end_mask").i(); - for (int i = 0; i < num; ++i) - { - if (ends.at(i) < 0) - ends.at(i) -= 1; - if (end_mask & (1 << i)) - ends.at(i) = -1; - if (strides.at(i) != 1) - CV_Error(Error::StsNotImplemented, - format("StridedSlice with stride %d", strides.at(i))); - } - if (begins.total() == 4 && getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) - { - // Swap NHWC parameters' order to NCHW. - std::swap(begins.at(2), begins.at(3)); - std::swap(begins.at(1), begins.at(2)); - std::swap(ends.at(2), ends.at(3)); - std::swap(ends.at(1), ends.at(2)); - } - layerParams.set("begin", DictValue::arrayInt((int*)begins.data, begins.total())); - layerParams.set("end", DictValue::arrayInt((int*)ends.data, ends.total())); + layerParams.set("use_cell_clip", true); + layerParams.set("cell_clip", cellClip); + } + } - int id = dstNet.addLayer(name, "Slice", layerParams); - layer_id[name] = id; + Mat W, Wh, Wx, b; + blobFromTensor(getConstBlob(layer, value_id, 4), W); + blobFromTensor(getConstBlob(layer, value_id, 8), b); + const int outSize = W.cols / 4; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + // IGFO->IFOG + float* weightData = (float*)W.data; + for (int i = 0; i < W.rows; ++i) + for (int j = 0; j < outSize; ++j) + { + std::swap(weightData[i * W.cols + 1 * outSize + j], + weightData[i * W.cols + 2 * outSize + j]); + std::swap(weightData[i * W.cols + 2 * outSize + j], + weightData[i * W.cols + 3 * outSize + j]); } - else if (type == "Mul" || type == "RealDiv") + Wx = W.rowRange(0, W.rows - outSize).t(); + Wh = W.rowRange(W.rows - outSize, W.rows).t(); + + layerParams.blobs.resize(3); + layerParams.blobs[0] = Wh; + layerParams.blobs[1] = Wx; + layerParams.blobs[2] = b; + + if (hasLayerAttr(layer, "use_peephole")) + { + bool usePeephole = getLayerAttr(layer, "use_peephole").b(); + if (usePeephole) { - CV_CheckGT(num_inputs, 0, ""); - int constId = -1; - for(int ii = 0; ii < num_inputs; ++ii) + layerParams.set("use_peephole", true); + layerParams.blobs.resize(6); + for (int i = 0; i < 3; ++i) { - Pin input = parsePin(layer.input(ii)); - if (value_id.find(input.name) != value_id.end()) - { - constId = ii; - break; - } + Mat w; + blobFromTensor(getConstBlob(layer, value_id, 5 + i), w); + w = w.reshape(1, w.total()); // Single column. + w = Mat::diag(w); // Make a diagonal matrix. + layerParams.blobs[3 + i] = w; } - CV_Assert((constId != -1) || (num_inputs == 2)); + } + } - if (constId != -1) - { - // Multiplication by constant. - CV_CheckEQ(num_inputs, 2, ""); - Mat scaleMat = getTensorContent(getConstBlob(layer, value_id)); - CV_Assert(scaleMat.type() == CV_32FC1); - if (type == "RealDiv") - { - if (constId == 0) - CV_Error(Error::StsNotImplemented, "Division of constant over variable"); - scaleMat = 1.0f / scaleMat; - } + int id = dstNet.addLayer(name, "LSTM", layerParams); + layer_id[name] = id; - int id; - if (scaleMat.total() == 1) // is a scalar. - { - // Try to match with a LeakyRelu: - // node { - // name: "LeakyRelu/mul" - // op: "Mul" - // input: "LeakyRelu/alpha" - // input: "input" - // } - // node { - // name: "LeakyRelu/Maximum" - // op: "Maximum" - // input: "LeakyRelu/mul" - // input: "input" - // } - StrIntVector next_layers = getNextLayers(net, name, "Maximum"); - if (!next_layers.empty()) - { - int maximumLayerIdx = next_layers[0].second; + // one input only + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); + data_layouts[name] = DATA_LAYOUT_UNKNOWN; +} - CV_Assert(net.node(maximumLayerIdx).input_size() == 2); +void TFImporter::parseResize(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer_, LayerParams& layerParams) +{ + tensorflow::NodeDef layer = layer_; + std::string name = layer.name(); + const std::string& type = layer.op(); + int num_inputs = layer.input_size(); - // The input from the Mul layer can also be at index 1. - int mulInputIdx = (net.node(maximumLayerIdx).input(0) == name) ? 0 : 1; + CV_CheckGT(num_inputs, 0, ""); + std::string convWeights = ""; + if (type == "FusedResizeAndPadConv2D") + { + // input: "mul_1" + // input: "decoder/ResizeBilinear/size" + // input: "decoder/decoder_conv0/Conv2D_dummy_paddings" + // input: "decoder/decoder_conv0/weights" + CV_CheckEQ(num_inputs, 4, "Number of input for FusedResizeAndPadConv2D"); - ExcludeLayer(net, maximumLayerIdx, mulInputIdx, false); - layers_to_ignore.insert(next_layers[0].first); + Mat paddings = getTensorContent(getConstBlob(layer, value_id, 2)); + CV_CheckEQ(countNonZero(paddings), 0, "Unsupported mode"); - layerParams.set("negative_slope", scaleMat.at(0)); - id = dstNet.addLayer(name, "ReLU", layerParams); - } - else - { - // Just a multiplication. - layerParams.set("scale", scaleMat.at(0)); - id = dstNet.addLayer(name, "Power", layerParams); - } - } - else // is a vector - { - layerParams.blobs.resize(1, scaleMat); - - StrIntVector next_layers = getNextLayers(net, name, "Add"); - if (!next_layers.empty()) - { - layerParams.set("bias_term", true); - layerParams.blobs.resize(2); - - int weights_layer_index = next_layers[0].second; - blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs.back()); - ExcludeLayer(net, weights_layer_index, 0, false); - layers_to_ignore.insert(next_layers[0].first); - } + convWeights = layer.input(3); + layer.mutable_input()->DeleteSubrange(2, 2); // FIXIT do NOT modify input model + num_inputs = layer.input_size(); + name = name + "/resize"; - if (hasLayerAttr(layer, "axis")) - layerParams.set("axis", getLayerAttr(layer, "axis").i()); + if (hasLayerAttr(layer, "resize_align_corners")) + { + // FIXIT do NOT modify input model + layer.mutable_attr()->insert( + ::google::protobuf::MapPair("align_corners", + getLayerAttr(layer, "resize_align_corners"))); + } + } + if (num_inputs == 2) + { + Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_CheckTypeEQ(outSize.type(), CV_32SC1, ""); CV_CheckEQ(outSize.total(), (size_t)2, ""); + layerParams.set("height", outSize.at(0, 0)); + layerParams.set("width", outSize.at(0, 1)); + } + else if (num_inputs == 3) + { + Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1)); + Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2)); + factorHeight.convertTo(factorHeight, CV_32F); + factorWidth.convertTo(factorWidth, CV_32F); + layerParams.set("zoom_factor_x", factorWidth.at(0)); + layerParams.set("zoom_factor_y", factorHeight.at(0)); + } + else + CV_Check(num_inputs, num_inputs == 2 || num_inputs == 3, ""); - id = dstNet.addLayer(name, "Scale", layerParams); - } - layer_id[name] = id; + if (type == "ResizeNearestNeighbor") + layerParams.set("interpolation", "nearest"); + else + layerParams.set("interpolation", "bilinear"); - Pin inp0 = parsePin(layer.input(0)); - if (layer_id.find(inp0.name) != layer_id.end()) - // First operand is a constant. - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - else - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); - } - else - { - // Check if all the inputs have the same shape. - bool equalInpShapes = true; - bool isShapeOnes = false; - MatShape outShape0; - for (int ii = 0; ii < num_inputs && !netInputShapes.empty(); ii++) - { - Pin pin = parsePin(layer.input(ii)); - int inpId = layer_id.find(pin.name)->second; + if (hasLayerAttr(layer, "align_corners")) + layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b()); - // Get input shape - MatShape outShape; - std::vector inpShapes, outShapes; - dstNet.getLayerShapes(netInputShapes, inpId, inpShapes, outShapes); - CV_CheckGT(static_cast(outShapes.size()), pin.blobIndex, ""); - outShape = outShapes[pin.blobIndex]; + if (hasLayerAttr(layer, "half_pixel_centers")) + layerParams.set("half_pixel_centers", getLayerAttr(layer, "half_pixel_centers").b()); - if (ii == 0) - { - outShape0 = outShape; - } - else if (outShape != outShape0) - { - equalInpShapes = false; - isShapeOnes = isAllOnes(outShape, 2, outShape.size()) || - isAllOnes(outShape0, 2, outShape0.size()); - break; - } - } + int id = dstNet.addLayer(name, "Resize", layerParams); + layer_id[name] = id; - int id; - if (equalInpShapes || netInputShapes.empty() || (!equalInpShapes && isShapeOnes)) - { - layerParams.set("operation", type == "RealDiv" ? "div" : "prod"); - id = dstNet.addLayer(name, "Eltwise", layerParams); - } - else - { - if (type == "RealDiv") - CV_Error(Error::StsNotImplemented, "Division of non equal tensors"); - id = dstNet.addLayer(name, "Scale", layerParams); - } + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - layer_id[name] = id; + // Step back to add convolution + if (type == "FusedResizeAndPadConv2D") + { + tensorflow::NodeDef conv = layer_; + conv.clear_input(); + conv.add_input(name); + conv.add_input(convWeights); + conv.set_op("Conv2D"); + parseNode(conv); + } +} - for (int ii = 0; ii < num_inputs; ii++) - { - Pin inp = parsePin(layer.input(ii)); - if (layer_id.find(inp.name) == layer_id.end()) - CV_Error(Error::StsError, "Input layer not found: " + inp.name); - connect(layer_id, dstNet, inp, id, ii); - } - } - } - else if (type == "FusedBatchNorm" || type == "FusedBatchNormV3") - { - // op: "FusedBatchNorm" - // input: "input" - // input: "BatchNorm/gamma" - // input: "BatchNorm/beta" - // input: "BatchNorm/moving_mean" - // input: "BatchNorm/moving_variance" - CV_CheckEQ(num_inputs, 5, "Expected gamma, beta, mean and std"); - Pin inpId = parsePin(layer.input(0)); +void TFImporter::parseL2Normalize(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "L2Normalize" + // input: "input" + // input: "reduction_indices" (axis) - bool isTraining = hasLayerAttr(layer, "is_training") && getLayerAttr(layer, "is_training").b(); + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - layerParams.blobs.resize(2); + CV_CheckEQ(num_inputs, 2, ""); + Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(reductionIndices.type() == CV_32SC1); - const tensorflow::TensorProto& gammaTensor = getConstBlob(layer, value_id, 1); - if (!gammaTensor.tensor_content().empty()) - { - layerParams.blobs.resize(layerParams.blobs.size() + 1); - layerParams.set("has_weight", true); - blobFromTensor(gammaTensor, layerParams.blobs.back()); - } - else - layerParams.set("has_weight", false); - - const tensorflow::TensorProto& betaTensor = getConstBlob(layer, value_id, 2); - if (!betaTensor.tensor_content().empty()) - { - layerParams.blobs.resize(layerParams.blobs.size() + 1); - layerParams.set("has_bias", true); - blobFromTensor(betaTensor, layerParams.blobs.back()); - } - else - layerParams.set("has_bias", false); - - Mat mean, std; - if (isTraining) - { - if (layerParams.blobs.size() == 2) - CV_Error(Error::StsNotImplemented, "Cannot determine number " - "of parameters for batch normalization layer."); - mean = Mat::zeros(1, layerParams.blobs[2].total(), CV_32F); - std = Mat::ones(1, layerParams.blobs[2].total(), CV_32F); - - // Add an extra layer: Mean-Variance normalization - LayerParams mvnParams; - std::string mvnName = name + "/MVN"; - CV_Assert(layer_id.find(mvnName) == layer_id.end()); - int mvnId = dstNet.addLayer(mvnName, "MVN", mvnParams); - layer_id[mvnName] = mvnId; - connect(layer_id, dstNet, inpId, mvnId, 0); - inpId = Pin(mvnName); - } - else - { - blobFromTensor(getConstBlob(layer, value_id, 3), mean); - blobFromTensor(getConstBlob(layer, value_id, 4), std); - } - layerParams.blobs[0] = mean; - layerParams.blobs[1] = std; + const int numAxes = reductionIndices.total(); + if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) + for (int i = 0; i < numAxes; ++i) + reductionIndices.at(i) = toNCHW(reductionIndices.at(i)); - if (hasLayerAttr(layer, "epsilon")) - layerParams.set("eps", getLayerAttr(layer, "epsilon").f()); + cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING); + for (int i = 1; i < numAxes; ++i) + { + CV_Assert(reductionIndices.at(i) == reductionIndices.at(i - 1) + 1); + // Axes have the same sign. + CV_Assert(reductionIndices.at(i) * reductionIndices.at(i - 1) >= 0); + } + layerParams.set("start_axis", reductionIndices.at(0)); + layerParams.set("end_axis", reductionIndices.at(numAxes - 1)); - int id = dstNet.addLayer(name, "BatchNorm", layerParams); - layer_id[name] = id; + int id = dstNet.addLayer(name, "Normalize", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); +} - // one input only - connect(layer_id, dstNet, inpId, id, 0); - } - else if (type == "Conv2DBackpropInput") +void TFImporter::parsePriorBox(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckEQ(num_inputs, 2, ""); + if (hasLayerAttr(layer, "min_size")) + layerParams.set("min_size", getLayerAttr(layer, "min_size").i()); + if (hasLayerAttr(layer, "max_size")) + layerParams.set("max_size", getLayerAttr(layer, "max_size").i()); + if (hasLayerAttr(layer, "flip")) + layerParams.set("flip", getLayerAttr(layer, "flip").b()); + if (hasLayerAttr(layer, "clip")) + layerParams.set("clip", getLayerAttr(layer, "clip").b()); + if (hasLayerAttr(layer, "offset")) + layerParams.set("offset", getLayerAttr(layer, "offset").f()); + if (hasLayerAttr(layer, "step")) + layerParams.set("step", getLayerAttr(layer, "step").f()); + + const std::string paramNames[] = {"variance", "aspect_ratio", "scales", + "width", "height"}; + for (int i = 0; i < 5; ++i) + { + if (hasLayerAttr(layer, paramNames[i])) { - // op: "Conv2DBackpropInput" - // input: "conv2d_transpose/output_shape" - // input: "weights" - // input: "input" - CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes"); + Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor()); + layerParams.set(paramNames[i], + DictValue::arrayReal((float*)values.data, values.total())); + } + } + int id = dstNet.addLayer(name, "PriorBox", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); + data_layouts[name] = DATA_LAYOUT_UNKNOWN; +} - layerParams.set("bias_term", false); - layerParams.blobs.resize(1); +void TFImporter::parseSoftmax(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - StrIntVector next_layers = getNextLayers(net, name, "BiasAdd"); - if (next_layers.size() == 1) - { - layerParams.set("bias_term", true); - layerParams.blobs.resize(2); + CV_CheckGT(num_inputs, 0, ""); + if (hasLayerAttr(layer, "axis")) + layerParams.set("axis", getLayerAttr(layer, "axis").i()); - int weights_layer_index = next_layers[0].second; + int id = dstNet.addLayer(name, "Softmax", layerParams); + layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - blobFromTensor(getConstBlob(net.node(weights_layer_index), value_id), layerParams.blobs[1]); - ExcludeLayer(net, weights_layer_index, 0, false); - layers_to_ignore.insert(next_layers[0].first); - } +void TFImporter::parseCropAndResize(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "CropAndResize" + // input: "input" + // input: "boxes" + // input: "sizes" - kernelFromTensor(getConstBlob(layer, value_id, 1), layerParams.blobs[0]); - - const int* kshape = layerParams.blobs[0].size.p; - const int kernelH = kshape[2]; - const int kernelW = kshape[3]; - layerParams.set("kernel_h", kernelH); - layerParams.set("kernel_w", kernelW); - layerParams.set("num_output", kshape[1]); - - setStrides(layerParams, layer); - setPadding(layerParams, layer); - - // For convolution layer, output shape computes as - // o = 1 + (i - k + 2*p) / s - // i - input size, o - output size, k - kernel size, p - pad, s - stride - // In TensorFlow, p == 0 is padMode == 'VALID' or p == (k - 1) / 2 - // considering that k is odd. - // SAME: o = 1 + (i - 1) / s - // VALID: o = 1 + i / s - // Deconvolution's layer output shape computes as - // SAME: o = 1 + (i - 1)*s - // VALID: o = (i - 1)*s - // If output_shape differs from formulas above then adjust padding is applied. - - const int strideY = layerParams.get("stride_h"); - const int strideX = layerParams.get("stride_w"); - Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); - const int outH = outShape.at(1); - const int outW = outShape.at(2); - if (layerParams.get("pad_mode") == "SAME") - { - layerParams.set("adj_w", (outW - 1) % strideX); - layerParams.set("adj_h", (outH - 1) % strideY); - } - else if (layerParams.get("pad_mode") == "VALID") - { - layerParams.set("adj_w", (outW - kernelW) % strideX); - layerParams.set("adj_h", (outH - kernelH) % strideY); - } - int id = dstNet.addLayer(name, "Deconvolution", layerParams); - layer_id[name] = id; + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + CV_CheckEQ(num_inputs, 3, ""); - // one input only - connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); - } - else if (type == "BlockLSTM") - { - // op: "BlockLSTM" - // input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps) - // input: "input" - // input: "lstm_block_wrapper/zeros" (ignore) - // input: "lstm_block_wrapper/zeros" (ignore) - // input: "lstm_block_wrapper/kernel" - // input: "lstm_block_wrapper/w_i_diag" - // input: "lstm_block_wrapper/w_f_diag" - // input: "lstm_block_wrapper/w_o_diag" - // input: "lstm_block_wrapper/bias" - CV_CheckEQ(num_inputs, 9, "Unexpected number of input nodes"); - - if (hasLayerAttr(layer, "forget_bias")) - layerParams.set("forget_bias", getLayerAttr(layer, "forget_bias").f()); - - if (hasLayerAttr(layer, "forget_bias")) - { - float cellClip = getLayerAttr(layer, "cell_clip").f(); - // Cell clip disabled if it's negative. - if (cellClip >= 0) - { - layerParams.set("use_cell_clip", true); - layerParams.set("cell_clip", cellClip); - } - } + Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2)); + CV_CheckTypeEQ(cropSize.type(), CV_32SC1, ""); CV_CheckEQ(cropSize.total(), (size_t)2, ""); - Mat W, Wh, Wx, b; - blobFromTensor(getConstBlob(layer, value_id, 4), W); - blobFromTensor(getConstBlob(layer, value_id, 8), b); - const int outSize = W.cols / 4; + layerParams.set("height", cropSize.at(0)); + layerParams.set("width", cropSize.at(1)); - // IGFO->IFOG - float* weightData = (float*)W.data; - for (int i = 0; i < W.rows; ++i) - for (int j = 0; j < outSize; ++j) - { - std::swap(weightData[i * W.cols + 1 * outSize + j], - weightData[i * W.cols + 2 * outSize + j]); - std::swap(weightData[i * W.cols + 2 * outSize + j], - weightData[i * W.cols + 3 * outSize + j]); - } - Wx = W.rowRange(0, W.rows - outSize).t(); - Wh = W.rowRange(W.rows - outSize, W.rows).t(); + int id = dstNet.addLayer(name, "CropAndResize", layerParams); + layer_id[name] = id; - layerParams.blobs.resize(3); - layerParams.blobs[0] = Wh; - layerParams.blobs[1] = Wx; - layerParams.blobs[2] = b; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); +} - if (hasLayerAttr(layer, "use_peephole")) - { - bool usePeephole = getLayerAttr(layer, "use_peephole").b(); - if (usePeephole) - { - layerParams.set("use_peephole", true); - layerParams.blobs.resize(6); - for (int i = 0; i < 3; ++i) - { - Mat w; - blobFromTensor(getConstBlob(layer, value_id, 5 + i), w); - w = w.reshape(1, w.total()); // Single column. - w = Mat::diag(w); // Make a diagonal matrix. - layerParams.blobs[3 + i] = w; - } - } - } +void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // Computes the mean of elements across dimensions of a tensor. + // If keepdims is false (default) reduces input_tensor along the dimensions given in axis, + // else the reduced dimensions are retained with length 1. + // if indices = [1, 2] in NHWC layout we use global pooling: NxCxHxW --Pooling--> NxCx1x1 + // if keepdims is false we use Flatten after Pooling: out_shape = NxC + // if indices = [0] we use a global pooling by indices. + // To return correct shape, we use Reshape after Pooling. To determine input shape use Slice for input, + // if keepdims is false we use Flatten after Slice. + // Example: input_shape = NxCxHxW + // determine out shape: NxCxHxW --Slice--> 1xCxHxW + // out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW) + // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape + + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + + Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); + CV_Assert(indices.type() == CV_32SC1); + + // There are two attributes, "keepdims" and a deprecated "keep_dims". + bool keepDims = false; + if (hasLayerAttr(layer, "keepdims")) + keepDims = getLayerAttr(layer, "keepdims").b(); + else if (hasLayerAttr(layer, "keep_dims")) + keepDims = getLayerAttr(layer, "keep_dims").b(); + + if (indices.total() == 1 && indices.at(0) == 0) + { + LayerParams flattenLp; + std::string flattenName = name + "/flatten"; + CV_Assert(layer_id.find(flattenName) == layer_id.end()); + int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); + layer_id[flattenName] = flattenId; + connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0); + + LayerParams reshapeLp; + std::string reshapeName = name + "/reshape"; + CV_Assert(layer_id.find(reshapeName) == layer_id.end()); + reshapeLp.set("axis", 0); + reshapeLp.set("num_axes", 1); + int newShape[] = {1, 1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3)); + + int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp); + layer_id[reshapeName] = reshapeId; + connect(layer_id, dstNet, Pin(flattenName), reshapeId, 0); + + LayerParams avgLp; + std::string avgName = name + "/avg"; + CV_Assert(layer_id.find(avgName) == layer_id.end()); + avgLp.set("pool", type == "Mean" ? "ave" : "sum"); + // pooling kernel H x 1 + avgLp.set("global_pooling_h", true); + avgLp.set("kernel_w", 1); + int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); + layer_id[avgName] = avgId; + connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); + + LayerParams sliceLp; + std::string layerShapeName = name + "/slice"; + CV_Assert(layer_id.find(layerShapeName) == layer_id.end()); + sliceLp.set("axis", 0); + int begin[] = {0}; + int size[] = {1}; + sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); + sliceLp.set("size", DictValue::arrayInt(&size[0], 1)); + int sliceId = dstNet.addLayer(layerShapeName, "Slice", sliceLp); + layer_id[layerShapeName] = sliceId; + connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0); + + if (!keepDims) + { + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", 0); + squeezeLp.set("end_axis", 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0); + layerShapeName = squeezeName; + } - int id = dstNet.addLayer(name, "LSTM", layerParams); + int id = dstNet.addLayer(name, "Reshape", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, Pin(avgName), id, 0); + connect(layer_id, dstNet, Pin(layerShapeName), id, 1); + } else if (indices.total() == 1) { + int axis = toNCHW(indices.at(0)); + if (axis == 2 || axis == 3) + { + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1); + layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true); + int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - // one input only - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 0); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + if (!keepDims) + { + // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC + LayerParams permLP; + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + std::string permName = name + "/nchw"; + Pin inpId = Pin(name); + addPermuteLayer(order, permName, inpId); + + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + squeezeLp.set("axis", indices.at(0)); + squeezeLp.set("end_axis", indices.at(0) + 1); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(permName), squeezeId, 0); + } } - else if (type == "ResizeNearestNeighbor" || type == "ResizeBilinear" || type == "FusedResizeAndPadConv2D") + else if (axis == 1) { - CV_CheckGT(num_inputs, 0, ""); - std::string convWeights = ""; - if (type == "FusedResizeAndPadConv2D") - { - // input: "mul_1" - // input: "decoder/ResizeBilinear/size" - // input: "decoder/decoder_conv0/Conv2D_dummy_paddings" - // input: "decoder/decoder_conv0/weights" - CV_CheckEQ(num_inputs, 4, "Number of input for FusedResizeAndPadConv2D"); - - Mat paddings = getTensorContent(getConstBlob(layer, value_id, 2)); - CV_CheckEQ(countNonZero(paddings), 0, "Unsupported mode"); + int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. + Pin inpId = parsePin(layer.input(0)); + addPermuteLayer(order, name + "/nhwc", inpId); - convWeights = layer.input(3); - layer.mutable_input()->DeleteSubrange(2, 2); // FIXIT do NOT modify input model - num_inputs = layer.input_size(); - name = name + "/resize"; + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("kernel_h", 1); + layerParams.set("global_pooling_w", true); + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, inpId, id, 0); - if (hasLayerAttr(layer, "resize_align_corners")) - { - // FIXIT do NOT modify input model - layer.mutable_attr()->insert( - ::google::protobuf::MapPair("align_corners", - getLayerAttr(layer, "resize_align_corners"))); - } - } - if (num_inputs == 2) - { - Mat outSize = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_CheckTypeEQ(outSize.type(), CV_32SC1, ""); CV_CheckEQ(outSize.total(), (size_t)2, ""); - layerParams.set("height", outSize.at(0, 0)); - layerParams.set("width", outSize.at(0, 1)); - } - else if (num_inputs == 3) + if (!keepDims) { - Mat factorHeight = getTensorContent(getConstBlob(layer, value_id, 1)); - Mat factorWidth = getTensorContent(getConstBlob(layer, value_id, 2)); - factorHeight.convertTo(factorHeight, CV_32F); - factorWidth.convertTo(factorWidth, CV_32F); - layerParams.set("zoom_factor_x", factorWidth.at(0)); - layerParams.set("zoom_factor_y", factorHeight.at(0)); + LayerParams squeezeLp; + std::string squeezeName = name + "/squeeze"; + CV_Assert(layer_id.find(squeezeName) == layer_id.end()); + int channel_id = 3; // TF NHWC layout + squeezeLp.set("axis", channel_id - 1); + squeezeLp.set("end_axis", channel_id); + int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); + layer_id[squeezeName] = squeezeId; + connect(layer_id, dstNet, Pin(name), squeezeId, 0); } else - CV_Check(num_inputs, num_inputs == 2 || num_inputs == 3, ""); - - if (type == "ResizeNearestNeighbor") - layerParams.set("interpolation", "nearest"); - else - layerParams.set("interpolation", "bilinear"); - - if (hasLayerAttr(layer, "align_corners")) - layerParams.set("align_corners", getLayerAttr(layer, "align_corners").b()); - - if (hasLayerAttr(layer, "half_pixel_centers")) - layerParams.set("half_pixel_centers", getLayerAttr(layer, "half_pixel_centers").b()); - - int id = dstNet.addLayer(name, "Resize", layerParams); - layer_id[name] = id; - - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - - // Step back to add convolution - if (type == "FusedResizeAndPadConv2D") { - tensorflow::NodeDef conv = layer_; - conv.clear_input(); - conv.add_input(name); - conv.add_input(convWeights); - conv.set_op("Conv2D"); - parseNode(conv); + int order[] = {0, 3, 1, 2}; // From NHWC to OpenCV's NCHW. + Pin inpId = parsePin(name); + addPermuteLayer(order, name + "/nchw", inpId); } } - else if (type == "L2Normalize") - { - // op: "L2Normalize" - // input: "input" - // input: "reduction_indices" (axis) - CV_CheckEQ(num_inputs, 2, ""); - Mat reductionIndices = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_Assert(reductionIndices.type() == CV_32SC1); - - const int numAxes = reductionIndices.total(); - if (getDataLayout(name, data_layouts) == DATA_LAYOUT_NHWC) - for (int i = 0; i < numAxes; ++i) - reductionIndices.at(i) = toNCHW(reductionIndices.at(i)); - - cv::sort(reductionIndices, reductionIndices, SORT_ASCENDING); - for (int i = 1; i < numAxes; ++i) - { - CV_Assert(reductionIndices.at(i) == reductionIndices.at(i - 1) + 1); - // Axes have the same sign. - CV_Assert(reductionIndices.at(i) * reductionIndices.at(i - 1) >= 0); - } - layerParams.set("start_axis", reductionIndices.at(0)); - layerParams.set("end_axis", reductionIndices.at(numAxes - 1)); + } else { + if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) + CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation."); - int id = dstNet.addLayer(name, "Normalize", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - } - else if (type == "PriorBox") + layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("global_pooling", true); + int id = dstNet.addLayer(name, "Pooling", layerParams); + layer_id[name] = id; + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + + if (!keepDims) { - CV_CheckEQ(num_inputs, 2, ""); - if (hasLayerAttr(layer, "min_size")) - layerParams.set("min_size", getLayerAttr(layer, "min_size").i()); - if (hasLayerAttr(layer, "max_size")) - layerParams.set("max_size", getLayerAttr(layer, "max_size").i()); - if (hasLayerAttr(layer, "flip")) - layerParams.set("flip", getLayerAttr(layer, "flip").b()); - if (hasLayerAttr(layer, "clip")) - layerParams.set("clip", getLayerAttr(layer, "clip").b()); - if (hasLayerAttr(layer, "offset")) - layerParams.set("offset", getLayerAttr(layer, "offset").f()); - if (hasLayerAttr(layer, "step")) - layerParams.set("step", getLayerAttr(layer, "step").f()); - - const std::string paramNames[] = {"variance", "aspect_ratio", "scales", - "width", "height"}; - for (int i = 0; i < 5; ++i) - { - if (hasLayerAttr(layer, paramNames[i])) - { - Mat values = getTensorContent(getLayerAttr(layer, paramNames[i]).tensor()); - layerParams.set(paramNames[i], - DictValue::arrayReal((float*)values.data, values.total())); - } - } - int id = dstNet.addLayer(name, "PriorBox", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); - data_layouts[name] = DATA_LAYOUT_UNKNOWN; + LayerParams flattenLp; + std::string flattenName = name + "/flatten"; + CV_Assert(layer_id.find(flattenName) == layer_id.end()); + int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); + layer_id[flattenName] = flattenId; + connect(layer_id, dstNet, Pin(name), flattenId, 0); } - else if (type == "Softmax") - { - CV_CheckGT(num_inputs, 0, ""); - if (hasLayerAttr(layer, "axis")) - layerParams.set("axis", getLayerAttr(layer, "axis").i()); + } +} - int id = dstNet.addLayer(name, "Softmax", layerParams); - layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); - } - else if (type == "CropAndResize") - { - // op: "CropAndResize" - // input: "input" - // input: "boxes" - // input: "sizes" - CV_CheckEQ(num_inputs, 3, ""); +void TFImporter::parsePack(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: tf.stack(list of tensors, axis=0) + // Join a list of inputs along a new axis. + // The "axis" specifies the index of the new axis in the dimensions of the output. + // Example: given a list with "N" tensors of shape (C, H, W): + // if axis == 0 then the output tensor will have the shape (N, C, H, W), + // if axis == 1 then the output tensor will have the shape (C, N, H, W). + + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + CV_Assert(hasLayerAttr(layer, "axis")); + int dim = (int)getLayerAttr(layer, "axis").i(); + if (dim != 0) + CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation."); + + CV_Assert(hasLayerAttr(layer, "N")); + int num = (int)getLayerAttr(layer, "N").i(); + CV_CheckEQ(num_inputs, num, ""); + std::string base_name = name + "/reshape_"; + std::vector reshape_ids; + for (int i = 0; i < num; i++) { + std::ostringstream ss; + ss << i; + std::string reshape_name = base_name + ss.str(); + LayerParams reshapeLP; + reshapeLP.set("axis", dim); + reshapeLP.set("num_axes", 1); + int outShape[] = {1, -1}; + reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2)); + int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); + layer_id[reshape_name] = id; + reshape_ids.push_back(id); + connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0); + } - Mat cropSize = getTensorContent(getConstBlob(layer, value_id, 2)); - CV_CheckTypeEQ(cropSize.type(), CV_32SC1, ""); CV_CheckEQ(cropSize.total(), (size_t)2, ""); + layerParams.set("axis", dim); + int id = dstNet.addLayer(name, "Concat", layerParams); + layer_id[name] = id; - layerParams.set("height", cropSize.at(0)); - layerParams.set("width", cropSize.at(1)); + for (int li = 0; li < num; li++) + dstNet.connect(reshape_ids[li], 0, id, li); +} - int id = dstNet.addLayer(name, "CropAndResize", layerParams); - layer_id[name] = id; +void TFImporter::parseClipByValue(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // op: "ClipByValue" + // input: "input" + // input: "mix" + // input: "max" - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); - } - else if (type == "Mean" || type == "Sum") - { - // Computes the mean of elements across dimensions of a tensor. - // If keepdims is false (default) reduces input_tensor along the dimensions given in axis, - // else the reduced dimensions are retained with length 1. - // if indices = [1, 2] in NHWC layout we use global pooling: NxCxHxW --Pooling--> NxCx1x1 - // if keepdims is false we use Flatten after Pooling: out_shape = NxC - // if indices = [0] we use a global pooling by indices. - // To return correct shape, we use Reshape after Pooling. To determine input shape use Slice for input, - // if keepdims is false we use Flatten after Slice. - // Example: input_shape = NxCxHxW - // determine out shape: NxCxHxW --Slice--> 1xCxHxW - // out_shape = 1xCxHxW if keepDims else (1xCxHxW --Flatten--> CxHxW) - // global pool: NxCxHxW --Flatten--> Nx(C*H*W) --Reshape--> 1x1xNx(C*H*W) --Pooling--> 1x1x1x(C*H*W) --Reshape--> out_shape - CV_CheckGT(num_inputs, 0, ""); - - Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); - CV_Assert(indices.type() == CV_32SC1); - - // There are two attributes, "keepdims" and a deprecated "keep_dims". - bool keepDims = false; - if (hasLayerAttr(layer, "keepdims")) - keepDims = getLayerAttr(layer, "keepdims").b(); - else if (hasLayerAttr(layer, "keep_dims")) - keepDims = getLayerAttr(layer, "keep_dims").b(); - - if (indices.total() == 1 && indices.at(0) == 0) - { - LayerParams flattenLp; - std::string flattenName = name + "/flatten"; - CV_Assert(layer_id.find(flattenName) == layer_id.end()); - int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); - layer_id[flattenName] = flattenId; - connect(layer_id, dstNet, parsePin(layer.input(0)), flattenId, 0); - - LayerParams reshapeLp; - std::string reshapeName = name + "/reshape"; - CV_Assert(layer_id.find(reshapeName) == layer_id.end()); - reshapeLp.set("axis", 0); - reshapeLp.set("num_axes", 1); - int newShape[] = {1, 1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 3)); - - int reshapeId = dstNet.addLayer(reshapeName, "Reshape", reshapeLp); - layer_id[reshapeName] = reshapeId; - connect(layer_id, dstNet, Pin(flattenName), reshapeId, 0); - - LayerParams avgLp; - std::string avgName = name + "/avg"; - CV_Assert(layer_id.find(avgName) == layer_id.end()); - avgLp.set("pool", type == "Mean" ? "ave" : "sum"); - // pooling kernel H x 1 - avgLp.set("global_pooling_h", true); - avgLp.set("kernel_w", 1); - int avgId = dstNet.addLayer(avgName, "Pooling", avgLp); - layer_id[avgName] = avgId; - connect(layer_id, dstNet, Pin(reshapeName), avgId, 0); - - LayerParams sliceLp; - std::string layerShapeName = name + "/slice"; - CV_Assert(layer_id.find(layerShapeName) == layer_id.end()); - sliceLp.set("axis", 0); - int begin[] = {0}; - int size[] = {1}; - sliceLp.set("begin", DictValue::arrayInt(&begin[0], 1)); - sliceLp.set("size", DictValue::arrayInt(&size[0], 1)); - int sliceId = dstNet.addLayer(layerShapeName, "Slice", sliceLp); - layer_id[layerShapeName] = sliceId; - connect(layer_id, dstNet, Pin(layer.input(0)), sliceId, 0); - - if (!keepDims) - { - LayerParams squeezeLp; - std::string squeezeName = name + "/squeeze"; - CV_Assert(layer_id.find(squeezeName) == layer_id.end()); - squeezeLp.set("axis", 0); - squeezeLp.set("end_axis", 1); - int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); - layer_id[squeezeName] = squeezeId; - connect(layer_id, dstNet, Pin(layerShapeName), squeezeId, 0); - layerShapeName = squeezeName; - } + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - int id = dstNet.addLayer(name, "Reshape", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, Pin(avgName), id, 0); - connect(layer_id, dstNet, Pin(layerShapeName), id, 1); - } else if (indices.total() == 1) { - int axis = toNCHW(indices.at(0)); - if (axis == 2 || axis == 3) - { - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); - layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1); - layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true); - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - - if (!keepDims) - { - // To keep correct order after squeeze dims we first need to change layout from NCHW to NHWC - LayerParams permLP; - int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - std::string permName = name + "/nchw"; - Pin inpId = Pin(name); - addPermuteLayer(order, permName, inpId); - - LayerParams squeezeLp; - std::string squeezeName = name + "/squeeze"; - CV_Assert(layer_id.find(squeezeName) == layer_id.end()); - squeezeLp.set("axis", indices.at(0)); - squeezeLp.set("end_axis", indices.at(0) + 1); - int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); - layer_id[squeezeName] = squeezeId; - connect(layer_id, dstNet, Pin(permName), squeezeId, 0); - } - } - else if (axis == 1) - { - int order[] = {0, 2, 3, 1}; // From OpenCV's NCHW to NHWC. - Pin inpId = parsePin(layer.input(0)); - addPermuteLayer(order, name + "/nhwc", inpId); - - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); - layerParams.set("kernel_h", 1); - layerParams.set("global_pooling_w", true); - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, inpId, id, 0); - - if (!keepDims) - { - LayerParams squeezeLp; - std::string squeezeName = name + "/squeeze"; - CV_Assert(layer_id.find(squeezeName) == layer_id.end()); - int channel_id = 3; // TF NHWC layout - squeezeLp.set("axis", channel_id - 1); - squeezeLp.set("end_axis", channel_id); - int squeezeId = dstNet.addLayer(squeezeName, "Flatten", squeezeLp); - layer_id[squeezeName] = squeezeId; - connect(layer_id, dstNet, Pin(name), squeezeId, 0); - } - else - { - int order[] = {0, 3, 1, 2}; // From NHWC to OpenCV's NCHW. - Pin inpId = parsePin(name); - addPermuteLayer(order, name + "/nchw", inpId); - } - } - } else { - if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) - CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation."); + CV_CheckEQ(num_inputs, 3, ""); - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); - layerParams.set("global_pooling", true); - int id = dstNet.addLayer(name, "Pooling", layerParams); - layer_id[name] = id; - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); + Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1)); + Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2)); + CV_CheckEQ(minValue.total(), (size_t)1, ""); CV_CheckTypeEQ(minValue.type(), CV_32FC1, ""); + CV_CheckEQ(maxValue.total(), (size_t)1, ""); CV_CheckTypeEQ(maxValue.type(), CV_32FC1, ""); - if (!keepDims) - { - LayerParams flattenLp; - std::string flattenName = name + "/flatten"; - CV_Assert(layer_id.find(flattenName) == layer_id.end()); - int flattenId = dstNet.addLayer(flattenName, "Flatten", flattenLp); - layer_id[flattenName] = flattenId; - connect(layer_id, dstNet, Pin(name), flattenId, 0); - } - } - } - else if (type == "Pack") - { - // op: tf.stack(list of tensors, axis=0) - // Join a list of inputs along a new axis. - // The "axis" specifies the index of the new axis in the dimensions of the output. - // Example: given a list with "N" tensors of shape (C, H, W): - // if axis == 0 then the output tensor will have the shape (N, C, H, W), - // if axis == 1 then the output tensor will have the shape (C, N, H, W). - CV_CheckGT(num_inputs, 0, ""); - CV_Assert(hasLayerAttr(layer, "axis")); - int dim = (int)getLayerAttr(layer, "axis").i(); - if (dim != 0) - CV_Error(Error::StsNotImplemented, "Unsupported mode of pack operation."); - - CV_Assert(hasLayerAttr(layer, "N")); - int num = (int)getLayerAttr(layer, "N").i(); - CV_CheckEQ(num_inputs, num, ""); - std::string base_name = name + "/reshape_"; - std::vector reshape_ids; - for (int i = 0; i < num; i++) { - std::ostringstream ss; - ss << i; - std::string reshape_name = base_name + ss.str(); - LayerParams reshapeLP; - reshapeLP.set("axis", dim); - reshapeLP.set("num_axes", 1); - int outShape[] = {1, -1}; - reshapeLP.set("dim", DictValue::arrayInt(&outShape[0], 2)); - int id = dstNet.addLayer(reshape_name, "Reshape", reshapeLP); - layer_id[reshape_name] = id; - reshape_ids.push_back(id); - connect(layer_id, dstNet, parsePin(layer.input(i)), id, 0); - } + layerParams.set("min_value", minValue.at(0)); + layerParams.set("max_value", maxValue.at(0)); - layerParams.set("axis", dim); - int id = dstNet.addLayer(name, "Concat", layerParams); - layer_id[name] = id; + int id = dstNet.addLayer(name, "ReLU6", layerParams); + layer_id[name] = id; - for (int li = 0; li < num; li++) - dstNet.connect(reshape_ids[li], 0, id, li); - } - else if (type == "ClipByValue") - { - // op: "ClipByValue" - // input: "input" - // input: "mix" - // input: "max" - CV_CheckEQ(num_inputs, 3, ""); + connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); +} - Mat minValue = getTensorContent(getConstBlob(layer, value_id, 1)); - Mat maxValue = getTensorContent(getConstBlob(layer, value_id, 2)); - CV_CheckEQ(minValue.total(), (size_t)1, ""); CV_CheckTypeEQ(minValue.type(), CV_32FC1, ""); - CV_CheckEQ(maxValue.total(), (size_t)1, ""); CV_CheckTypeEQ(maxValue.type(), CV_32FC1, ""); +void TFImporter::parseLeakyRelu(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const int num_inputs = layer.input_size(); - layerParams.set("min_value", minValue.at(0)); - layerParams.set("max_value", maxValue.at(0)); + CV_CheckGT(num_inputs, 0, ""); + CV_Assert(hasLayerAttr(layer, "alpha")); + layerParams.set("negative_slope", getLayerAttr(layer, "alpha").f()); - int id = dstNet.addLayer(name, "ReLU6", layerParams); - layer_id[name] = id; + int id = dstNet.addLayer(name, "ReLU", layerParams); + layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); - } - else if (type == "LeakyRelu") - { - CV_CheckGT(num_inputs, 0, ""); - CV_Assert(hasLayerAttr(layer, "alpha")); - layerParams.set("negative_slope", getLayerAttr(layer, "alpha").f()); +void TFImporter::parseActivation(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); + + CV_CheckGT(num_inputs, 0, ""); + std::string dnnType = type; + if (type == "Abs") dnnType = "AbsVal"; + else if (type == "Tanh") dnnType = "TanH"; + else if (type == "Relu") dnnType = "ReLU"; + else if (type == "Relu6") dnnType = "ReLU6"; + else if (type == "Elu") dnnType = "ELU"; + + int id = dstNet.addLayer(name, dnnType, layerParams); + layer_id[name] = id; + connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); +} - int id = dstNet.addLayer(name, "ReLU", layerParams); - layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); - } - else if (type == "Abs" || type == "Tanh" || type == "Sigmoid" || - type == "Relu" || type == "Elu" || type == "Exp" || - type == "Identity" || type == "Relu6") +void TFImporter::parseCustomLayer(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) +{ + // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer. + // However we create a layer with the same type and rely that user defined a custom layer. + + const std::string& name = layer.name(); + const std::string& type = layer.op(); + const int num_inputs = layer.input_size(); + + // All the attributes are added to LayerParams. + google::protobuf::Map attr = layer.attr(); + for (google::protobuf::Map::const_iterator ai = attr.begin(); + ai != attr.end(); ++ai) + { + if (ai->second.value_case() == tensorflow::AttrValue::kS) // string + layerParams.set(ai->first, ai->second.s()); + if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64 + layerParams.set(ai->first, ai->second.i()); + if (ai->second.value_case() == tensorflow::AttrValue::kF) // float + layerParams.set(ai->first, ai->second.f()); + if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool + layerParams.set(ai->first, ai->second.b()); + } + + // All the Const input nodes are added to layer's blobs. + std::vector inputsNames; + for (int i = 0; i < num_inputs; ++i) + { + // Check if input is a Const node. + if (value_id.find(layer.input(i)) != value_id.end()) { - CV_CheckGT(num_inputs, 0, ""); - std::string dnnType = type; - if (type == "Abs") dnnType = "AbsVal"; - else if (type == "Tanh") dnnType = "TanH"; - else if (type == "Relu") dnnType = "ReLU"; - else if (type == "Relu6") dnnType = "ReLU6"; - else if (type == "Elu") dnnType = "ELU"; - - int id = dstNet.addLayer(name, dnnType, layerParams); - layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); + Mat blob = getTensorContent(getConstBlob(layer, value_id, i)); + layerParams.blobs.push_back(blob); } else - { - // Importer does not know how to map this TensorFlow's operation onto OpenCV's layer. - // However we create a layer with the same type and rely that user defined a custom layer. + inputsNames.push_back(layer.input(i)); + } + int id = dstNet.addLayer(name, type, layerParams); + layer_id[name] = id; - // All the attributes are added to LayerParams. - google::protobuf::Map attr = layer.attr(); - for (google::protobuf::Map::const_iterator ai = attr.begin(); - ai != attr.end(); ++ai) - { - if (ai->second.value_case() == tensorflow::AttrValue::kS) // string - layerParams.set(ai->first, ai->second.s()); - if (ai->second.value_case() == tensorflow::AttrValue::kI) // int64 - layerParams.set(ai->first, ai->second.i()); - if (ai->second.value_case() == tensorflow::AttrValue::kF) // float - layerParams.set(ai->first, ai->second.f()); - if (ai->second.value_case() == tensorflow::AttrValue::kB) // bool - layerParams.set(ai->first, ai->second.b()); - } + for (int i = 0; i < inputsNames.size(); ++i) + { + connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i); + } +} - // All the Const input nodes are added to layer's blobs. - std::vector inputsNames; - for (int i = 0; i < num_inputs; ++i) - { - // Check if input is a Const node. - if (value_id.find(layer.input(i)) != value_id.end()) - { - Mat blob = getTensorContent(getConstBlob(layer, value_id, i)); - layerParams.blobs.push_back(blob); - } - else - inputsNames.push_back(layer.input(i)); - } - int id = dstNet.addLayer(name, type, layerParams); - layer_id[name] = id; +TFImporter::TFImporter(Net& net, const char *model, const char *config) + : dstNet(net), dispatch(buildDispatchMap()) +{ + if (model && model[0]) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from file: " << model); + ReadTFNetParamsFromBinaryFileOrDie(model, &netBin); + } + if (config && config[0]) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from file: " << config); + ReadTFNetParamsFromTextFileOrDie(config, &netTxt); + } - for (int i = 0; i < inputsNames.size(); ++i) - { - connect(layer_id, dstNet, parsePin(inputsNames[i]), id, i); - } + populateNet(); +} + +TFImporter::TFImporter( + Net& net, + const char *dataModel, size_t lenModel, + const char *dataConfig, size_t lenConfig +) + : dstNet(net), dispatch(buildDispatchMap()) +{ + if (dataModel != NULL && lenModel > 0) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow model from memory (" << lenModel << " bytes)"); + ReadTFNetParamsFromBinaryBufferOrDie(dataModel, lenModel, &netBin); + } + if (dataConfig != NULL && lenConfig > 0) + { + CV_LOG_DEBUG(NULL, "DNN/TF: processing TensorFlow config from memory (" << lenConfig << " bytes)"); + ReadTFNetParamsFromTextBufferOrDie(dataConfig, lenConfig, &netTxt); + } + populateNet(); +} + +void TFImporter::kernelFromTensor(const tensorflow::TensorProto &tensor, Mat &dstBlob) +{ + MatShape shape; + blobShapeFromTensor(tensor, shape); + int dims = (int)shape.size(); + + // TODO: other blob types + CV_Assert(tensor.dtype() == tensorflow::DT_FLOAT || + tensor.dtype() == tensorflow::DT_HALF); + CV_Assert(dims == 4 || dims == 5); + + int out_c, input_c, depth, height, width; + if (dims == 4) + { + // REORDER kernel HWIO to OIHW + swap(shape[0], shape[2]); // IWHO + swap(shape[1], shape[3]); // IOHW + swap(shape[0], shape[1]); // OIHW + depth = 1; height = shape[2]; width = shape[3]; + } + else + { + // REORDER kernel DHWIO to OIDHW + swap(shape[0], shape[4]); // OHWID + swap(shape[1], shape[3]); // OIWHD + swap(shape[2], shape[4]); // OIDHW + depth = shape[2]; height = shape[3]; width = shape[4]; + } + out_c = shape[0]; input_c = shape[1]; + + dstBlob.create(shape, CV_32F); + + Mat tensorContent = getTensorContent(tensor, /*no copy*/false); + int size = tensorContent.total(); + CV_Assert(size == (int)dstBlob.total()); + + float *dstData = dstBlob.ptr(); + const float *data = reinterpret_cast(tensorContent.data); + + int total = out_c * input_c * depth * height * width; + for (int i_oc = 0; i_oc < out_c; i_oc++) { + for (int i_ic = 0; i_ic < input_c; i_ic++) { + for (int i_d = 0; i_d < depth; i_d++) { + for (int i_h = 0; i_h < height; i_h++) { + for (int i_w = 0; i_w < width; i_w++) { + int dst_i = input_c * depth * height * width * i_oc + + depth * height * width * i_ic + height * width * i_d + width * i_h + i_w; + int src_i = out_c * input_c * width * height * i_d + + out_c * input_c * width * i_h + out_c * input_c * i_w + out_c * i_ic + i_oc; + CV_Assert(dst_i < total); + CV_Assert(src_i < total); + dstData[dst_i] = data[src_i]; + } + } + } + } + } +} + +void TFImporter::connect(const std::map& layers_name_id_map, Net& network, const Pin& outPin, + const int input_layer_id, const int input_blob_id) +{ + std::map::const_iterator it = layers_name_id_map.find(outPin.name); + if (it == layers_name_id_map.end()) + CV_Error(Error::StsError, "Input layer not found: " + outPin.name); + + std::vector::iterator inpNameIt = std::find(netInputsNames.begin(), netInputsNames.end(), outPin.name); + int blobIndex; + if (inpNameIt == netInputsNames.end()) + blobIndex = outPin.blobIndex; + else + blobIndex = inpNameIt - netInputsNames.begin(); + network.connect(it->second, blobIndex, input_layer_id, input_blob_id); +} + +void TFImporter::connectToAllBlobs(const std::map& layer_id, Net& network, const Pin& outPin, + const int input_layer_id, const int input_blobs_count) +{ + for (int input_blob_id = 0; input_blob_id < input_blobs_count; input_blob_id++) + connect(layer_id, network, outPin, input_layer_id, input_blob_id); +} + +const tensorflow::TensorProto& TFImporter::getConstBlob(const tensorflow::NodeDef &layer, std::map const_layers, + int input_blob_index, int* actual_inp_blob_idx) { + if (input_blob_index == -1) { + for(int i = 0; i < layer.input_size(); i++) { + Pin input = parsePin(layer.input(i)); + if (const_layers.find(input.name) != const_layers.end()) { + if (input_blob_index != -1) + CV_Error(Error::StsError, "More than one input is Const op"); + + input_blob_index = i; + } + } + } + + if (input_blob_index == -1) + CV_Error(Error::StsError, "Const input blob for weights not found"); + + Pin kernel_inp = parsePin(layer.input(input_blob_index)); + if (const_layers.find(kernel_inp.name) == const_layers.end()) + CV_Error(Error::StsError, "Input [" + layer.input(input_blob_index) + + "] for node [" + layer.name() + "] not found"); + if (kernel_inp.blobIndex != 0) + CV_Error(Error::StsError, "Unsupported kernel input"); + + if(actual_inp_blob_idx) { + *actual_inp_blob_idx = input_blob_index; + } + + int nodeIdx = const_layers.at(kernel_inp.name); + if (nodeIdx < netBin.node_size() && netBin.node(nodeIdx).name() == kernel_inp.name) + { + return netBin.node(nodeIdx).attr().at("value").tensor(); + } + else + { + CV_Assert_N(nodeIdx < netTxt.node_size(), + netTxt.node(nodeIdx).name() == kernel_inp.name); + return netTxt.node(nodeIdx).attr().at("value").tensor(); + } +} + +static void addConstNodes(tensorflow::GraphDef& net, std::map& const_layers, + std::set& layers_to_ignore) +{ + CV_LOG_DEBUG(NULL, "DNN/TF: addConstNodes(): handling " << net.node_size() << " nodes..."); + for (int li = 0; li < net.node_size(); li++) + { + const tensorflow::NodeDef &layer = net.node(li); + String name = layer.name(); + String type = layer.op(); + + //CV_LOG_DEBUG(NULL, "DNN/TF: layer_id=" << li << " - '" << name << "' @ " << type); + + try + { + if (type == "Dequantize") + { + // Example of Dequantize node: + // name: "conv2d_1/bias" + // op: "Dequantize" + // input: "conv2d_1/bias_quantized_const" (tensor of dtype DT_QUINT8) + // input: "conv2d_1/bias_quantized_min" + // input: "conv2d_1/bias_quantized_max" + // attr { key: "T" value { type: DT_QUINT8 } } (quantized type) + // attr { key: "mode" value { s: "MIN_FIRST" } } (quantization technique) + CV_CheckEQ(layer.input_size(), 3, "Dequantize: 3 inputs is supported only"); + for (int i = 0; i < 3; ++i) + CV_Assert(const_layers.find(layer.input(i)) != const_layers.end()); + CV_Assert(hasLayerAttr(layer, "mode") && + getLayerAttr(layer, "mode").s() == "MIN_FIRST"); + + int tensorId = const_layers[layer.input(0)]; + int minId = const_layers[layer.input(1)]; + int maxId = const_layers[layer.input(2)]; + + tensorflow::TensorProto* tensor = net.mutable_node(tensorId) + ->mutable_attr()->at("value") + .mutable_tensor(); + CV_CheckEQ((int)tensor->dtype(), (int)tensorflow::DT_QUINT8, ""); + + Mat qMin = getTensorContent(net.node(minId).attr().at("value").tensor()); + Mat qMax = getTensorContent(net.node(maxId).attr().at("value").tensor()); + CV_CheckEQ(qMin.total(), (size_t)1, ""); + CV_CheckTypeEQ(qMin.type(), CV_32FC1, ""); + CV_CheckEQ(qMax.total(), (size_t)1, ""); + CV_CheckTypeEQ(qMax.type(), CV_32FC1, ""); + + Mat content = getTensorContent(*tensor); + + float minVal = qMin.at(0); + float rangeScale = (qMax.at(0) - minVal) / 255; + CV_Assert(rangeScale >= 0); + content.convertTo(content, CV_32FC1, rangeScale, + rangeScale * cvRound(minVal / rangeScale)); + + tensor->set_dtype(tensorflow::DT_FLOAT); + tensor->set_tensor_content(content.data, content.total() * content.elemSize1()); + + net.mutable_node(tensorId)->set_name(name); + CV_Assert(const_layers.insert(std::make_pair(name, tensorId)).second); + layers_to_ignore.insert(name); + continue; + } + else if (type != "Const") + continue; // only Const parameters are supported + + if (layer.attr().find("value") != layer.attr().end()) + { + CV_Assert(const_layers.insert(std::make_pair(name, li)).second); + } + layers_to_ignore.insert(name); + } + catch (const std::exception& e) + { + CV_LOG_ERROR(NULL, "DNN/TF: Can't handle node='" << name << "'. Exception: " << e.what()); + throw; + } + } + CV_LOG_DEBUG(NULL, "DNN/TF: layers_to_ignore.size() = " << layers_to_ignore.size()); +} + +// If all inputs of specific layer have the same data layout we can say that +// this layer's output has this data layout too. Returns DATA_LAYOUT_UNKNOWN otherwise. +DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) +{ + DataLayout layout = getDataLayout(layer); + if (layout != DATA_LAYOUT_UNKNOWN) + { + CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from attrs)"); + return layout; + } + + // Determine layout by layer's inputs + for (int i = 0, n = layer.input_size(); i < n; ++i) + { + std::map::const_iterator it = data_layouts.find(getNodeName(layer.input(i))); + if (it != data_layouts.end()) + { + if (layout != DATA_LAYOUT_UNKNOWN) + { + if (it->second != layout && it->second != DATA_LAYOUT_UNKNOWN) + return DATA_LAYOUT_UNKNOWN; + } + else + layout = it->second; + } + } + + if (layout != DATA_LAYOUT_UNKNOWN) + { + CV_LOG_DEBUG(NULL, "DNN/TF: predictOutputDataLayout(" << layer.name() << " @ " << layer.op() << ") => " << (int)layout << " (from inputs)"); + return layout; + } + + // Determine layout by layer's consumers recursively. + std::map::const_iterator it = data_layouts.find(layer.name()); + CV_Assert(it != data_layouts.end()); + return it->second; +} + +void TFImporter::populateNet() +{ + CV_Assert(netBin.ByteSize() || netTxt.ByteSize()); + + CV_LOG_INFO(NULL, "DNN/TF: parsing model" + << (netBin.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netBin.versions().producer(), (int)netBin.versions().min_consumer()) : cv::String(" (N/A version info)")) + << ". Number of nodes = " << netBin.node_size() + ); + + if (netTxt.ByteSize()) + { + CV_LOG_INFO(NULL, "DNN/TF: parsing config" + << (netTxt.has_versions() ? cv::format(" produced by TF v%d (min_consumer=%d)", (int)netTxt.versions().producer(), (int)netTxt.versions().min_consumer()) : cv::String(" (N/A version info)")) + << ". Number of nodes = " << netTxt.node_size() + ); + + RemoveIdentityOps(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); + RemoveIdentityOps(netTxt); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(config) => " << netTxt.node_size() << " nodes"); + + sortByExecutionOrder(netTxt); + CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(config) => " << netTxt.node_size() << " nodes"); + } + else + { + removePhaseSwitches(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: removePhaseSwitches(model) => " << netBin.node_size() << " nodes"); + + RemoveIdentityOps(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: RemoveIdentityOps(model) => " << netBin.node_size() << " nodes"); + + simplifySubgraphs(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: simplifySubgraphs(model) => " << netBin.node_size() << " nodes"); + sortByExecutionOrder(netBin); + CV_LOG_DEBUG(NULL, "DNN/TF: sortByExecutionOrder(model) => " << netBin.node_size() << " nodes"); + } + + tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; + + int layersSize = net.node_size(); + + // Pre-fill data layouts where they are set explicitly. + // Assuming that nodes are in topological order + for (int i = layersSize - 1; i >= 0; --i) + { + const tensorflow::NodeDef& layer = net.node(i); + std::string name = layer.name(); + + CV_LOG_DEBUG(NULL, "DNN/TF: node(" << i << " - '" << name << "') propagating layout..."); + + try + { + DataLayout layout = getDataLayout(layer); + std::map::iterator it = data_layouts.find(name); + if (it != data_layouts.end()) + { + if (layout != DATA_LAYOUT_UNKNOWN) + { + if (it->second == DATA_LAYOUT_UNKNOWN) + it->second = layout; + else if (it->second != layout) + { + it->second = DATA_LAYOUT_UNKNOWN; + layout = DATA_LAYOUT_UNKNOWN; + } + } + else + layout = it->second; + } + else + data_layouts[name] = layout; + + // Specify input layers to have the same data layout. + for (int j = 0; j < layer.input_size(); ++j) + { + name = getNodeName(layer.input(j)); + it = data_layouts.find(name); + if (it != data_layouts.end()) + { + if (layout != DATA_LAYOUT_UNKNOWN) + { + if (it->second == DATA_LAYOUT_UNKNOWN) + it->second = layout; + else if (it->second != layout) + it->second = DATA_LAYOUT_UNKNOWN; + } + } + else + data_layouts[name] = layout; + } + } + catch (const std::exception& e) + { + CV_LOG_ERROR(NULL, "DNN/TF: Can't propagate layout for node='" << name << "'. Exception: " << e.what()); + throw; + } + } + + addConstNodes(netBin, value_id, layers_to_ignore); + addConstNodes(netTxt, value_id, layers_to_ignore); + + + for (int li = 0; li < layersSize; li++) + { + const tensorflow::NodeDef& layer = net.node(li); + + const std::string name = layer.name(); + const std::string type = layer.op(); + const int ninputs = layer.input_size(); + CV_LOG_DEBUG(NULL, "DNN/TF: (" << li << "/" << layersSize << ") Parse layer " << name << " @ " << type << " with " << ninputs << " inputs"); + + parseNode(layer); + } + + for (size_t i = 0; i < netInputsNames.size(); i++) + { + CV_LOG_DEBUG(NULL, "DNN/TF: Model input: " << i << " - '" << netInputsNames[i] << "'"); + CV_Assert(!netInputsNames[i].empty()); + } + dstNet.setInputsNames(netInputsNames); + CV_LOG_DEBUG(NULL, "DNN/TF: ===================== Import completed ====================="); +} + +void TFImporter::addPermuteLayer(const int* order, const std::string& permName, Pin& inpId) +{ + LayerParams permLP; + permLP.set("order", DictValue::arrayInt(order, 4)); + CV_Assert(layer_id.find(permName) == layer_id.end()); + int permId = dstNet.addLayer(permName, "Permute", permLP); + layer_id[permName] = permId; + connect(layer_id, dstNet, inpId, permId, 0); + inpId = Pin(permName); +} + +void TFImporter::parseNode(const tensorflow::NodeDef& layer) +{ + tensorflow::GraphDef& net = netTxt.ByteSize() != 0 ? netTxt : netBin; + + const std::string& name = layer.name(); + const std::string& type = layer.op(); + + try + { + LayerParams layerParams; + + if (layers_to_ignore.find(name) != layers_to_ignore.end()) + { + CV_LOG_DEBUG(NULL, "DNN/TF: ignored"); + return; + } + + DataLayout predictedLayout = predictOutputDataLayout(layer); + data_layouts[name] = predictedLayout; + + DispatchMap::const_iterator iter = dispatch.find(type); + if (iter != dispatch.end()) + { + ((*this).*(iter->second))(net, layer, layerParams); + } + else + { + parseCustomLayer(net, layer, layerParams); } } catch (const std::exception& e) From 3cf43753876e3c2c2676283d4fd10a6018b5a264 Mon Sep 17 00:00:00 2001 From: Tiago De Gaspari Date: Sat, 12 Jun 2021 17:28:54 -0300 Subject: [PATCH 004/128] Merge pull request #19842 from gasparitiago:3.4 Update rotatedRectangleIntersection function to calculate near to origin * Change type used in points function from RotatedRect In the function that sets the points of a RotatedRect, the types should be double in order to keep the precision when dealing with RotatedRects that are defined far from the origin. This commit solves the problem in some assertions from rotatedRectangleIntersection when dealing with rectangles far from origin. * added proper type casts * Update rotatedRectangleIntersection function to calculate near to origin This commit changes the rotatedRectangleIntersection function in order to calculate the intersection of two rectangles considering that they are shifted near the coordinates origin (0, 0). This commit solves the problem in some assertions from rotatedRectangleIntersection when dealing with rectangles far from origin. * Revert type changes in types.cpp and adequate code to c++98 * Revert unnecessary casts on types.cpp Co-authored-by: Vadim Pisarevsky --- modules/imgproc/src/intersection.cpp | 57 +++++++++++++++++----- modules/imgproc/test/test_intersection.cpp | 17 +++++++ 2 files changed, 62 insertions(+), 12 deletions(-) diff --git a/modules/imgproc/src/intersection.cpp b/modules/imgproc/src/intersection.cpp index 3f749896a42c..47d3f3f457b5 100644 --- a/modules/imgproc/src/intersection.cpp +++ b/modules/imgproc/src/intersection.cpp @@ -47,24 +47,16 @@ namespace cv { -int rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& rect2, OutputArray intersectingRegion ) +static int _rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& rect2, std::vector &intersection ) { CV_INSTRUMENT_REGION(); // L2 metric const float samePointEps = std::max(1e-16f, 1e-6f * (float)std::max(rect1.size.area(), rect2.size.area())); - if (rect1.size.empty() || rect2.size.empty()) - { - intersectingRegion.release(); - return INTERSECT_NONE; - } - Point2f vec1[4], vec2[4]; Point2f pts1[4], pts2[4]; - std::vector intersection; intersection.reserve(24); - rect1.points(pts1); rect2.points(pts2); @@ -92,8 +84,6 @@ int rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& r intersection[i] = pts1[i]; } - Mat(intersection).copyTo(intersectingRegion); - return INTERSECT_FULL; } } @@ -300,7 +290,50 @@ int rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& r } intersection.resize(N); - Mat(intersection).copyTo(intersectingRegion); + + return ret; +} + +int rotatedRectangleIntersection( const RotatedRect& rect1, const RotatedRect& rect2, OutputArray intersectingRegion ) +{ + CV_INSTRUMENT_REGION(); + + if (rect1.size.empty() || rect2.size.empty()) + { + intersectingRegion.release(); + return INTERSECT_NONE; + } + + // Shift rectangles closer to origin (0, 0) to improve the calculation of the intesection region + // To do that, the average center of the rectangles is moved to the origin + const Point2f averageCenter = (rect1.center + rect2.center) / 2.0f; + + RotatedRect shiftedRect1(rect1); + RotatedRect shiftedRect2(rect2); + + // Move rectangles closer to origin + shiftedRect1.center -= averageCenter; + shiftedRect2.center -= averageCenter; + + std::vector intersection; intersection.reserve(24); + + const int ret = _rotatedRectangleIntersection(shiftedRect1, shiftedRect2, intersection); + + // If return is not None, the intersection Points are shifted back to the original position + // and copied to the interesectingRegion + if (ret != INTERSECT_NONE) + { + for (size_t i = 0; i < intersection.size(); ++i) + { + intersection[i] += averageCenter; + } + + Mat(intersection).copyTo(intersectingRegion); + } + else + { + intersectingRegion.release(); + } return ret; } diff --git a/modules/imgproc/test/test_intersection.cpp b/modules/imgproc/test/test_intersection.cpp index 7527dd9a22cc..c455c439fce1 100644 --- a/modules/imgproc/test/test_intersection.cpp +++ b/modules/imgproc/test/test_intersection.cpp @@ -391,4 +391,21 @@ TEST(Imgproc_RotatedRectangleIntersection, regression_18520) } } +TEST(Imgproc_RotatedRectangleIntersection, regression_19824) +{ + RotatedRect r1( + Point2f(246805.033f, 4002326.94f), + Size2f(26.40587f, 6.20026f), + -62.10156f); + RotatedRect r2( + Point2f(246805.122f, 4002326.59f), + Size2f(27.4821f, 8.5361f), + -56.33761f); + + std::vector intersections; + int interType = cv::rotatedRectangleIntersection(r1, r2, intersections); + EXPECT_EQ(INTERSECT_PARTIAL, interType); + EXPECT_LE(intersections.size(), (size_t)7); +} + }} // namespace From c8268e65fd6c3a8fba7a5a1f4b830222b9bc9d66 Mon Sep 17 00:00:00 2001 From: Vincent Rabaud Date: Fri, 11 Jun 2021 22:03:33 +0200 Subject: [PATCH 005/128] Fix potential NaN in cv::norm. There can be an int overflow. cv::norm( InputArray _src, int normType, InputArray _mask ) is fine, not cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ). --- modules/core/src/norm.cpp | 2 +- modules/core/test/test_arithm.cpp | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/core/src/norm.cpp b/modules/core/src/norm.cpp index 601082783e3c..fad641554dac 100644 --- a/modules/core/src/norm.cpp +++ b/modules/core/src/norm.cpp @@ -1171,7 +1171,7 @@ double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask // special case to handle "integer" overflow in accumulator const size_t esz = src1.elemSize(); const int total = (int)it.size; - const int intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15); + const int intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn; const int blockSize = std::min(total, intSumBlockSize); int isum = 0; int count = 0; diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index 2746feb2f245..74bf39fbc7a2 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -2117,6 +2117,15 @@ TEST(Core_Norm, IPP_regression_NORM_L1_16UC3_small) EXPECT_EQ((double)20*cn, cv::norm(a, b, NORM_L1, mask)); } +TEST(Core_Norm, NORM_L2_8UC4) +{ + // Tests there is no integer overflow in norm computation for multiple channels. + const int kSide = 100; + cv::Mat4b a(kSide, kSide, cv::Scalar(255, 255, 255, 255)); + cv::Mat4b b = cv::Mat4b::zeros(kSide, kSide); + const double kNorm = 2.*kSide*255.; + EXPECT_EQ(kNorm, cv::norm(a, b, NORM_L2)); +} TEST(Core_ConvertTo, regression_12121) { From 464441d8c3943126a5238b34ee34633dcfbf399e Mon Sep 17 00:00:00 2001 From: Ian Maquignaz <9im14@queensu.ca> Date: Thu, 10 Jun 2021 20:13:06 -0400 Subject: [PATCH 006/128] Added new unit test for initInverseRectificationMap() Function is validated. Included an update to DISABLED_Calib3d_InitInverseRectificationMap. Includes updates per input from @alalek and unit test regression # to reflect PR # --- modules/calib3d/test/test_undistort.cpp | 82 +++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 4 deletions(-) diff --git a/modules/calib3d/test/test_undistort.cpp b/modules/calib3d/test/test_undistort.cpp index 9663d36b7862..ea1a95207954 100644 --- a/modules/calib3d/test/test_undistort.cpp +++ b/modules/calib3d/test/test_undistort.cpp @@ -897,7 +897,7 @@ void CV_InitInverseRectificationMapTest::prepare_to_validation(int/* test_case_i Mat _new_cam0 = zero_new_cam ? test_mat[INPUT][0] : test_mat[INPUT][3]; Mat _mapx(img_size, CV_32F), _mapy(img_size, CV_32F); - double a[9], d[5]={0,0,0,0,0}, R[9]={1, 0, 0, 0, 1, 0, 0, 0, 1}, a1[9]; + double a[9], d[5]={0., 0., 0., 0. , 0.}, R[9]={1., 0., 0., 0., 1., 0., 0., 0., 1.}, a1[9]; Mat _a(3, 3, CV_64F, a), _a1(3, 3, CV_64F, a1); Mat _d(_d0.rows,_d0.cols, CV_MAKETYPE(CV_64F,_d0.channels()),d); Mat _R(3, 3, CV_64F, R); @@ -951,9 +951,9 @@ void CV_InitInverseRectificationMapTest::prepare_to_validation(int/* test_case_i // Undistort double x2 = x*x, y2 = y*y; double r2 = x2 + y2; - double cdist = 1./(1 + (d[0] + (d[1] + d[4]*r2)*r2)*r2); // (1 + (d[5] + (d[6] + d[7]*r2)*r2)*r2) == 1 as d[5-7]=0; - double x_ = x*cdist - d[2]*2*x*y + d[3]*(r2 + 2*x2); - double y_ = y*cdist - d[3]*2*x*y + d[2]*(r2 + 2*y2); + double cdist = 1./(1. + (d[0] + (d[1] + d[4]*r2)*r2)*r2); // (1. + (d[5] + (d[6] + d[7]*r2)*r2)*r2) == 1 as d[5-7]=0; + double x_ = (x - (d[2]*2.*x*y + d[3]*(r2 + 2.*x2)))*cdist; + double y_ = (y - (d[3]*2.*x*y + d[2]*(r2 + 2.*y2)))*cdist; // Rectify double X = R[0]*x_ + R[1]*y_ + R[2]; @@ -1807,4 +1807,78 @@ TEST(Calib3d_initUndistortRectifyMap, regression_14467) EXPECT_LE(cvtest::norm(dst, mesh_uv, NORM_INF), 1e-3); } +TEST(Calib3d_initInverseRectificationMap, regression_20165) +{ + Size size_w_h(1280, 800); + Mat dst(size_w_h, CV_32FC2); // Reference for validation + Mat mapxy; // Output of initInverseRectificationMap() + + // Camera Matrix + double k[9]={ + 1.5393951443032472e+03, 0., 6.7491727003047140e+02, + 0., 1.5400748240626747e+03, 5.1226968329123963e+02, + 0., 0., 1. + }; + Mat _K(3, 3, CV_64F, k); + + // Distortion + // double d[5]={0,0,0,0,0}; // Zero Distortion + double d[5]={ // Non-zero distortion + -3.4134571357400023e-03, 2.9733267766101856e-03, // K1, K2 + 3.6653586399031184e-03, -3.1960714017365702e-03, // P1, P2 + 0. // K3 + }; + Mat _d(1, 5, CV_64F, d); + + // Rotation + //double R[9]={1., 0., 0., 0., 1., 0., 0., 0., 1.}; // Identity transform (none) + double R[9]={ // Random transform + 9.6625486010428052e-01, 1.6055789378989216e-02, 2.5708706103628531e-01, + -8.0300261706161002e-03, 9.9944797497929860e-01, -3.2237617614807819e-02, + -2.5746274294459848e-01, 2.9085338870243265e-02, 9.6585039165403186e-01 + }; + Mat _R(3, 3, CV_64F, R); + + // --- Validation --- // + initInverseRectificationMap(_K, _d, _R, _K, size_w_h, CV_32FC2, mapxy, noArray()); + + // Copy camera matrix + double fx, fy, cx, cy, ifx, ify, cxn, cyn; + fx = k[0]; fy = k[4]; cx = k[2]; cy = k[5]; + + // Copy new camera matrix + ifx = k[0]; ify = k[4]; cxn = k[2]; cyn = k[5]; + + // Distort Points + for( int v = 0; v < size_w_h.height; v++ ) + { + for( int u = 0; u < size_w_h.width; u++ ) + { + // Convert from image to pin-hole coordinates + double x = (u - cx)/fx; + double y = (v - cy)/fy; + + // Undistort + double x2 = x*x, y2 = y*y; + double r2 = x2 + y2; + double cdist = 1./(1. + (d[0] + (d[1] + d[4]*r2)*r2)*r2); // (1. + (d[5] + (d[6] + d[7]*r2)*r2)*r2) == 1 as d[5-7]=0; + double x_ = (x - (d[2]*2.*x*y + d[3]*(r2 + 2.*x2)))*cdist; + double y_ = (y - (d[3]*2.*x*y + d[2]*(r2 + 2.*y2)))*cdist; + + // Rectify + double X = R[0]*x_ + R[1]*y_ + R[2]; + double Y = R[3]*x_ + R[4]*y_ + R[5]; + double Z = R[6]*x_ + R[7]*y_ + R[8]; + double x__ = X/Z; + double y__ = Y/Z; + + // Convert from pin-hole to image coordinates + dst.at(v, u) = Vec2f((float)(x__*ifx + cxn), (float)(y__*ify + cyn)); + } + } + + // Check Result + EXPECT_LE(cvtest::norm(dst, mapxy, NORM_INF), 2e-1); +} + }} // namespace From 9557b9f70f18785d44e3f21155f4f9f8859e7c0f Mon Sep 17 00:00:00 2001 From: Developer-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com> Date: Thu, 17 Jun 2021 10:14:48 -0700 Subject: [PATCH 007/128] Improve SIFT for arm64/Apple silicon - Reduce branch density by collapsing compares. - Fix windows build errors - Use OpenCV universal intrinsics - Use v_check_any and v_signmask as requested --- modules/features2d/src/sift.simd.hpp | 195 ++++++++++++++++++++++++--- 1 file changed, 174 insertions(+), 21 deletions(-) diff --git a/modules/features2d/src/sift.simd.hpp b/modules/features2d/src/sift.simd.hpp index b5033459b957..60129b1535b5 100644 --- a/modules/features2d/src/sift.simd.hpp +++ b/modules/features2d/src/sift.simd.hpp @@ -450,31 +450,184 @@ class findScaleSpaceExtremaT const sift_wt* currptr = img.ptr(r); const sift_wt* prevptr = prev.ptr(r); const sift_wt* nextptr = next.ptr(r); + int c = SIFT_IMG_BORDER; - for( int c = SIFT_IMG_BORDER; c < cols-SIFT_IMG_BORDER; c++) +#if CV_SIMD && !(DoG_TYPE_SHORT) + const int vecsize = v_float32::nlanes; + for( ; c <= cols-SIFT_IMG_BORDER - vecsize; c += vecsize) + { + v_float32 val = vx_load(&currptr[c]); + v_float32 _00,_01,_02; + v_float32 _10, _12; + v_float32 _20,_21,_22; + + v_float32 vmin,vmax; + + + v_float32 cond = v_abs(val) > vx_setall_f32((float)threshold); + if (!v_check_any(cond)) + { + continue; + } + + _00 = vx_load(&currptr[c-step-1]); _01 = vx_load(&currptr[c-step]); _02 = vx_load(&currptr[c-step+1]); + _10 = vx_load(&currptr[c -1]); _12 = vx_load(&currptr[c +1]); + _20 = vx_load(&currptr[c+step-1]); _21 = vx_load(&currptr[c+step]); _22 = vx_load(&currptr[c+step+1]); + + vmax = v_max(v_max(v_max(_00,_01),v_max(_02,_10)),v_max(v_max(_12,_20),v_max(_21,_22))); + vmin = v_min(v_min(v_min(_00,_01),v_min(_02,_10)),v_min(v_min(_12,_20),v_min(_21,_22))); + + v_float32 condp = cond & (val > vx_setall_f32(0)) & (val >= vmax); + v_float32 condm = cond & (val < vx_setall_f32(0)) & (val <= vmin); + + cond = condp | condm; + if (!v_check_any(cond)) + { + continue; + } + + _00 = vx_load(&prevptr[c-step-1]); _01 = vx_load(&prevptr[c-step]); _02 = vx_load(&prevptr[c-step+1]); + _10 = vx_load(&prevptr[c -1]); _12 = vx_load(&prevptr[c +1]); + _20 = vx_load(&prevptr[c+step-1]); _21 = vx_load(&prevptr[c+step]); _22 = vx_load(&prevptr[c+step+1]); + + vmax = v_max(v_max(v_max(_00,_01),v_max(_02,_10)),v_max(v_max(_12,_20),v_max(_21,_22))); + vmin = v_min(v_min(v_min(_00,_01),v_min(_02,_10)),v_min(v_min(_12,_20),v_min(_21,_22))); + + condp &= (val >= vmax); + condm &= (val <= vmin); + + cond = condp | condm; + if (!v_check_any(cond)) + { + continue; + } + + v_float32 _11p = vx_load(&prevptr[c]); + v_float32 _11n = vx_load(&nextptr[c]); + + v_float32 max_middle = v_max(_11n,_11p); + v_float32 min_middle = v_min(_11n,_11p); + + _00 = vx_load(&nextptr[c-step-1]); _01 = vx_load(&nextptr[c-step]); _02 = vx_load(&nextptr[c-step+1]); + _10 = vx_load(&nextptr[c -1]); _12 = vx_load(&nextptr[c +1]); + _20 = vx_load(&nextptr[c+step-1]); _21 = vx_load(&nextptr[c+step]); _22 = vx_load(&nextptr[c+step+1]); + + vmax = v_max(v_max(v_max(_00,_01),v_max(_02,_10)),v_max(v_max(_12,_20),v_max(_21,_22))); + vmin = v_min(v_min(v_min(_00,_01),v_min(_02,_10)),v_min(v_min(_12,_20),v_min(_21,_22))); + + condp &= (val >= v_max(vmax,max_middle)); + condm &= (val <= v_min(vmin,min_middle)); + + cond = condp | condm; + if (!v_check_any(cond)) + { + continue; + } + + int mask = v_signmask(cond); + for (int k = 0; k 0 ? j - 1 : n - 1; + int r2 = j < n-1 ? j + 1 : 0; + + if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr ) + { + float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]); + bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin; + kpt.angle = 360.f - (float)((360.f/n) * bin); + if(std::abs(kpt.angle - 360.f) < FLT_EPSILON) + kpt.angle = 0.f; + + kpts_.push_back(kpt); + } + } + } + } + +#endif //CV_SIMD && !(DoG_TYPE_SHORT) + + // vector loop reminder, better predictibility and less branch density + for( ; c < cols-SIFT_IMG_BORDER; c++) { sift_wt val = currptr[c]; + if (std::abs(val) <= threshold) + continue; + + sift_wt _00,_01,_02; + sift_wt _10, _12; + sift_wt _20,_21,_22; + _00 = currptr[c-step-1]; _01 = currptr[c-step]; _02 = currptr[c-step+1]; + _10 = currptr[c -1]; _12 = currptr[c +1]; + _20 = currptr[c+step-1]; _21 = currptr[c+step]; _22 = currptr[c+step+1]; + + bool calculate = false; + if (val > 0) + { + sift_wt vmax = std::max(std::max(std::max(_00,_01),std::max(_02,_10)),std::max(std::max(_12,_20),std::max(_21,_22))); + if (val >= vmax) + { + _00 = prevptr[c-step-1]; _01 = prevptr[c-step]; _02 = prevptr[c-step+1]; + _10 = prevptr[c -1]; _12 = prevptr[c +1]; + _20 = prevptr[c+step-1]; _21 = prevptr[c+step]; _22 = prevptr[c+step+1]; + vmax = std::max(std::max(std::max(_00,_01),std::max(_02,_10)),std::max(std::max(_12,_20),std::max(_21,_22))); + if (val >= vmax) + { + _00 = nextptr[c-step-1]; _01 = nextptr[c-step]; _02 = nextptr[c-step+1]; + _10 = nextptr[c -1]; _12 = nextptr[c +1]; + _20 = nextptr[c+step-1]; _21 = nextptr[c+step]; _22 = nextptr[c+step+1]; + vmax = std::max(std::max(std::max(_00,_01),std::max(_02,_10)),std::max(std::max(_12,_20),std::max(_21,_22))); + if (val >= vmax) + { + sift_wt _11p = prevptr[c], _11n = nextptr[c]; + calculate = (val >= std::max(_11p,_11n)); + } + } + } + + } else { // val cant be zero here (first abs took care of zero), must be negative + sift_wt vmin = std::min(std::min(std::min(_00,_01),std::min(_02,_10)),std::min(std::min(_12,_20),std::min(_21,_22))); + if (val <= vmin) + { + _00 = prevptr[c-step-1]; _01 = prevptr[c-step]; _02 = prevptr[c-step+1]; + _10 = prevptr[c -1]; _12 = prevptr[c +1]; + _20 = prevptr[c+step-1]; _21 = prevptr[c+step]; _22 = prevptr[c+step+1]; + vmin = std::min(std::min(std::min(_00,_01),std::min(_02,_10)),std::min(std::min(_12,_20),std::min(_21,_22))); + if (val <= vmin) + { + _00 = nextptr[c-step-1]; _01 = nextptr[c-step]; _02 = nextptr[c-step+1]; + _10 = nextptr[c -1]; _12 = nextptr[c +1]; + _20 = nextptr[c+step-1]; _21 = nextptr[c+step]; _22 = nextptr[c+step+1]; + vmin = std::min(std::min(std::min(_00,_01),std::min(_02,_10)),std::min(std::min(_12,_20),std::min(_21,_22))); + if (val <= vmin) + { + sift_wt _11p = prevptr[c], _11n = nextptr[c]; + calculate = (val <= std::min(_11p,_11n)); + } + } + } + } - // find local extrema with pixel accuracy - if( std::abs(val) > threshold && - ((val > 0 && val >= currptr[c-1] && val >= currptr[c+1] && - val >= currptr[c-step-1] && val >= currptr[c-step] && val >= currptr[c-step+1] && - val >= currptr[c+step-1] && val >= currptr[c+step] && val >= currptr[c+step+1] && - val >= nextptr[c] && val >= nextptr[c-1] && val >= nextptr[c+1] && - val >= nextptr[c-step-1] && val >= nextptr[c-step] && val >= nextptr[c-step+1] && - val >= nextptr[c+step-1] && val >= nextptr[c+step] && val >= nextptr[c+step+1] && - val >= prevptr[c] && val >= prevptr[c-1] && val >= prevptr[c+1] && - val >= prevptr[c-step-1] && val >= prevptr[c-step] && val >= prevptr[c-step+1] && - val >= prevptr[c+step-1] && val >= prevptr[c+step] && val >= prevptr[c+step+1]) || - (val < 0 && val <= currptr[c-1] && val <= currptr[c+1] && - val <= currptr[c-step-1] && val <= currptr[c-step] && val <= currptr[c-step+1] && - val <= currptr[c+step-1] && val <= currptr[c+step] && val <= currptr[c+step+1] && - val <= nextptr[c] && val <= nextptr[c-1] && val <= nextptr[c+1] && - val <= nextptr[c-step-1] && val <= nextptr[c-step] && val <= nextptr[c-step+1] && - val <= nextptr[c+step-1] && val <= nextptr[c+step] && val <= nextptr[c+step+1] && - val <= prevptr[c] && val <= prevptr[c-1] && val <= prevptr[c+1] && - val <= prevptr[c-step-1] && val <= prevptr[c-step] && val <= prevptr[c-step+1] && - val <= prevptr[c+step-1] && val <= prevptr[c+step] && val <= prevptr[c+step+1]))) + if (calculate) { CV_TRACE_REGION("pixel_candidate"); From 3a15a3821ac147e2715c2a9364bc02c6d789177d Mon Sep 17 00:00:00 2001 From: Zhang Yin Date: Thu, 10 Jun 2021 08:08:20 +0000 Subject: [PATCH 008/128] Update RISC-V back-end to RVV 0.10 --- cmake/checks/cpu_rvv.cpp | 2 +- .../include/opencv2/core/hal/intrin_rvv.hpp | 1292 ++++++++--------- platforms/linux/riscv64-gcc.toolchain.cmake | 4 +- 3 files changed, 582 insertions(+), 716 deletions(-) diff --git a/cmake/checks/cpu_rvv.cpp b/cmake/checks/cpu_rvv.cpp index a3eab2abc44e..684b2ecbebfa 100644 --- a/cmake/checks/cpu_rvv.cpp +++ b/cmake/checks/cpu_rvv.cpp @@ -9,7 +9,7 @@ int test() { const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - vfloat32m1_t val = vle32_v_f32m1((const float*)(src)); + vfloat32m1_t val = vle32_v_f32m1((const float*)(src), 4); return (int)vfmv_f_s_f32m1_f32(val); } #else diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp index cb2140df585b..4a3455b07385 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp @@ -151,12 +151,14 @@ struct vint8mf4_t }; #define OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(_Tpvec, _Tp, suffix, width, n) \ -inline _Tpvec vle##width##_v_##suffix##mf2(const _Tp* ptr) \ +inline _Tpvec vle##width##_v_##suffix##mf2(const _Tp* ptr, size_t vl) \ { \ + CV_UNUSED(vl); \ return _Tpvec(ptr); \ } \ -inline void vse##width##_v_##suffix##mf2(_Tp* ptr, _Tpvec v) \ +inline void vse##width##_v_##suffix##mf2(_Tp* ptr, _Tpvec v, size_t vl) \ { \ + CV_UNUSED(vl); \ for (int i = 0; i < n; ++i) \ { \ ptr[i] = v.val[i]; \ @@ -176,15 +178,14 @@ OPENCV_HAL_IMPL_RVV_NATIVE_LOADSTORE_MF2(vfloat64mf2_t, float64_t, f64, 64, 1) #define OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(_Tpwvec, _Tpvec, _wTp, wcvt, suffix, width, n) \ -inline _Tpwvec wcvt (_Tpvec v) \ +inline _Tpwvec wcvt (_Tpvec v, size_t vl) \ { \ _wTp tmp[n]; \ for (int i = 0; i < n; ++i) \ { \ tmp[i] = (_wTp)v.val[i]; \ } \ - vsetvlmax_e##width##m1(); \ - return vle##width##_v_##suffix##m1(tmp); \ + return vle##width##_v_##suffix##m1(tmp, vl); \ } OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint16m1_t, vuint8mf2_t, ushort, vwcvtu_x_x_v_u16m1, u16, 16, 8) @@ -194,32 +195,34 @@ OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint32m1_t, vint16mf2_t, int, vwcvt_x_x_v_i32m1, OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vuint64m1_t, vuint32mf2_t, uint64, vwcvtu_x_x_v_u64m1, u64, 64, 2) OPENCV_HAL_IMPL_RVV_NATIVE_WCVT(vint64m1_t, vint32mf2_t, int64, vwcvt_x_x_v_i64m1, i64, 64, 2) -inline vuint8mf4_t vle8_v_u8mf4 (const uint8_t *base) +inline vuint8mf4_t vle8_v_u8mf4 (const uint8_t *base, size_t vl) { + CV_UNUSED(vl); return vuint8mf4_t(base); } -inline vint8mf4_t vle8_v_i8mf4 (const int8_t *base) +inline vint8mf4_t vle8_v_i8mf4 (const int8_t *base, size_t vl) { + CV_UNUSED(vl); return vint8mf4_t(base); } -inline vuint16mf2_t vwcvtu_x_x_v_u16mf2 (vuint8mf4_t src) +inline vuint16mf2_t vwcvtu_x_x_v_u16mf2 (vuint8mf4_t src, size_t vl) { ushort tmp[4]; for (int i = 0; i < 4; ++i) { tmp[i] = (ushort)src.val[i]; } - return vle16_v_u16mf2(tmp); + return vle16_v_u16mf2(tmp, vl); } -inline vint16mf2_t vwcvt_x_x_v_i16mf2 (vint8mf4_t src) +inline vint16mf2_t vwcvt_x_x_v_i16mf2 (vint8mf4_t src, size_t vl) { short tmp[4]; for (int i = 0; i < 4; ++i) { tmp[i] = (short)src.val[i]; } - return vle16_v_i16mf2(tmp); + return vle16_v_i16mf2(tmp, vl); } //////////// Types //////////// @@ -232,8 +235,7 @@ struct v_uint8x16 v_uint8x16() {} explicit v_uint8x16(vuint8m1_t v) { - vsetvlmax_e8m1(); - vse8_v_u8m1(val, v); + vse8_v_u8m1(val, v, nlanes); } v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) @@ -246,8 +248,7 @@ struct v_uint8x16 } operator vuint8m1_t() const { - vsetvlmax_e8m1(); - return vle8_v_u8m1(val); + return vle8_v_u8m1(val, nlanes); } uchar get0() const { @@ -265,8 +266,7 @@ struct v_int8x16 v_int8x16() {} explicit v_int8x16(vint8m1_t v) { - vsetvlmax_e8m1(); - vse8_v_i8m1(val, v); + vse8_v_i8m1(val, v, nlanes); } v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) @@ -279,8 +279,7 @@ struct v_int8x16 } operator vint8m1_t() const { - vsetvlmax_e8m1(); - return vle8_v_i8m1(val); + return vle8_v_i8m1(val, nlanes); } schar get0() const { @@ -298,8 +297,7 @@ struct v_uint16x8 v_uint16x8() {} explicit v_uint16x8(vuint16m1_t v) { - vsetvlmax_e16m1(); - vse16_v_u16m1(val, v); + vse16_v_u16m1(val, v, nlanes); } v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) { @@ -311,8 +309,7 @@ struct v_uint16x8 } operator vuint16m1_t() const { - vsetvlmax_e16m1(); - return vle16_v_u16m1(val); + return vle16_v_u16m1(val, nlanes); } ushort get0() const { @@ -330,8 +327,7 @@ struct v_int16x8 v_int16x8() {} explicit v_int16x8(vint16m1_t v) { - vsetvlmax_e16m1(); - vse16_v_i16m1(val, v); + vse16_v_i16m1(val, v, nlanes); } v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) { @@ -343,8 +339,7 @@ struct v_int16x8 } operator vint16m1_t() const { - vsetvlmax_e16m1(); - return vle16_v_i16m1(val); + return vle16_v_i16m1(val, nlanes); } short get0() const { @@ -362,8 +357,7 @@ struct v_uint32x4 v_uint32x4() {} explicit v_uint32x4(vuint32m1_t v) { - vsetvlmax_e32m1(); - vse32_v_u32m1(val, v); + vse32_v_u32m1(val, v, nlanes); } v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) { @@ -375,8 +369,7 @@ struct v_uint32x4 } operator vuint32m1_t() const { - vsetvlmax_e32m1(); - return vle32_v_u32m1(val); + return vle32_v_u32m1(val, nlanes); } unsigned get0() const { @@ -394,8 +387,7 @@ struct v_int32x4 v_int32x4() {} explicit v_int32x4(vint32m1_t v) { - vsetvlmax_e32m1(); - vse32_v_i32m1(val, v); + vse32_v_i32m1(val, v, nlanes); } v_int32x4(int v0, int v1, int v2, int v3) { @@ -407,8 +399,7 @@ struct v_int32x4 } operator vint32m1_t() const { - vsetvlmax_e32m1(); - return vle32_v_i32m1(val); + return vle32_v_i32m1(val, nlanes); } int get0() const { @@ -425,8 +416,7 @@ struct v_float32x4 v_float32x4() {} explicit v_float32x4(vfloat32m1_t v) { - vsetvlmax_e32m1(); - vse32_v_f32m1(val, v); + vse32_v_f32m1(val, v, nlanes); } v_float32x4(float v0, float v1, float v2, float v3) { @@ -438,8 +428,7 @@ struct v_float32x4 } operator vfloat32m1_t() const { - vsetvlmax_e32m1(); - return vle32_v_f32m1(val); + return vle32_v_f32m1(val, nlanes); } float get0() const { @@ -456,8 +445,7 @@ struct v_uint64x2 v_uint64x2() {} explicit v_uint64x2(vuint64m1_t v) { - vsetvlmax_e64m1(); - vse64_v_u64m1(val, v); + vse64_v_u64m1(val, v, nlanes); } v_uint64x2(uint64 v0, uint64 v1) { @@ -469,8 +457,7 @@ struct v_uint64x2 } operator vuint64m1_t() const { - vsetvlmax_e64m1(); - return vle64_v_u64m1(val); + return vle64_v_u64m1(val, nlanes); } uint64 get0() const { @@ -488,8 +475,7 @@ struct v_int64x2 v_int64x2() {} explicit v_int64x2(vint64m1_t v) { - vsetvlmax_e64m1(); - vse64_v_i64m1(val, v); + vse64_v_i64m1(val, v, nlanes); } v_int64x2(int64 v0, int64 v1) { @@ -501,8 +487,7 @@ struct v_int64x2 } operator vint64m1_t() const { - vsetvlmax_e64m1(); - return vle64_v_i64m1(val); + return vle64_v_i64m1(val, nlanes); } int64 get0() const { @@ -521,8 +506,7 @@ struct v_float64x2 v_float64x2() {} explicit v_float64x2(vfloat64m1_t v) { - vsetvlmax_e64m1(); - vse64_v_f64m1(val, v); + vse64_v_f64m1(val, v, nlanes); } v_float64x2(double v0, double v1) { @@ -534,8 +518,7 @@ struct v_float64x2 } operator vfloat64m1_t() const { - vsetvlmax_e64m1(); - return vle64_v_f64m1(val); + return vle64_v_f64m1(val, nlanes); } double get0() const { @@ -549,42 +532,38 @@ struct v_float64x2 //////////// Initial //////////// -#define OPENCV_HAL_IMPL_RVV_INIT_INTEGER(_Tpvec, _Tp, width, suffix1, suffix2) \ +#define OPENCV_HAL_IMPL_RVV_INIT_INTEGER(_Tpvec, _Tp, suffix1, suffix2, vl) \ inline v_##_Tpvec v_setzero_##suffix1() \ { \ - vsetvlmax_e##width##m1(); \ - return v_##_Tpvec(vzero_##suffix2##m1()); \ + return v_##_Tpvec(vmv_v_x_##suffix2##m1(0, vl)); \ } \ inline v_##_Tpvec v_setall_##suffix1(_Tp v) \ { \ - vsetvlmax_e##width##m1(); \ - return v_##_Tpvec(vmv_v_x_##suffix2##m1(v)); \ + return v_##_Tpvec(vmv_v_x_##suffix2##m1(v, vl)); \ } -OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8x16, uchar, 8, u8, u8) -OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int8x16, schar, 8, s8, i8) -OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint16x8, ushort, 16, u16, u16) -OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int16x8, short, 16, s16, i16) -OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint32x4, unsigned, 32, u32, u32) -OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int32x4, int, 32, s32, i32) -OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint64x2, uint64, 64, u64, u64) -OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int64x2, int64, 64, s64, i64) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint8x16, uchar, u8, u8, 16) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int8x16, schar, s8, i8, 16) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint16x8, ushort, u16, u16, 8) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int16x8, short, s16, i16, 8) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint32x4, unsigned, u32, u32, 4) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int32x4, int, s32, i32, 4) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(uint64x2, uint64, u64, u64, 2) +OPENCV_HAL_IMPL_RVV_INIT_INTEGER(int64x2, int64, s64, i64, 2) -#define OPENCV_HAL_IMPL_RVV_INIT_FP(_Tpv, _Tp, width, suffix) \ +#define OPENCV_HAL_IMPL_RVV_INIT_FP(_Tpv, _Tp, suffix, vl) \ inline v_##_Tpv v_setzero_##suffix() \ { \ - vsetvlmax_e##width##m1(); \ - return v_##_Tpv(vzero_##suffix##m1()); \ + return v_##_Tpv(vfmv_v_f_##suffix##m1(0, vl)); \ } \ inline v_##_Tpv v_setall_##suffix(_Tp v) \ { \ - vsetvlmax_e##width##m1(); \ - return v_##_Tpv(vfmv_v_f_##suffix##m1(v)); \ + return v_##_Tpv(vfmv_v_f_##suffix##m1(v, vl)); \ } -OPENCV_HAL_IMPL_RVV_INIT_FP(float32x4, float, 32, f32) +OPENCV_HAL_IMPL_RVV_INIT_FP(float32x4, float, f32, 4) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_INIT_FP(float64x2, double, 64, f64) +OPENCV_HAL_IMPL_RVV_INIT_FP(float64x2, double, f64, 2) #endif //////////// Reinterpret //////////// @@ -605,167 +584,155 @@ OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(int64x2, s64) OPENCV_HAL_IMPL_RVV_SELF_REINTERPRET(float64x2, f64) #endif -#define OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(_Tpvec1, _Tpvec2, _nTpvec1, _nTpvec2, suffix1, suffix2, nsuffix1, nsuffix2, width1, width2) \ +#define OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(_Tpvec1, _Tpvec2, _nTpvec1, _nTpvec2, suffix1, suffix2, nsuffix1, nsuffix2, width1, width2, vl1, vl2) \ inline v_##_Tpvec1 v_reinterpret_as_##suffix1(const v_##_Tpvec2& v) \ { \ - vsetvlmax_e##width2##m1(); \ - return v_##_Tpvec1((_nTpvec1)vle##width2##_v_##nsuffix2##m1(v.val)); \ + return v_##_Tpvec1((_nTpvec1)vle##width2##_v_##nsuffix2##m1(v.val, vl2)); \ } \ inline v_##_Tpvec2 v_reinterpret_as_##suffix2(const v_##_Tpvec1& v) \ { \ - vsetvlmax_e##width1##m1(); \ - return v_##_Tpvec2((_nTpvec2)vle##width1##_v_##nsuffix1##m1(v.val)); \ -} - -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int8x16, vuint8m1_t, vint8m1_t, u8, s8, u8, i8, 8, 8) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int16x8, vuint16m1_t, vint16m1_t, u16, s16, u16, i16, 16, 16) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int32x4, vuint32m1_t, vint32m1_t, u32, s32, u32, i32, 32, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float32x4, vuint32m1_t, vfloat32m1_t, u32, f32, u32, f32, 32, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float32x4, vint32m1_t, vfloat32m1_t, s32, f32, i32, f32, 32, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int64x2, vuint64m1_t, vint64m1_t, u64, s64, u64, i64, 64, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint16x8, vuint8m1_t, vuint16m1_t, u8, u16, u8, u16, 8, 16) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint32x4, vuint8m1_t, vuint32m1_t, u8, u32, u8, u32, 8, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint64x2, vuint8m1_t, vuint64m1_t, u8, u64, u8, u64, 8, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint32x4, vuint16m1_t, vuint32m1_t, u16, u32, u16, u32, 16, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint64x2, vuint16m1_t, vuint64m1_t, u16, u64, u16, u64, 16, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, uint64x2, vuint32m1_t, vuint64m1_t, u32, u64, u32, u64, 32, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int16x8, vint8m1_t, vint16m1_t, s8, s16, i8, i16, 8, 16) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int32x4, vint8m1_t, vint32m1_t, s8, s32, i8, i32, 8, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int64x2, vint8m1_t, vint64m1_t, s8, s64, i8, i64, 8, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int32x4, vint16m1_t, vint32m1_t, s16, s32, i16, i32, 16, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int64x2, vint16m1_t, vint64m1_t, s16, s64, i16, i64, 16, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, int64x2, vint32m1_t, vint64m1_t, s32, s64, i32, i64, 32, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int16x8, vuint8m1_t, vint16m1_t, u8, s16, u8, i16, 8, 16) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int32x4, vuint8m1_t, vint32m1_t, u8, s32, u8, i32, 8, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int64x2, vuint8m1_t, vint64m1_t, u8, s64, u8, i64, 8, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int8x16, vuint16m1_t, vint8m1_t, u16, s8, u16, i8, 16, 8) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int32x4, vuint16m1_t, vint32m1_t, u16, s32, u16, i32, 16, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int64x2, vuint16m1_t, vint64m1_t, u16, s64, u16, i64, 16, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int8x16, vuint32m1_t, vint8m1_t, u32, s8, u32, i8, 32, 8) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int16x8, vuint32m1_t, vint16m1_t, u32, s16, u32, i16, 32, 16) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int64x2, vuint32m1_t, vint64m1_t, u32, s64, u32, i64, 32, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int8x16, vuint64m1_t, vint8m1_t, u64, s8, u64, i8, 64, 8) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int16x8, vuint64m1_t, vint16m1_t, u64, s16, u64, i16, 64, 16) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int32x4, vuint64m1_t, vint32m1_t, u64, s32, u64, i32, 64, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float32x4, vuint8m1_t, vfloat32m1_t, u8, f32, u8, f32, 8, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float32x4, vuint16m1_t, vfloat32m1_t, u16, f32, u16, f32, 16, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float32x4, vuint64m1_t, vfloat32m1_t, u64, f32, u64, f32, 64, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float32x4, vint8m1_t, vfloat32m1_t, s8, f32, i8, f32, 8, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float32x4, vint16m1_t, vfloat32m1_t, s16, f32, i16, f32, 16, 32) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float32x4, vint64m1_t, vfloat32m1_t, s64, f32, i64, f32, 64, 32) + return v_##_Tpvec2((_nTpvec2)vle##width1##_v_##nsuffix1##m1(v.val, vl1)); \ +} + +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int8x16, vuint8m1_t, vint8m1_t, u8, s8, u8, i8, 8, 8, 16, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int16x8, vuint16m1_t, vint16m1_t, u16, s16, u16, i16, 16, 16, 8, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int32x4, vuint32m1_t, vint32m1_t, u32, s32, u32, i32, 32, 32, 4, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float32x4, vuint32m1_t, vfloat32m1_t, u32, f32, u32, f32, 32, 32, 4, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float32x4, vint32m1_t, vfloat32m1_t, s32, f32, i32, f32, 32, 32, 4, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int64x2, vuint64m1_t, vint64m1_t, u64, s64, u64, i64, 64, 64, 2, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint16x8, vuint8m1_t, vuint16m1_t, u8, u16, u8, u16, 8, 16, 16, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint32x4, vuint8m1_t, vuint32m1_t, u8, u32, u8, u32, 8, 32, 16, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, uint64x2, vuint8m1_t, vuint64m1_t, u8, u64, u8, u64, 8, 64, 16, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint32x4, vuint16m1_t, vuint32m1_t, u16, u32, u16, u32, 16, 32, 8, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, uint64x2, vuint16m1_t, vuint64m1_t, u16, u64, u16, u64, 16, 64, 8, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, uint64x2, vuint32m1_t, vuint64m1_t, u32, u64, u32, u64, 32, 64, 4, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int16x8, vint8m1_t, vint16m1_t, s8, s16, i8, i16, 8, 16, 16, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int32x4, vint8m1_t, vint32m1_t, s8, s32, i8, i32, 8, 32, 16, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, int64x2, vint8m1_t, vint64m1_t, s8, s64, i8, i64, 8, 64, 16, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int32x4, vint16m1_t, vint32m1_t, s16, s32, i16, i32, 16, 32, 8, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, int64x2, vint16m1_t, vint64m1_t, s16, s64, i16, i64, 16, 64, 8, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, int64x2, vint32m1_t, vint64m1_t, s32, s64, i32, i64, 32, 64, 4, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int16x8, vuint8m1_t, vint16m1_t, u8, s16, u8, i16, 8, 16, 16, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int32x4, vuint8m1_t, vint32m1_t, u8, s32, u8, i32, 8, 32, 16, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, int64x2, vuint8m1_t, vint64m1_t, u8, s64, u8, i64, 8, 64, 16, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int8x16, vuint16m1_t, vint8m1_t, u16, s8, u16, i8, 16, 8, 8, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int32x4, vuint16m1_t, vint32m1_t, u16, s32, u16, i32, 16, 32, 8, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, int64x2, vuint16m1_t, vint64m1_t, u16, s64, u16, i64, 16, 64, 8, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int8x16, vuint32m1_t, vint8m1_t, u32, s8, u32, i8, 32, 8, 4, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int16x8, vuint32m1_t, vint16m1_t, u32, s16, u32, i16, 32, 16, 4, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, int64x2, vuint32m1_t, vint64m1_t, u32, s64, u32, i64, 32, 64, 4, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int8x16, vuint64m1_t, vint8m1_t, u64, s8, u64, i8, 64, 8, 2, 16) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int16x8, vuint64m1_t, vint16m1_t, u64, s16, u64, i16, 64, 16, 2, 8) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, int32x4, vuint64m1_t, vint32m1_t, u64, s32, u64, i32, 64, 32, 2, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float32x4, vuint8m1_t, vfloat32m1_t, u8, f32, u8, f32, 8, 32, 16, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float32x4, vuint16m1_t, vfloat32m1_t, u16, f32, u16, f32, 16, 32, 8, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float32x4, vuint64m1_t, vfloat32m1_t, u64, f32, u64, f32, 64, 32, 2, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float32x4, vint8m1_t, vfloat32m1_t, s8, f32, i8, f32, 8, 32, 16, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float32x4, vint16m1_t, vfloat32m1_t, s16, f32, i16, f32, 16, 32, 8, 4) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float32x4, vint64m1_t, vfloat32m1_t, s64, f32, i64, f32, 64, 32, 2, 4) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float64x2, vuint64m1_t, vfloat64m1_t, u64, f64, u64, f64, 64, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float64x2, vint64m1_t, vfloat64m1_t, s64, f64, i64, f64, 64, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float64x2, vuint8m1_t, vfloat64m1_t, u8, f64, u8, f64, 8, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float64x2, vuint16m1_t, vfloat64m1_t, u16, f64, u16, f64, 16, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float64x2, vuint32m1_t, vfloat64m1_t, u32, f64, u32, f64, 32, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float64x2, vint8m1_t, vfloat64m1_t, s8, f64, i8, f64, 8, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float64x2, vint16m1_t, vfloat64m1_t, s16, f64, i16, f64, 16, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float64x2, vint32m1_t, vfloat64m1_t, s32, f64, i32, f64, 32, 64) -OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(float32x4, float64x2, vfloat32m1_t, vfloat64m1_t, f32, f64, f32, f64, 32, 64) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint64x2, float64x2, vuint64m1_t, vfloat64m1_t, u64, f64, u64, f64, 64, 64, 2, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int64x2, float64x2, vint64m1_t, vfloat64m1_t, s64, f64, i64, f64, 64, 64, 2, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint8x16, float64x2, vuint8m1_t, vfloat64m1_t, u8, f64, u8, f64, 8, 64, 16, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint16x8, float64x2, vuint16m1_t, vfloat64m1_t, u16, f64, u16, f64, 16, 64, 6, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(uint32x4, float64x2, vuint32m1_t, vfloat64m1_t, u32, f64, u32, f64, 32, 64, 4, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int8x16, float64x2, vint8m1_t, vfloat64m1_t, s8, f64, i8, f64, 8, 64, 16, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int16x8, float64x2, vint16m1_t, vfloat64m1_t, s16, f64, i16, f64, 16, 64, 8, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(int32x4, float64x2, vint32m1_t, vfloat64m1_t, s32, f64, i32, f64, 32, 64, 4, 2) +OPENCV_HAL_IMPL_RVV_ONE_TIME_REINTERPRET(float32x4, float64x2, vfloat32m1_t, vfloat64m1_t, f32, f64, f32, f64, 32, 64, 4, 2) #endif ////////////// Extract ////////////// -#define OPENCV_HAL_IMPL_RVV_EXTRACT(_Tpvec, _Tp, suffix, width, vmv) \ +#define OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(_Tpvec, _Tp, suffix, vmv, vl) \ template \ inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, s), b, _Tpvec::nlanes - s)); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), a, s, vl), b, _Tpvec::nlanes - s, vl)); \ } \ template inline _Tp v_extract_n(_Tpvec v) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tp(vmv(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), v, i))); \ + return _Tp(vmv(vslidedown_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), v, i, vl))); \ } -OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint8x16, uchar, u8, 8, vmv_x_s_u8m1_u8) -OPENCV_HAL_IMPL_RVV_EXTRACT(v_int8x16, schar, i8, 8, vmv_x_s_i8m1_i8) -OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint16x8, ushort, u16, 16, vmv_x_s_u16m1_u16) -OPENCV_HAL_IMPL_RVV_EXTRACT(v_int16x8, short, i16, 16, vmv_x_s_i16m1_i16) -OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint32x4, uint, u32, 32, vmv_x_s_u32m1_u32) -OPENCV_HAL_IMPL_RVV_EXTRACT(v_int32x4, int, i32, 32, vmv_x_s_i32m1_i32) -OPENCV_HAL_IMPL_RVV_EXTRACT(v_uint64x2, uint64, u64, 64, vmv_x_s_u64m1_u64) -OPENCV_HAL_IMPL_RVV_EXTRACT(v_int64x2, int64, i64, 64, vmv_x_s_i64m1_i64) -OPENCV_HAL_IMPL_RVV_EXTRACT(v_float32x4, float, f32, 32, vfmv_f_s_f32m1_f32) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint8x16, uchar, u8, vmv_x_s_u8m1_u8, 16) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int8x16, schar, i8, vmv_x_s_i8m1_i8, 16) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint16x8, ushort, u16, vmv_x_s_u16m1_u16, 8) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int16x8, short, i16, vmv_x_s_i16m1_i16, 8) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint32x4, uint, u32, vmv_x_s_u32m1_u32, 4) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int32x4, int, i32, vmv_x_s_i32m1_i32, 4) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_uint64x2, uint64, u64, vmv_x_s_u64m1_u64, 2) +OPENCV_HAL_IMPL_RVV_EXTRACT_INTEGER(v_int64x2, int64, i64, vmv_x_s_i64m1_i64, 2) + +#define OPENCV_HAL_IMPL_RVV_EXTRACT_FP(_Tpvec, _Tp, suffix, vmv, vl) \ +template \ +inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), a, s, vl), b, _Tpvec::nlanes - s, vl)); \ +} \ +template inline _Tp v_extract_n(_Tpvec v) \ +{ \ + return _Tp(vmv(vslidedown_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), v, i, vl))); \ +} + +OPENCV_HAL_IMPL_RVV_EXTRACT_FP(v_float32x4, float, f32, vfmv_f_s_f32m1_f32, 4) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_EXTRACT(v_float64x2, double, f64, 64, vfmv_f_s_f64m1_f64) +OPENCV_HAL_IMPL_RVV_EXTRACT_FP(v_float64x2, double, f64, vfmv_f_s_f64m1_f64, 2) #endif ////////////// Load/Store ////////////// -#define OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(_Tpvec, _nTpvec, _Tp, hvl, width, suffix) \ +#define OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(_Tpvec, _nTpvec, _Tp, hvl, vl, width, suffix, vmv) \ inline _Tpvec v_load(const _Tp* ptr) \ { \ - vsetvlmax_e8m1(); \ - return _Tpvec((_nTpvec)vle8_v_u8m1((uchar*)ptr)); \ + return _Tpvec((_nTpvec)vle8_v_u8m1((uchar*)ptr, 16)); \ } \ inline _Tpvec v_load_aligned(const _Tp* ptr) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vle##width##_v_##suffix##m1(ptr)); \ + return _Tpvec(vle##width##_v_##suffix##m1(ptr, vl)); \ } \ inline _Tpvec v_load_low(const _Tp* ptr) \ { \ - vsetvl_e##width##m1(hvl); \ - _Tpvec res = _Tpvec(vle##width##_v_##suffix##m1(ptr)); \ - vsetvlmax_e##width##m1(); \ + _Tpvec res = _Tpvec(vle##width##_v_##suffix##m1(ptr, hvl)); \ return res; \ } \ inline void v_store(_Tp* ptr, const _Tpvec& a) \ { \ - vsetvlmax_e8m1(); \ - vse8_v_u8m1((uchar*)ptr, vle8_v_u8m1((uchar*)a.val)); \ + vse8_v_u8m1((uchar*)ptr, vle8_v_u8m1((uchar*)a.val, 16), 16); \ } \ inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - vse##width##_v_##suffix##m1(ptr, a); \ + vse##width##_v_##suffix##m1(ptr, a, vl); \ } \ inline void v_store_aligned_nocache(_Tp* ptr, const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - vse##width##_v_##suffix##m1(ptr, a); \ + vse##width##_v_##suffix##m1(ptr, a, vl); \ } \ inline void v_store(_Tp* ptr, const _Tpvec& a, hal::StoreMode /*mode*/) \ { \ - vsetvlmax_e##width##m1(); \ - vse##width##_v_##suffix##m1(ptr, a); \ + vse##width##_v_##suffix##m1(ptr, a, vl); \ } \ inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ { \ - _Tp CV_DECL_ALIGNED(32) tmp_ptr[_Tpvec::nlanes] = {0}; \ - vsetvlmax_e##width##m1(); \ - vse##width##_v_##suffix##m1(tmp_ptr, a); \ - for(int i = 0; i < _Tpvec::nlanes/2; ++i) \ - { \ - ptr[i] = tmp_ptr[i]; \ - } \ + vse##width##_v_##suffix##m1(ptr, a, hvl); \ } \ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ { \ - _Tp CV_DECL_ALIGNED(32) tmp_ptr[_Tpvec::nlanes] = {0}; \ - vsetvlmax_e##width##m1(); \ - vse##width##_v_##suffix##m1(tmp_ptr, a); \ - for(int i = 0; i < _Tpvec::nlanes/2; ++i) \ - { \ - ptr[i] = tmp_ptr[i+_Tpvec::nlanes/2]; \ - } \ + vse##width##_v_##suffix##m1(ptr, vslidedown_vx_##suffix##m1(vmv(0, vl), a, hvl, vl), hvl); \ } -OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint8x16, vuint8m1_t, uchar, 8, 8, u8) -OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int8x16, vint8m1_t, schar, 8, 8, i8) -OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint16x8, vuint16m1_t, ushort, 4, 16, u16) -OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int16x8, vint16m1_t, short, 4, 16, i16) -OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint32x4, vuint32m1_t, unsigned, 2, 32, u32) -OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int32x4, vint32m1_t, int, 2, 32, i32) -OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint64x2, vuint64m1_t, uint64, 1, 64, u64) -OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int64x2, vint64m1_t, int64, 1, 64, i64) -OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float32x4, vfloat32m1_t, float, 2, 32, f32) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint8x16, vuint8m1_t, uchar, 8, 16, 8, u8, vmv_v_x_u8m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int8x16, vint8m1_t, schar, 8, 16, 8, i8, vmv_v_x_i8m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint16x8, vuint16m1_t, ushort, 4, 8, 16, u16, vmv_v_x_u16m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int16x8, vint16m1_t, short, 4, 8, 16, i16, vmv_v_x_i16m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint32x4, vuint32m1_t, unsigned, 2, 4, 32, u32, vmv_v_x_u32m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int32x4, vint32m1_t, int, 2, 4, 32, i32, vmv_v_x_i32m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_uint64x2, vuint64m1_t, uint64, 1, 2, 64, u64, vmv_v_x_u64m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_int64x2, vint64m1_t, int64, 1, 2, 64, i64, vmv_v_x_i64m1) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float32x4, vfloat32m1_t, float, 2, 4, 32, f32, vfmv_v_f_f32m1) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float64x2, vfloat64m1_t, double, 1, 64, f64) +OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float64x2, vfloat64m1_t, double, 1, 2, 64, f64, vfmv_v_f_f64m1) #endif inline v_int8x16 v_load_halves(const schar* ptr0, const schar* ptr1) @@ -775,8 +742,7 @@ inline v_int8x16 v_load_halves(const schar* ptr0, const schar* ptr1) ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr0[4], ptr0[5], ptr0[6], ptr0[7], ptr1[0], ptr1[1], ptr1[2], ptr1[3], ptr1[4], ptr1[5], ptr1[6], ptr1[7] }; - vsetvlmax_e8m1(); - return v_int8x16(vle8_v_i8m1(elems)); + return v_int8x16(vle8_v_i8m1(elems, 16)); } inline v_uint8x16 v_load_halves(const uchar* ptr0, const uchar* ptr1) { return v_reinterpret_as_u8(v_load_halves((schar*)ptr0, (schar*)ptr1)); } @@ -786,8 +752,7 @@ inline v_int16x8 v_load_halves(const short* ptr0, const short* ptr1) { ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr1[0], ptr1[1], ptr1[2], ptr1[3] }; - vsetvlmax_e16m1(); - return v_int16x8(vle16_v_i16m1(elems)); + return v_int16x8(vle16_v_i16m1(elems, 8)); } inline v_uint16x8 v_load_halves(const ushort* ptr0, const ushort* ptr1) { return v_reinterpret_as_u16(v_load_halves((short*)ptr0, (short*)ptr1)); } @@ -797,8 +762,7 @@ inline v_int32x4 v_load_halves(const int* ptr0, const int* ptr1) { ptr0[0], ptr0[1], ptr1[0], ptr1[1] }; - vsetvlmax_e32m1(); - return v_int32x4(vle32_v_i32m1(elems)); + return v_int32x4(vle32_v_i32m1(elems, 4)); } inline v_float32x4 v_load_halves(const float* ptr0, const float* ptr1) { @@ -806,8 +770,7 @@ inline v_float32x4 v_load_halves(const float* ptr0, const float* ptr1) { ptr0[0], ptr0[1], ptr1[0], ptr1[1] }; - vsetvlmax_e32m1(); - return v_float32x4(vle32_v_f32m1(elems)); + return v_float32x4(vle32_v_f32m1(elems, 4)); } inline v_uint32x4 v_load_halves(const unsigned* ptr0, const unsigned* ptr1) { return v_reinterpret_as_u32(v_load_halves((int*)ptr0, (int*)ptr1)); } @@ -817,8 +780,7 @@ inline v_int64x2 v_load_halves(const int64* ptr0, const int64* ptr1) { ptr0[0], ptr1[0] }; - vsetvlmax_e64m1(); - return v_int64x2(vle64_v_i64m1(elems)); + return v_int64x2(vle64_v_i64m1(elems, 2)); } inline v_uint64x2 v_load_halves(const uint64* ptr0, const uint64* ptr1) { return v_reinterpret_as_u64(v_load_halves((int64*)ptr0, (int64*)ptr1)); } @@ -829,8 +791,7 @@ inline v_float64x2 v_load_halves(const double* ptr0, const double* ptr1) { ptr0[0], ptr1[0] }; - vsetvlmax_e64m1(); - return v_float64x2(vle64_v_f64m1(elems)); + return v_float64x2(vle64_v_f64m1(elems, 2)); } #endif @@ -858,8 +819,7 @@ inline v_int8x16 v_lut(const schar* tab, const int* idx) tab[idx[14]], tab[idx[15]] }; - vsetvlmax_e8m1(); - return v_int8x16(vle8_v_i8m1(elems)); + return v_int8x16(vle8_v_i8m1(elems, 16)); } inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) { @@ -882,8 +842,7 @@ inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) tab[idx[7]], tab[idx[7] + 1] }; - vsetvlmax_e8m1(); - return v_int8x16(vle8_v_i8m1(elems)); + return v_int8x16(vle8_v_i8m1(elems, 16)); } inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) { @@ -906,8 +865,7 @@ inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) tab[idx[3] + 2], tab[idx[3] + 3] }; - vsetvlmax_e8m1(); - return v_int8x16(vle8_v_i8m1(elems)); + return v_int8x16(vle8_v_i8m1(elems, 16)); } inline v_uint8x16 v_lut(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut((schar*)tab, idx)); } inline v_uint8x16 v_lut_pairs(const uchar* tab, const int* idx) { return v_reinterpret_as_u8(v_lut_pairs((schar*)tab, idx)); } @@ -926,8 +884,7 @@ inline v_int16x8 v_lut(const short* tab, const int* idx) tab[idx[6]], tab[idx[7]] }; - vsetvlmax_e16m1(); - return v_int16x8(vle16_v_i16m1(elems)); + return v_int16x8(vle16_v_i16m1(elems, 8)); } inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) { @@ -942,8 +899,7 @@ inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) tab[idx[3]], tab[idx[3] + 1] }; - vsetvlmax_e16m1(); - return v_int16x8(vle16_v_i16m1(elems)); + return v_int16x8(vle16_v_i16m1(elems, 8)); } inline v_int16x8 v_lut_quads(const short* tab, const int* idx) { @@ -958,8 +914,7 @@ inline v_int16x8 v_lut_quads(const short* tab, const int* idx) tab[idx[1] + 2], tab[idx[1] + 3] }; - vsetvlmax_e16m1(); - return v_int16x8(vle16_v_i16m1(elems)); + return v_int16x8(vle16_v_i16m1(elems, 8)); } inline v_uint16x8 v_lut(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut((short*)tab, idx)); } inline v_uint16x8 v_lut_pairs(const ushort* tab, const int* idx) { return v_reinterpret_as_u16(v_lut_pairs((short*)tab, idx)); } @@ -974,8 +929,7 @@ inline v_int32x4 v_lut(const int* tab, const int* idx) tab[idx[2]], tab[idx[3]] }; - vsetvlmax_e32m1(); - return v_int32x4(vle32_v_i32m1(elems)); + return v_int32x4(vle32_v_i32m1(elems, 4)); } inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) { @@ -986,13 +940,11 @@ inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) tab[idx[1]], tab[idx[1] + 1] }; - vsetvlmax_e32m1(); - return v_int32x4(vle32_v_i32m1(elems)); + return v_int32x4(vle32_v_i32m1(elems, 4)); } inline v_int32x4 v_lut_quads(const int* tab, const int* idx) { - vsetvlmax_e32m1(); - return v_int32x4(vle32_v_i32m1(tab + idx[0])); + return v_int32x4(vle32_v_i32m1(tab + idx[0], 4)); } inline v_uint32x4 v_lut(const unsigned* tab, const int* idx) { return v_reinterpret_as_u32(v_lut((int*)tab, idx)); } @@ -1006,13 +958,11 @@ inline v_int64x2 v_lut(const int64_t* tab, const int* idx) tab[idx[0]], tab[idx[1]] }; - vsetvlmax_e64m1(); - return v_int64x2(vle64_v_i64m1(elems)); + return v_int64x2(vle64_v_i64m1(elems, 2)); } inline v_int64x2 v_lut_pairs(const int64* tab, const int* idx) { - vsetvlmax_e64m1(); - return v_int64x2(vle64_v_i64m1(tab + idx[0])); + return v_int64x2(vle64_v_i64m1(tab + idx[0], 2)); } inline v_uint64x2 v_lut(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v_lut((const int64_t *)tab, idx)); } inline v_uint64x2 v_lut_pairs(const uint64* tab, const int* idx) { return v_reinterpret_as_u64(v_lut_pairs((const int64_t *)tab, idx)); } @@ -1026,8 +976,7 @@ inline v_float32x4 v_lut(const float* tab, const int* idx) tab[idx[2]], tab[idx[3]] }; - vsetvlmax_e32m1(); - return v_float32x4(vle32_v_f32m1(elems)); + return v_float32x4(vle32_v_f32m1(elems, 4)); } inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { @@ -1038,13 +987,11 @@ inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) tab[idx[1]], tab[idx[1] + 1] }; - vsetvlmax_e32m1(); - return v_float32x4(vle32_v_f32m1(elems)); + return v_float32x4(vle32_v_f32m1(elems, 4)); } inline v_float32x4 v_lut_quads(const float* tab, const int* idx) { - vsetvlmax_e32m1(); - return v_float32x4(vle32_v_f32m1(tab + idx[0])); + return v_float32x4(vle32_v_f32m1(tab + idx[0], 4)); } inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) @@ -1056,8 +1003,7 @@ inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) tab[v_extract_n<2>(idxvec)], tab[v_extract_n<3>(idxvec)] }; - vsetvlmax_e32m1(); - return v_int32x4(vle32_v_i32m1(elems)); + return v_int32x4(vle32_v_i32m1(elems, 4)); } inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) @@ -1069,8 +1015,7 @@ inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) tab[v_extract_n<2>(idxvec)], tab[v_extract_n<3>(idxvec)] }; - vsetvlmax_e32m1(); - return v_uint32x4(vle32_v_u32m1(elems)); + return v_uint32x4(vle32_v_u32m1(elems, 4)); } inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) @@ -1082,8 +1027,7 @@ inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) tab[v_extract_n<2>(idxvec)], tab[v_extract_n<3>(idxvec)] }; - vsetvlmax_e32m1(); - return v_float32x4(vle32_v_f32m1(elems)); + return v_float32x4(vle32_v_f32m1(elems, 4)); } inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) @@ -1103,14 +1047,12 @@ inline v_float64x2 v_lut(const double* tab, const int* idx) tab[idx[0]], tab[idx[1]] }; - vsetvlmax_e64m1(); - return v_float64x2(vle64_v_f64m1(elems)); + return v_float64x2(vle64_v_f64m1(elems, 2)); } inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) { - vsetvlmax_e64m1(); - return v_float64x2(vle64_v_f64m1(tab + idx[0])); + return v_float64x2(vle64_v_f64m1(tab + idx[0], 2)); } inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) @@ -1120,8 +1062,7 @@ inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) tab[v_extract_n<0>(idxvec)], tab[v_extract_n<1>(idxvec)] }; - vsetvlmax_e64m1(); - return v_float64x2(vle64_v_f64m1(elems)); + return v_float64x2(vle64_v_f64m1(elems, 2)); } inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) @@ -1141,8 +1082,7 @@ inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) ushort CV_DECL_ALIGNED(32) ptr[16] = {0}; v_store(ptr, a); v_store(ptr + 8, b); - vsetvlmax_e8m1(); - return v_uint8x16(vnsrl_wx_u8m1(vle16_v_u16m2(ptr), 0)); + return v_uint8x16(vnsrl_wx_u8m1(vle16_v_u16m2(ptr, 16), 0, 16)); } inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, @@ -1153,8 +1093,7 @@ inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, v_store(ptr + 4, b); v_store(ptr + 8, c); v_store(ptr + 12, d); - vsetvlmax_e8m1(); - return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vle32_v_u32m4(ptr), 0), 0)); + return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vle32_v_u32m4(ptr, 16), 0, 16), 0, 16)); } inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uint64x2& c, @@ -1170,95 +1109,89 @@ inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uin v_store(ptr + 10, f); v_store(ptr + 12, g); v_store(ptr + 14, h); - vsetvlmax_e8m1(); - return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vnsrl_wx_u32m4(vle64_v_u64m8(ptr), 0), 0), 0)); + return v_uint8x16(vnsrl_wx_u8m1(vnsrl_wx_u16m2(vnsrl_wx_u32m4(vle64_v_u64m8(ptr, 16), 0, 16), 0, 16), 0, 16)); } ////////////// Arithmetics ////////////// -#define OPENCV_HAL_IMPL_RVV_BIN_OP(bin_op, _Tpvec, intrin, width) \ +#define OPENCV_HAL_IMPL_RVV_BIN_OP(bin_op, _Tpvec, intrin, vl) \ inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(intrin(a, b)); \ + return _Tpvec(intrin(a, b, vl)); \ } \ inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ { \ - vsetvlmax_e##width##m1(); \ - a = _Tpvec(intrin(a, b)); \ + a = _Tpvec(intrin(a, b, vl)); \ return a; \ } -OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint8x16, vsaddu_vv_u8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint8x16, vssubu_vv_u8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint8x16, vdivu_vv_u8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int8x16, vsadd_vv_i8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int8x16, vssub_vv_i8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int8x16, vdiv_vv_i8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint16x8, vsaddu_vv_u16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint16x8, vssubu_vv_u16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint16x8, vdivu_vv_u16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int16x8, vsadd_vv_i16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int16x8, vssub_vv_i16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int16x8, vdiv_vv_i16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint32x4, vadd_vv_u32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint32x4, vsub_vv_u32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint32x4, vmul_vv_u32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint32x4, vdivu_vv_u32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int32x4, vadd_vv_i32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int32x4, vsub_vv_i32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int32x4, vmul_vv_i32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int32x4, vdiv_vv_i32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float32x4, vfadd_vv_f32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float32x4, vfsub_vv_f32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float32x4, vfmul_vv_f32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float32x4, vfdiv_vv_f32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint64x2, vadd_vv_u64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint64x2, vsub_vv_u64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint64x2, vmul_vv_u64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint64x2, vdivu_vv_u64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int64x2, vadd_vv_i64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int64x2, vsub_vv_i64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int64x2, vmul_vv_i64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int64x2, vdiv_vv_i64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint8x16, vsaddu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint8x16, vssubu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint8x16, vdivu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int8x16, vsadd_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int8x16, vssub_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int8x16, vdiv_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint16x8, vsaddu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint16x8, vssubu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint16x8, vdivu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int16x8, vsadd_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int16x8, vssub_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int16x8, vdiv_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint32x4, vadd_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint32x4, vsub_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint32x4, vmul_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint32x4, vdivu_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int32x4, vadd_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int32x4, vsub_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int32x4, vmul_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int32x4, vdiv_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float32x4, vfadd_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float32x4, vfsub_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float32x4, vfmul_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float32x4, vfdiv_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_uint64x2, vadd_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_uint64x2, vsub_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_uint64x2, vmul_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_uint64x2, vdivu_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_int64x2, vadd_vv_i64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_int64x2, vsub_vv_i64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_int64x2, vmul_vv_i64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_int64x2, vdiv_vv_i64m1, 2) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float64x2, vfadd_vv_f64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float64x2, vfsub_vv_f64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float64x2, vfmul_vv_f64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float64x2, vfdiv_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_OP(+, v_float64x2, vfadd_vv_f64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(-, v_float64x2, vfsub_vv_f64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(*, v_float64x2, vfmul_vv_f64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_OP(/, v_float64x2, vfdiv_vv_f64m1, 2) #endif ////////////// Bitwise logic ////////////// -#define OPENCV_HAL_IMPL_RVV_LOGIC_OP(_Tpvec, suffix, width) \ -OPENCV_HAL_IMPL_RVV_BIN_OP(&, _Tpvec, vand_vv_##suffix##m1, width) \ -OPENCV_HAL_IMPL_RVV_BIN_OP(|, _Tpvec, vor_vv_##suffix##m1, width) \ -OPENCV_HAL_IMPL_RVV_BIN_OP(^, _Tpvec, vxor_vv_##suffix##m1, width) \ +#define OPENCV_HAL_IMPL_RVV_LOGIC_OP(_Tpvec, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(&, _Tpvec, vand_vv_##suffix##m1, vl) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(|, _Tpvec, vor_vv_##suffix##m1, vl) \ +OPENCV_HAL_IMPL_RVV_BIN_OP(^, _Tpvec, vxor_vv_##suffix##m1, vl) \ inline _Tpvec operator ~ (const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vnot_v_##suffix##m1(a)); \ + return _Tpvec(vnot_v_##suffix##m1(a, vl)); \ } -OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint8x16, u8, 8) -OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int8x16, i8, 8) -OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint16x8, u16, 16) -OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int16x8, i16, 16) -OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint32x4, u32, 32) -OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int32x4, i32, 32) -OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint64x2, u64, 64) -OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int64x2, i64, 64) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint8x16, u8, 16) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int8x16, i8, 16) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint16x8, u16, 8) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int16x8, i16, 8) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint32x4, u32, 4) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int32x4, i32, 4) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_uint64x2, u64, 2) +OPENCV_HAL_IMPL_RVV_LOGIC_OP(v_int64x2, i64, 2) #define OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(bin_op, intrin) \ inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ { \ - vsetvlmax_e32m1(); \ - return v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b)))); \ + return v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b), 4))); \ } \ inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ { \ - vsetvlmax_e32m1(); \ - a = v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b)))); \ + a = v_float32x4(vreinterpret_v_i32m1_f32m1(intrin(vreinterpret_v_f32m1_i32m1(a), vreinterpret_v_f32m1_i32m1(b), 4))); \ return a; \ } @@ -1268,21 +1201,18 @@ OPENCV_HAL_IMPL_RVV_FLT_BIT_OP(^, vxor_vv_i32m1) inline v_float32x4 operator ~ (const v_float32x4& a) { - vsetvlmax_e32m1(); - return v_float32x4(vreinterpret_v_i32m1_f32m1(vnot_v_i32m1(vreinterpret_v_f32m1_i32m1(a)))); + return v_float32x4(vreinterpret_v_i32m1_f32m1(vnot_v_i32m1(vreinterpret_v_f32m1_i32m1(a), 4))); } #if CV_SIMD128_64F #define OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(bin_op, intrin) \ inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ { \ - vsetvlmax_e64m1(); \ - return v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b)))); \ + return v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b), 2))); \ } \ inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ { \ - vsetvlmax_e64m1(); \ - a = v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b)))); \ + a = v_float64x2(vreinterpret_v_i64m1_f64m1(intrin(vreinterpret_v_f64m1_i64m1(a), vreinterpret_v_f64m1_i64m1(b), 2))); \ return a; \ } @@ -1292,119 +1222,108 @@ OPENCV_HAL_IMPL_RVV_FLT64_BIT_OP(^, vxor_vv_i64m1) inline v_float64x2 operator ~ (const v_float64x2& a) { - vsetvlmax_e64m1(); - return v_float64x2(vreinterpret_v_i64m1_f64m1(vnot_v_i64m1(vreinterpret_v_f64m1_i64m1(a)))); + return v_float64x2(vreinterpret_v_i64m1_f64m1(vnot_v_i64m1(vreinterpret_v_f64m1_i64m1(a), 2))); } #endif ////////////// Bitwise shifts ////////////// -#define OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(_Tpvec, suffix, width) \ +#define OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(_Tpvec, suffix, vl) \ inline _Tpvec operator << (const _Tpvec& a, int n) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n), vl)); \ } \ inline _Tpvec operator >> (const _Tpvec& a, int n) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n))); \ + return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n), vl)); \ } \ template inline _Tpvec v_shl(const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n), vl)); \ } \ template inline _Tpvec v_shr(const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n))); \ + return _Tpvec(vsrl_vx_##suffix##m1(a, uint8_t(n), vl)); \ } -#define OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(_Tpvec, suffix, width) \ +#define OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(_Tpvec, suffix, vl) \ inline _Tpvec operator << (const _Tpvec& a, int n) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n), vl)); \ } \ inline _Tpvec operator >> (const _Tpvec& a, int n) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n))); \ + return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n), vl)); \ } \ template inline _Tpvec v_shl(const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n))); \ + return _Tpvec(vsll_vx_##suffix##m1(a, uint8_t(n), vl)); \ } \ template inline _Tpvec v_shr(const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n))); \ + return _Tpvec(vsra_vx_##suffix##m1(a, uint8_t(n), vl)); \ } -OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint8x16, u8, 8) -OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint16x8, u16, 16) -OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint32x4, u32, 32) -OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint64x2, u64, 64) -OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int8x16, i8, 8) -OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int16x8, i16, 16) -OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int32x4, i32, 32) -OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int64x2, i64, 64) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint8x16, u8, 16) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint16x8, u16, 8) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint32x4, u32, 4) +OPENCV_HAL_IMPL_RVV_UNSIGNED_SHIFT_OP(v_uint64x2, u64, 2) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int8x16, i8, 16) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int16x8, i16, 8) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int32x4, i32, 4) +OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int64x2, i64, 2) ////////////// Comparison ////////////// -#define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix, width) \ +#define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix, vl) \ inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vmerge_vxm_##suffix##m1(intrin(a, b), vzero_##suffix##m1(), 1)); \ + return _Tpvec(vmerge_vxm_##suffix##m1(intrin(a, b, vl), vmv_v_x_##suffix##m1(0, vl), 1, vl)); \ } -#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix, width) \ +#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix, vl) \ inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vfmerge_vfm_##suffix##m1(intrin(a, b), vzero_##suffix##m1(), 1)); \ -} - -#define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmsltu_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgtu_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsleu_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsgeu_vv_##suffix##m1_b##width, suffix, width) - -#define OPENCV_HAL_IMPL_RVV_SIGNED_CMP(_Tpvec, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmslt_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgt_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsle_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsge_vv_##suffix##m1_b##width, suffix, width) - -#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP(_Tpvec, suffix, width) \ -OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ==, vmfeq_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, !=, vmfne_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <, vmflt_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >, vmfgt_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <=, vmfle_vv_##suffix##m1_b##width, suffix, width) \ -OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >=, vmfge_vv_##suffix##m1_b##width, suffix, width) - - -OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint8x16, u8, 8) -OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint16x8, u16, 16) -OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint32x4, u32, 32) -OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint64x2, u64, 64) -OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int8x16, i8, 8) -OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int16x8, i16, 16) -OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int32x4, i32, 32) -OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int64x2, i64, 64) -OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float32x4, f32, 32) + return _Tpvec(vfmerge_vfm_##suffix##m1(intrin(a, b, vl), vfmv_v_f_##suffix##m1(0, vl), 1, vl)); \ +} + +#define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix, width, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmsltu_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgtu_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsleu_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsgeu_vv_##suffix##m1_b##width, suffix, vl) + +#define OPENCV_HAL_IMPL_RVV_SIGNED_CMP(_Tpvec, suffix, width, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, ==, vmseq_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, !=, vmsne_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <, vmslt_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >, vmsgt_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, <=, vmsle_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, >=, vmsge_vv_##suffix##m1_b##width, suffix, vl) + +#define OPENCV_HAL_IMPL_RVV_FLOAT_CMP(_Tpvec, suffix, width, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, ==, vmfeq_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, !=, vmfne_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <, vmflt_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >, vmfgt_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, <=, vmfle_vv_##suffix##m1_b##width, suffix, vl) \ +OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, >=, vmfge_vv_##suffix##m1_b##width, suffix, vl) + + +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint8x16, u8, 8, 16) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint16x8, u16, 16, 8) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint32x4, u32, 32, 4) +OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(v_uint64x2, u64, 64, 2) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int8x16, i8, 8, 16) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int16x8, i16, 16, 8) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int32x4, i32, 32, 4) +OPENCV_HAL_IMPL_RVV_SIGNED_CMP(v_int64x2, i64, 64, 2) +OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float32x4, f32, 32, 4) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float64x2, f64, 64) +OPENCV_HAL_IMPL_RVV_FLOAT_CMP(v_float64x2, f64, 64, 2) #endif inline v_float32x4 v_not_nan(const v_float32x4& a) @@ -1417,99 +1336,106 @@ inline v_float64x2 v_not_nan(const v_float64x2& a) ////////////// Min/Max ////////////// -#define OPENCV_HAL_IMPL_RVV_BIN_FUNC(_Tpvec, func, intrin, width) \ +#define OPENCV_HAL_IMPL_RVV_BIN_FUNC(_Tpvec, func, intrin, vl) \ inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(intrin(a, b)); \ -} - -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_min, vminu_vv_u8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_max, vmaxu_vv_u8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_min, vmin_vv_i8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_max, vmax_vv_i8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_min, vminu_vv_u16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_max, vmaxu_vv_u16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_min, vmin_vv_i16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_max, vmax_vv_i16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_min, vminu_vv_u32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_max, vmaxu_vv_u32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_min, vmin_vv_i32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_max, vmax_vv_i32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_min, vfmin_vv_f32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_max, vfmax_vv_f32m1, 32) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_min, vminu_vv_u64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_max, vmaxu_vv_u64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_min, vmin_vv_i64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_max, vmax_vv_i64m1, 64) + return _Tpvec(intrin(a, b, vl)); \ +} + +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_min, vminu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_max, vmaxu_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_min, vmin_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_max, vmax_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_min, vminu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_max, vmaxu_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_min, vmin_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_max, vmax_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_min, vminu_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint32x4, v_max, vmaxu_vv_u32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_min, vmin_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int32x4, v_max, vmax_vv_i32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_min, vfmin_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float32x4, v_max, vfmax_vv_f32m1, 4) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_min, vminu_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint64x2, v_max, vmaxu_vv_u64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_min, vmin_vv_i64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int64x2, v_max, vmax_vv_i64m1, 2) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_min, vfmin_vv_f64m1, 64) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_max, vfmax_vv_f64m1, 64) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_min, vfmin_vv_f64m1, 2) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_float64x2, v_max, vfmax_vv_f64m1, 2) #endif ////////////// Arithmetics wrap ////////////// -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_add_wrap, vadd_vv_u8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_add_wrap, vadd_vv_i8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_add_wrap, vadd_vv_u16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_add_wrap, vadd_vv_i16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_sub_wrap, vsub_vv_u8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_sub_wrap, vsub_vv_i8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_sub_wrap, vsub_vv_u16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_sub_wrap, vsub_vv_i16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_mul_wrap, vmul_vv_u8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_mul_wrap, vmul_vv_i8m1, 8) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_mul_wrap, vmul_vv_u16m1, 16) -OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_mul_wrap, vmul_vv_i16m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_add_wrap, vadd_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_add_wrap, vadd_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_add_wrap, vadd_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_add_wrap, vadd_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_sub_wrap, vsub_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_sub_wrap, vsub_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_sub_wrap, vsub_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_sub_wrap, vsub_vv_i16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint8x16, v_mul_wrap, vmul_vv_u8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int8x16, v_mul_wrap, vmul_vv_i8m1, 16) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_uint16x8, v_mul_wrap, vmul_vv_u16m1, 8) +OPENCV_HAL_IMPL_RVV_BIN_FUNC(v_int16x8, v_mul_wrap, vmul_vv_i16m1, 8) ////////////// Reduce ////////////// -#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM(_Tpvec, _wTpvec, _nwTpvec, scalartype, suffix, wsuffix, wwidth, red) \ +#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM(_Tpvec, _wTpvec, _nwTpvec, scalartype, suffix, wsuffix, vl, red) \ inline scalartype v_reduce_sum(const _Tpvec& a) \ { \ - vsetvlmax_e##wwidth##m1(); \ - _nwTpvec zero = vzero_##wsuffix##m1(); \ - _nwTpvec res = vzero_##wsuffix##m1(); \ - res = v##red##_vs_##suffix##m1_##wsuffix##m1(res, a, zero); \ + _nwTpvec zero = vmv_v_x_##wsuffix##m1(0, vl); \ + _nwTpvec res = vmv_v_x_##wsuffix##m1(0, vl); \ + res = v##red##_vs_##suffix##m1_##wsuffix##m1(res, a, zero, vl); \ return (scalartype)(_wTpvec(res).get0()); \ } OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint8x16, v_uint16x8, vuint16m1_t, unsigned, u8, u16, 16, wredsumu) OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int8x16, v_int16x8, vint16m1_t, int, i8, i16, 16, wredsum) -OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint16x8, v_uint32x4, vuint32m1_t, unsigned, u16, u32, 32, wredsumu) -OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int16x8, v_int32x4, vint32m1_t, int, i16, i32, 32, wredsum) -OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint32x4, v_uint64x2, vuint64m1_t, unsigned, u32, u64, 64, wredsumu) -OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int32x4, v_int64x2, vint64m1_t, int, i32, i64, 64, wredsum) -OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_float32x4, v_float32x4, vfloat32m1_t, float, f32, f32, 32, fredsum) -OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint64x2, v_uint64x2, vuint64m1_t, uint64, u64, u64, 64, redsum) -OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int64x2, v_int64x2, vint64m1_t, int64, i64, i64, 64, redsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint16x8, v_uint32x4, vuint32m1_t, unsigned, u16, u32, 8, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int16x8, v_int32x4, vint32m1_t, int, i16, i32, 8, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint32x4, v_uint64x2, vuint64m1_t, unsigned, u32, u64, 4, wredsumu) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int32x4, v_int64x2, vint64m1_t, int, i32, i64, 4, wredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_uint64x2, v_uint64x2, vuint64m1_t, uint64, u64, u64, 4, redsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_int64x2, v_int64x2, vint64m1_t, int64, i64, i64, 4, redsum) + +#define OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(_Tpvec, _wTpvec, _nwTpvec, scalartype, suffix, wsuffix, vl, red) \ +inline scalartype v_reduce_sum(const _Tpvec& a) \ +{ \ + _nwTpvec zero = vfmv_v_f_##wsuffix##m1(0, vl); \ + _nwTpvec res = vfmv_v_f_##wsuffix##m1(0, vl); \ + res = v##red##_vs_##suffix##m1_##wsuffix##m1(res, a, zero, vl); \ + return (scalartype)(_wTpvec(res).get0()); \ +} + +OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float32x4, v_float32x4, vfloat32m1_t, float, f32, f32, 8, fredsum) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_REDUCE_SUM(v_float64x2, v_float64x2, vfloat64m1_t, double, f64, f64, 64, fredsum) +OPENCV_HAL_IMPL_RVV_REDUCE_SUM_FP(v_float64x2, v_float64x2, vfloat64m1_t, double, f64, f64, 4, fredsum) #endif -#define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, width, red) \ +#define OPENCV_HAL_IMPL_RVV_REDUCE(_Tpvec, func, scalartype, suffix, vl, red) \ inline scalartype v_reduce_##func(const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - _Tpvec res = _Tpvec(v##red##_vs_##suffix##m1_##suffix##m1(a, a, a)); \ + _Tpvec res = _Tpvec(v##red##_vs_##suffix##m1_##suffix##m1(a, a, a, vl)); \ return scalartype(res.get0()); \ } -OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, min, uchar, u8, 8, redminu) -OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, min, schar, i8, 8, redmin) -OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, min, ushort, u16, 16, redminu) -OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, min, short, i16, 16, redmin) -OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, min, unsigned, u32, 32, redminu) -OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, min, int, i32, 32, redmin) -OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, min, float, f32, 32, fredmin) -OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, max, uchar, u8, 8, redmaxu) -OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, max, schar, i8, 8, redmax) -OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, max, ushort, u16, 16, redmaxu) -OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, max, short, i16, 16, redmax) -OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, max, unsigned, u32, 32, redmaxu) -OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, max, int, i32, 32, redmax) -OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, max, float, f32, 32, fredmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, min, uchar, u8, 16, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, min, schar, i8, 16, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, min, ushort, u16, 8, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, min, short, i16, 8, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, min, unsigned, u32, 4, redminu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, min, int, i32, 4, redmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, min, float, f32, 4, fredmin) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint8x16, max, uchar, u8, 16, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int8x16, max, schar, i8, 16, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint16x8, max, ushort, u16, 8, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int16x8, max, short, i16, 8, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_uint32x4, max, unsigned, u32, 4, redmaxu) +OPENCV_HAL_IMPL_RVV_REDUCE(v_int32x4, max, int, i32, 4, redmax) +OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, max, float, f32, 4, fredmax) inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, @@ -1522,16 +1448,14 @@ inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, v_reduce_sum(c), v_reduce_sum(d) }; - vsetvlmax_e32m1(); - return v_float32x4(vle32_v_f32m1(elems)); + return v_float32x4(vle32_v_f32m1(elems, 4)); } ////////////// Square-Root ////////////// inline v_float32x4 v_sqrt(const v_float32x4& x) { - vsetvlmax_e32m1(); - return v_float32x4(vfsqrt_v_f32m1(x)); + return v_float32x4(vfsqrt_v_f32m1(x, 4)); } inline v_float32x4 v_invsqrt(const v_float32x4& x) @@ -1543,8 +1467,7 @@ inline v_float32x4 v_invsqrt(const v_float32x4& x) #if CV_SIMD128_64F inline v_float64x2 v_sqrt(const v_float64x2& x) { - vsetvlmax_e64m1(); - return v_float64x2(vfsqrt_v_f64m1(x)); + return v_float64x2(vfsqrt_v_f64m1(x, 4)); } inline v_float64x2 v_invsqrt(const v_float64x2& x) @@ -1556,29 +1479,25 @@ inline v_float64x2 v_invsqrt(const v_float64x2& x) inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) { - vsetvlmax_e32m1(); - v_float32x4 x(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a), b, b)); + v_float32x4 x(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a, 4), b, b, 4)); return v_sqrt(x); } inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) { - vsetvlmax_e32m1(); - return v_float32x4(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a), b, b)); + return v_float32x4(vfmacc_vv_f32m1(vfmul_vv_f32m1(a, a, 4), b, b, 4)); } #if CV_SIMD128_64F inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) { - vsetvlmax_e64m1(); - v_float64x2 x(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a), b, b)); + v_float64x2 x(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a, 2), b, b, 2)); return v_sqrt(x); } inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) { - vsetvlmax_e64m1(); - return v_float64x2(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a), b, b)); + return v_float64x2(vfmacc_vv_f64m1(vfmul_vv_f64m1(a, a, 2), b, b, 2)); } #endif @@ -1586,13 +1505,11 @@ inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) inline v_float32x4 v_fma(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) { - vsetvlmax_e32m1(); - return v_float32x4(vfmacc_vv_f32m1(c, a, b)); + return v_float32x4(vfmacc_vv_f32m1(c, a, b, 4)); } inline v_int32x4 v_fma(const v_int32x4& a, const v_int32x4& b, const v_int32x4& c) { - vsetvlmax_e32m1(); - return v_int32x4(vmacc_vv_i32m1(c, a, b)); + return v_int32x4(vmacc_vv_i32m1(c, a, b, 4)); } inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) @@ -1608,8 +1525,7 @@ inline v_int32x4 v_muladd(const v_int32x4& a, const v_int32x4& b, const v_int32x #if CV_SIMD128_64F inline v_float64x2 v_fma(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) { - vsetvlmax_e64m1(); - return v_float64x2(vfmacc_vv_f64m1(c, a, b)); + return v_float64x2(vfmacc_vv_f64m1(c, a, b, 2)); } inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) @@ -1620,24 +1536,22 @@ inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_ ////////////// Check all/any ////////////// -#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, width) \ +#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, vl) \ inline bool v_check_all(const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a), shift)); \ + v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl)); \ return (v.val[0] | v.val[1]) == 0; \ } \ inline bool v_check_any(const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift)); \ + v_uint64x2 v = v_uint64x2((vuint64m1_t)vsrl_vx_##suffix##m1(a, shift, vl)); \ return (v.val[0] | v.val[1]) != 0; \ } -OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 8) -OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 16) -OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 32) -OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 64) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint8x16, u8, 7, 16) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint16x8, u16, 15, 8) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint32x4, u32, 31, 4) +OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_uint64x2, u64, 63, 2) inline bool v_check_all(const v_int8x16& a) @@ -1690,16 +1604,15 @@ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float64x2, absdiff) OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int8x16, absdiffs) OPENCV_HAL_IMPL_RVV_ABSDIFF(v_int16x8, absdiffs) -#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width) \ +#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, vl) \ inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \ { \ - vsetvlmax_e##width##m1(); \ - return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b)), 0)); \ + return _rTpvec(rshr((_nwTpvec)sub(v_max(a, b), v_min(a, b), vl), 0, vl)); \ } -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 8) -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 16) -OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 32) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int8x16, v_uint8x16, vuint16m2_t, vwsub_vv_i16m2, vnclipu_wx_u8m1, 16) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int16x8, v_uint16x8, vuint32m2_t, vwsub_vv_i32m2, vnclipu_wx_u16m1, 8) +OPENCV_HAL_IMPL_RVV_ABSDIFF_S(v_int32x4, v_uint32x4, vuint64m2_t, vwsub_vv_i64m2, vnclipu_wx_u32m1, 4) #define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \ inline _Tprvec v_abs(const _Tpvec& a) \ @@ -1732,149 +1645,152 @@ OPENCV_HAL_IMPL_RVV_REDUCE_SAD(v_float32x4, float) ////////////// Select ////////////// -#define OPENCV_HAL_IMPL_RVV_SELECT(_Tpvec, merge, ne, width) \ +#define OPENCV_HAL_IMPL_RVV_SELECT(_Tpvec, merge, ne, vl) \ inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(merge(ne(mask, 0), b, a)); \ + return _Tpvec(merge(ne(mask, 0, vl), b, a, vl)); \ } -OPENCV_HAL_IMPL_RVV_SELECT(v_uint8x16, vmerge_vvm_u8m1, vmsne_vx_u8m1_b8, 8) -OPENCV_HAL_IMPL_RVV_SELECT(v_int8x16, vmerge_vvm_i8m1, vmsne_vx_i8m1_b8, 8) -OPENCV_HAL_IMPL_RVV_SELECT(v_uint16x8, vmerge_vvm_u16m1, vmsne_vx_u16m1_b16, 16) -OPENCV_HAL_IMPL_RVV_SELECT(v_int16x8, vmerge_vvm_i16m1, vmsne_vx_i16m1_b16, 16) -OPENCV_HAL_IMPL_RVV_SELECT(v_uint32x4, vmerge_vvm_u32m1, vmsne_vx_u32m1_b32, 32) -OPENCV_HAL_IMPL_RVV_SELECT(v_int32x4, vmerge_vvm_i32m1, vmsne_vx_i32m1_b32, 32) -OPENCV_HAL_IMPL_RVV_SELECT(v_float32x4, vmerge_vvm_f32m1, vmfne_vf_f32m1_b32, 32) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint8x16, vmerge_vvm_u8m1, vmsne_vx_u8m1_b8, 16) +OPENCV_HAL_IMPL_RVV_SELECT(v_int8x16, vmerge_vvm_i8m1, vmsne_vx_i8m1_b8, 16) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint16x8, vmerge_vvm_u16m1, vmsne_vx_u16m1_b16, 8) +OPENCV_HAL_IMPL_RVV_SELECT(v_int16x8, vmerge_vvm_i16m1, vmsne_vx_i16m1_b16, 8) +OPENCV_HAL_IMPL_RVV_SELECT(v_uint32x4, vmerge_vvm_u32m1, vmsne_vx_u32m1_b32, 4) +OPENCV_HAL_IMPL_RVV_SELECT(v_int32x4, vmerge_vvm_i32m1, vmsne_vx_i32m1_b32, 4) +OPENCV_HAL_IMPL_RVV_SELECT(v_float32x4, vmerge_vvm_f32m1, vmfne_vf_f32m1_b32, 4) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_SELECT(v_float64x2, vmerge_vvm_f64m1, vmfne_vf_f64m1_b64, 64) +OPENCV_HAL_IMPL_RVV_SELECT(v_float64x2, vmerge_vvm_f64m1, vmfne_vf_f64m1_b64, 2) #endif ////////////// Rotate shift ////////////// -#define OPENCV_HAL_IMPL_RVV_ROTATE_OP(_Tpvec, suffix, width) \ +#define OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(_Tpvec, suffix, vl) \ template inline _Tpvec v_rotate_right(const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, n)); \ + return _Tpvec(vslidedown_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), a, n, vl)); \ } \ template inline _Tpvec v_rotate_left(const _Tpvec& a) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vslideup_vx_##suffix##m1(vzero_##suffix##m1(), a, n)); \ + return _Tpvec(vslideup_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), a, n, vl)); \ } \ template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ { return a; } \ template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), a, n), b, _Tpvec::nlanes - n)); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), a, n, vl), b, _Tpvec::nlanes - n, vl)); \ } \ template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ { \ - vsetvlmax_e##width##m1(); \ - return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vzero_##suffix##m1(), b, _Tpvec::nlanes - n), a, n)); \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vmv_v_x_##suffix##m1(0, vl), b, _Tpvec::nlanes - n, vl), a, n, vl)); \ } \ template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ { CV_UNUSED(b); return a; } +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint8x16, u8, 16) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int8x16, i8, 16) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint16x8, u16, 8) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int16x8, i16, 8) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint32x4, u32, 4) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int32x4, i32, 4) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_uint64x2, u64, 2) +OPENCV_HAL_IMPL_RVV_ROTATE_INTEGER(v_int64x2, i64, 2) + +#define OPENCV_HAL_IMPL_RVV_ROTATE_FP(_Tpvec, suffix, vl) \ +template inline _Tpvec v_rotate_right(const _Tpvec& a) \ +{ \ + return _Tpvec(vslidedown_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), a, n, vl)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), a, n, vl)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a) \ +{ return a; } \ +template inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), a, n, vl), b, _Tpvec::nlanes - n, vl)); \ +} \ +template inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b) \ +{ \ + return _Tpvec(vslideup_vx_##suffix##m1(vslidedown_vx_##suffix##m1(vfmv_v_f_##suffix##m1(0, vl), b, _Tpvec::nlanes - n, vl), a, n, vl)); \ +} \ +template<> inline _Tpvec v_rotate_left<0>(const _Tpvec& a, const _Tpvec& b) \ +{ CV_UNUSED(b); return a; } -OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint8x16, u8, 8) -OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int8x16, i8, 8) -OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint16x8, u16, 16) -OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int16x8, i16, 16) -OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint32x4, u32, 32) -OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int32x4, i32, 32) -OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_float32x4, f32, 32) -OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_uint64x2, u64, 64) -OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_int64x2, i64, 64) +OPENCV_HAL_IMPL_RVV_ROTATE_FP(v_float32x4, f32, 4) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_ROTATE_OP(v_float64x2, f64, 64) +OPENCV_HAL_IMPL_RVV_ROTATE_FP(v_float64x2, f64, 2) #endif ////////////// Convert to float ////////////// inline v_float32x4 v_cvt_f32(const v_int32x4& a) { - vsetvlmax_e32m1(); - return v_float32x4(vfcvt_f_x_v_f32m1(a)); + return v_float32x4(vfcvt_f_x_v_f32m1(a, 4)); } #if CV_SIMD128_64F inline v_float32x4 v_cvt_f32(const v_float64x2& a) { double arr[4] = {a.val[0], a.val[1], 0, 0}; - vsetvlmax_e64m2(); - vfloat64m2_t tmp = vle64_v_f64m2(arr); - vsetvlmax_e32m1(); - return v_float32x4(vfncvt_f_f_w_f32m1(tmp)); + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_float32x4(vfncvt_f_f_w_f32m1(tmp, 4)); } inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) { double arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]}; - vsetvlmax_e64m2(); - vfloat64m2_t tmp = vle64_v_f64m2(arr); - vsetvlmax_e32m1(); - return v_float32x4(vfncvt_f_f_w_f32m1(tmp)); + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_float32x4(vfncvt_f_f_w_f32m1(tmp, 4)); } inline v_float64x2 v_cvt_f64(const v_int32x4& a) { double CV_DECL_ALIGNED(32) ptr[4] = {0}; - vsetvlmax_e64m2(); - vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a)); + vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a, 4), 4); double CV_DECL_ALIGNED(32) elems[2] = { ptr[0], ptr[1] }; - vsetvlmax_e64m1(); - return v_float64x2(vle64_v_f64m1(elems)); + return v_float64x2(vle64_v_f64m1(elems, 2)); } inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) { double CV_DECL_ALIGNED(32) ptr[4] = {0}; - vsetvlmax_e64m2(); - vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a)); + vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a, 4), 4); double CV_DECL_ALIGNED(32) elems[2] = { ptr[2], ptr[3] }; - vsetvlmax_e64m1(); - return v_float64x2(vle64_v_f64m1(elems)); + return v_float64x2(vle64_v_f64m1(elems, 2)); } inline v_float64x2 v_cvt_f64(const v_float32x4& a) { double CV_DECL_ALIGNED(32) ptr[4] = {0}; - vsetvlmax_e64m2(); - vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a)); + vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a, 4), 4); double CV_DECL_ALIGNED(32) elems[2] = { ptr[0], ptr[1] }; - vsetvlmax_e64m1(); - return v_float64x2(vle64_v_f64m1(elems)); + return v_float64x2(vle64_v_f64m1(elems, 2)); } inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) { double CV_DECL_ALIGNED(32) ptr[4] = {0}; - vsetvlmax_e64m2(); - vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a)); + vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a, 4), 4); double CV_DECL_ALIGNED(32) elems[2] = { ptr[2], ptr[3] }; - vsetvlmax_e64m1(); - return v_float64x2(vle64_v_f64m1(elems)); + return v_float64x2(vle64_v_f64m1(elems, 2)); } inline v_float64x2 v_cvt_f64(const v_int64x2& a) { - vsetvlmax_e64m1(); - return v_float64x2(vfcvt_f_x_v_f64m1(a)); + return v_float64x2(vfcvt_f_x_v_f64m1(a, 2)); } #endif @@ -1947,7 +1863,7 @@ OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(float32x4, float, f32) ////////////// Reverse ////////////// -#define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, _Tp, width, suffix) \ +#define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, _Tp, suffix) \ inline _Tpvec v_reverse(const _Tpvec& a) \ { \ _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ @@ -1960,84 +1876,80 @@ inline _Tpvec v_reverse(const _Tpvec& a) \ return v_load(ptr); \ } -OPENCV_HAL_IMPL_RVV_REVERSE(v_uint8x16, uchar, 8, u8) -OPENCV_HAL_IMPL_RVV_REVERSE(v_int8x16, schar, 8, i8) -OPENCV_HAL_IMPL_RVV_REVERSE(v_uint16x8, ushort, 16, u16) -OPENCV_HAL_IMPL_RVV_REVERSE(v_int16x8, short, 16, i16) -OPENCV_HAL_IMPL_RVV_REVERSE(v_uint32x4, unsigned, 32, u32) -OPENCV_HAL_IMPL_RVV_REVERSE(v_int32x4, int, 32, i32) -OPENCV_HAL_IMPL_RVV_REVERSE(v_float32x4, float, 32, f32) -OPENCV_HAL_IMPL_RVV_REVERSE(v_uint64x2, uint64, 64, u64) -OPENCV_HAL_IMPL_RVV_REVERSE(v_int64x2, int64, 64, i64) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint8x16, uchar, u8) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int8x16, schar, i8) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint16x8, ushort, u16) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int16x8, short, i16) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int32x4, int, i32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_float32x4, float, f32) +OPENCV_HAL_IMPL_RVV_REVERSE(v_uint64x2, uint64, u64) +OPENCV_HAL_IMPL_RVV_REVERSE(v_int64x2, int64, i64) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_REVERSE(v_float64x2, double, 64, f64) +OPENCV_HAL_IMPL_RVV_REVERSE(v_float64x2, double, f64) #endif //////////// Value reordering //////////// -#define OPENCV_HAL_IMPL_RVV_EXPAND(_Tpwvec, _Tp, _Tpvec, width, suffix, wcvt) \ +#define OPENCV_HAL_IMPL_RVV_EXPAND(_Tpwvec, _Tp, _Tpvec, width, suffix, wcvt, vl) \ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ { \ _Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \ _Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \ v_store_low(lptr, a); \ v_store_high(hptr, a); \ - b0 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr))); \ - b1 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr))); \ + b0 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr, vl), vl)); \ + b1 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr, vl), vl)); \ } \ inline _Tpwvec v_expand_low(const _Tpvec& a) \ { \ _Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \ v_store_low(lptr, a); \ - return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr))); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr, vl), vl)); \ } \ inline _Tpwvec v_expand_high(const _Tpvec& a) \ { \ _Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \ v_store_high(hptr, a); \ - return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr))); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr, vl), vl)); \ } \ inline _Tpwvec v_load_expand(const _Tp* ptr) \ { \ - return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(ptr))); \ + return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(ptr, vl), vl)); \ } -OPENCV_HAL_IMPL_RVV_EXPAND(v_uint16x8, uchar, v_uint8x16, 8, u8, vwcvtu_x_x_v_u16m1) -OPENCV_HAL_IMPL_RVV_EXPAND(v_int16x8, schar, v_int8x16, 8, i8, vwcvt_x_x_v_i16m1) -OPENCV_HAL_IMPL_RVV_EXPAND(v_uint32x4, ushort, v_uint16x8, 16, u16, vwcvtu_x_x_v_u32m1) -OPENCV_HAL_IMPL_RVV_EXPAND(v_int32x4, short, v_int16x8, 16, i16, vwcvt_x_x_v_i32m1) -OPENCV_HAL_IMPL_RVV_EXPAND(v_uint64x2, uint, v_uint32x4, 32, u32, vwcvtu_x_x_v_u64m1) -OPENCV_HAL_IMPL_RVV_EXPAND(v_int64x2, int, v_int32x4, 32, i32, vwcvt_x_x_v_i64m1) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint16x8, uchar, v_uint8x16, 8, u8, vwcvtu_x_x_v_u16m1, 8) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int16x8, schar, v_int8x16, 8, i8, vwcvt_x_x_v_i16m1, 8) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint32x4, ushort, v_uint16x8, 16, u16, vwcvtu_x_x_v_u32m1, 4) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int32x4, short, v_int16x8, 16, i16, vwcvt_x_x_v_i32m1, 4) +OPENCV_HAL_IMPL_RVV_EXPAND(v_uint64x2, uint, v_uint32x4, 32, u32, vwcvtu_x_x_v_u64m1, 2) +OPENCV_HAL_IMPL_RVV_EXPAND(v_int64x2, int, v_int32x4, 32, i32, vwcvt_x_x_v_i64m1, 2) inline v_uint32x4 v_load_expand_q(const uchar* ptr) { - vsetvlmax_e32m1(); - return v_uint32x4(vwcvtu_x_x_v_u32m1(vwcvtu_x_x_v_u16mf2(vle8_v_u8mf4(ptr)))); + return v_uint32x4(vwcvtu_x_x_v_u32m1(vwcvtu_x_x_v_u16mf2(vle8_v_u8mf4(ptr, 4), 4), 4)); } inline v_int32x4 v_load_expand_q(const schar* ptr) { - vsetvlmax_e32m1(); - return v_int32x4(vwcvt_x_x_v_i32m1(vwcvt_x_x_v_i16mf2(vle8_v_i8mf4(ptr)))); + return v_int32x4(vwcvt_x_x_v_i32m1(vwcvt_x_x_v_i16mf2(vle8_v_i8mf4(ptr, 4), 4), 4)); } -#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, shr) \ +#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, shr, hvl, vl) \ inline _Tpvec v_pack(const _wTpvec& a, const _wTpvec& b) \ { \ _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, b); \ - vsetvlmax_e##width##m2(); \ - return _Tpvec(shr(vle##width##_v_##suffix##m2(arr), 0)); \ + return _Tpvec(shr(vle##width##_v_##suffix##m2(arr, vl), 0, vl)); \ } \ inline void v_pack_store(_Tp* ptr, const _wTpvec& a) \ { \ _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ - v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ - vsetvlmax_e##width##m2(); \ - v_store(ptr, _Tpvec(shr(vle##width##_v_##suffix##m2(arr), 0))); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ + v_store(ptr, _Tpvec(shr(vle##width##_v_##suffix##m2(arr, vl), 0, vl))); \ } \ template inline \ _Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b) \ @@ -2045,43 +1957,39 @@ _Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b) \ _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, b); \ - vsetvlmax_e##width##m2(); \ - return _Tpvec(rshr(vle##width##_v_##suffix##m2(arr), n)); \ + return _Tpvec(rshr(vle##width##_v_##suffix##m2(arr, vl), n, vl)); \ } \ template inline \ void v_rshr_pack_store(_Tp* ptr, const _wTpvec& a) \ { \ _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ - v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ - vsetvlmax_e##width##m2(); \ - v_store(ptr, _Tpvec(rshr(vle##width##_v_##suffix##m2(arr), n))); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ + v_store(ptr, _Tpvec(rshr(vle##width##_v_##suffix##m2(arr, vl), n, vl))); \ } -OPENCV_HAL_IMPL_RVV_PACK(v_uint8x16, uchar, v_uint16x8, ushort, 16, u16, vnclipu_wx_u8m1, vnclipu_wx_u8m1) -OPENCV_HAL_IMPL_RVV_PACK(v_int8x16, schar, v_int16x8, short, 16, i16, vnclip_wx_i8m1, vnclip_wx_i8m1) -OPENCV_HAL_IMPL_RVV_PACK(v_uint16x8, ushort, v_uint32x4, unsigned, 32, u32, vnclipu_wx_u16m1, vnclipu_wx_u16m1) -OPENCV_HAL_IMPL_RVV_PACK(v_int16x8, short, v_int32x4, int, 32, i32, vnclip_wx_i16m1, vnclip_wx_i16m1) -OPENCV_HAL_IMPL_RVV_PACK(v_uint32x4, unsigned, v_uint64x2, uint64, 64, u64, vnclipu_wx_u32m1, vnsrl_wx_u32m1) -OPENCV_HAL_IMPL_RVV_PACK(v_int32x4, int, v_int64x2, int64, 64, i64, vnclip_wx_i32m1, vnsra_wx_i32m1) +OPENCV_HAL_IMPL_RVV_PACK(v_uint8x16, uchar, v_uint16x8, ushort, 16, u16, vnclipu_wx_u8m1, vnclipu_wx_u8m1, 8, 16) +OPENCV_HAL_IMPL_RVV_PACK(v_int8x16, schar, v_int16x8, short, 16, i16, vnclip_wx_i8m1, vnclip_wx_i8m1, 8, 16) +OPENCV_HAL_IMPL_RVV_PACK(v_uint16x8, ushort, v_uint32x4, unsigned, 32, u32, vnclipu_wx_u16m1, vnclipu_wx_u16m1, 4, 8) +OPENCV_HAL_IMPL_RVV_PACK(v_int16x8, short, v_int32x4, int, 32, i32, vnclip_wx_i16m1, vnclip_wx_i16m1, 4, 8) +OPENCV_HAL_IMPL_RVV_PACK(v_uint32x4, unsigned, v_uint64x2, uint64, 64, u64, vnclipu_wx_u32m1, vnsrl_wx_u32m1, 2, 4) +OPENCV_HAL_IMPL_RVV_PACK(v_int32x4, int, v_int64x2, int64, 64, i64, vnclip_wx_i32m1, vnsra_wx_i32m1, 2, 4) -#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, cast) \ +#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, cast, vl) \ inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \ { \ _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, b); \ - vsetvlmax_e##width##m2(); \ - return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), 0)); \ + return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), 0, vl)); \ } \ inline void v_pack_u_store(_Tp* ptr, const _wTpvec& a) \ { \ _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ - v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ - vsetvlmax_e##width##m2(); \ - v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), 0))); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, vl))); \ + v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), 0, vl))); \ } \ template inline \ _Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b) \ @@ -2089,24 +1997,22 @@ _Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b) \ _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, b); \ - vsetvlmax_e##width##m2(); \ - return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), n)); \ + return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), n, vl)); \ } \ template inline \ void v_rshr_pack_u_store(_Tp* ptr, const _wTpvec& a) \ { \ _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ - v_store(arr + _wTpvec::nlanes, _wTpvec(vzero_##suffix##m1())); \ - vsetvlmax_e##width##m2(); \ - v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr), 0)), n))); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, vl))); \ + v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), n, vl))); \ } -OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8x16, uchar, v_int16x8, short, 16, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2) -OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16x8, ushort, v_int32x4, int, 32, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2) +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8x16, uchar, v_int16x8, short, 16, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2, 16) +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16x8, ushort, v_int32x4, int, 32, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2, 8) -#define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, _Tp, width, suffix) \ +#define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, _Tp, suffix) \ inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ { \ _Tp CV_DECL_ALIGNED(32) ptra0[v_##_Tpvec::nlanes] = {0}; \ @@ -2151,19 +2057,19 @@ inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, d = v_combine_high(a, b); \ } -OPENCV_HAL_IMPL_RVV_UNPACKS(uint8x16, uchar, 8, u8) -OPENCV_HAL_IMPL_RVV_UNPACKS(int8x16, schar, 8, i8) -OPENCV_HAL_IMPL_RVV_UNPACKS(uint16x8, ushort, 16, u16) -OPENCV_HAL_IMPL_RVV_UNPACKS(int16x8, short, 16, i16) -OPENCV_HAL_IMPL_RVV_UNPACKS(uint32x4, unsigned, 32, u32) -OPENCV_HAL_IMPL_RVV_UNPACKS(int32x4, int, 32, i32) -OPENCV_HAL_IMPL_RVV_UNPACKS(float32x4, float, 32, f32) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint8x16, uchar, u8) +OPENCV_HAL_IMPL_RVV_UNPACKS(int8x16, schar, i8) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint16x8, ushort, u16) +OPENCV_HAL_IMPL_RVV_UNPACKS(int16x8, short, i16) +OPENCV_HAL_IMPL_RVV_UNPACKS(uint32x4, unsigned, u32) +OPENCV_HAL_IMPL_RVV_UNPACKS(int32x4, int, i32) +OPENCV_HAL_IMPL_RVV_UNPACKS(float32x4, float, f32) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_UNPACKS(float64x2, double, 64, f64) +OPENCV_HAL_IMPL_RVV_UNPACKS(float64x2, double, f64) #endif -#define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp, suffix, width) \ +#define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp) \ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ { \ _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ @@ -2298,17 +2204,17 @@ inline v_##_Tpvec v_interleave_quads(const v_##_Tpvec& vec) \ return v_load(ptr); \ } -OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint8x16, uchar, u8, 8) -OPENCV_HAL_IMPL_RVV_INTERLEAVED(int8x16, schar, i8, 8) -OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint16x8, ushort, u16, 16) -OPENCV_HAL_IMPL_RVV_INTERLEAVED(int16x8, short, i16, 16) -OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint32x4, unsigned, u32, 32) -OPENCV_HAL_IMPL_RVV_INTERLEAVED(int32x4, int, i32, 32) -OPENCV_HAL_IMPL_RVV_INTERLEAVED(float32x4, float, f32, 32) -OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint64x2, uint64, u64, 64) -OPENCV_HAL_IMPL_RVV_INTERLEAVED(int64x2, int64, i64, 64) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint8x16, uchar) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int8x16, schar) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint16x8, ushort) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int16x8, short) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint32x4, unsigned) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int32x4, int) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(float32x4, float) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(uint64x2, uint64) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(int64x2, int64) #if CV_SIMD128_64F -OPENCV_HAL_IMPL_RVV_INTERLEAVED(float64x2, double, f64, 64) +OPENCV_HAL_IMPL_RVV_INTERLEAVED(float64x2, double) #endif //////////// PopCount //////////// @@ -2356,21 +2262,20 @@ OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(v_uint64x2, v_int64x2, uint64, int64, u64) //////////// SignMask //////////// -#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, _Tp, suffix, width, shift) \ +#define OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(_Tpvec, _Tp, suffix, vl, shift) \ inline int v_signmask(const _Tpvec& a) \ { \ int mask = 0; \ - vsetvlmax_e##width##m1(); \ - _Tpvec tmp = _Tpvec(vsrl_vx_##suffix##m1(a, shift)); \ + _Tpvec tmp = _Tpvec(vsrl_vx_##suffix##m1(a, shift, vl)); \ for( int i = 0; i < _Tpvec::nlanes; i++ ) \ mask |= (int)(tmp.val[i]) << i; \ return mask; \ } -OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint8x16, uchar, u8, 8, 7) -OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint16x8, ushort, u16, 16, 15) -OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint32x4, unsigned, u32, 32, 31) -OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint64x2, uint64, u64, 64, 63) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint8x16, uchar, u8, 16, 7) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint16x8, ushort, u16, 8, 15) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint32x4, unsigned, u32, 4, 31) +OPENCV_HAL_IMPL_RVV_SIGNMASK_OP(v_uint64x2, uint64, u64, 2, 63) inline int v_signmask(const v_int8x16& a) { return v_signmask(v_reinterpret_as_u8(a)); } @@ -2445,12 +2350,12 @@ OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float32x4, float) #if CV_FP16 inline v_float32x4 v_load_expand(const float16_t* ptr) { - return v_float32x4(vfwcvt_f_f_v_f32m1(vle16_v_f16mf2(ptr))); + return v_float32x4(vfwcvt_f_f_v_f32m1(vle16_v_f16mf2(ptr, 4), 4)); } inline void v_pack_store(float16_t* ptr, const v_float32x4& v) { - vse16_v_f16mf2(ptr, vfncvt_f_f_w_f16mf2(v)); + vse16_v_f16mf2(ptr, vfncvt_f_f_w_f16mf2(v, 4), 4); } #else inline v_float32x4 v_load_expand(const float16_t* ptr) @@ -2474,70 +2379,61 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v) inline v_int32x4 v_round(const v_float32x4& a) { - vsetvlmax_e32m1(); - return v_int32x4(vfcvt_x_f_v_i32m1(a)); + return v_int32x4(vfcvt_x_f_v_i32m1(a, 4)); } inline v_int32x4 v_floor(const v_float32x4& a) { v_float32x4 ZP5 = v_setall_f32(0.5f); v_float32x4 t = a - ZP5; - vsetvlmax_e32m1(); - return v_int32x4(vfcvt_x_f_v_i32m1(t)); + return v_int32x4(vfcvt_x_f_v_i32m1(t, 4)); } inline v_int32x4 v_ceil(const v_float32x4& a) { v_float32x4 ZP5 = v_setall_f32(0.5f); v_float32x4 t = a + ZP5; - vsetvlmax_e32m1(); - return v_int32x4(vfcvt_x_f_v_i32m1(t)); + return v_int32x4(vfcvt_x_f_v_i32m1(t, 4)); } inline v_int32x4 v_trunc(const v_float32x4& a) { - vsetvlmax_e32m1(); - return v_int32x4(vfcvt_rtz_x_f_v_i32m1(a)); + return v_int32x4(vfcvt_rtz_x_f_v_i32m1(a, 4)); } #if CV_SIMD128_64F inline v_int32x4 v_round(const v_float64x2& a) { double arr[4] = {a.val[0], a.val[1], 0, 0}; - vsetvlmax_e64m2(); - vfloat64m2_t tmp = vle64_v_f64m2(arr); - return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp, 4)); } inline v_int32x4 v_round(const v_float64x2& a, const v_float64x2& b) { double arr[4] = {a.val[0], a.val[1], b.val[0], b.val[1]}; - vsetvlmax_e64m2(); - vfloat64m2_t tmp = vle64_v_f64m2(arr); - return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp, 4)); } inline v_int32x4 v_floor(const v_float64x2& a) { double arr[4] = {a.val[0]-0.5f, a.val[1]-0.5f, 0, 0}; - vsetvlmax_e64m2(); - vfloat64m2_t tmp = vle64_v_f64m2(arr); - return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp, 4)); } inline v_int32x4 v_ceil(const v_float64x2& a) { double arr[4] = {a.val[0]+0.5f, a.val[1]+0.5f, 0, 0}; - vsetvlmax_e64m2(); - vfloat64m2_t tmp = vle64_v_f64m2(arr); - return v_int32x4(vfncvt_x_f_w_i32m1(tmp)); + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_int32x4(vfncvt_x_f_w_i32m1(tmp, 4)); } inline v_int32x4 v_trunc(const v_float64x2& a) { double arr[4] = {a.val[0], a.val[1], 0, 0}; - vsetvlmax_e64m2(); - vfloat64m2_t tmp = vle64_v_f64m2(arr); - return v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp)); + vfloat64m2_t tmp = vle64_v_f64m2(arr, 4); + return v_int32x4(vfncvt_rtz_x_f_w_i32m1(tmp, 4)); } #endif @@ -2549,8 +2445,7 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) { int CV_DECL_ALIGNED(32) ptr[8] = {0}; v_int32x4 t1, t2; - vsetvlmax_e32m2(); - vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); v_load_deinterleave(ptr, t1, t2); return t1 + t2; } @@ -2558,8 +2453,7 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32 { int CV_DECL_ALIGNED(32) ptr[8] = {0}; v_int32x4 t1, t2; - vsetvlmax_e32m2(); - vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); v_load_deinterleave(ptr, t1, t2); return t1 + t2 + c; } @@ -2569,8 +2463,7 @@ inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) { int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; v_int64x2 t1, t2; - vsetvlmax_e64m2(); - vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); v_load_deinterleave(ptr, t1, t2); return t1 + t2; } @@ -2578,8 +2471,7 @@ inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64 { int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; v_int64x2 t1, t2; - vsetvlmax_e64m2(); - vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); v_load_deinterleave(ptr, t1, t2); return t1 + t2 + c; } @@ -2589,8 +2481,7 @@ inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) { unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; v_uint32x4 t1, t2, t3, t4; - vsetvlmax_e32m4(); - vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); v_load_deinterleave(ptr, t1, t2, t3, t4); return t1 + t2 + t3 + t4; } @@ -2599,8 +2490,7 @@ inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, { unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; v_uint32x4 t1, t2, t3, t4; - vsetvlmax_e32m4(); - vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); v_load_deinterleave(ptr, t1, t2, t3, t4); return t1 + t2 + t3 + t4 + c; } @@ -2609,8 +2499,7 @@ inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) { int CV_DECL_ALIGNED(32) ptr[16] = {0}; v_int32x4 t1, t2, t3, t4; - vsetvlmax_e32m4(); - vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); v_load_deinterleave(ptr, t1, t2, t3, t4); return t1 + t2 + t3 + t4; } @@ -2619,8 +2508,7 @@ inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, { int CV_DECL_ALIGNED(32) ptr[16] = {0}; v_int32x4 t1, t2, t3, t4; - vsetvlmax_e32m4(); - vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); v_load_deinterleave(ptr, t1, t2, t3, t4); return t1 + t2 + t3 + t4 + c; } @@ -2630,8 +2518,7 @@ inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) { uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; v_uint64x2 t1, t2, t3, t4; - vsetvlmax_e64m4(); - vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); v_load_deinterleave(ptr, t1, t2, t3, t4); return t1 + t2 + t3 + t4; } @@ -2639,8 +2526,7 @@ inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, con { uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; v_uint64x2 t1, t2, t3, t4; - vsetvlmax_e64m4(); - vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); v_load_deinterleave(ptr, t1, t2, t3, t4); return t1 + t2 + t3 + t4 + c; } @@ -2649,8 +2535,7 @@ inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) { int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; v_int64x2 t1, t2, t3, t4; - vsetvlmax_e64m4(); - vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); v_load_deinterleave(ptr, t1, t2, t3, t4); return t1 + t2 + t3 + t4; } @@ -2659,8 +2544,7 @@ inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, { int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; v_int64x2 t1, t2, t3, t4; - vsetvlmax_e64m4(); - vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); v_load_deinterleave(ptr, t1, t2, t3, t4); return t1 + t2 + t3 + t4 + c; } @@ -2680,8 +2564,7 @@ inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) { int CV_DECL_ALIGNED(32) ptr[8] = {0}; - vsetvlmax_e32m2(); - vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); v_int32x4 t1 = v_load(ptr); v_int32x4 t2 = v_load(ptr+4); return t1 + t2; @@ -2689,8 +2572,7 @@ inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) { int CV_DECL_ALIGNED(32) ptr[8] = {0}; - vsetvlmax_e32m2(); - vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b)); + vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); v_int32x4 t1 = v_load(ptr); v_int32x4 t2 = v_load(ptr+4); return t1 + t2 + c; @@ -2700,8 +2582,7 @@ inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_ inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) { int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; - vsetvlmax_e64m2(); - vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); v_int64x2 t1 = v_load(ptr); v_int64x2 t2 = v_load(ptr+2); return t1 + t2; @@ -2709,8 +2590,7 @@ inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) { int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; - vsetvlmax_e64m2(); - vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b)); + vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); v_int64x2 t1 = v_load(ptr); v_int64x2 t2 = v_load(ptr+2); return t1 + t2 + c; @@ -2721,8 +2601,7 @@ inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_ inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) { unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; - vsetvlmax_e32m4(); - vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); v_uint32x4 t1 = v_load(ptr); v_uint32x4 t2 = v_load(ptr+4); v_uint32x4 t3 = v_load(ptr+8); @@ -2732,8 +2611,7 @@ inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) { unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; - vsetvlmax_e32m4(); - vse32_v_u32m4(ptr, vqmaccu_vv_u32m4(vzero_u32m4(), a, b)); + vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); v_uint32x4 t1 = v_load(ptr); v_uint32x4 t2 = v_load(ptr+4); v_uint32x4 t3 = v_load(ptr+8); @@ -2743,8 +2621,7 @@ inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) { int CV_DECL_ALIGNED(32) ptr[16] = {0}; - vsetvlmax_e32m4(); - vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); v_int32x4 t1 = v_load(ptr); v_int32x4 t2 = v_load(ptr+4); v_int32x4 t3 = v_load(ptr+8); @@ -2754,8 +2631,7 @@ inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) { int CV_DECL_ALIGNED(32) ptr[16] = {0}; - vsetvlmax_e32m4(); - vse32_v_i32m4(ptr, vqmacc_vv_i32m4(vzero_i32m4(), a, b)); + vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); v_int32x4 t1 = v_load(ptr); v_int32x4 t2 = v_load(ptr+4); v_int32x4 t3 = v_load(ptr+8); @@ -2767,8 +2643,7 @@ inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, c inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) { uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; - vsetvlmax_e64m4(); - vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); v_uint64x2 t1 = v_load(ptr); v_uint64x2 t2 = v_load(ptr+2); v_uint64x2 t3 = v_load(ptr+4); @@ -2778,8 +2653,7 @@ inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) { uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; - vsetvlmax_e64m4(); - vse64_v_u64m4(ptr, vqmaccu_vv_u64m4(vzero_u64m4(), a, b)); + vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); v_uint64x2 t1 = v_load(ptr); v_uint64x2 t2 = v_load(ptr+2); v_uint64x2 t3 = v_load(ptr+4); @@ -2789,8 +2663,7 @@ inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) { int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; - vsetvlmax_e64m4(); - vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); v_int64x2 t1 = v_load(ptr); v_int64x2 t2 = v_load(ptr+2); v_int64x2 t3 = v_load(ptr+4); @@ -2800,8 +2673,7 @@ inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) { int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; - vsetvlmax_e64m4(); - vse64_v_i64m4(ptr, vqmacc_vv_i64m4(vzero_i64m4(), a, b)); + vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); v_int64x2 t1 = v_load(ptr); v_int64x2 t2 = v_load(ptr+2); v_int64x2 t3 = v_load(ptr+4); @@ -2822,11 +2694,10 @@ inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, const v_float32x4& m1, const v_float32x4& m2, const v_float32x4& m3) { - vsetvlmax_e32m1(); - vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v)); - res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1); - res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2); - res = vfmacc_vf_f32m1(res, v_extract_n<3>(v), m3); + vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v), 4); + res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1, 4); + res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2, 4); + res = vfmacc_vf_f32m1(res, v_extract_n<3>(v), m3, 4); return v_float32x4(res); } @@ -2834,40 +2705,35 @@ inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, const v_float32x4& m1, const v_float32x4& m2, const v_float32x4& a) { - vsetvlmax_e32m1(); - vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v)); - res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1); - res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2); + vfloat32m1_t res = vfmul_vf_f32m1(m0, v_extract_n<0>(v), 4); + res = vfmacc_vf_f32m1(res, v_extract_n<1>(v), m1, 4); + res = vfmacc_vf_f32m1(res, v_extract_n<2>(v), m2, 4); return v_float32x4(res) + a; } -#define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _Tpw, suffix, wmul, width) \ +#define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _Tpw, suffix, wmul, width, vl, hvl) \ inline void v_mul_expand(const _Tpvec& a, const _Tpvec& b, _Tpwvec& c, _Tpwvec& d) \ { \ _Tpw CV_DECL_ALIGNED(32) ptr[_Tpwvec::nlanes*2] = {0}; \ - vsetvlmax_e##width##m2(); \ - vse##width##_v_##suffix##m2(ptr, wmul(a, b)); \ - vsetvlmax_e##width##m1(); \ - c = _Tpwvec(vle##width##_v_##suffix##m1(ptr)); \ - d = _Tpwvec(vle##width##_v_##suffix##m1(ptr+_Tpwvec::nlanes)); \ + vse##width##_v_##suffix##m2(ptr, wmul(a, b, vl), vl); \ + c = _Tpwvec(vle##width##_v_##suffix##m1(ptr, hvl)); \ + d = _Tpwvec(vle##width##_v_##suffix##m1(ptr+_Tpwvec::nlanes, hvl)); \ } -OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint8x16, v_uint16x8, ushort, u16, vwmulu_vv_u16m2, 16) -OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int8x16, v_int16x8, short, i16, vwmul_vv_i16m2, 16) -OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint16x8, v_uint32x4, unsigned, u32, vwmulu_vv_u32m2, 32) -OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int16x8, v_int32x4, int, i32, vwmul_vv_i32m2, 32) -OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint32x4, v_uint64x2, uint64, u64, vwmulu_vv_u64m2, 64) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint8x16, v_uint16x8, ushort, u16, vwmulu_vv_u16m2, 16, 16, 8) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int8x16, v_int16x8, short, i16, vwmul_vv_i16m2, 16, 16, 8) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint16x8, v_uint32x4, unsigned, u32, vwmulu_vv_u32m2, 32, 8, 4) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_int16x8, v_int32x4, int, i32, vwmul_vv_i32m2, 32, 8, 4) +OPENCV_HAL_IMPL_RVV_MUL_EXPAND(v_uint32x4, v_uint64x2, uint64, u64, vwmulu_vv_u64m2, 64, 4, 2) inline v_int16x8 v_mul_hi(const v_int16x8& a, const v_int16x8& b) { - vsetvlmax_e16m1(); - return v_int16x8(vnsra_wx_i16m1(vwmul_vv_i32m2(a, b), 16)); + return v_int16x8(vnsra_wx_i16m1(vwmul_vv_i32m2(a, b, 8), 16, 8)); } inline v_uint16x8 v_mul_hi(const v_uint16x8& a, const v_uint16x8& b) { - vsetvlmax_e16m1(); - return v_uint16x8(vnsrl_wx_u16m1(vwmulu_vv_u32m2(a, b), 16)); + return v_uint16x8(vnsrl_wx_u16m1(vwmulu_vv_u32m2(a, b, 8), 16, 8)); } diff --git a/platforms/linux/riscv64-gcc.toolchain.cmake b/platforms/linux/riscv64-gcc.toolchain.cmake index c46d62a360d3..675879f86b9f 100644 --- a/platforms/linux/riscv64-gcc.toolchain.cmake +++ b/platforms/linux/riscv64-gcc.toolchain.cmake @@ -10,8 +10,8 @@ set(CMAKE_CXX_COMPILER ${RISCV_GCC_INSTALL_ROOT}/bin/riscv64-unknown-linux-gnu-g # Don't run the linker on compiler check set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) -set(CMAKE_C_FLAGS "-march=rv64gcv_zvqmac ${CMAKE_C_FLAGS}") -set(CMAKE_CXX_FLAGS "-march=rv64gcv_zvqmac ${CXX_FLAGS}") +set(CMAKE_C_FLAGS "-march=rv64gcv_zfh ${CMAKE_C_FLAGS}") +set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh ${CXX_FLAGS}") set(CMAKE_FIND_ROOT_PATH ${CMAKE_SYSROOT}) set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) From 53eca2ff5b24a6088647632cb598d732fc582ce6 Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Fri, 18 Jun 2021 20:16:07 +0300 Subject: [PATCH 009/128] Merge pull request #20196 from TolyaTalamanov:at/support-vaargs-compile-args G-API: Support vaargs for cv.compile_args * Support cv.compile_args to work with variadic number of inputs * Disable python2.x G-API * Move compile_args to gapi pkg --- .../gapi/misc/python/package/gapi/__init__.py | 5 + modules/gapi/misc/python/shadow_gapi.hpp | 9 +- .../gapi/misc/python/test/test_gapi_core.py | 332 +++++----- .../misc/python/test/test_gapi_imgproc.py | 163 ++--- .../gapi/misc/python/test/test_gapi_infer.py | 580 +++++++++--------- .../python/test/test_gapi_sample_pipelines.py | 36 +- .../misc/python/test/test_gapi_streaming.py | 314 +++++----- .../gapi/misc/python/test/test_gapi_types.py | 52 +- 8 files changed, 802 insertions(+), 689 deletions(-) diff --git a/modules/gapi/misc/python/package/gapi/__init__.py b/modules/gapi/misc/python/package/gapi/__init__.py index 733c980010af..23f5f41846f3 100644 --- a/modules/gapi/misc/python/package/gapi/__init__.py +++ b/modules/gapi/misc/python/package/gapi/__init__.py @@ -11,6 +11,11 @@ def parameterized(func): return parameterized +@register('cv2.gapi') +def compile_args(*args): + return list(map(cv.GCompileArg, args)) + + @register('cv2') class GOpaque(): # NB: Inheritance from c++ class cause segfault. diff --git a/modules/gapi/misc/python/shadow_gapi.hpp b/modules/gapi/misc/python/shadow_gapi.hpp index 40dab4158141..941250c2fb45 100644 --- a/modules/gapi/misc/python/shadow_gapi.hpp +++ b/modules/gapi/misc/python/shadow_gapi.hpp @@ -3,11 +3,10 @@ namespace cv { - struct GAPI_EXPORTS_W_SIMPLE GCompileArg { }; - - GAPI_EXPORTS_W GCompileArgs compile_args(gapi::GKernelPackage pkg); - GAPI_EXPORTS_W GCompileArgs compile_args(gapi::GNetPackage pkg); - GAPI_EXPORTS_W GCompileArgs compile_args(gapi::GKernelPackage kernels, gapi::GNetPackage nets); + struct GAPI_EXPORTS_W_SIMPLE GCompileArg { + GAPI_WRAP GCompileArg(gapi::GKernelPackage pkg); + GAPI_WRAP GCompileArg(gapi::GNetPackage pkg); + }; // NB: This classes doesn't exist in *.so // HACK: Mark them as a class to force python wrapper generate code for this entities diff --git a/modules/gapi/misc/python/test/test_gapi_core.py b/modules/gapi/misc/python/test/test_gapi_core.py index 814d05d7cde4..780558d98b1a 100644 --- a/modules/gapi/misc/python/test/test_gapi_core.py +++ b/modules/gapi/misc/python/test/test_gapi_core.py @@ -3,187 +3,209 @@ import numpy as np import cv2 as cv import os +import sys +import unittest from tests_common import NewOpenCVTests -# Plaidml is an optional backend -pkgs = [ - ('ocl' , cv.gapi.core.ocl.kernels()), - ('cpu' , cv.gapi.core.cpu.kernels()), - ('fluid' , cv.gapi.core.fluid.kernels()) - # ('plaidml', cv.gapi.core.plaidml.kernels()) - ] +try: + if sys.version_info[:2] < (3, 0): + raise unittest.SkipTest('Python 2.x is not supported') -class gapi_core_test(NewOpenCVTests): + # Plaidml is an optional backend + pkgs = [ + ('ocl' , cv.gapi.core.ocl.kernels()), + ('cpu' , cv.gapi.core.cpu.kernels()), + ('fluid' , cv.gapi.core.fluid.kernels()) + # ('plaidml', cv.gapi.core.plaidml.kernels()) + ] - def test_add(self): - # TODO: Extend to use any type and size here - sz = (720, 1280) - in1 = np.full(sz, 100) - in2 = np.full(sz, 50) - # OpenCV - expected = cv.add(in1, in2) + class gapi_core_test(NewOpenCVTests): - # G-API - g_in1 = cv.GMat() - g_in2 = cv.GMat() - g_out = cv.gapi.add(g_in1, g_in2) - comp = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out)) + def test_add(self): + # TODO: Extend to use any type and size here + sz = (720, 1280) + in1 = np.full(sz, 100) + in2 = np.full(sz, 50) - for pkg_name, pkg in pkgs: - actual = comp.apply(cv.gin(in1, in2), args=cv.compile_args(pkg)) - # Comparison - self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF), - 'Failed on ' + pkg_name + ' backend') - self.assertEqual(expected.dtype, actual.dtype, 'Failed on ' + pkg_name + ' backend') + # OpenCV + expected = cv.add(in1, in2) + # G-API + g_in1 = cv.GMat() + g_in2 = cv.GMat() + g_out = cv.gapi.add(g_in1, g_in2) + comp = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out)) + + for pkg_name, pkg in pkgs: + actual = comp.apply(cv.gin(in1, in2), args=cv.gapi.compile_args(pkg)) + # Comparison + self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF), + 'Failed on ' + pkg_name + ' backend') + self.assertEqual(expected.dtype, actual.dtype, 'Failed on ' + pkg_name + ' backend') + + + def test_add_uint8(self): + sz = (720, 1280) + in1 = np.full(sz, 100, dtype=np.uint8) + in2 = np.full(sz, 50 , dtype=np.uint8) + + # OpenCV + expected = cv.add(in1, in2) + + # G-API + g_in1 = cv.GMat() + g_in2 = cv.GMat() + g_out = cv.gapi.add(g_in1, g_in2) + comp = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out)) + + for pkg_name, pkg in pkgs: + actual = comp.apply(cv.gin(in1, in2), args=cv.gapi.compile_args(pkg)) + # Comparison + self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF), + 'Failed on ' + pkg_name + ' backend') + self.assertEqual(expected.dtype, actual.dtype, 'Failed on ' + pkg_name + ' backend') - def test_add_uint8(self): - sz = (720, 1280) - in1 = np.full(sz, 100, dtype=np.uint8) - in2 = np.full(sz, 50 , dtype=np.uint8) - # OpenCV - expected = cv.add(in1, in2) + def test_mean(self): + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + in_mat = cv.imread(img_path) - # G-API - g_in1 = cv.GMat() - g_in2 = cv.GMat() - g_out = cv.gapi.add(g_in1, g_in2) - comp = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out)) + # OpenCV + expected = cv.mean(in_mat) - for pkg_name, pkg in pkgs: - actual = comp.apply(cv.gin(in1, in2), args=cv.compile_args(pkg)) - # Comparison - self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF), - 'Failed on ' + pkg_name + ' backend') - self.assertEqual(expected.dtype, actual.dtype, 'Failed on ' + pkg_name + ' backend') + # G-API + g_in = cv.GMat() + g_out = cv.gapi.mean(g_in) + comp = cv.GComputation(g_in, g_out) + + for pkg_name, pkg in pkgs: + actual = comp.apply(cv.gin(in_mat), args=cv.gapi.compile_args(pkg)) + # Comparison + self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF), + 'Failed on ' + pkg_name + ' backend') - def test_mean(self): - img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - in_mat = cv.imread(img_path) + def test_split3(self): + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + in_mat = cv.imread(img_path) - # OpenCV - expected = cv.mean(in_mat) + # OpenCV + expected = cv.split(in_mat) - # G-API - g_in = cv.GMat() - g_out = cv.gapi.mean(g_in) - comp = cv.GComputation(g_in, g_out) + # G-API + g_in = cv.GMat() + b, g, r = cv.gapi.split3(g_in) + comp = cv.GComputation(cv.GIn(g_in), cv.GOut(b, g, r)) - for pkg_name, pkg in pkgs: - actual = comp.apply(cv.gin(in_mat), args=cv.compile_args(pkg)) - # Comparison - self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF), - 'Failed on ' + pkg_name + ' backend') + for pkg_name, pkg in pkgs: + actual = comp.apply(cv.gin(in_mat), args=cv.gapi.compile_args(pkg)) + # Comparison + for e, a in zip(expected, actual): + self.assertEqual(0.0, cv.norm(e, a, cv.NORM_INF), + 'Failed on ' + pkg_name + ' backend') + self.assertEqual(e.dtype, a.dtype, 'Failed on ' + pkg_name + ' backend') - def test_split3(self): - img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - in_mat = cv.imread(img_path) + def test_threshold(self): + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + in_mat = cv.cvtColor(cv.imread(img_path), cv.COLOR_RGB2GRAY) + maxv = (30, 30) - # OpenCV - expected = cv.split(in_mat) + # OpenCV + expected_thresh, expected_mat = cv.threshold(in_mat, maxv[0], maxv[0], cv.THRESH_TRIANGLE) - # G-API - g_in = cv.GMat() - b, g, r = cv.gapi.split3(g_in) - comp = cv.GComputation(cv.GIn(g_in), cv.GOut(b, g, r)) + # G-API + g_in = cv.GMat() + g_sc = cv.GScalar() + mat, threshold = cv.gapi.threshold(g_in, g_sc, cv.THRESH_TRIANGLE) + comp = cv.GComputation(cv.GIn(g_in, g_sc), cv.GOut(mat, threshold)) - for pkg_name, pkg in pkgs: - actual = comp.apply(cv.gin(in_mat), args=cv.compile_args(pkg)) - # Comparison - for e, a in zip(expected, actual): - self.assertEqual(0.0, cv.norm(e, a, cv.NORM_INF), + for pkg_name, pkg in pkgs: + actual_mat, actual_thresh = comp.apply(cv.gin(in_mat, maxv), args=cv.gapi.compile_args(pkg)) + # Comparison + self.assertEqual(0.0, cv.norm(expected_mat, actual_mat, cv.NORM_INF), + 'Failed on ' + pkg_name + ' backend') + self.assertEqual(expected_mat.dtype, actual_mat.dtype, + 'Failed on ' + pkg_name + ' backend') + self.assertEqual(expected_thresh, actual_thresh[0], 'Failed on ' + pkg_name + ' backend') - self.assertEqual(e.dtype, a.dtype, 'Failed on ' + pkg_name + ' backend') - - - def test_threshold(self): - img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - in_mat = cv.cvtColor(cv.imread(img_path), cv.COLOR_RGB2GRAY) - maxv = (30, 30) - - # OpenCV - expected_thresh, expected_mat = cv.threshold(in_mat, maxv[0], maxv[0], cv.THRESH_TRIANGLE) - - # G-API - g_in = cv.GMat() - g_sc = cv.GScalar() - mat, threshold = cv.gapi.threshold(g_in, g_sc, cv.THRESH_TRIANGLE) - comp = cv.GComputation(cv.GIn(g_in, g_sc), cv.GOut(mat, threshold)) - - for pkg_name, pkg in pkgs: - actual_mat, actual_thresh = comp.apply(cv.gin(in_mat, maxv), args=cv.compile_args(pkg)) - # Comparison - self.assertEqual(0.0, cv.norm(expected_mat, actual_mat, cv.NORM_INF), - 'Failed on ' + pkg_name + ' backend') - self.assertEqual(expected_mat.dtype, actual_mat.dtype, - 'Failed on ' + pkg_name + ' backend') - self.assertEqual(expected_thresh, actual_thresh[0], - 'Failed on ' + pkg_name + ' backend') - - def test_kmeans(self): - # K-means params - count = 100 - sz = (count, 2) - in_mat = np.random.random(sz).astype(np.float32) - K = 5 - flags = cv.KMEANS_RANDOM_CENTERS - attempts = 1; - criteria = (cv.TERM_CRITERIA_MAX_ITER + cv.TERM_CRITERIA_EPS, 30, 0) - - # G-API - g_in = cv.GMat() - compactness, out_labels, centers = cv.gapi.kmeans(g_in, K, criteria, attempts, flags) - comp = cv.GComputation(cv.GIn(g_in), cv.GOut(compactness, out_labels, centers)) - - compact, labels, centers = comp.apply(cv.gin(in_mat)) - - # Assert - self.assertTrue(compact >= 0) - self.assertEqual(sz[0], labels.shape[0]) - self.assertEqual(1, labels.shape[1]) - self.assertTrue(labels.size != 0) - self.assertEqual(centers.shape[1], sz[1]); - self.assertEqual(centers.shape[0], K); - self.assertTrue(centers.size != 0); - - - def generate_random_points(self, sz): - arr = np.random.random(sz).astype(np.float32).T - return list(zip(arr[0], arr[1])) - - - def test_kmeans_2d(self): - # K-means 2D params - count = 100 - sz = (count, 2) - amount = sz[0] - K = 5 - flags = cv.KMEANS_RANDOM_CENTERS - attempts = 1; - criteria = (cv.TERM_CRITERIA_MAX_ITER + cv.TERM_CRITERIA_EPS, 30, 0); - in_vector = self.generate_random_points(sz) - in_labels = [] - - # G-API - data = cv.GArrayT(cv.gapi.CV_POINT2F) - best_labels = cv.GArrayT(cv.gapi.CV_INT) - - compactness, out_labels, centers = cv.gapi.kmeans(data, K, best_labels, criteria, attempts, flags); - comp = cv.GComputation(cv.GIn(data, best_labels), cv.GOut(compactness, out_labels, centers)); - - compact, labels, centers = comp.apply(cv.gin(in_vector, in_labels)); - - # Assert - self.assertTrue(compact >= 0) - self.assertEqual(amount, len(labels)) - self.assertEqual(K, len(centers)) + + + def test_kmeans(self): + # K-means params + count = 100 + sz = (count, 2) + in_mat = np.random.random(sz).astype(np.float32) + K = 5 + flags = cv.KMEANS_RANDOM_CENTERS + attempts = 1 + criteria = (cv.TERM_CRITERIA_MAX_ITER + cv.TERM_CRITERIA_EPS, 30, 0) + + # G-API + g_in = cv.GMat() + compactness, out_labels, centers = cv.gapi.kmeans(g_in, K, criteria, attempts, flags) + comp = cv.GComputation(cv.GIn(g_in), cv.GOut(compactness, out_labels, centers)) + + compact, labels, centers = comp.apply(cv.gin(in_mat)) + + # Assert + self.assertTrue(compact >= 0) + self.assertEqual(sz[0], labels.shape[0]) + self.assertEqual(1, labels.shape[1]) + self.assertTrue(labels.size != 0) + self.assertEqual(centers.shape[1], sz[1]) + self.assertEqual(centers.shape[0], K) + self.assertTrue(centers.size != 0) + + + def generate_random_points(self, sz): + arr = np.random.random(sz).astype(np.float32).T + return list(zip(arr[0], arr[1])) + + + def test_kmeans_2d(self): + # K-means 2D params + count = 100 + sz = (count, 2) + amount = sz[0] + K = 5 + flags = cv.KMEANS_RANDOM_CENTERS + attempts = 1 + criteria = (cv.TERM_CRITERIA_MAX_ITER + cv.TERM_CRITERIA_EPS, 30, 0) + in_vector = self.generate_random_points(sz) + in_labels = [] + + # G-API + data = cv.GArrayT(cv.gapi.CV_POINT2F) + best_labels = cv.GArrayT(cv.gapi.CV_INT) + + compactness, out_labels, centers = cv.gapi.kmeans(data, K, best_labels, criteria, attempts, flags) + comp = cv.GComputation(cv.GIn(data, best_labels), cv.GOut(compactness, out_labels, centers)) + + compact, labels, centers = comp.apply(cv.gin(in_vector, in_labels)) + + # Assert + self.assertTrue(compact >= 0) + self.assertEqual(amount, len(labels)) + self.assertEqual(K, len(centers)) + + +except unittest.SkipTest as e: + + message = str(e) + + class TestSkip(unittest.TestCase): + def setUp(self): + self.skipTest('Skip tests: ' + message) + + def test_skip(): + pass + + pass if __name__ == '__main__': diff --git a/modules/gapi/misc/python/test/test_gapi_imgproc.py b/modules/gapi/misc/python/test/test_gapi_imgproc.py index ed6f883fe55f..365a5a8cca74 100644 --- a/modules/gapi/misc/python/test/test_gapi_imgproc.py +++ b/modules/gapi/misc/python/test/test_gapi_imgproc.py @@ -3,103 +3,124 @@ import numpy as np import cv2 as cv import os +import sys +import unittest from tests_common import NewOpenCVTests -# Plaidml is an optional backend -pkgs = [ - ('ocl' , cv.gapi.core.ocl.kernels()), - ('cpu' , cv.gapi.core.cpu.kernels()), - ('fluid' , cv.gapi.core.fluid.kernels()) - # ('plaidml', cv.gapi.core.plaidml.kernels()) - ] +try: + if sys.version_info[:2] < (3, 0): + raise unittest.SkipTest('Python 2.x is not supported') -class gapi_imgproc_test(NewOpenCVTests): + # Plaidml is an optional backend + pkgs = [ + ('ocl' , cv.gapi.core.ocl.kernels()), + ('cpu' , cv.gapi.core.cpu.kernels()), + ('fluid' , cv.gapi.core.fluid.kernels()) + # ('plaidml', cv.gapi.core.plaidml.kernels()) + ] - def test_good_features_to_track(self): - # TODO: Extend to use any type and size here - img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - in1 = cv.cvtColor(cv.imread(img_path), cv.COLOR_RGB2GRAY) - # NB: goodFeaturesToTrack configuration - max_corners = 50 - quality_lvl = 0.01 - min_distance = 10 - block_sz = 3 - use_harris_detector = True - k = 0.04 - mask = None + class gapi_imgproc_test(NewOpenCVTests): - # OpenCV - expected = cv.goodFeaturesToTrack(in1, max_corners, quality_lvl, - min_distance, mask=mask, - blockSize=block_sz, useHarrisDetector=use_harris_detector, k=k) + def test_good_features_to_track(self): + # TODO: Extend to use any type and size here + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + in1 = cv.cvtColor(cv.imread(img_path), cv.COLOR_RGB2GRAY) - # G-API - g_in = cv.GMat() - g_out = cv.gapi.goodFeaturesToTrack(g_in, max_corners, quality_lvl, - min_distance, mask, block_sz, use_harris_detector, k) + # NB: goodFeaturesToTrack configuration + max_corners = 50 + quality_lvl = 0.01 + min_distance = 10 + block_sz = 3 + use_harris_detector = True + k = 0.04 + mask = None - comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) + # OpenCV + expected = cv.goodFeaturesToTrack(in1, max_corners, quality_lvl, + min_distance, mask=mask, + blockSize=block_sz, useHarrisDetector=use_harris_detector, k=k) - for pkg_name, pkg in pkgs: - actual = comp.apply(cv.gin(in1), args=cv.compile_args(pkg)) - # NB: OpenCV & G-API have different output shapes: - # OpenCV - (num_points, 1, 2) - # G-API - (num_points, 2) - # Comparison - self.assertEqual(0.0, cv.norm(expected.flatten(), - np.array(actual, dtype=np.float32).flatten(), - cv.NORM_INF), - 'Failed on ' + pkg_name + ' backend') + # G-API + g_in = cv.GMat() + g_out = cv.gapi.goodFeaturesToTrack(g_in, max_corners, quality_lvl, + min_distance, mask, block_sz, use_harris_detector, k) + comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) - def test_rgb2gray(self): - # TODO: Extend to use any type and size here - img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - in1 = cv.imread(img_path) + for pkg_name, pkg in pkgs: + actual = comp.apply(cv.gin(in1), args=cv.gapi.compile_args(pkg)) + # NB: OpenCV & G-API have different output shapes: + # OpenCV - (num_points, 1, 2) + # G-API - (num_points, 2) + # Comparison + self.assertEqual(0.0, cv.norm(expected.flatten(), + np.array(actual, dtype=np.float32).flatten(), + cv.NORM_INF), + 'Failed on ' + pkg_name + ' backend') - # OpenCV - expected = cv.cvtColor(in1, cv.COLOR_RGB2GRAY) - # G-API - g_in = cv.GMat() - g_out = cv.gapi.RGB2Gray(g_in) + def test_rgb2gray(self): + # TODO: Extend to use any type and size here + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + in1 = cv.imread(img_path) - comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) + # OpenCV + expected = cv.cvtColor(in1, cv.COLOR_RGB2GRAY) - for pkg_name, pkg in pkgs: - actual = comp.apply(cv.gin(in1), args=cv.compile_args(pkg)) - # Comparison - self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF), - 'Failed on ' + pkg_name + ' backend') + # G-API + g_in = cv.GMat() + g_out = cv.gapi.RGB2Gray(g_in) + comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) - def test_bounding_rect(self): - sz = 1280 - fscale = 256 + for pkg_name, pkg in pkgs: + actual = comp.apply(cv.gin(in1), args=cv.gapi.compile_args(pkg)) + # Comparison + self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF), + 'Failed on ' + pkg_name + ' backend') - def sample_value(fscale): - return np.random.uniform(0, 255 * fscale) / fscale - points = np.array([(sample_value(fscale), sample_value(fscale)) for _ in range(1280)], np.float32) + def test_bounding_rect(self): + sz = 1280 + fscale = 256 - # OpenCV - expected = cv.boundingRect(points) + def sample_value(fscale): + return np.random.uniform(0, 255 * fscale) / fscale - # G-API - g_in = cv.GMat() - g_out = cv.gapi.boundingRect(g_in) + points = np.array([(sample_value(fscale), sample_value(fscale)) for _ in range(1280)], np.float32) - comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) + # OpenCV + expected = cv.boundingRect(points) - for pkg_name, pkg in pkgs: - actual = comp.apply(cv.gin(points), args=cv.compile_args(pkg)) - # Comparison - self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF), - 'Failed on ' + pkg_name + ' backend') + # G-API + g_in = cv.GMat() + g_out = cv.gapi.boundingRect(g_in) + + comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) + + for pkg_name, pkg in pkgs: + actual = comp.apply(cv.gin(points), args=cv.gapi.compile_args(pkg)) + # Comparison + self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF), + 'Failed on ' + pkg_name + ' backend') + + +except unittest.SkipTest as e: + + message = str(e) + + class TestSkip(unittest.TestCase): + def setUp(self): + self.skipTest('Skip tests: ' + message) + + def test_skip(): + pass + + pass if __name__ == '__main__': diff --git a/modules/gapi/misc/python/test/test_gapi_infer.py b/modules/gapi/misc/python/test/test_gapi_infer.py index db048f57866c..8ecc957e416d 100644 --- a/modules/gapi/misc/python/test/test_gapi_infer.py +++ b/modules/gapi/misc/python/test/test_gapi_infer.py @@ -3,318 +3,338 @@ import numpy as np import cv2 as cv import os +import sys +import unittest from tests_common import NewOpenCVTests -class test_gapi_infer(NewOpenCVTests): +try: - def infer_reference_network(self, model_path, weights_path, img): - net = cv.dnn.readNetFromModelOptimizer(model_path, weights_path) - net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) - net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) + if sys.version_info[:2] < (3, 0): + raise unittest.SkipTest('Python 2.x is not supported') - blob = cv.dnn.blobFromImage(img) - net.setInput(blob) - return net.forward(net.getUnconnectedOutLayersNames()) + class test_gapi_infer(NewOpenCVTests): + def infer_reference_network(self, model_path, weights_path, img): + net = cv.dnn.readNetFromModelOptimizer(model_path, weights_path) + net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) + net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) - def make_roi(self, img, roi): - return img[roi[1]:roi[1] + roi[3], roi[0]:roi[0] + roi[2], ...] + blob = cv.dnn.blobFromImage(img) + net.setInput(blob) + return net.forward(net.getUnconnectedOutLayersNames()) - def test_age_gender_infer(self): - # NB: Check IE - if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): - return - root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - device_id = 'CPU' + def make_roi(self, img, roi): + return img[roi[1]:roi[1] + roi[3], roi[0]:roi[0] + roi[2], ...] - img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - img = cv.resize(cv.imread(img_path), (62,62)) - # OpenCV DNN - dnn_age, dnn_gender = self.infer_reference_network(model_path, weights_path, img) + def test_age_gender_infer(self): + # NB: Check IE + if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): + return - # OpenCV G-API - g_in = cv.GMat() - inputs = cv.GInferInputs() - inputs.setInput('data', g_in) + root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + device_id = 'CPU' - outputs = cv.gapi.infer("net", inputs) - age_g = outputs.at("age_conv3") - gender_g = outputs.at("prob") + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + img = cv.resize(cv.imread(img_path), (62,62)) - comp = cv.GComputation(cv.GIn(g_in), cv.GOut(age_g, gender_g)) - pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) + # OpenCV DNN + dnn_age, dnn_gender = self.infer_reference_network(model_path, weights_path, img) - gapi_age, gapi_gender = comp.apply(cv.gin(img), args=cv.compile_args(cv.gapi.networks(pp))) + # OpenCV G-API + g_in = cv.GMat() + inputs = cv.GInferInputs() + inputs.setInput('data', g_in) - # Check - self.assertEqual(0.0, cv.norm(dnn_gender, gapi_gender, cv.NORM_INF)) - self.assertEqual(0.0, cv.norm(dnn_age, gapi_age, cv.NORM_INF)) + outputs = cv.gapi.infer("net", inputs) + age_g = outputs.at("age_conv3") + gender_g = outputs.at("prob") + comp = cv.GComputation(cv.GIn(g_in), cv.GOut(age_g, gender_g)) + pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) - def test_age_gender_infer_roi(self): - # NB: Check IE - if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): - return + gapi_age, gapi_gender = comp.apply(cv.gin(img), args=cv.gapi.compile_args(cv.gapi.networks(pp))) - root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - device_id = 'CPU' + # Check + self.assertEqual(0.0, cv.norm(dnn_gender, gapi_gender, cv.NORM_INF)) + self.assertEqual(0.0, cv.norm(dnn_age, gapi_age, cv.NORM_INF)) - img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - img = cv.imread(img_path) - roi = (10, 10, 62, 62) - # OpenCV DNN - dnn_age, dnn_gender = self.infer_reference_network(model_path, - weights_path, - self.make_roi(img, roi)) + def test_age_gender_infer_roi(self): + # NB: Check IE + if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): + return - # OpenCV G-API - g_in = cv.GMat() - g_roi = cv.GOpaqueT(cv.gapi.CV_RECT) - inputs = cv.GInferInputs() - inputs.setInput('data', g_in) - - outputs = cv.gapi.infer("net", g_roi, inputs) - age_g = outputs.at("age_conv3") - gender_g = outputs.at("prob") - - comp = cv.GComputation(cv.GIn(g_in, g_roi), cv.GOut(age_g, gender_g)) - pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) - - gapi_age, gapi_gender = comp.apply(cv.gin(img, roi), args=cv.compile_args(cv.gapi.networks(pp))) - - # Check - self.assertEqual(0.0, cv.norm(dnn_gender, gapi_gender, cv.NORM_INF)) - self.assertEqual(0.0, cv.norm(dnn_age, gapi_age, cv.NORM_INF)) - - - def test_age_gender_infer_roi_list(self): - # NB: Check IE - if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): - return - - root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - device_id = 'CPU' - - rois = [(10, 15, 62, 62), (23, 50, 62, 62), (14, 100, 62, 62), (80, 50, 62, 62)] - img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - img = cv.imread(img_path) - - # OpenCV DNN - dnn_age_list = [] - dnn_gender_list = [] - for roi in rois: - age, gender = self.infer_reference_network(model_path, - weights_path, - self.make_roi(img, roi)) - dnn_age_list.append(age) - dnn_gender_list.append(gender) - - # OpenCV G-API - g_in = cv.GMat() - g_rois = cv.GArrayT(cv.gapi.CV_RECT) - inputs = cv.GInferInputs() - inputs.setInput('data', g_in) - - outputs = cv.gapi.infer("net", g_rois, inputs) - age_g = outputs.at("age_conv3") - gender_g = outputs.at("prob") - - comp = cv.GComputation(cv.GIn(g_in, g_rois), cv.GOut(age_g, gender_g)) - pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) - - gapi_age_list, gapi_gender_list = comp.apply(cv.gin(img, rois), - args=cv.compile_args(cv.gapi.networks(pp))) - - # Check - for gapi_age, gapi_gender, dnn_age, dnn_gender in zip(gapi_age_list, - gapi_gender_list, - dnn_age_list, - dnn_gender_list): - self.assertEqual(0.0, cv.norm(dnn_gender, gapi_gender, cv.NORM_INF)) - self.assertEqual(0.0, cv.norm(dnn_age, gapi_age, cv.NORM_INF)) + root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + device_id = 'CPU' + + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + img = cv.imread(img_path) + roi = (10, 10, 62, 62) + + # OpenCV DNN + dnn_age, dnn_gender = self.infer_reference_network(model_path, + weights_path, + self.make_roi(img, roi)) + + # OpenCV G-API + g_in = cv.GMat() + g_roi = cv.GOpaqueT(cv.gapi.CV_RECT) + inputs = cv.GInferInputs() + inputs.setInput('data', g_in) + + outputs = cv.gapi.infer("net", g_roi, inputs) + age_g = outputs.at("age_conv3") + gender_g = outputs.at("prob") + + comp = cv.GComputation(cv.GIn(g_in, g_roi), cv.GOut(age_g, gender_g)) + pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) + gapi_age, gapi_gender = comp.apply(cv.gin(img, roi), args=cv.gapi.compile_args(cv.gapi.networks(pp))) - def test_age_gender_infer2_roi(self): - # NB: Check IE - if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): - return - - root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - device_id = 'CPU' - - rois = [(10, 15, 62, 62), (23, 50, 62, 62), (14, 100, 62, 62), (80, 50, 62, 62)] - img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - img = cv.imread(img_path) - - # OpenCV DNN - dnn_age_list = [] - dnn_gender_list = [] - for roi in rois: - age, gender = self.infer_reference_network(model_path, - weights_path, - self.make_roi(img, roi)) - dnn_age_list.append(age) - dnn_gender_list.append(gender) - - # OpenCV G-API - g_in = cv.GMat() - g_rois = cv.GArrayT(cv.gapi.CV_RECT) - inputs = cv.GInferListInputs() - inputs.setInput('data', g_rois) - - outputs = cv.gapi.infer2("net", g_in, inputs) - age_g = outputs.at("age_conv3") - gender_g = outputs.at("prob") - - comp = cv.GComputation(cv.GIn(g_in, g_rois), cv.GOut(age_g, gender_g)) - pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) - - gapi_age_list, gapi_gender_list = comp.apply(cv.gin(img, rois), - args=cv.compile_args(cv.gapi.networks(pp))) - - # Check - for gapi_age, gapi_gender, dnn_age, dnn_gender in zip(gapi_age_list, - gapi_gender_list, - dnn_age_list, - dnn_gender_list): + # Check self.assertEqual(0.0, cv.norm(dnn_gender, gapi_gender, cv.NORM_INF)) self.assertEqual(0.0, cv.norm(dnn_age, gapi_age, cv.NORM_INF)) + def test_age_gender_infer_roi_list(self): + # NB: Check IE + if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): + return + + root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + device_id = 'CPU' + + rois = [(10, 15, 62, 62), (23, 50, 62, 62), (14, 100, 62, 62), (80, 50, 62, 62)] + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + img = cv.imread(img_path) + + # OpenCV DNN + dnn_age_list = [] + dnn_gender_list = [] + for roi in rois: + age, gender = self.infer_reference_network(model_path, + weights_path, + self.make_roi(img, roi)) + dnn_age_list.append(age) + dnn_gender_list.append(gender) + + # OpenCV G-API + g_in = cv.GMat() + g_rois = cv.GArrayT(cv.gapi.CV_RECT) + inputs = cv.GInferInputs() + inputs.setInput('data', g_in) + + outputs = cv.gapi.infer("net", g_rois, inputs) + age_g = outputs.at("age_conv3") + gender_g = outputs.at("prob") + + comp = cv.GComputation(cv.GIn(g_in, g_rois), cv.GOut(age_g, gender_g)) + pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) + + gapi_age_list, gapi_gender_list = comp.apply(cv.gin(img, rois), + args=cv.gapi.compile_args(cv.gapi.networks(pp))) + + # Check + for gapi_age, gapi_gender, dnn_age, dnn_gender in zip(gapi_age_list, + gapi_gender_list, + dnn_age_list, + dnn_gender_list): + self.assertEqual(0.0, cv.norm(dnn_gender, gapi_gender, cv.NORM_INF)) + self.assertEqual(0.0, cv.norm(dnn_age, gapi_age, cv.NORM_INF)) + + + def test_age_gender_infer2_roi(self): + # NB: Check IE + if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): + return + + root_path = '/omz_intel_models/intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013' + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + device_id = 'CPU' + + rois = [(10, 15, 62, 62), (23, 50, 62, 62), (14, 100, 62, 62), (80, 50, 62, 62)] + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + img = cv.imread(img_path) + + # OpenCV DNN + dnn_age_list = [] + dnn_gender_list = [] + for roi in rois: + age, gender = self.infer_reference_network(model_path, + weights_path, + self.make_roi(img, roi)) + dnn_age_list.append(age) + dnn_gender_list.append(gender) + + # OpenCV G-API + g_in = cv.GMat() + g_rois = cv.GArrayT(cv.gapi.CV_RECT) + inputs = cv.GInferListInputs() + inputs.setInput('data', g_rois) + + outputs = cv.gapi.infer2("net", g_in, inputs) + age_g = outputs.at("age_conv3") + gender_g = outputs.at("prob") + + comp = cv.GComputation(cv.GIn(g_in, g_rois), cv.GOut(age_g, gender_g)) + pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) + + gapi_age_list, gapi_gender_list = comp.apply(cv.gin(img, rois), + args=cv.gapi.compile_args(cv.gapi.networks(pp))) + + # Check + for gapi_age, gapi_gender, dnn_age, dnn_gender in zip(gapi_age_list, + gapi_gender_list, + dnn_age_list, + dnn_gender_list): + self.assertEqual(0.0, cv.norm(dnn_gender, gapi_gender, cv.NORM_INF)) + self.assertEqual(0.0, cv.norm(dnn_age, gapi_age, cv.NORM_INF)) + + + + def test_person_detection_retail_0013(self): + # NB: Check IE + if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): + return + + root_path = '/omz_intel_models/intel/person-detection-retail-0013/FP32/person-detection-retail-0013' + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + img_path = self.find_file('gpu/lbpcascade/er.png', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + device_id = 'CPU' + img = cv.resize(cv.imread(img_path), (544, 320)) + + # OpenCV DNN + net = cv.dnn.readNetFromModelOptimizer(model_path, weights_path) + net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) + net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) + + blob = cv.dnn.blobFromImage(img) + + def parseSSD(detections, size): + h, w = size + bboxes = [] + detections = detections.reshape(-1, 7) + for sample_id, class_id, confidence, xmin, ymin, xmax, ymax in detections: + if confidence >= 0.5: + x = int(xmin * w) + y = int(ymin * h) + width = int(xmax * w - x) + height = int(ymax * h - y) + bboxes.append((x, y, width, height)) + + return bboxes + + net.setInput(blob) + dnn_detections = net.forward() + dnn_boxes = parseSSD(np.array(dnn_detections), img.shape[:2]) + + # OpenCV G-API + g_in = cv.GMat() + inputs = cv.GInferInputs() + inputs.setInput('data', g_in) + + g_sz = cv.gapi.streaming.size(g_in) + outputs = cv.gapi.infer("net", inputs) + detections = outputs.at("detection_out") + bboxes = cv.gapi.parseSSD(detections, g_sz, 0.5, False, False) + + comp = cv.GComputation(cv.GIn(g_in), cv.GOut(bboxes)) + pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) + + gapi_boxes = comp.apply(cv.gin(img.astype(np.float32)), + args=cv.gapi.compile_args(cv.gapi.networks(pp))) + + # Comparison + self.assertEqual(0.0, cv.norm(np.array(dnn_boxes).flatten(), + np.array(gapi_boxes).flatten(), + cv.NORM_INF)) + + + def test_person_detection_retail_0013(self): + # NB: Check IE + if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): + return + + root_path = '/omz_intel_models/intel/person-detection-retail-0013/FP32/person-detection-retail-0013' + model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) + img_path = self.find_file('gpu/lbpcascade/er.png', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + device_id = 'CPU' + img = cv.resize(cv.imread(img_path), (544, 320)) + + # OpenCV DNN + net = cv.dnn.readNetFromModelOptimizer(model_path, weights_path) + net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) + net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) + + blob = cv.dnn.blobFromImage(img) + + def parseSSD(detections, size): + h, w = size + bboxes = [] + detections = detections.reshape(-1, 7) + for sample_id, class_id, confidence, xmin, ymin, xmax, ymax in detections: + if confidence >= 0.5: + x = int(xmin * w) + y = int(ymin * h) + width = int(xmax * w - x) + height = int(ymax * h - y) + bboxes.append((x, y, width, height)) + + return bboxes + + net.setInput(blob) + dnn_detections = net.forward() + dnn_boxes = parseSSD(np.array(dnn_detections), img.shape[:2]) + + # OpenCV G-API + g_in = cv.GMat() + inputs = cv.GInferInputs() + inputs.setInput('data', g_in) + + g_sz = cv.gapi.streaming.size(g_in) + outputs = cv.gapi.infer("net", inputs) + detections = outputs.at("detection_out") + bboxes = cv.gapi.parseSSD(detections, g_sz, 0.5, False, False) + + comp = cv.GComputation(cv.GIn(g_in), cv.GOut(bboxes)) + pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) + + gapi_boxes = comp.apply(cv.gin(img.astype(np.float32)), + args=cv.gapi.compile_args(cv.gapi.networks(pp))) + + # Comparison + self.assertEqual(0.0, cv.norm(np.array(dnn_boxes).flatten(), + np.array(gapi_boxes).flatten(), + cv.NORM_INF)) + + +except unittest.SkipTest as e: + + message = str(e) + + class TestSkip(unittest.TestCase): + def setUp(self): + self.skipTest('Skip tests: ' + message) + + def test_skip(): + pass - def test_person_detection_retail_0013(self): - # NB: Check IE - if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): - return - - root_path = '/omz_intel_models/intel/person-detection-retail-0013/FP32/person-detection-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - img_path = self.find_file('gpu/lbpcascade/er.png', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - device_id = 'CPU' - img = cv.resize(cv.imread(img_path), (544, 320)) - - # OpenCV DNN - net = cv.dnn.readNetFromModelOptimizer(model_path, weights_path) - net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) - net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) - - blob = cv.dnn.blobFromImage(img) - - def parseSSD(detections, size): - h, w = size - bboxes = [] - detections = detections.reshape(-1, 7) - for sample_id, class_id, confidence, xmin, ymin, xmax, ymax in detections: - if confidence >= 0.5: - x = int(xmin * w) - y = int(ymin * h) - width = int(xmax * w - x) - height = int(ymax * h - y) - bboxes.append((x, y, width, height)) - - return bboxes - - net.setInput(blob) - dnn_detections = net.forward() - dnn_boxes = parseSSD(np.array(dnn_detections), img.shape[:2]) - - # OpenCV G-API - g_in = cv.GMat() - inputs = cv.GInferInputs() - inputs.setInput('data', g_in) - - g_sz = cv.gapi.streaming.size(g_in) - outputs = cv.gapi.infer("net", inputs) - detections = outputs.at("detection_out") - bboxes = cv.gapi.parseSSD(detections, g_sz, 0.5, False, False) - - comp = cv.GComputation(cv.GIn(g_in), cv.GOut(bboxes)) - pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) - - gapi_age, gapi_gender = comp.apply(cv.gin(img), args=cv.compile_args(cv.gapi.networks(pp))) - - gapi_boxes = comp.apply(cv.gin(img.astype(np.float32)), - args=cv.compile_args(cv.gapi.networks(pp))) - - # Comparison - self.assertEqual(0.0, cv.norm(np.array(dnn_boxes).flatten(), - np.array(gapi_boxes).flatten(), - cv.NORM_INF)) - - - def test_person_detection_retail_0013(self): - # NB: Check IE - if not cv.dnn.DNN_TARGET_CPU in cv.dnn.getAvailableTargets(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE): - return - - root_path = '/omz_intel_models/intel/person-detection-retail-0013/FP32/person-detection-retail-0013' - model_path = self.find_file(root_path + '.xml', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - weights_path = self.find_file(root_path + '.bin', [os.environ.get('OPENCV_DNN_TEST_DATA_PATH')]) - img_path = self.find_file('gpu/lbpcascade/er.png', [os.environ.get('OPENCV_TEST_DATA_PATH')]) - device_id = 'CPU' - img = cv.resize(cv.imread(img_path), (544, 320)) - - # OpenCV DNN - net = cv.dnn.readNetFromModelOptimizer(model_path, weights_path) - net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) - net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) - - blob = cv.dnn.blobFromImage(img) - - def parseSSD(detections, size): - h, w = size - bboxes = [] - detections = detections.reshape(-1, 7) - for sample_id, class_id, confidence, xmin, ymin, xmax, ymax in detections: - if confidence >= 0.5: - x = int(xmin * w) - y = int(ymin * h) - width = int(xmax * w - x) - height = int(ymax * h - y) - bboxes.append((x, y, width, height)) - - return bboxes - - net.setInput(blob) - dnn_detections = net.forward() - dnn_boxes = parseSSD(np.array(dnn_detections), img.shape[:2]) - - # OpenCV G-API - g_in = cv.GMat() - inputs = cv.GInferInputs() - inputs.setInput('data', g_in) - - g_sz = cv.gapi.streaming.size(g_in) - outputs = cv.gapi.infer("net", inputs) - detections = outputs.at("detection_out") - bboxes = cv.gapi.parseSSD(detections, g_sz, 0.5, False, False) - - comp = cv.GComputation(cv.GIn(g_in), cv.GOut(bboxes)) - pp = cv.gapi.ie.params("net", model_path, weights_path, device_id) - - gapi_boxes = comp.apply(cv.gin(img.astype(np.float32)), - args=cv.compile_args(cv.gapi.networks(pp))) - - # Comparison - self.assertEqual(0.0, cv.norm(np.array(dnn_boxes).flatten(), - np.array(gapi_boxes).flatten(), - cv.NORM_INF)) + pass if __name__ == '__main__': diff --git a/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py b/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py index 2f921901db7a..a10d63f09ef2 100644 --- a/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py +++ b/modules/gapi/misc/python/test/test_gapi_sample_pipelines.py @@ -225,7 +225,7 @@ def test_custom_op_add(self): comp = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(g_out)) pkg = cv.gapi.kernels(GAddImpl) - actual = comp.apply(cv.gin(in_mat1, in_mat2), args=cv.compile_args(pkg)) + actual = comp.apply(cv.gin(in_mat1, in_mat2), args=cv.gapi.compile_args(pkg)) self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) @@ -245,7 +245,7 @@ def test_custom_op_split3(self): comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_ch1, g_ch2, g_ch3)) pkg = cv.gapi.kernels(GSplit3Impl) - ch1, ch2, ch3 = comp.apply(cv.gin(in_mat), args=cv.compile_args(pkg)) + ch1, ch2, ch3 = comp.apply(cv.gin(in_mat), args=cv.gapi.compile_args(pkg)) self.assertEqual(0.0, cv.norm(in_ch1, ch1, cv.NORM_INF)) self.assertEqual(0.0, cv.norm(in_ch2, ch2, cv.NORM_INF)) @@ -266,7 +266,7 @@ def test_custom_op_mean(self): comp = cv.GComputation(g_in, g_out) pkg = cv.gapi.kernels(GMeanImpl) - actual = comp.apply(cv.gin(in_mat), args=cv.compile_args(pkg)) + actual = comp.apply(cv.gin(in_mat), args=cv.gapi.compile_args(pkg)) # Comparison self.assertEqual(expected, actual) @@ -287,7 +287,7 @@ def test_custom_op_addC(self): comp = cv.GComputation(cv.GIn(g_in, g_sc), cv.GOut(g_out)) pkg = cv.gapi.kernels(GAddCImpl) - actual = comp.apply(cv.gin(in_mat, sc), args=cv.compile_args(pkg)) + actual = comp.apply(cv.gin(in_mat, sc), args=cv.gapi.compile_args(pkg)) self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) @@ -305,7 +305,7 @@ def test_custom_op_size(self): comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_sz)) pkg = cv.gapi.kernels(GSizeImpl) - actual = comp.apply(cv.gin(in_mat), args=cv.compile_args(pkg)) + actual = comp.apply(cv.gin(in_mat), args=cv.gapi.compile_args(pkg)) self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) @@ -322,7 +322,7 @@ def test_custom_op_sizeR(self): comp = cv.GComputation(cv.GIn(g_r), cv.GOut(g_sz)) pkg = cv.gapi.kernels(GSizeRImpl) - actual = comp.apply(cv.gin(roi), args=cv.compile_args(pkg)) + actual = comp.apply(cv.gin(roi), args=cv.gapi.compile_args(pkg)) # cv.norm works with tuples ? self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) @@ -340,7 +340,7 @@ def test_custom_op_boundingRect(self): comp = cv.GComputation(cv.GIn(g_pts), cv.GOut(g_br)) pkg = cv.gapi.kernels(GBoundingRectImpl) - actual = comp.apply(cv.gin(points), args=cv.compile_args(pkg)) + actual = comp.apply(cv.gin(points), args=cv.gapi.compile_args(pkg)) # cv.norm works with tuples ? self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) @@ -371,7 +371,7 @@ def test_custom_op_goodFeaturesToTrack(self): comp = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) pkg = cv.gapi.kernels(GGoodFeaturesImpl) - actual = comp.apply(cv.gin(in_mat), args=cv.compile_args(pkg)) + actual = comp.apply(cv.gin(in_mat), args=cv.gapi.compile_args(pkg)) # NB: OpenCV & G-API have different output types. # OpenCV - numpy array with shape (num_points, 1, 2) @@ -453,10 +453,10 @@ def run(arr): g_in = cv.GArray.Int() comp = cv.GComputation(cv.GIn(g_in), cv.GOut(GSum.on(g_in))) - s = comp.apply(cv.gin([1, 2, 3, 4]), args=cv.compile_args(cv.gapi.kernels(GSumImpl))) + s = comp.apply(cv.gin([1, 2, 3, 4]), args=cv.gapi.compile_args(cv.gapi.kernels(GSumImpl))) self.assertEqual(10, s) - s = comp.apply(cv.gin([1, 2, 8, 7]), args=cv.compile_args(cv.gapi.kernels(GSumImpl))) + s = comp.apply(cv.gin([1, 2, 8, 7]), args=cv.gapi.compile_args(cv.gapi.kernels(GSumImpl))) self.assertEqual(18, s) self.assertEqual(18, GSumImpl.last_result) @@ -488,13 +488,13 @@ def run(table, key): 'tuple': (42, 42) } - out = comp.apply(cv.gin(table, 'int'), args=cv.compile_args(cv.gapi.kernels(GLookUpImpl))) + out = comp.apply(cv.gin(table, 'int'), args=cv.gapi.compile_args(cv.gapi.kernels(GLookUpImpl))) self.assertEqual(42, out) - out = comp.apply(cv.gin(table, 'str'), args=cv.compile_args(cv.gapi.kernels(GLookUpImpl))) + out = comp.apply(cv.gin(table, 'str'), args=cv.gapi.compile_args(cv.gapi.kernels(GLookUpImpl))) self.assertEqual('hello, world!', out) - out = comp.apply(cv.gin(table, 'tuple'), args=cv.compile_args(cv.gapi.kernels(GLookUpImpl))) + out = comp.apply(cv.gin(table, 'tuple'), args=cv.gapi.compile_args(cv.gapi.kernels(GLookUpImpl))) self.assertEqual((42, 42), out) @@ -521,7 +521,7 @@ def run(arr0, arr1): arr1 = [3, 'str'] out = comp.apply(cv.gin(arr0, arr1), - args=cv.compile_args(cv.gapi.kernels(GConcatImpl))) + args=cv.gapi.compile_args(cv.gapi.kernels(GConcatImpl))) self.assertEqual(arr0 + arr1, out) @@ -550,7 +550,7 @@ def run(img0, img1): img1 = np.array([1, 2, 3]) with self.assertRaises(Exception): comp.apply(cv.gin(img0, img1), - args=cv.compile_args( + args=cv.gapi.compile_args( cv.gapi.kernels(GAddImpl))) @@ -577,7 +577,7 @@ def run(img0, img1): img1 = np.array([1, 2, 3]) with self.assertRaises(Exception): comp.apply(cv.gin(img0, img1), - args=cv.compile_args( + args=cv.gapi.compile_args( cv.gapi.kernels(GAddImpl))) @@ -607,7 +607,7 @@ def run(img0, img1): # FIXME: Cause Bad variant access. # Need to provide more descriptive error messsage. with self.assertRaises(Exception): comp.apply(cv.gin(img0, img1), - args=cv.compile_args( + args=cv.gapi.compile_args( cv.gapi.kernels(GAddImpl))) def test_pipeline_with_custom_kernels(self): @@ -657,7 +657,7 @@ def run(img, order): g_mean = cv.gapi.mean(g_transposed) comp = cv.GComputation(cv.GIn(g_bgr), cv.GOut(g_mean)) - actual = comp.apply(cv.gin(img), args=cv.compile_args( + actual = comp.apply(cv.gin(img), args=cv.gapi.compile_args( cv.gapi.kernels(GResizeImpl, GTransposeImpl))) self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) diff --git a/modules/gapi/misc/python/test/test_gapi_streaming.py b/modules/gapi/misc/python/test/test_gapi_streaming.py index 5356abc76afd..f1cce4fb72fc 100644 --- a/modules/gapi/misc/python/test/test_gapi_streaming.py +++ b/modules/gapi/misc/python/test/test_gapi_streaming.py @@ -3,201 +3,225 @@ import numpy as np import cv2 as cv import os +import sys +import unittest from tests_common import NewOpenCVTests -class test_gapi_streaming(NewOpenCVTests): - def test_image_input(self): - sz = (1280, 720) - in_mat = np.random.randint(0, 100, sz).astype(np.uint8) +try: - # OpenCV - expected = cv.medianBlur(in_mat, 3) + if sys.version_info[:2] < (3, 0): + raise unittest.SkipTest('Python 2.x is not supported') - # G-API - g_in = cv.GMat() - g_out = cv.gapi.medianBlur(g_in, 3) - c = cv.GComputation(g_in, g_out) - ccomp = c.compileStreaming(cv.descr_of(in_mat)) - ccomp.setSource(cv.gin(in_mat)) - ccomp.start() - _, actual = ccomp.pull() + class test_gapi_streaming(NewOpenCVTests): - # Assert - self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) + def test_image_input(self): + sz = (1280, 720) + in_mat = np.random.randint(0, 100, sz).astype(np.uint8) + # OpenCV + expected = cv.medianBlur(in_mat, 3) + + # G-API + g_in = cv.GMat() + g_out = cv.gapi.medianBlur(g_in, 3) + c = cv.GComputation(g_in, g_out) + ccomp = c.compileStreaming(cv.descr_of(in_mat)) + ccomp.setSource(cv.gin(in_mat)) + ccomp.start() - def test_video_input(self): - ksize = 3 - path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) + _, actual = ccomp.pull() - # OpenCV - cap = cv.VideoCapture(path) + # Assert + self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) - # G-API - g_in = cv.GMat() - g_out = cv.gapi.medianBlur(g_in, ksize) - c = cv.GComputation(g_in, g_out) - ccomp = c.compileStreaming() - source = cv.gapi.wip.make_capture_src(path) - ccomp.setSource(source) - ccomp.start() + def test_video_input(self): + ksize = 3 + path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) + + # OpenCV + cap = cv.VideoCapture(path) - # Assert - max_num_frames = 10 - proc_num_frames = 0 - while cap.isOpened(): - has_expected, expected = cap.read() - has_actual, actual = ccomp.pull() + # G-API + g_in = cv.GMat() + g_out = cv.gapi.medianBlur(g_in, ksize) + c = cv.GComputation(g_in, g_out) - self.assertEqual(has_expected, has_actual) + ccomp = c.compileStreaming() + source = cv.gapi.wip.make_capture_src(path) + ccomp.setSource(source) + ccomp.start() - if not has_actual: - break + # Assert + max_num_frames = 10 + proc_num_frames = 0 + while cap.isOpened(): + has_expected, expected = cap.read() + has_actual, actual = ccomp.pull() - self.assertEqual(0.0, cv.norm(cv.medianBlur(expected, ksize), actual, cv.NORM_INF)) + self.assertEqual(has_expected, has_actual) - proc_num_frames += 1 - if proc_num_frames == max_num_frames: - break; + if not has_actual: + break + self.assertEqual(0.0, cv.norm(cv.medianBlur(expected, ksize), actual, cv.NORM_INF)) - def test_video_split3(self): - path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) + proc_num_frames += 1 + if proc_num_frames == max_num_frames: + break - # OpenCV - cap = cv.VideoCapture(path) - # G-API - g_in = cv.GMat() - b, g, r = cv.gapi.split3(g_in) - c = cv.GComputation(cv.GIn(g_in), cv.GOut(b, g, r)) + def test_video_split3(self): + path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) - ccomp = c.compileStreaming() - source = cv.gapi.wip.make_capture_src(path) - ccomp.setSource(source) - ccomp.start() + # OpenCV + cap = cv.VideoCapture(path) - # Assert - max_num_frames = 10 - proc_num_frames = 0 - while cap.isOpened(): - has_expected, frame = cap.read() - has_actual, actual = ccomp.pull() + # G-API + g_in = cv.GMat() + b, g, r = cv.gapi.split3(g_in) + c = cv.GComputation(cv.GIn(g_in), cv.GOut(b, g, r)) - self.assertEqual(has_expected, has_actual) + ccomp = c.compileStreaming() + source = cv.gapi.wip.make_capture_src(path) + ccomp.setSource(source) + ccomp.start() - if not has_actual: - break + # Assert + max_num_frames = 10 + proc_num_frames = 0 + while cap.isOpened(): + has_expected, frame = cap.read() + has_actual, actual = ccomp.pull() - expected = cv.split(frame) - for e, a in zip(expected, actual): - self.assertEqual(0.0, cv.norm(e, a, cv.NORM_INF)) + self.assertEqual(has_expected, has_actual) - proc_num_frames += 1 - if proc_num_frames == max_num_frames: - break; + if not has_actual: + break + expected = cv.split(frame) + for e, a in zip(expected, actual): + self.assertEqual(0.0, cv.norm(e, a, cv.NORM_INF)) - def test_video_add(self): - sz = (576, 768, 3) - in_mat = np.random.randint(0, 100, sz).astype(np.uint8) + proc_num_frames += 1 + if proc_num_frames == max_num_frames: + break - path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) - # OpenCV - cap = cv.VideoCapture(path) + def test_video_add(self): + sz = (576, 768, 3) + in_mat = np.random.randint(0, 100, sz).astype(np.uint8) - # G-API - g_in1 = cv.GMat() - g_in2 = cv.GMat() - out = cv.gapi.add(g_in1, g_in2) - c = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(out)) + path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) - ccomp = c.compileStreaming() - source = cv.gapi.wip.make_capture_src(path) - ccomp.setSource(cv.gin(source, in_mat)) - ccomp.start() + # OpenCV + cap = cv.VideoCapture(path) - # Assert - max_num_frames = 10 - proc_num_frames = 0 - while cap.isOpened(): - has_expected, frame = cap.read() - has_actual, actual = ccomp.pull() + # G-API + g_in1 = cv.GMat() + g_in2 = cv.GMat() + out = cv.gapi.add(g_in1, g_in2) + c = cv.GComputation(cv.GIn(g_in1, g_in2), cv.GOut(out)) - self.assertEqual(has_expected, has_actual) + ccomp = c.compileStreaming() + source = cv.gapi.wip.make_capture_src(path) + ccomp.setSource(cv.gin(source, in_mat)) + ccomp.start() - if not has_actual: - break + # Assert + max_num_frames = 10 + proc_num_frames = 0 + while cap.isOpened(): + has_expected, frame = cap.read() + has_actual, actual = ccomp.pull() - expected = cv.add(frame, in_mat) - self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) + self.assertEqual(has_expected, has_actual) - proc_num_frames += 1 - if proc_num_frames == max_num_frames: - break; + if not has_actual: + break + expected = cv.add(frame, in_mat) + self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) - def test_video_good_features_to_track(self): - path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) + proc_num_frames += 1 + if proc_num_frames == max_num_frames: + break; - # NB: goodFeaturesToTrack configuration - max_corners = 50 - quality_lvl = 0.01 - min_distance = 10 - block_sz = 3 - use_harris_detector = True - k = 0.04 - mask = None - # OpenCV - cap = cv.VideoCapture(path) + def test_video_good_features_to_track(self): + path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) - # G-API - g_in = cv.GMat() - g_gray = cv.gapi.RGB2Gray(g_in) - g_out = cv.gapi.goodFeaturesToTrack(g_gray, max_corners, quality_lvl, - min_distance, mask, block_sz, use_harris_detector, k) + # NB: goodFeaturesToTrack configuration + max_corners = 50 + quality_lvl = 0.01 + min_distance = 10 + block_sz = 3 + use_harris_detector = True + k = 0.04 + mask = None - c = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) + # OpenCV + cap = cv.VideoCapture(path) - ccomp = c.compileStreaming() - source = cv.gapi.wip.make_capture_src(path) - ccomp.setSource(source) - ccomp.start() + # G-API + g_in = cv.GMat() + g_gray = cv.gapi.RGB2Gray(g_in) + g_out = cv.gapi.goodFeaturesToTrack(g_gray, max_corners, quality_lvl, + min_distance, mask, block_sz, use_harris_detector, k) - # Assert - max_num_frames = 10 - proc_num_frames = 0 - while cap.isOpened(): - has_expected, frame = cap.read() - has_actual, actual = ccomp.pull() + c = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out)) - self.assertEqual(has_expected, has_actual) + ccomp = c.compileStreaming() + source = cv.gapi.wip.make_capture_src(path) + ccomp.setSource(source) + ccomp.start() - if not has_actual: - break + # Assert + max_num_frames = 10 + proc_num_frames = 0 + while cap.isOpened(): + has_expected, frame = cap.read() + has_actual, actual = ccomp.pull() + + self.assertEqual(has_expected, has_actual) + + if not has_actual: + break + + # OpenCV + frame = cv.cvtColor(frame, cv.COLOR_RGB2GRAY) + expected = cv.goodFeaturesToTrack(frame, max_corners, quality_lvl, + min_distance, mask=mask, + blockSize=block_sz, useHarrisDetector=use_harris_detector, k=k) + for e, a in zip(expected, actual): + # NB: OpenCV & G-API have different output shapes: + # OpenCV - (num_points, 1, 2) + # G-API - (num_points, 2) + self.assertEqual(0.0, cv.norm(e.flatten(), + np.array(a, np.float32).flatten(), + cv.NORM_INF)) + + proc_num_frames += 1 + if proc_num_frames == max_num_frames: + break + + +except unittest.SkipTest as e: + + message = str(e) + + class TestSkip(unittest.TestCase): + def setUp(self): + self.skipTest('Skip tests: ' + message) + + def test_skip(): + pass + + pass - # OpenCV - frame = cv.cvtColor(frame, cv.COLOR_RGB2GRAY) - expected = cv.goodFeaturesToTrack(frame, max_corners, quality_lvl, - min_distance, mask=mask, - blockSize=block_sz, useHarrisDetector=use_harris_detector, k=k) - for e, a in zip(expected, actual): - # NB: OpenCV & G-API have different output shapes: - # OpenCV - (num_points, 1, 2) - # G-API - (num_points, 2) - self.assertEqual(0.0, cv.norm(e.flatten(), - np.array(a, np.float32).flatten(), - cv.NORM_INF)) - - proc_num_frames += 1 - if proc_num_frames == max_num_frames: - break; if __name__ == '__main__': NewOpenCVTests.bootstrap() diff --git a/modules/gapi/misc/python/test/test_gapi_types.py b/modules/gapi/misc/python/test/test_gapi_types.py index 0f3b194a2f97..dde554f5e10a 100644 --- a/modules/gapi/misc/python/test/test_gapi_types.py +++ b/modules/gapi/misc/python/test/test_gapi_types.py @@ -3,29 +3,51 @@ import numpy as np import cv2 as cv import os +import sys +import unittest from tests_common import NewOpenCVTests -class gapi_types_test(NewOpenCVTests): - def test_garray_type(self): - types = [cv.gapi.CV_BOOL , cv.gapi.CV_INT , cv.gapi.CV_DOUBLE , cv.gapi.CV_FLOAT, - cv.gapi.CV_STRING, cv.gapi.CV_POINT , cv.gapi.CV_POINT2F, cv.gapi.CV_SIZE , - cv.gapi.CV_RECT , cv.gapi.CV_SCALAR, cv.gapi.CV_MAT , cv.gapi.CV_GMAT] +try: - for t in types: - g_array = cv.GArrayT(t) - self.assertEqual(t, g_array.type()) + if sys.version_info[:2] < (3, 0): + raise unittest.SkipTest('Python 2.x is not supported') + class gapi_types_test(NewOpenCVTests): - def test_gopaque_type(self): - types = [cv.gapi.CV_BOOL , cv.gapi.CV_INT , cv.gapi.CV_DOUBLE , cv.gapi.CV_FLOAT, - cv.gapi.CV_STRING, cv.gapi.CV_POINT , cv.gapi.CV_POINT2F, cv.gapi.CV_SIZE , - cv.gapi.CV_RECT] + def test_garray_type(self): + types = [cv.gapi.CV_BOOL , cv.gapi.CV_INT , cv.gapi.CV_DOUBLE , cv.gapi.CV_FLOAT, + cv.gapi.CV_STRING, cv.gapi.CV_POINT , cv.gapi.CV_POINT2F, cv.gapi.CV_SIZE , + cv.gapi.CV_RECT , cv.gapi.CV_SCALAR, cv.gapi.CV_MAT , cv.gapi.CV_GMAT] - for t in types: - g_opaque = cv.GOpaqueT(t) - self.assertEqual(t, g_opaque.type()) + for t in types: + g_array = cv.GArrayT(t) + self.assertEqual(t, g_array.type()) + + + def test_gopaque_type(self): + types = [cv.gapi.CV_BOOL , cv.gapi.CV_INT , cv.gapi.CV_DOUBLE , cv.gapi.CV_FLOAT, + cv.gapi.CV_STRING, cv.gapi.CV_POINT , cv.gapi.CV_POINT2F, cv.gapi.CV_SIZE , + cv.gapi.CV_RECT] + + for t in types: + g_opaque = cv.GOpaqueT(t) + self.assertEqual(t, g_opaque.type()) + + +except unittest.SkipTest as e: + + message = str(e) + + class TestSkip(unittest.TestCase): + def setUp(self): + self.skipTest('Skip tests: ' + message) + + def test_skip(): + pass + + pass if __name__ == '__main__': From ef2b400c61ee61211ffaa4233dd44b65974ae2aa Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 19 Jun 2021 09:16:23 +0000 Subject: [PATCH 010/128] highgui: win32ui plugin --- cmake/OpenCVFindLibsGUI.cmake | 9 - cmake/templates/cvconfig.h.in | 3 - modules/highgui/CMakeLists.txt | 16 +- modules/highgui/cmake/detect_win32ui.cmake | 17 + modules/highgui/cmake/init.cmake | 3 +- modules/highgui/src/backend.hpp | 4 + modules/highgui/src/precomp.hpp | 8 +- modules/highgui/src/registry.impl.hpp | 8 + modules/highgui/src/window.cpp | 65 +- modules/highgui/src/window_QT.cpp | 3 +- modules/highgui/src/window_cocoa.mm | 6 +- modules/highgui/src/window_gtk.cpp | 4 +- modules/highgui/src/window_w32.cpp | 2331 ++++++++++++-------- 13 files changed, 1500 insertions(+), 977 deletions(-) create mode 100644 modules/highgui/cmake/detect_win32ui.cmake diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake index 8030e8b0c0fc..c8ec55b58864 100644 --- a/cmake/OpenCVFindLibsGUI.cmake +++ b/cmake/OpenCVFindLibsGUI.cmake @@ -2,15 +2,6 @@ # Detect 3rd-party GUI libraries # ---------------------------------------------------------------------------- -#--- Win32 UI --- -ocv_clear_vars(HAVE_WIN32UI) -if(WITH_WIN32UI) - try_compile(HAVE_WIN32UI - "${OpenCV_BINARY_DIR}" - "${OpenCV_SOURCE_DIR}/cmake/checks/win32uitest.cpp" - CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=user32;gdi32") -endif() - # --- QT4/5 --- ocv_clear_vars(HAVE_QT HAVE_QT5) if(WITH_QT) diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index e79e1ec0a1bc..6439d8b43f06 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -121,9 +121,6 @@ /* TIFF codec */ #cmakedefine HAVE_TIFF -/* Win32 UI */ -#cmakedefine HAVE_WIN32UI - /* Define if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel and VAX). */ #cmakedefine WORDS_BIGENDIAN diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index b4d4b9f50384..5eb9f5ab5e6b 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -131,12 +131,6 @@ elseif(WINRT) message(STATUS " ${name}: Removing 'comctl32.lib, gdi32.lib, ole32.lib, setupapi.lib'") message(STATUS " ${name}: Leaving '${HIGHGUI_LIBRARIES}'") endif() -elseif(HAVE_WIN32UI) - set(OPENCV_HIGHGUI_BUILTIN_BACKEND "WIN32UI") - list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_w32.cpp) - if(OpenCV_ARCH STREQUAL "ARM64") - list(APPEND HIGHGUI_LIBRARIES "comdlg32" "advapi32") - endif() elseif(HAVE_COCOA) set(OPENCV_HIGHGUI_BUILTIN_BACKEND "COCOA") add_definitions(-DHAVE_COCOA) @@ -144,6 +138,16 @@ elseif(HAVE_COCOA) list(APPEND HIGHGUI_LIBRARIES "-framework Cocoa") endif() +if(TARGET ocv.3rdparty.win32ui) + if("win32ui" IN_LIST HIGHGUI_PLUGIN_LIST OR HIGHGUI_PLUGIN_LIST STREQUAL "all") + ocv_create_builtin_highgui_plugin(opencv_highgui_win32 ocv.3rdparty.win32ui "window_w32.cpp") + else() + set(OPENCV_HIGHGUI_BUILTIN_BACKEND "WIN32UI") + list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_w32.cpp) + list(APPEND tgts ocv.3rdparty.win32ui) + endif() +endif() + if(TARGET ocv.3rdparty.gtk3 OR TARGET ocv.3rdparty.gtk2) if(TARGET ocv.3rdparty.gtk3 AND NOT WITH_GTK_2_X) set(__gtk_dependency "ocv.3rdparty.gtk3") diff --git a/modules/highgui/cmake/detect_win32ui.cmake b/modules/highgui/cmake/detect_win32ui.cmake new file mode 100644 index 000000000000..1d2fdc5d4654 --- /dev/null +++ b/modules/highgui/cmake/detect_win32ui.cmake @@ -0,0 +1,17 @@ +#--- Win32 UI --- +ocv_clear_vars(HAVE_WIN32UI) +if(WITH_WIN32UI) + try_compile(HAVE_WIN32UI + "${CMAKE_CURRENT_BINARY_DIR}" + "${OpenCV_SOURCE_DIR}/cmake/checks/win32uitest.cpp" + CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=user32;gdi32") + if(HAVE_WIN32UI) + set(__libs "user32" "gdi32") + if(OpenCV_ARCH STREQUAL "ARM64") + list(APPEND __libs "comdlg32" "advapi32") + endif() + ocv_add_external_target(win32ui "" "${__libs}" "HAVE_WIN32UI") + endif() +endif() + +set(HAVE_WIN32UI "${HAVE_WIN32UI}" PARENT_SCOPE) # informational diff --git a/modules/highgui/cmake/init.cmake b/modules/highgui/cmake/init.cmake index 3b766b3758c0..1626d254daf9 100644 --- a/modules/highgui/cmake/init.cmake +++ b/modules/highgui/cmake/init.cmake @@ -43,8 +43,7 @@ else() endif() add_backend("gtk" WITH_GTK) - -# TODO win32 +add_backend("win32ui" WITH_WIN32UI) # TODO cocoa # TODO qt # TODO opengl diff --git a/modules/highgui/src/backend.hpp b/modules/highgui/src/backend.hpp index 14c88b238761..7c32846ce4a3 100644 --- a/modules/highgui/src/backend.hpp +++ b/modules/highgui/src/backend.hpp @@ -114,6 +114,10 @@ bool setUIBackend(const std::string& backendName); #ifndef BUILD_PLUGIN +#ifdef HAVE_WIN32UI +std::shared_ptr createUIBackendWin32UI(); +#endif + #ifdef HAVE_GTK std::shared_ptr createUIBackendGTK(); #endif diff --git a/modules/highgui/src/precomp.hpp b/modules/highgui/src/precomp.hpp index 6ad5bce8b465..0d26b957ad71 100644 --- a/modules/highgui/src/precomp.hpp +++ b/modules/highgui/src/precomp.hpp @@ -67,7 +67,6 @@ #include #include #include -#include #if defined _WIN32 || defined WINCE #include @@ -127,6 +126,13 @@ void cvSetPropTopmost_COCOA(const char* name, const bool topmost); double cvGetPropVsync_W32(const char* name); void cvSetPropVsync_W32(const char* name, const bool enabled); +void setWindowTitle_W32(const cv::String& name, const cv::String& title); +void setWindowTitle_GTK(const cv::String& name, const cv::String& title); +void setWindowTitle_QT(const cv::String& name, const cv::String& title); +void setWindowTitle_COCOA(const cv::String& name, const cv::String& title); + +int pollKey_W32(); + //for QT #if defined (HAVE_QT) CvRect cvGetWindowRect_QT(const char* name); diff --git a/modules/highgui/src/registry.impl.hpp b/modules/highgui/src/registry.impl.hpp index ccf81f928002..66693f1b07e0 100644 --- a/modules/highgui/src/registry.impl.hpp +++ b/modules/highgui/src/registry.impl.hpp @@ -50,6 +50,14 @@ std::vector& getBuiltinBackendsInfo() #elif defined(ENABLE_PLUGINS) DECLARE_DYNAMIC_BACKEND("QT") #endif +#endif + +#ifdef _WIN32 +#ifdef HAVE_WIN32UI + DECLARE_STATIC_BACKEND("WIN32", createUIBackendWin32UI) +#elif defined(ENABLE_PLUGINS) + DECLARE_DYNAMIC_BACKEND("WIN32") +#endif #endif }; return g_backends; diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index 56c1456a5d95..d1ccd1dbc3a9 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -586,6 +586,46 @@ void cv::moveWindow( const String& winname, int x, int y ) #endif } +void cv::setWindowTitle(const String& winname, const String& title) +{ + CV_TRACE_FUNCTION(); + + { + cv::AutoLock lock(cv::getWindowMutex()); + auto window = findWindow_(winname); + if (window) + { + return window->setTitle(title); + } + } + +#if defined(OPENCV_HIGHGUI_WITHOUT_BUILTIN_BACKEND) && defined(ENABLE_PLUGINS) + auto backend = getCurrentUIBackend(); + if (backend) + { + CV_LOG_WARNING(NULL, "Can't find window with name: '" << winname << "'. Do nothing"); + CV_NOT_FOUND_DEPRECATION; + } + else + { + CV_LOG_WARNING(NULL, "No UI backends available. Use OPENCV_LOG_LEVEL=DEBUG for investigation"); + } + return; +#elif defined(HAVE_WIN32UI) + return setWindowTitle_W32(winname, title); +#elif defined (HAVE_GTK) + return setWindowTitle_GTK(winname, title); +#elif defined (HAVE_QT) + return setWindowTitle_QT(winname, title); +#elif defined (HAVE_COCOA) + return setWindowTitle_COCOA(winname, title); +#else + CV_Error(Error::StsNotImplemented, "The function is not implemented. " + "Rebuild the library with Windows, GTK+ 2.x or Cocoa support. " + "If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script"); +#endif +} + void cv::setWindowProperty(const String& winname, int prop_id, double prop_value) { CV_TRACE_FUNCTION(); @@ -630,9 +670,9 @@ int cv::waitKey(int delay) return (code != -1) ? (code & 0xff) : -1; } -#if defined(HAVE_QT) || (defined (WINRT) && !defined (WINRT_8_0)) || \ - !defined(HAVE_WIN32UI) && (defined(HAVE_GTK) || defined(HAVE_COCOA)) -// pollKey() fallback implementation +/* + * process until queue is empty but don't wait. + */ int cv::pollKey() { CV_TRACE_FUNCTION(); @@ -646,12 +686,13 @@ int cv::pollKey() } } +#if defined(HAVE_WIN32UI) + return pollKey_W32(); +#else // fallback. please implement a proper polling function return cvWaitKey(1); -} -#elif defined(HAVE_WIN32UI) -// pollKey() implemented in window_w32.cpp #endif +} int cv::createTrackbar(const String& trackbarName, const String& winName, int* value, int count, TrackbarCallback callback, @@ -1203,13 +1244,6 @@ int cv::createButton(const String&, ButtonCallback, void*, int , bool ) // version with a more capable one without a need to recompile dependent // applications or libraries. -void cv::setWindowTitle(const String&, const String&) -{ - CV_Error(Error::StsNotImplemented, "The function is not implemented. " - "Rebuild the library with Windows, GTK+ 2.x or Cocoa support. " - "If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script"); -} - #define CV_NO_GUI_ERROR(funcname) \ cv::error(cv::Error::StsError, \ "The function is not implemented. " \ @@ -1360,11 +1394,6 @@ CV_IMPL int cvCreateButton(const char*, void (*)(int, void*), void*, int, int) CV_NO_GUI_ERROR("cvCreateButton"); } -int cv::pollKey() -{ - CV_NO_GUI_ERROR("cv::pollKey()"); -} - #endif /* End of file. */ diff --git a/modules/highgui/src/window_QT.cpp b/modules/highgui/src/window_QT.cpp index 60d7d69a5979..9899dfdcf0f1 100644 --- a/modules/highgui/src/window_QT.cpp +++ b/modules/highgui/src/window_QT.cpp @@ -63,6 +63,7 @@ #endif #endif +using namespace cv; //Static and global first static GuiReceiver *guiMainThread = NULL; @@ -197,7 +198,7 @@ void cvSetPropWindow_QT(const char* name,double prop_value) Q_ARG(double, prop_value)); } -void cv::setWindowTitle(const String& winname, const String& title) +void setWindowTitle_QT(const String& winname, const String& title) { if (!guiMainThread) CV_Error(Error::StsNullPtr, "NULL guiReceiver (please create a window)"); diff --git a/modules/highgui/src/window_cocoa.mm b/modules/highgui/src/window_cocoa.mm index 29a0278c982e..e8e903440675 100644 --- a/modules/highgui/src/window_cocoa.mm +++ b/modules/highgui/src/window_cocoa.mm @@ -795,18 +795,18 @@ void cvSetPropTopmost_COCOA( const char* name, const bool topmost ) __END__; } -void cv::setWindowTitle(const String& winname, const String& title) +void setWindowTitle_COCOA(const cv::String& winname, const cv::String& title) { CVWindow *window = cvGetWindow(winname.c_str()); if (window == NULL) { - namedWindow(winname); + cv::namedWindow(winname); window = cvGetWindow(winname.c_str()); } if (window == NULL) - CV_Error(Error::StsNullPtr, "NULL window"); + CV_Error(cv::Error::StsNullPtr, "NULL window"); NSAutoreleasePool* localpool = [[NSAutoreleasePool alloc] init]; diff --git a/modules/highgui/src/window_gtk.cpp b/modules/highgui/src/window_gtk.cpp index efa3fbd96f56..8eaf98fb3612 100644 --- a/modules/highgui/src/window_gtk.cpp +++ b/modules/highgui/src/window_gtk.cpp @@ -364,7 +364,7 @@ static void cvImageWidget_set_size(GtkWidget * widget, int max_width, int max_he } - assert( image_widget->scaled_image ); + CV_Assert(image_widget->scaled_image); } static void @@ -849,7 +849,7 @@ static bool setModeWindow_(const std::shared_ptr& window, int mode) return false; } -void cv::setWindowTitle(const String& winname, const String& title) +void setWindowTitle_GTK(const String& winname, const String& title) { CV_LOCK_MUTEX(); diff --git a/modules/highgui/src/window_w32.cpp b/modules/highgui/src/window_w32.cpp index c4f2ddd2a603..d9a9d732227a 100644 --- a/modules/highgui/src/window_w32.cpp +++ b/modules/highgui/src/window_w32.cpp @@ -41,12 +41,17 @@ #include "precomp.hpp" +#ifdef HAVE_WIN32UI + +#include +#include + +#include "backend.hpp" + using namespace cv; #include // required for GET_X_LPARAM() and GET_Y_LPARAM() macros -#if defined _WIN32 - #ifdef __GNUC__ # pragma GCC diagnostic ignored "-Wmissing-declarations" #endif @@ -60,14 +65,12 @@ using namespace cv; #include #include #include -#include #ifdef HAVE_OPENGL #include #include #include #include -#include "opencv2/highgui.hpp" #include #include "opencv2/core/opengl.hpp" #endif @@ -78,7 +81,7 @@ static const char* trackbar_text = #if defined _M_X64 || defined __x86_64 || defined _M_ARM64 #define icvGetWindowLongPtr GetWindowLongPtr -#define icvSetWindowLongPtr( hwnd, id, ptr ) SetWindowLongPtr( hwnd, id, (LONG_PTR)(ptr) ) +#define icvSetWindowLongPtr(hwnd, id, ptr) SetWindowLongPtr(hwnd, id, (LONG_PTR)(ptr)) #define icvGetClassLongPtr GetClassLongPtr #define CV_USERDATA GWLP_USERDATA @@ -89,7 +92,7 @@ static const char* trackbar_text = #else #define icvGetWindowLongPtr GetWindowLong -#define icvSetWindowLongPtr( hwnd, id, ptr ) SetWindowLong( hwnd, id, (size_t)ptr ) +#define icvSetWindowLongPtr(hwnd, id, ptr) SetWindowLong(hwnd, id, (size_t)ptr) #define icvGetClassLongPtr GetClassLong #define CV_USERDATA GWL_USERDATA @@ -116,13 +119,13 @@ static inline void mingw_strcat_s(char *dest, size_t destsz, const char *src){ #define strcat_s mingw_strcat_s #endif -static void FillBitmapInfo( BITMAPINFO* bmi, int width, int height, int bpp, int origin ) +static void FillBitmapInfo(BITMAPINFO* bmi, int width, int height, int bpp, int origin) { - assert( bmi && width >= 0 && height >= 0 && (bpp == 8 || bpp == 24 || bpp == 32)); + CV_Assert(bmi && width >= 0 && height >= 0 && (bpp == 8 || bpp == 24 || bpp == 32)); BITMAPINFOHEADER* bmih = &(bmi->bmiHeader); - memset( bmih, 0, sizeof(*bmih)); + memset(bmih, 0, sizeof(*bmih)); bmih->biSize = sizeof(BITMAPINFOHEADER); bmih->biWidth = width; bmih->biHeight = origin ? abs(height) : -abs(height); @@ -130,11 +133,11 @@ static void FillBitmapInfo( BITMAPINFO* bmi, int width, int height, int bpp, int bmih->biBitCount = (unsigned short)bpp; bmih->biCompression = BI_RGB; - if( bpp == 8 ) + if (bpp == 8) { RGBQUAD* palette = bmi->bmiColors; int i; - for( i = 0; i < 256; i++ ) + for (i = 0; i < 256; i++) { palette[i].rgbBlue = palette[i].rgbGreen = palette[i].rgbRed = (BYTE)i; palette[i].rgbReserved = 0; @@ -144,68 +147,91 @@ static void FillBitmapInfo( BITMAPINFO* bmi, int width, int height, int bpp, int struct CvWindow; -typedef struct CvTrackbar +struct CvTrackbar : public std::enable_shared_from_this { + CvTrackbar(CvWindow& window, const std::string& name_) + : signature(CV_TRACKBAR_MAGIC_VAL) + , name(name_) + , parent(&window) + { + // nothing + } + ~CvTrackbar() + { + signature = -1; + } + int signature; - HWND hwnd; - char* name; - CvTrackbar* next; - CvWindow* parent; - HWND buddy; - int* data; - int pos; - int maxval; - int minval; - void (*notify)(int); - void (*notify2)(int, void*); - void* userdata; - int id; -} -CvTrackbar; + HWND hwnd = 0; + std::string name; + CvWindow* parent; // TODO weak_ptr + HWND buddy = 0; + int* data = nullptr; + int pos = 0; + int maxval = 0; + int minval = 0; + void (*notify)(int) = nullptr; // deprecated + void (*notify2)(int, void*) = nullptr; // deprecated + TrackbarCallback onChangeCallback = nullptr; + void* userdata = nullptr; + int id = -1; +}; -typedef struct CvWindow +struct CvWindow : public std::enable_shared_from_this { + CvWindow(const std::string& name_) + : signature(CV_WINDOW_MAGIC_VAL) + , name(name_) + { + // nothing + } + + ~CvWindow() + { + signature = -1; + } + + void destroy(); + int signature; - HWND hwnd; - char* name; - CvWindow* prev; - CvWindow* next; - HWND frame; + cv::Mutex mutex; + HWND hwnd = 0; + std::string name; + HWND frame = 0; - HDC dc; - HGDIOBJ image; - int last_key; - int flags; - int status;//0 normal, 1 fullscreen (YV) + HDC dc = 0; + HGDIOBJ image = 0; + int last_key = 0; + int flags = 0; + int status = 0;//0 normal, 1 fullscreen (YV) - CvMouseCallback on_mouse; - void* on_mouse_param; + CvMouseCallback on_mouse = nullptr; + void* on_mouse_param = nullptr; struct { - HWND toolbar; - int pos; - int rows; - WNDPROC toolBarProc; - CvTrackbar* first; + HWND toolbar = 0; + int pos = 0; + int rows = 0; + WNDPROC toolBarProc = nullptr; + std::vector< std::shared_ptr > trackbars; } toolbar; - int width; - int height; + int width = -1; + int height = -1; // OpenGL support #ifdef HAVE_OPENGL - bool useGl; - HGLRC hGLRC; + bool useGl = false; + HGLRC hGLRC = 0; - CvOpenGlDrawCallback glDrawCallback; - void* glDrawData; + CvOpenGlDrawCallback glDrawCallback = nullptr; + void* glDrawData = nullptr; #endif -} -CvWindow; +}; #define HG_BUDDY_WIDTH 130 @@ -221,19 +247,50 @@ CvWindow; #define TBM_GETTOOLTIPS (WM_USER + 30) #endif -static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam); -static LRESULT CALLBACK WindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam); -static LRESULT CALLBACK MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam); -static void icvUpdateWindowPos( CvWindow* window ); +static +std::vector< std::shared_ptr >& getWindowsList() +{ + static std::vector< std::shared_ptr > g_windows; + return g_windows; +} -static CvWindow* hg_windows = 0; + +// Mutex must be locked +static +std::shared_ptr icvFindWindowByName(const std::string& name) +{ + auto& g_windows = getWindowsList(); + for (auto it = g_windows.begin(); it != g_windows.end(); ++it) + { + auto window = *it; + if (!window) + continue; + if (window->name == name) + return window; + } + return std::shared_ptr(); +} + +static inline +std::shared_ptr icvFindWindowByName(const char* name) +{ + CV_Assert(name); + return icvFindWindowByName(std::string(name)); +} + + + +static LRESULT CALLBACK HighGUIProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam); +static LRESULT CALLBACK WindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam); +static LRESULT CALLBACK MainWindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam); +static void icvUpdateWindowPos(CvWindow& window); typedef int (CV_CDECL * CvWin32WindowCallback)(HWND, UINT, WPARAM, LPARAM, int*); static CvWin32WindowCallback hg_on_preprocess = 0, hg_on_postprocess = 0; static HINSTANCE hg_hinstance = 0; -static const char* highGUIclassName = "HighGUI class"; -static const char* mainHighGUIclassName = "Main HighGUI class"; +static const char* const highGUIclassName = "HighGUI class"; +static const char* const mainHighGUIclassName = "Main HighGUI class"; static void icvCleanupHighgui() { @@ -242,15 +299,15 @@ static void icvCleanupHighgui() UnregisterClass(mainHighGUIclassName, hg_hinstance); } -CV_IMPL int cvInitSystem( int, char** ) +CV_IMPL int cvInitSystem(int, char**) { static int wasInitialized = 0; // check initialization status - if( !wasInitialized ) + if (!wasInitialized) { - // Initialize the storage - hg_windows = 0; + (void)getWindowMutex(); // force mutex initialization + (void)getWindowsList(); // Initialize the storage // Register the class WNDCLASS wndc; @@ -262,7 +319,7 @@ CV_IMPL int cvInitSystem( int, char** ) wndc.lpszClassName = highGUIclassName; wndc.lpszMenuName = highGUIclassName; wndc.hIcon = LoadIcon(0, IDI_APPLICATION); - wndc.hCursor = (HCURSOR)LoadCursor(0, (LPSTR)(size_t)IDC_CROSS ); + wndc.hCursor = (HCURSOR)LoadCursor(0, (LPSTR)(size_t)IDC_CROSS); wndc.hbrBackground = (HBRUSH)GetStockObject(DKGRAY_BRUSH); RegisterClass(&wndc); @@ -273,12 +330,12 @@ CV_IMPL int cvInitSystem( int, char** ) wndc.lpfnWndProc = MainWindowProc; RegisterClass(&wndc); - atexit( icvCleanupHighgui ); + atexit(icvCleanupHighgui); wasInitialized = 1; } - setlocale(LC_NUMERIC,"C"); + setlocale(LC_NUMERIC,"C"); // FIXIT must be removed return 0; } @@ -287,50 +344,58 @@ CV_IMPL int cvStartWindowThread(){ return 0; } -static CvWindow* icvFindWindowByName( const char* name ) -{ - CvWindow* window = hg_windows; - - for( ; window != 0 && strcmp( name, window->name) != 0; window = window->next ) - ; - return window; -} - - -static CvWindow* icvWindowByHWND( HWND hwnd ) +static std::shared_ptr icvWindowByHWND(HWND hwnd) { - CvWindow* window = (CvWindow*)icvGetWindowLongPtr( hwnd, CV_USERDATA ); - return window != 0 && hg_windows != 0 && + AutoLock lock(getWindowMutex()); + CvWindow* window = (CvWindow*)icvGetWindowLongPtr(hwnd, CV_USERDATA); + window = window != 0 && window->signature == CV_WINDOW_MAGIC_VAL ? window : 0; + if (window) + { + return window->shared_from_this(); + } + else + { + return std::shared_ptr(); + } } -static CvTrackbar* icvTrackbarByHWND( HWND hwnd ) +static std::shared_ptr icvTrackbarByHWND(HWND hwnd) { - CvTrackbar* trackbar = (CvTrackbar*)icvGetWindowLongPtr( hwnd, CV_USERDATA ); - return trackbar != 0 && trackbar->signature == CV_TRACKBAR_MAGIC_VAL && + AutoLock lock(getWindowMutex()); + CvTrackbar* trackbar = (CvTrackbar*)icvGetWindowLongPtr(hwnd, CV_USERDATA); + trackbar = trackbar != 0 && trackbar->signature == CV_TRACKBAR_MAGIC_VAL && trackbar->hwnd == hwnd ? trackbar : 0; + if (trackbar) + { + return trackbar->shared_from_this(); + } + else + { + return std::shared_ptr(); + } } -static const char* icvWindowPosRootKey = "Software\\OpenCV\\HighGUI\\Windows\\"; +static const char* const icvWindowPosRootKey = "Software\\OpenCV\\HighGUI\\Windows\\"; // Window positions saving/loading added by Philip Gruebele. //pgruebele@cox.net // Restores the window position from the registry saved position. static void -icvLoadWindowPos( const char* name, CvRect& rect ) +icvLoadWindowPos(const char* name, CvRect& rect) { HKEY hkey; char szKey[1024]; - strcpy_s( szKey, 1024, icvWindowPosRootKey ); - strcat_s( szKey, 1024, name ); + strcpy_s(szKey, 1024, icvWindowPosRootKey); + strcat_s(szKey, 1024, name); rect.x = rect.y = CW_USEDEFAULT; rect.width = rect.height = 320; - if( RegOpenKeyEx(HKEY_CURRENT_USER,szKey,0,KEY_QUERY_VALUE,&hkey) == ERROR_SUCCESS ) + if (RegOpenKeyEx(HKEY_CURRENT_USER,szKey,0,KEY_QUERY_VALUE,&hkey) == ERROR_SUCCESS) { // Yes we are installed. DWORD dwType = 0; @@ -379,16 +444,16 @@ icvLoadWindowPos( const char* name, CvRect& rect ) //pgruebele@cox.net // philipg. Saves the window position in the registry static void -icvSaveWindowPos( const char* name, CvRect rect ) +icvSaveWindowPos(const char* name, CvRect rect) { static const DWORD MAX_RECORD_COUNT = 100; HKEY hkey; char szKey[1024]; char rootKey[1024]; - strcpy_s( szKey, 1024, icvWindowPosRootKey ); - strcat_s( szKey, 1024, name ); + strcpy_s(szKey, 1024, icvWindowPosRootKey); + strcat_s(szKey, 1024, name); - if( RegOpenKeyEx( HKEY_CURRENT_USER,szKey,0,KEY_READ,&hkey) != ERROR_SUCCESS ) + if (RegOpenKeyEx(HKEY_CURRENT_USER,szKey,0,KEY_READ,&hkey) != ERROR_SUCCESS) { HKEY hroot; DWORD count = 0; @@ -396,40 +461,40 @@ icvSaveWindowPos( const char* name, CvRect rect ) char oldestKey[1024]; char currentKey[1024]; - strcpy_s( rootKey, 1024, icvWindowPosRootKey ); + strcpy_s(rootKey, 1024, icvWindowPosRootKey); rootKey[strlen(rootKey)-1] = '\0'; - if( RegCreateKeyEx(HKEY_CURRENT_USER, rootKey, 0, NULL, REG_OPTION_NON_VOLATILE, KEY_READ+KEY_WRITE, 0, &hroot, NULL) != ERROR_SUCCESS ) - //RegOpenKeyEx( HKEY_CURRENT_USER,rootKey,0,KEY_READ,&hroot) != ERROR_SUCCESS ) + if (RegCreateKeyEx(HKEY_CURRENT_USER, rootKey, 0, NULL, REG_OPTION_NON_VOLATILE, KEY_READ+KEY_WRITE, 0, &hroot, NULL) != ERROR_SUCCESS) + //RegOpenKeyEx(HKEY_CURRENT_USER,rootKey,0,KEY_READ,&hroot) != ERROR_SUCCESS) return; for(;;) { DWORD csize = sizeof(currentKey); FILETIME accesstime = { 0, 0 }; - LONG code = RegEnumKeyEx( hroot, count, currentKey, &csize, NULL, NULL, NULL, &accesstime ); - if( code != ERROR_SUCCESS && code != ERROR_MORE_DATA ) + LONG code = RegEnumKeyEx(hroot, count, currentKey, &csize, NULL, NULL, NULL, &accesstime); + if (code != ERROR_SUCCESS && code != ERROR_MORE_DATA) break; count++; - if( oldestTime.dwHighDateTime > accesstime.dwHighDateTime || + if (oldestTime.dwHighDateTime > accesstime.dwHighDateTime || (oldestTime.dwHighDateTime == accesstime.dwHighDateTime && - oldestTime.dwLowDateTime > accesstime.dwLowDateTime) ) + oldestTime.dwLowDateTime > accesstime.dwLowDateTime)) { oldestTime = accesstime; - strcpy_s( oldestKey, 1024, currentKey ); + strcpy_s(oldestKey, 1024, currentKey); } } - if( count >= MAX_RECORD_COUNT ) - RegDeleteKey( hroot, oldestKey ); - RegCloseKey( hroot ); + if (count >= MAX_RECORD_COUNT) + RegDeleteKey(hroot, oldestKey); + RegCloseKey(hroot); - if( RegCreateKeyEx(HKEY_CURRENT_USER,szKey,0,NULL,REG_OPTION_NON_VOLATILE, KEY_WRITE, 0, &hkey, NULL) != ERROR_SUCCESS ) + if (RegCreateKeyEx(HKEY_CURRENT_USER,szKey,0,NULL,REG_OPTION_NON_VOLATILE, KEY_WRITE, 0, &hkey, NULL) != ERROR_SUCCESS) return; } else { - RegCloseKey( hkey ); - if( RegOpenKeyEx( HKEY_CURRENT_USER,szKey,0,KEY_WRITE,&hkey) != ERROR_SUCCESS ) + RegCloseKey(hkey); + if (RegOpenKeyEx(HKEY_CURRENT_USER,szKey,0,KEY_WRITE,&hkey) != ERROR_SUCCESS) return; } @@ -440,96 +505,101 @@ icvSaveWindowPos( const char* name, CvRect rect ) RegCloseKey(hkey); } +static Rect getImageRect_(CvWindow& window); + CvRect cvGetWindowRect_W32(const char* name) { - RECT rect = { 0 }; - CvRect result = cvRect(-1, -1, -1, -1); - - CV_FUNCNAME( "cvGetWindowRect_W32" ); + CV_FUNCNAME("cvGetWindowRect_W32"); - __BEGIN__; - - CvWindow* window; + AutoLock lock(getWindowMutex()); if (!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); - window = icvFindWindowByName( name ); + CV_Error(Error::StsNullPtr, "NULL name string"); + + auto window = icvFindWindowByName(name); if (!window) - EXIT; // keep silence here + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); - GetClientRect(window->hwnd, &rect); - { + Rect r = getImageRect_(*window); + + CvRect result = cvRect(r.x, r.y, r.width, r.height); + return result; +} + +static Rect getImageRect_(CvWindow& window) +{ + RECT rect = { 0 }; + GetClientRect(window.hwnd, &rect); POINT pt = {rect.left, rect.top}; - ClientToScreen(window->hwnd, &pt); - result = cvRect(pt.x, pt.y, rect.right - rect.left, rect.bottom - rect.top); - } - __END__; + ClientToScreen(window.hwnd, &pt); + Rect result(pt.x, pt.y, rect.right - rect.left, rect.bottom - rect.top); return result; } double cvGetModeWindow_W32(const char* name)//YV { - double result = -1; - - CV_FUNCNAME( "cvGetModeWindow_W32" ); - - __BEGIN__; + CV_FUNCNAME("cvGetModeWindow_W32"); - CvWindow* window; + AutoLock lock(getWindowMutex()); if (!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); + CV_Error(Error::StsNullPtr, "NULL name string"); - window = icvFindWindowByName( name ); + auto window = icvFindWindowByName(name); if (!window) - EXIT; // keep silence here + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); - result = window->status; - - __END__; - return result; + return window->status; } -void cvSetModeWindow_W32( const char* name, double prop_value)//Yannick Verdie +static bool setModeWindow_(CvWindow& window, int mode); + +void cvSetModeWindow_W32(const char* name, double prop_value)//Yannick Verdie { - CV_FUNCNAME( "cvSetModeWindow_W32" ); + CV_FUNCNAME("cvSetModeWindow_W32"); - __BEGIN__; + AutoLock lock(getWindowMutex()); - CvWindow* window; + if (!name) + CV_Error(Error::StsNullPtr, "NULL name string"); - if(!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); + auto window = icvFindWindowByName(name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); - window = icvFindWindowByName( name ); - if( !window ) - CV_ERROR( CV_StsNullPtr, "NULL window" ); + (void)setModeWindow_(*window, (int)prop_value); +} - if(window->flags & CV_WINDOW_AUTOSIZE)//if the flag CV_WINDOW_AUTOSIZE is set - EXIT; +static bool setModeWindow_(CvWindow& window, int mode) +{ + if (window.flags & CV_WINDOW_AUTOSIZE)//if the flag CV_WINDOW_AUTOSIZE is set + return false; + + if (window.status == mode) + return true; { - DWORD dwStyle = (DWORD)GetWindowLongPtr(window->frame, GWL_STYLE); + DWORD dwStyle = (DWORD)GetWindowLongPtr(window.frame, GWL_STYLE); CvRect position; - if (window->status==CV_WINDOW_FULLSCREEN && prop_value==CV_WINDOW_NORMAL) + if (window.status == CV_WINDOW_FULLSCREEN && mode == CV_WINDOW_NORMAL) { - icvLoadWindowPos(window->name,position ); - SetWindowLongPtr(window->frame, GWL_STYLE, dwStyle | WS_CAPTION | WS_THICKFRAME); + icvLoadWindowPos(window.name.c_str(), position); + SetWindowLongPtr(window.frame, GWL_STYLE, dwStyle | WS_CAPTION | WS_THICKFRAME); - SetWindowPos(window->frame, HWND_TOP, position.x, position.y , position.width,position.height, SWP_NOZORDER | SWP_FRAMECHANGED); - window->status=CV_WINDOW_NORMAL; + SetWindowPos(window.frame, HWND_TOP, position.x, position.y , position.width,position.height, SWP_NOZORDER | SWP_FRAMECHANGED); + window.status=CV_WINDOW_NORMAL; - EXIT; + return true; } - if (window->status==CV_WINDOW_NORMAL && prop_value==CV_WINDOW_FULLSCREEN) + if (window.status == CV_WINDOW_NORMAL && mode == CV_WINDOW_FULLSCREEN) { //save dimension RECT rect = { 0 }; - GetWindowRect(window->frame, &rect); - CvRect RectCV = cvRect(rect.left, rect.top,rect.right - rect.left, rect.bottom - rect.top); - icvSaveWindowPos(window->name,RectCV ); + GetWindowRect(window.frame, &rect); + CvRect rectCV = cvRect(rect.left, rect.top,rect.right - rect.left, rect.bottom - rect.top); + icvSaveWindowPos(window.name.c_str(), rectCV); //Look at coordinate for fullscreen HMONITOR hMonitor; @@ -542,60 +612,75 @@ void cvSetModeWindow_W32( const char* name, double prop_value)//Yannick Verdie //fullscreen position.x=mi.rcMonitor.left;position.y=mi.rcMonitor.top; position.width=mi.rcMonitor.right - mi.rcMonitor.left;position.height=mi.rcMonitor.bottom - mi.rcMonitor.top; - SetWindowLongPtr(window->frame, GWL_STYLE, dwStyle & ~WS_CAPTION & ~WS_THICKFRAME); + SetWindowLongPtr(window.frame, GWL_STYLE, dwStyle & ~WS_CAPTION & ~WS_THICKFRAME); - SetWindowPos(window->frame, HWND_TOP, position.x, position.y , position.width,position.height, SWP_NOZORDER | SWP_FRAMECHANGED); - window->status=CV_WINDOW_FULLSCREEN; + SetWindowPos(window.frame, HWND_TOP, position.x, position.y , position.width,position.height, SWP_NOZORDER | SWP_FRAMECHANGED); + window.status=CV_WINDOW_FULLSCREEN; - EXIT; + return true; } } - __END__; + return false; } +static double getPropTopmost_(CvWindow& window); + double cvGetPropTopmost_W32(const char* name) { - double result = -1; - CV_Assert(name); - CvWindow* window = icvFindWindowByName(name); + auto window = icvFindWindowByName(name); if (!window) CV_Error(Error::StsNullPtr, "NULL window"); - LONG style = GetWindowLongA(window->frame, GWL_EXSTYLE); // -20 + return getPropTopmost_(*window); +} + +static double getPropTopmost_(CvWindow& window) +{ + LONG style = GetWindowLongA(window.frame, GWL_EXSTYLE); // -20 if (!style) { std::ostringstream errorMsg; - errorMsg << "window(" << name << "): failed to retrieve extended window style using GetWindowLongA(); error code: " << GetLastError(); - CV_Error(Error::StsError, errorMsg.str().c_str()); + errorMsg << "window(" << window.name << "): failed to retrieve extended window style using GetWindowLongA(); error code: " << GetLastError(); + CV_Error(Error::StsError, errorMsg.str()); } - result = (style & WS_EX_TOPMOST) == WS_EX_TOPMOST; - - return result; + bool result = (style & WS_EX_TOPMOST) == WS_EX_TOPMOST; + return result ? 1.0 : 0.0; } +static bool setPropTopmost_(CvWindow& window, bool topmost); + void cvSetPropTopmost_W32(const char* name, const bool topmost) { CV_Assert(name); - CvWindow* window = icvFindWindowByName(name); + auto window = icvFindWindowByName(name); if (!window) CV_Error(Error::StsNullPtr, "NULL window"); + (void)setPropTopmost_(*window, topmost); +} + +static bool setPropTopmost_(CvWindow& window, bool topmost) +{ HWND flag = topmost ? HWND_TOPMOST : HWND_TOP; - BOOL success = SetWindowPos(window->frame, flag, 0, 0, 0, 0, SWP_NOMOVE | SWP_NOSIZE); + BOOL success = SetWindowPos(window.frame, flag, 0, 0, 0, 0, SWP_NOMOVE | SWP_NOSIZE); if (!success) { std::ostringstream errorMsg; - errorMsg << "window(" << name << "): error reported by SetWindowPos(" << (topmost ? "HWND_TOPMOST" : "HWND_TOP") << "), error code: " << GetLastError(); - CV_Error(Error::StsError, errorMsg.str().c_str()); + errorMsg << "window(" << window.name << "): error reported by SetWindowPos(" << (topmost ? "HWND_TOPMOST" : "HWND_TOP") << "), error code: " << GetLastError(); + CV_Error(Error::StsError, errorMsg.str()); + return false; } + return true; } +static double getPropVsync_(CvWindow& window); + double cvGetPropVsync_W32(const char* name) { #ifndef HAVE_OPENGL @@ -605,40 +690,53 @@ double cvGetPropVsync_W32(const char* name) if (!name) CV_Error(Error::StsNullPtr, "'name' argument must not be NULL"); - CvWindow* window = icvFindWindowByName(name); + auto window = icvFindWindowByName(name); if (!window) CV_Error_(Error::StsBadArg, ("there is no window named '%s'", name)); + double result = getPropVsync_(*window); + return cvIsNaN(result) ? -1.0 : result; +#endif +} + +static double getPropVsync_(CvWindow& window) +{ +#ifndef HAVE_OPENGL + CV_UNUSED(window); + CV_Error(Error::OpenGlNotSupported, "Library was built without OpenGL support"); +#else // https://www.khronos.org/opengl/wiki/Swap_Interval // https://www.khronos.org/registry/OpenGL/extensions/EXT/WGL_EXT_extensions_string.txt // https://www.khronos.org/registry/OpenGL/extensions/EXT/WGL_EXT_swap_control.txt - if (!wglMakeCurrent(window->dc, window->hGLRC)) + if (!wglMakeCurrent(window.dc, window.hGLRC)) CV_Error(Error::OpenGlApiCallError, "Can't Activate The GL Rendering Context"); typedef const char* (APIENTRY* PFNWGLGETEXTENSIONSSTRINGEXTPROC)(void); PFNWGLGETEXTENSIONSSTRINGEXTPROC wglGetExtensionsString = NULL; wglGetExtensionsString = (PFNWGLGETEXTENSIONSSTRINGEXTPROC)wglGetProcAddress("wglGetExtensionsStringEXT"); if (wglGetExtensionsString == NULL) - return -1; // wglGetProcAddress failed to get wglGetExtensionsStringEXT + return std::numeric_limits::quiet_NaN(); // wglGetProcAddress failed to get wglGetExtensionsStringEXT const char* wgl_extensions = wglGetExtensionsString(); if (wgl_extensions == NULL) - return -1; // Can't get WGL extensions string + return std::numeric_limits::quiet_NaN(); // Can't get WGL extensions string if (strstr(wgl_extensions, "WGL_EXT_swap_control") == NULL) - return -1; // WGL extensions don't contain WGL_EXT_swap_control + return std::numeric_limits::quiet_NaN(); // WGL extensions don't contain WGL_EXT_swap_control typedef int (APIENTRY* PFNWGLGETSWAPINTERVALPROC)(void); PFNWGLGETSWAPINTERVALPROC wglGetSwapInterval = 0; wglGetSwapInterval = (PFNWGLGETSWAPINTERVALPROC)wglGetProcAddress("wglGetSwapIntervalEXT"); if (wglGetSwapInterval == NULL) - return -1; // wglGetProcAddress failed to get wglGetSwapIntervalEXT + return std::numeric_limits::quiet_NaN(); // wglGetProcAddress failed to get wglGetSwapIntervalEXT return wglGetSwapInterval(); #endif } +static bool setPropVsync_(CvWindow& window, bool enable_vsync); + void cvSetPropVsync_W32(const char* name, const bool enable_vsync) { #ifndef HAVE_OPENGL @@ -649,11 +747,22 @@ void cvSetPropVsync_W32(const char* name, const bool enable_vsync) if (!name) CV_Error(Error::StsNullPtr, "'name' argument must not be NULL"); - CvWindow* window = icvFindWindowByName(name); + auto window = icvFindWindowByName(name); if (!window) CV_Error_(Error::StsBadArg, ("there is no window named '%s'", name)); - if (!wglMakeCurrent(window->dc, window->hGLRC)) + (void)setPropVsync_(*window, enable_vsync); +#endif +} + +static bool setPropVsync_(CvWindow& window, bool enable_vsync) +{ +#ifndef HAVE_OPENGL + CV_UNUSED(window); + CV_UNUSED(enable_vsync); + CV_Error(Error::OpenGlNotSupported, "Library was built without OpenGL support"); +#else + if (!wglMakeCurrent(window.dc, window.hGLRC)) CV_Error(Error::OpenGlApiCallError, "Can't Activate The GL Rendering Context"); typedef const char* (APIENTRY* PFNWGLGETEXTENSIONSSTRINGEXTPROC)(void); @@ -676,47 +785,44 @@ void cvSetPropVsync_W32(const char* name, const bool enable_vsync) CV_Error(Error::OpenGlApiCallError, "wglGetProcAddress failed to get wglSwapIntervalEXT"); wglSwapInterval(enable_vsync); + return true; #endif } -void cv::setWindowTitle(const String& winname, const String& title) +void setWindowTitle_W32(const std::string& name, const std::string& title) { - CvWindow* window = icvFindWindowByName(winname.c_str()); + auto window = icvFindWindowByName(name); if (!window) { - namedWindow(winname); - window = icvFindWindowByName(winname.c_str()); + namedWindow(name); + window = icvFindWindowByName(name); } if (!window) CV_Error(Error::StsNullPtr, "NULL window"); if (!SetWindowText(window->frame, title.c_str())) - CV_Error_(Error::StsError, ("Failed to set \"%s\" window title to \"%s\"", winname.c_str(), title.c_str())); + CV_Error_(Error::StsError, ("Failed to set \"%s\" window title to \"%s\"", name.c_str(), title.c_str())); } double cvGetPropWindowAutoSize_W32(const char* name) { double result = -1; - CV_FUNCNAME( "cvSetCloseCallback" ); + CV_FUNCNAME("cvSetCloseCallback"); - __BEGIN__; - - CvWindow* window; + AutoLock lock(getWindowMutex()); if (!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); + CV_Error(Error::StsNullPtr, "NULL name string"); - window = icvFindWindowByName( name ); + auto window = icvFindWindowByName(name); if (!window) - EXIT; // keep silence here + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); result = window->flags & CV_WINDOW_AUTOSIZE; - __END__; - return result; } @@ -724,23 +830,19 @@ double cvGetRatioWindow_W32(const char* name) { double result = -1; - CV_FUNCNAME( "cvGetRatioWindow_W32" ); + CV_FUNCNAME("cvGetRatioWindow_W32"); - __BEGIN__; - - CvWindow* window; + AutoLock lock(getWindowMutex()); if (!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); + CV_Error(Error::StsNullPtr, "NULL name string"); - window = icvFindWindowByName( name ); + auto window = icvFindWindowByName(name); if (!window) - EXIT; // keep silence here + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); result = static_cast(window->width) / window->height; - __END__; - return result; } @@ -749,23 +851,20 @@ double cvGetOpenGlProp_W32(const char* name) double result = -1; #ifdef HAVE_OPENGL - CV_FUNCNAME( "cvGetOpenGlProp_W32" ); + CV_FUNCNAME("cvGetOpenGlProp_W32"); - __BEGIN__; - - CvWindow* window; + AutoLock lock(getWindowMutex()); if (!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); + CV_Error(Error::StsNullPtr, "NULL name string"); - window = icvFindWindowByName( name ); + auto window = icvFindWindowByName(name); if (!window) - EXIT; // keep silence here + return -1; result = window->useGl; - - __END__; #endif + CV_UNUSED(name); return result; @@ -775,16 +874,15 @@ double cvGetPropVisible_W32(const char* name) { double result = -1; - CV_FUNCNAME( "cvGetPropVisible_W32" ); + CV_FUNCNAME("cvGetPropVisible_W32"); - __BEGIN__; + AutoLock lock(getWindowMutex()); if (!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); - - result = (icvFindWindowByName( name ) != NULL); + CV_Error(Error::StsNullPtr, "NULL name string"); - __END__; + auto window = icvFindWindowByName(name); + result = (bool)window ? 1.0 : 0.0; return result; } @@ -798,9 +896,9 @@ namespace { void createGlContext(HWND hWnd, HDC& hGLDC, HGLRC& hGLRC, bool& useGl) { - CV_FUNCNAME( "createGlContext" ); + CV_FUNCNAME("createGlContext"); - __BEGIN__; + AutoLock lock(getWindowMutex()); useGl = false; @@ -830,120 +928,119 @@ namespace hGLDC = GetDC(hWnd); if (!hGLDC) - CV_ERROR( CV_OpenGlApiCallError, "Can't Create A GL Device Context" ); + CV_Error(Error::OpenGlApiCallError, "Can't Create A GL Device Context"); PixelFormat = ChoosePixelFormat(hGLDC, &pfd); if (!PixelFormat) - CV_ERROR( CV_OpenGlApiCallError, "Can't Find A Suitable PixelFormat" ); + CV_Error(Error::OpenGlApiCallError, "Can't Find A Suitable PixelFormat"); if (!SetPixelFormat(hGLDC, PixelFormat, &pfd)) - CV_ERROR( CV_OpenGlApiCallError, "Can't Set The PixelFormat" ); + CV_Error(Error::OpenGlApiCallError, "Can't Set The PixelFormat"); hGLRC = wglCreateContext(hGLDC); if (!hGLRC) - CV_ERROR( CV_OpenGlApiCallError, "Can't Create A GL Rendering Context" ); + CV_Error(Error::OpenGlApiCallError, "Can't Create A GL Rendering Context"); if (!wglMakeCurrent(hGLDC, hGLRC)) - CV_ERROR( CV_OpenGlApiCallError, "Can't Activate The GL Rendering Context" ); + CV_Error(Error::OpenGlApiCallError, "Can't Activate The GL Rendering Context"); useGl = true; - - __END__; } - void releaseGlContext(CvWindow* window) + void releaseGlContext(CvWindow& window) { - //CV_FUNCNAME( "releaseGlContext" ); + //CV_FUNCNAME("releaseGlContext"); - __BEGIN__; + AutoLock lock(getWindowMutex()); - if (window->hGLRC) + if (window.hGLRC) { - wglDeleteContext(window->hGLRC); - window->hGLRC = NULL; + wglDeleteContext(window.hGLRC); + window.hGLRC = NULL; } - if (window->dc) + if (window.dc) { - ReleaseDC(window->hwnd, window->dc); - window->dc = NULL; + ReleaseDC(window.hwnd, window.dc); + window.dc = NULL; } - window->useGl = false; - - __END__; + window.useGl = false; } - void drawGl(CvWindow* window) + void drawGl(CvWindow& window) { - CV_FUNCNAME( "drawGl" ); + CV_FUNCNAME("drawGl"); - __BEGIN__; + AutoLock lock(getWindowMutex()); - if (!wglMakeCurrent(window->dc, window->hGLRC)) - CV_ERROR( CV_OpenGlApiCallError, "Can't Activate The GL Rendering Context" ); + if (!wglMakeCurrent(window.dc, window.hGLRC)) + CV_Error(Error::OpenGlApiCallError, "Can't Activate The GL Rendering Context"); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - if (window->glDrawCallback) - window->glDrawCallback(window->glDrawData); + if (window.glDrawCallback) + window.glDrawCallback(window.glDrawData); - if (!SwapBuffers(window->dc)) - CV_ERROR( CV_OpenGlApiCallError, "Can't swap OpenGL buffers" ); - - __END__; + if (!SwapBuffers(window.dc)) + CV_Error(Error::OpenGlApiCallError, "Can't swap OpenGL buffers"); } - void resizeGl(CvWindow* window) + void resizeGl(CvWindow& window) { - CV_FUNCNAME( "resizeGl" ); - - __BEGIN__; + CV_FUNCNAME("resizeGl"); - if (!wglMakeCurrent(window->dc, window->hGLRC)) - CV_ERROR( CV_OpenGlApiCallError, "Can't Activate The GL Rendering Context" ); + AutoLock lock(getWindowMutex()); - glViewport(0, 0, window->width, window->height); + if (!wglMakeCurrent(window.dc, window.hGLRC)) + CV_Error(Error::OpenGlApiCallError, "Can't Activate The GL Rendering Context"); - __END__; + glViewport(0, 0, window.width, window.height); } } #endif // HAVE_OPENGL +static std::shared_ptr namedWindow_(const std::string& name, int flags); + +CV_IMPL int cvNamedWindow(const char* name, int flags) +{ + CV_FUNCNAME("cvNamedWindow"); + + AutoLock lock(getWindowMutex()); + + if (!name) + CV_Error(Error::StsNullPtr, "NULL name string"); + + // Check the name in the storage + auto window = icvFindWindowByName(name); + if (window) + { + return 1; + } -CV_IMPL int cvNamedWindow( const char* name, int flags ) + window = namedWindow_(name, flags); + return (bool)window; +} + +static std::shared_ptr namedWindow_(const std::string& name, int flags) { - int result = 0; - CV_FUNCNAME( "cvNamedWindow" ); + AutoLock lock(getWindowMutex()); - __BEGIN__; + cvInitSystem(0,0); HWND hWnd, mainhWnd; - CvWindow* window; DWORD defStyle = WS_VISIBLE | WS_MINIMIZEBOX | WS_MAXIMIZEBOX | WS_SYSMENU; - int len; - CvRect rect; #ifdef HAVE_OPENGL bool useGl; HDC hGLDC; HGLRC hGLRC; #endif - cvInitSystem(0,0); - - if( !name ) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); - - // Check the name in the storage - window = icvFindWindowByName( name ); - if (window != 0) - { - result = 1; - EXIT; - } + CvRect rect; + icvLoadWindowPos(name.c_str(), rect); - if( !(flags & CV_WINDOW_AUTOSIZE))//YV add border in order to resize the window + if (!(flags & CV_WINDOW_AUTOSIZE))//YV add border in order to resize the window defStyle |= WS_SIZEBOX; #ifdef HAVE_OPENGL @@ -951,23 +1048,21 @@ CV_IMPL int cvNamedWindow( const char* name, int flags ) defStyle |= WS_CLIPCHILDREN | WS_CLIPSIBLINGS; #endif - icvLoadWindowPos( name, rect ); - - mainhWnd = CreateWindow( "Main HighGUI class", name, defStyle | WS_OVERLAPPED, - rect.x, rect.y, rect.width, rect.height, 0, 0, hg_hinstance, 0 ); - if( !mainhWnd ) - CV_ERROR( CV_StsError, "Frame window can not be created" ); + mainhWnd = CreateWindow(mainHighGUIclassName, name.c_str(), defStyle | WS_OVERLAPPED, + rect.x, rect.y, rect.width, rect.height, 0, 0, hg_hinstance, 0); + if (!mainhWnd) + CV_Error_(Error::StsError, ("Frame window can not be created: '%s'", name.c_str())); ShowWindow(mainhWnd, SW_SHOW); //YV- remove one border by changing the style - hWnd = CreateWindow("HighGUI class", "", (defStyle & ~WS_SIZEBOX) | WS_CHILD, CW_USEDEFAULT, 0, rect.width, rect.height, mainhWnd, 0, hg_hinstance, 0); - if( !hWnd ) - CV_ERROR( CV_StsError, "Frame window can not be created" ); + hWnd = CreateWindow(highGUIclassName, "", (defStyle & ~WS_SIZEBOX) | WS_CHILD, CW_USEDEFAULT, 0, rect.width, rect.height, mainhWnd, 0, hg_hinstance, 0); + if (!hWnd) + CV_Error(Error::StsError, "Frame window can not be created"); #ifndef HAVE_OPENGL if (flags & CV_WINDOW_OPENGL) - CV_ERROR( CV_OpenGlNotSupported, "Library was built without OpenGL support" ); + CV_Error(Error::OpenGlNotSupported, "Library was built without OpenGL support"); #else useGl = false; hGLDC = 0; @@ -979,14 +1074,10 @@ CV_IMPL int cvNamedWindow( const char* name, int flags ) ShowWindow(hWnd, SW_SHOW); - len = (int)strlen(name); - CV_CALL( window = (CvWindow*)cvAlloc(sizeof(CvWindow) + len + 1)); + auto window = std::make_shared(name); - window->signature = CV_WINDOW_MAGIC_VAL; window->hwnd = hWnd; window->frame = mainhWnd; - window->name = (char*)(window + 1); - memcpy( window->name, name, len + 1 ); window->flags = flags; window->image = 0; @@ -1016,200 +1107,175 @@ CV_IMPL int cvNamedWindow( const char* name, int flags ) window->on_mouse = 0; window->on_mouse_param = 0; - memset( &window->toolbar, 0, sizeof(window->toolbar)); + icvSetWindowLongPtr(hWnd, CV_USERDATA, window.get()); + icvSetWindowLongPtr(mainhWnd, CV_USERDATA, window.get()); - window->next = hg_windows; - window->prev = 0; - if( hg_windows ) - hg_windows->prev = window; - hg_windows = window; - icvSetWindowLongPtr( hWnd, CV_USERDATA, window ); - icvSetWindowLongPtr( mainhWnd, CV_USERDATA, window ); + auto& g_windows = getWindowsList(); + g_windows.push_back(window); // Recalculate window pos - icvUpdateWindowPos( window ); + icvUpdateWindowPos(*window); - result = 1; - __END__; - - return result; + return window; } #ifdef HAVE_OPENGL CV_IMPL void cvSetOpenGlContext(const char* name) { - CV_FUNCNAME( "cvSetOpenGlContext" ); - - __BEGIN__; + CV_FUNCNAME("cvSetOpenGlContext"); - CvWindow* window; + AutoLock lock(getWindowMutex()); - if(!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); + if (!name) + CV_Error(Error::StsNullPtr, "NULL name string"); - window = icvFindWindowByName( name ); + auto window = icvFindWindowByName(name); if (!window) - CV_ERROR( CV_StsNullPtr, "NULL window" ); + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); if (!window->useGl) - CV_ERROR( CV_OpenGlNotSupported, "Window doesn't support OpenGL" ); + CV_Error(Error::OpenGlNotSupported, "Window doesn't support OpenGL"); if (!wglMakeCurrent(window->dc, window->hGLRC)) - CV_ERROR( CV_OpenGlApiCallError, "Can't Activate The GL Rendering Context" ); - - __END__; + CV_Error(Error::OpenGlApiCallError, "Can't Activate The GL Rendering Context"); } CV_IMPL void cvUpdateWindow(const char* name) { - CV_FUNCNAME( "cvUpdateWindow" ); + CV_FUNCNAME("cvUpdateWindow"); - __BEGIN__; - - CvWindow* window; + AutoLock lock(getWindowMutex()); if (!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); + CV_Error(Error::StsNullPtr, "NULL name string"); - window = icvFindWindowByName( name ); + auto window = icvFindWindowByName(name); if (!window) - EXIT; + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); InvalidateRect(window->hwnd, 0, 0); - - __END__; } CV_IMPL void cvSetOpenGlDrawCallback(const char* name, CvOpenGlDrawCallback callback, void* userdata) { - CV_FUNCNAME( "cvCreateOpenGLCallback" ); + CV_FUNCNAME("cvCreateOpenGLCallback"); - __BEGIN__; + AutoLock lock(getWindowMutex()); - CvWindow* window; - - if(!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); + if (!name) + CV_Error(Error::StsNullPtr, "NULL name string"); - window = icvFindWindowByName( name ); - if( !window ) - EXIT; + auto window = icvFindWindowByName(name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); if (!window->useGl) - CV_ERROR( CV_OpenGlNotSupported, "Window was created without OpenGL context" ); + CV_Error(Error::OpenGlNotSupported, "Window was created without OpenGL context"); window->glDrawCallback = callback; window->glDrawData = userdata; - - __END__; } #endif // HAVE_OPENGL -static void icvRemoveWindow( CvWindow* window ) +static void icvRemoveWindow(const std::shared_ptr& window_) { - CvTrackbar* trackbar = NULL; + CV_Assert(window_); + AutoLock lock(getWindowMutex()); + CvWindow& window = *window_; + RECT wrect={0,0,0,0}; + auto& g_windows = getWindowsList(); + for (auto it = g_windows.begin(); it != g_windows.end(); ++it) + { + const std::shared_ptr& w = *it; + if (w.get() == &window) + { + g_windows.erase(it); + break; + } + } + #ifdef HAVE_OPENGL - if (window->useGl) + if (window.useGl) releaseGlContext(window); #endif - if( window->frame ) - GetWindowRect( window->frame, &wrect ); - if( window->name ) - icvSaveWindowPos( window->name, cvRect(wrect.left, wrect.top, - wrect.right-wrect.left, wrect.bottom-wrect.top) ); - - if( window->hwnd ) - icvSetWindowLongPtr( window->hwnd, CV_USERDATA, 0 ); - if( window->frame ) - icvSetWindowLongPtr( window->frame, CV_USERDATA, 0 ); + if (window.frame) + GetWindowRect(window.frame, &wrect); + icvSaveWindowPos(window.name.c_str(), cvRect(wrect.left, wrect.top, wrect.right-wrect.left, wrect.bottom-wrect.top)); - if( window->toolbar.toolbar ) - icvSetWindowLongPtr(window->toolbar.toolbar, CV_USERDATA, 0); + if (window.hwnd) + icvSetWindowLongPtr(window.hwnd, CV_USERDATA, 0); + if (window.frame) + icvSetWindowLongPtr(window.frame, CV_USERDATA, 0); - if( window->prev ) - window->prev->next = window->next; - else - hg_windows = window->next; - - if( window->next ) - window->next->prev = window->prev; + if (window.toolbar.toolbar) + icvSetWindowLongPtr(window.toolbar.toolbar, CV_USERDATA, 0); - window->prev = window->next = 0; + if (window.dc && window.image) + DeleteObject(SelectObject(window.dc, window.image)); - if( window->dc && window->image ) - DeleteObject(SelectObject(window->dc,window->image)); + if (window.dc) + DeleteDC(window.dc); - if( window->dc ) - DeleteDC(window->dc); - - for( trackbar = window->toolbar.first; trackbar != 0; ) + for (auto it = window.toolbar.trackbars.begin(); it != window.toolbar.trackbars.end(); ++it) { - CvTrackbar* next = trackbar->next; - if( trackbar->hwnd ) + auto trackbar = (*it).get(); + if (trackbar && trackbar->hwnd) { - icvSetWindowLongPtr( trackbar->hwnd, CV_USERDATA, 0 ); - cvFree( &trackbar ); + icvSetWindowLongPtr(trackbar->hwnd, CV_USERDATA, 0); } - trackbar = next; } - - cvFree( &window ); } -CV_IMPL void cvDestroyWindow( const char* name ) +CV_IMPL void cvDestroyWindow(const char* name) { - CV_FUNCNAME( "cvDestroyWindow" ); + CV_FUNCNAME("cvDestroyWindow"); - __BEGIN__; + AutoLock lock(getWindowMutex()); - CvWindow* window; - HWND mainhWnd; + if (!name) + CV_Error(Error::StsNullPtr, "NULL name string"); - if(!name) - CV_ERROR( CV_StsNullPtr, "NULL name string" ); + auto window = icvFindWindowByName(name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); - window = icvFindWindowByName( name ); - if( !window ) - EXIT; + window->destroy(); +} - mainhWnd = window->frame; - SendMessage(window->hwnd, WM_CLOSE, 0, 0); - SendMessage( mainhWnd, WM_CLOSE, 0, 0); +void CvWindow::destroy() +{ + SendMessage(hwnd, WM_CLOSE, 0, 0); + SendMessage(frame, WM_CLOSE, 0, 0); // Do NOT call _remove_window -- CvWindow list will be updated automatically ... - - __END__; } - -static void icvScreenToClient( HWND hwnd, RECT* rect ) +static void icvScreenToClient(HWND hwnd, RECT* rect) { POINT p; p.x = rect->left; p.y = rect->top; ScreenToClient(hwnd, &p); - OffsetRect( rect, p.x - rect->left, p.y - rect->top ); + OffsetRect(rect, p.x - rect->left, p.y - rect->top); } /* Calculatess the window coordinates relative to the upper left corner of the mainhWnd window */ -static RECT icvCalcWindowRect( CvWindow* window ) +static RECT icvCalcWindowRect(CvWindow& window) { RECT crect = { 0 }, trect = { 0 }, rect = { 0 }; - assert(window); - - GetClientRect(window->frame, &crect); - if (window->toolbar.toolbar) + GetClientRect(window.frame, &crect); + if (window.toolbar.toolbar) { - GetWindowRect(window->toolbar.toolbar, &trect); - icvScreenToClient(window->frame, &trect); + GetWindowRect(window.toolbar.toolbar, &trect); + icvScreenToClient(window.frame, &trect); SubtractRect(&rect, &crect, &trect); } else @@ -1217,138 +1283,153 @@ static RECT icvCalcWindowRect( CvWindow* window ) return rect; } +static inline RECT icvCalcWindowRect(CvWindow* window) { CV_Assert(window); return icvCalcWindowRect(*window); } + -// returns TRUE if there is a problem such as ERROR_IO_PENDING. -static bool icvGetBitmapData( CvWindow* window, SIZE* size, int* channels, void** data ) +// returns FALSE if there is a problem such as ERROR_IO_PENDING. +static bool icvGetBitmapData(CvWindow& window, SIZE& size, int& channels, void*& data) { - BITMAP bmp; GdiFlush(); - HGDIOBJ h = GetCurrentObject( window->dc, OBJ_BITMAP ); - if( size ) - size->cx = size->cy = 0; - if( data ) - *data = 0; + + HGDIOBJ h = GetCurrentObject(window.dc, OBJ_BITMAP); + size.cx = size.cy = 0; + data = 0; if (h == NULL) - return true; + return false; + + BITMAP bmp = {}; if (GetObject(h, sizeof(bmp), &bmp) == 0) - return true; + return false; - if( size ) - { - size->cx = abs(bmp.bmWidth); - size->cy = abs(bmp.bmHeight); - } + size.cx = abs(bmp.bmWidth); + size.cy = abs(bmp.bmHeight); - if( channels ) - *channels = bmp.bmBitsPixel/8; + channels = bmp.bmBitsPixel/8; - if( data ) - *data = bmp.bmBits; + data = bmp.bmBits; - return false; + return true; +} +static bool icvGetBitmapData(CvWindow& window, SIZE& size) +{ + int channels = 0; + void* data = nullptr; + return icvGetBitmapData(window, size, channels, data); } -static void icvUpdateWindowPos( CvWindow* window ) +static void icvUpdateWindowPos(CvWindow& window) { RECT rect = { 0 }; - assert(window); - if( (window->flags & CV_WINDOW_AUTOSIZE) && window->image ) + if ((window.flags & CV_WINDOW_AUTOSIZE) && window.image) { int i; SIZE size = {0,0}; - icvGetBitmapData( window, &size, 0, 0 ); + icvGetBitmapData(window, size); // TODO check return value? // Repeat two times because after the first resizing of the mainhWnd window // toolbar may resize too - for(i = 0; i < (window->toolbar.toolbar ? 2 : 1); i++) + for(i = 0; i < (window.toolbar.toolbar ? 2 : 1); i++) { - RECT rmw = { 0 }, rw = icvCalcWindowRect(window ); - MoveWindow(window->hwnd, rw.left, rw.top, + RECT rmw = { 0 }, rw = icvCalcWindowRect(&window); + MoveWindow(window.hwnd, rw.left, rw.top, rw.right - rw.left, rw.bottom - rw.top, FALSE); - GetClientRect(window->hwnd, &rw); - GetWindowRect(window->frame, &rmw); + GetClientRect(window.hwnd, &rw); + GetWindowRect(window.frame, &rmw); // Resize the mainhWnd window in order to make the bitmap fit into the child window - MoveWindow(window->frame, rmw.left, rmw.top, + MoveWindow(window.frame, rmw.left, rmw.top, size.cx + (rmw.right - rmw.left) - (rw.right - rw.left), - size.cy + (rmw.bottom - rmw.top) - (rw.bottom - rw.top), TRUE ); + size.cy + (rmw.bottom - rmw.top) - (rw.bottom - rw.top), TRUE); } } rect = icvCalcWindowRect(window); - MoveWindow(window->hwnd, rect.left, rect.top, + MoveWindow(window.hwnd, rect.left, rect.top, rect.right - rect.left, - rect.bottom - rect.top, TRUE ); + rect.bottom - rect.top, TRUE); } +static void showImage_(CvWindow& window, const Mat& image); + CV_IMPL void -cvShowImage( const char* name, const CvArr* arr ) +cvShowImage(const char* name, const CvArr* arr) { - CV_FUNCNAME( "cvShowImage" ); - - __BEGIN__; - - CvWindow* window; - SIZE size = { 0, 0 }; - int channels = 0; - void* dst_ptr = 0; - const int channels0 = 3; - CvMat stub, *image; - bool changed_size = false; // philipg + CV_FUNCNAME("cvShowImage"); - if( !name ) - CV_ERROR( CV_StsNullPtr, "NULL name" ); + if (!name) + CV_Error(Error::StsNullPtr, "NULL name"); - window = icvFindWindowByName(name); - if(!window) + std::shared_ptr window; { - cvNamedWindow(name, CV_WINDOW_AUTOSIZE); + AutoLock lock(getWindowMutex()); + window = icvFindWindowByName(name); + if (!window) + { + cvNamedWindow(name, CV_WINDOW_AUTOSIZE); + window = icvFindWindowByName(name); + } } - if( !window || !arr ) - EXIT; // keep silence here. - - CV_CALL( image = cvGetMat( arr, &stub )); + if (!window || !arr) + return; // keep silence here. + CvMat stub = {}; + CvMat* image_c = cvGetMat(arr, &stub); + Mat image = cv::cvarrToMat(image_c); #ifdef HAVE_OPENGL if (window->useGl) { - cv::imshow(name, cv::cvarrToMat(image)); + cv::imshow(name, image); return; } #endif + return showImage_(*window, image); +} + +static void showImage_(CvWindow& window, const Mat& image) +{ + AutoLock lock(window.mutex); - if (window->image) + SIZE size = { 0, 0 }; + int channels = 0; + void* dst_ptr = 0; + const int channels0 = 3; + bool changed_size = false; // philipg + + if (window.image) + { // if there is something wrong with these system calls, we cannot display image... - if (icvGetBitmapData( window, &size, &channels, &dst_ptr )) + if (!icvGetBitmapData(window, size, channels, dst_ptr)) return; + } - if( size.cx != image->width || size.cy != image->height || channels != channels0 ) + if (size.cx != image.cols || size.cy != image.rows || channels != channels0) { changed_size = true; uchar buffer[sizeof(BITMAPINFO) + 255*sizeof(RGBQUAD)]; BITMAPINFO* binfo = (BITMAPINFO*)buffer; - DeleteObject( SelectObject( window->dc, window->image )); - window->image = 0; + DeleteObject(SelectObject(window.dc, window.image)); + window.image = 0; - size.cx = image->width; - size.cy = image->height; + size.cx = image.cols; + size.cy = image.rows; channels = channels0; - FillBitmapInfo( binfo, size.cx, size.cy, channels*8, 1 ); + FillBitmapInfo(binfo, size.cx, size.cy, channels*8, 1); - window->image = SelectObject( window->dc, CreateDIBSection(window->dc, binfo, - DIB_RGB_COLORS, &dst_ptr, 0, 0)); + window.image = SelectObject(window.dc, + CreateDIBSection(window.dc, binfo, DIB_RGB_COLORS, &dst_ptr, 0, 0) + ); } { cv::Mat dst(size.cy, size.cx, CV_8UC3, dst_ptr, (size.cx * channels + 3) & -4); - convertToShow(cv::cvarrToMat(image), dst, false); + convertToShow(image, dst, false); CV_Assert(dst.data == (uchar*)dst_ptr); cv::flip(dst, dst, 0); } @@ -1356,98 +1437,103 @@ cvShowImage( const char* name, const CvArr* arr ) // only resize window if needed if (changed_size) icvUpdateWindowPos(window); - InvalidateRect(window->hwnd, 0, 0); + InvalidateRect(window.hwnd, 0, 0); // philipg: this is not needed and just slows things down // UpdateWindow(window->hwnd); - - __END__; } -CV_IMPL void cvResizeWindow(const char* name, int width, int height ) +static void resizeWindow_(CvWindow& window, const Size& size); + +CV_IMPL void cvResizeWindow(const char* name, int width, int height) { - CV_FUNCNAME( "cvResizeWindow" ); + CV_FUNCNAME("cvResizeWindow"); - __BEGIN__; + AutoLock lock(getWindowMutex()); - int i; - CvWindow* window; - RECT rmw = { 0 }, rw = { 0 }, rect = { 0 }; + if (!name) + CV_Error(Error::StsNullPtr, "NULL name"); - if( !name ) - CV_ERROR( CV_StsNullPtr, "NULL name" ); + auto window = icvFindWindowByName(name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); - window = icvFindWindowByName(name); - if(!window) - EXIT; + return resizeWindow_(*window, Size(width, height)); +} + +static void resizeWindow_(CvWindow& window, const Size& size) +{ + RECT rmw = { 0 }, rw = { 0 }, rect = { 0 }; // Repeat two times because after the first resizing of the mainhWnd window // toolbar may resize too - for(i = 0; i < (window->toolbar.toolbar ? 2 : 1); i++) + for (int i = 0; i < (window.toolbar.toolbar ? 2 : 1); i++) { rw = icvCalcWindowRect(window); - MoveWindow(window->hwnd, rw.left, rw.top, + MoveWindow(window.hwnd, rw.left, rw.top, rw.right - rw.left, rw.bottom - rw.top, FALSE); - GetClientRect(window->hwnd, &rw); - GetWindowRect(window->frame, &rmw); + GetClientRect(window.hwnd, &rw); + GetWindowRect(window.frame, &rmw); // Resize the mainhWnd window in order to make the bitmap fit into the child window - MoveWindow(window->frame, rmw.left, rmw.top, - width + (rmw.right - rmw.left) - (rw.right - rw.left), - height + (rmw.bottom - rmw.top) - (rw.bottom - rw.top), TRUE); + MoveWindow(window.frame, rmw.left, rmw.top, + size.width + (rmw.right - rmw.left) - (rw.right - rw.left), + size.height + (rmw.bottom - rmw.top) - (rw.bottom - rw.top), TRUE); } rect = icvCalcWindowRect(window); - MoveWindow(window->hwnd, rect.left, rect.top, + MoveWindow(window.hwnd, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, TRUE); - - __END__; } +static void moveWindow_(CvWindow& window, const Point& pt); -CV_IMPL void cvMoveWindow( const char* name, int x, int y ) +CV_IMPL void cvMoveWindow(const char* name, int x, int y) { - CV_FUNCNAME( "cvMoveWindow" ); + CV_FUNCNAME("cvMoveWindow"); - __BEGIN__; + AutoLock lock(getWindowMutex()); - CvWindow* window; - RECT rect = { 0 }; - - if( !name ) - CV_ERROR( CV_StsNullPtr, "NULL name" ); + if (!name) + CV_Error(Error::StsNullPtr, "NULL name"); - window = icvFindWindowByName(name); - if(!window) - EXIT; + auto window = icvFindWindowByName(name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); - GetWindowRect( window->frame, &rect ); - MoveWindow( window->frame, x, y, rect.right - rect.left, rect.bottom - rect.top, TRUE); + (void)moveWindow_(*window, Point(x, y)); +} - __END__; +static void moveWindow_(CvWindow& window, const Point& pt) +{ + RECT rect = { 0 }; + GetWindowRect(window.frame, &rect); // TODO check return value + MoveWindow(window.frame, pt.x, pt.y, rect.right - rect.left, rect.bottom - rect.top, TRUE); } static LRESULT CALLBACK -MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) +MainWindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam) { - CvWindow* window = icvWindowByHWND( hwnd ); - if( !window ) + auto window_ = icvWindowByHWND(hwnd); + if (!window_) return DefWindowProc(hwnd, uMsg, wParam, lParam); + CvWindow& window = *window_; + switch(uMsg) { case WM_COPY: - ::SendMessage(window->hwnd, uMsg, wParam, lParam); + ::SendMessage(window.hwnd, uMsg, wParam, lParam); break; case WM_DESTROY: - icvRemoveWindow(window); + icvRemoveWindow(window_); // Do nothing!!! //PostQuitMessage(0); break; case WM_GETMINMAXINFO: - if( !(window->flags & CV_WINDOW_AUTOSIZE) ) + if (!(window.flags & CV_WINDOW_AUTOSIZE)) { MINMAXINFO* minmax = (MINMAXINFO*)lParam; RECT rect = { 0 }; @@ -1456,10 +1542,10 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) minmax->ptMinTrackSize.y = 100; minmax->ptMinTrackSize.x = 100; - if( window->toolbar.first ) + if (!window.toolbar.trackbars.empty()) { - GetWindowRect( window->toolbar.first->hwnd, &rect ); - minmax->ptMinTrackSize.y += window->toolbar.rows*(rect.bottom - rect.top); + GetWindowRect(window.toolbar.trackbars[0]->hwnd, &rect); + minmax->ptMinTrackSize.y += window.toolbar.rows*(rect.bottom - rect.top); minmax->ptMinTrackSize.x = MAX(rect.right - rect.left + HG_BUDDY_WIDTH, HG_BUDDY_WIDTH*2); } return retval; @@ -1471,14 +1557,14 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) WINDOWPOS* pos = (WINDOWPOS*)lParam; // Update the toolbar pos/size - if(window->toolbar.toolbar) + if (window.toolbar.toolbar) { RECT rect = { 0 }; - GetWindowRect(window->toolbar.toolbar, &rect); - MoveWindow(window->toolbar.toolbar, 0, 0, pos->cx, rect.bottom - rect.top, TRUE); + GetWindowRect(window.toolbar.toolbar, &rect); + MoveWindow(window.toolbar.toolbar, 0, 0, pos->cx, rect.bottom - rect.top, TRUE); } - if(!(window->flags & CV_WINDOW_AUTOSIZE)) + if (!(window.flags & CV_WINDOW_AUTOSIZE)) icvUpdateWindowPos(window); break; @@ -1490,7 +1576,7 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) LPWINDOWPOS pos = (LPWINDOWPOS)lParam; RECT rect = { 0 }; - GetWindowRect(window->frame, &rect); + GetWindowRect(window.frame, &rect); HMONITOR hMonitor; hMonitor = MonitorFromRect(&rect, MONITOR_DEFAULTTONEAREST); @@ -1515,13 +1601,13 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) } case WM_ACTIVATE: - if(LOWORD(wParam) == WA_ACTIVE || LOWORD(wParam) == WA_CLICKACTIVE) - SetFocus(window->hwnd); + if (LOWORD(wParam) == WA_ACTIVE || LOWORD(wParam) == WA_CLICKACTIVE) + SetFocus(window.hwnd); break; case WM_MOUSEWHEEL: case WM_MOUSEHWHEEL: - if( window->on_mouse ) + if (window.on_mouse) { int flags = (wParam & MK_LBUTTON ? CV_EVENT_FLAG_LBUTTON : 0)| (wParam & MK_RBUTTON ? CV_EVENT_FLAG_RBUTTON : 0)| @@ -1536,32 +1622,32 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) flags |= (delta << 16); POINT pt; - pt.x = GET_X_LPARAM( lParam ); - pt.y = GET_Y_LPARAM( lParam ); + pt.x = GET_X_LPARAM(lParam); + pt.y = GET_Y_LPARAM(lParam); ::ScreenToClient(hwnd, &pt); // Convert screen coordinates to client coordinates. RECT rect = { 0 }; - GetClientRect( window->hwnd, &rect ); + GetClientRect(window.hwnd, &rect); SIZE size = {0,0}; #ifdef HAVE_OPENGL - if (window->useGl) + if (window.useGl) { - cv::ogl::Texture2D* texObj = static_cast(window->glDrawData); + cv::ogl::Texture2D* texObj = static_cast(window.glDrawData); size.cx = texObj->cols(); size.cy = texObj->rows(); } else { - icvGetBitmapData(window, &size, 0, 0); + icvGetBitmapData(window, size); } #else - icvGetBitmapData(window, &size, 0, 0); + icvGetBitmapData(window, size); #endif - window->on_mouse( event, pt.x*size.cx/MAX(rect.right - rect.left,1), - pt.y*size.cy/MAX(rect.bottom - rect.top,1), flags, - window->on_mouse_param ); + int x = cvRound((float)pt.x*size.cx/MAX(rect.right - rect.left,1)); + int y = cvRound((float)pt.y*size.cy/MAX(rect.bottom - rect.top,1)); + window.on_mouse(event, x, y, flags, window.on_mouse_param); } break; @@ -1571,17 +1657,17 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) HRGN rgn, rgn1, rgn2; int ret; HDC hdc = (HDC)wParam; - GetWindowRect(window->hwnd, &cr); - icvScreenToClient(window->frame, &cr); - if(window->toolbar.toolbar) + GetWindowRect(window.hwnd, &cr); + icvScreenToClient(window.frame, &cr); + if (window.toolbar.toolbar) { - GetWindowRect(window->toolbar.toolbar, &tr); - icvScreenToClient(window->frame, &tr); + GetWindowRect(window.toolbar.toolbar, &tr); + icvScreenToClient(window.frame, &tr); } else tr.left = tr.top = tr.right = tr.bottom = 0; - GetClientRect(window->frame, &wrc); + GetClientRect(window.frame, &wrc); rgn = CreateRectRgn(0, 0, wrc.right, wrc.bottom); rgn1 = CreateRectRgn(cr.left, cr.top, cr.right, cr.bottom); @@ -1591,7 +1677,7 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) ret = CombineRgn(rgn, rgn, rgn1, RGN_DIFF); ret = CombineRgn(rgn, rgn, rgn2, RGN_DIFF); - if(ret != NULLREGION && ret != ERROR) + if (ret != NULLREGION && ret != ERROR) FillRgn(hdc, rgn, (HBRUSH)icvGetClassLongPtr(hwnd, CV_HBRBACKGROUND)); DeleteObject(rgn); @@ -1605,20 +1691,24 @@ MainWindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) } -static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) +static LRESULT CALLBACK HighGUIProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam) { - CvWindow* window = icvWindowByHWND(hwnd); - if( !window ) + auto window_ = icvWindowByHWND(hwnd); + if (!window_) + { // This window is not mentioned in HighGUI storage // Actually, this should be error except for the case of calls to CreateWindow return DefWindowProc(hwnd, uMsg, wParam, lParam); + } + + CvWindow& window = *window_; // Process the message switch(uMsg) { case WM_COPY: { - if (!::OpenClipboard(hwnd) ) + if (!::OpenClipboard(hwnd)) break; HDC hDC = 0; @@ -1632,7 +1722,7 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM if (!::EmptyClipboard()) break; - if(!window->image) + if (!window.image) break; // Get window device context @@ -1640,19 +1730,20 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM break; // Create another DC compatible with hDC - if (0 == (memDC = ::CreateCompatibleDC( hDC ))) + if (0 == (memDC = ::CreateCompatibleDC(hDC))) break; // Determine the bitmap's dimensions - int nchannels = 3; SIZE size = {0,0}; - icvGetBitmapData( window, &size, &nchannels, 0 ); + int nchannels = 3; + void* data = NULL; // unused + icvGetBitmapData(window, size, nchannels, data); // Create bitmap to draw on and it in the new DC - if (0 == (memBM = ::CreateCompatibleBitmap ( hDC, size.cx, size.cy))) + if (0 == (memBM = ::CreateCompatibleBitmap(hDC, size.cx, size.cy))) break; - if (!::SelectObject( memDC, memBM )) + if (!::SelectObject(memDC, memBM)) break; // Begin drawing to DC @@ -1660,7 +1751,7 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM break; RGBQUAD table[256]; - if( 1 == nchannels ) + if (1 == nchannels) { for(int i = 0; i < 256; ++i) { @@ -1668,14 +1759,14 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM table[i].rgbGreen = (unsigned char)i; table[i].rgbRed = (unsigned char)i; } - if (!::SetDIBColorTable(window->dc, 0, 255, table)) + if (!::SetDIBColorTable(window.dc, 0, 255, table)) break; } // The image copied to the clipboard will be in its original size, regardless if the window itself was resized. // Render the image to the dc/bitmap (at original size). - if (!::BitBlt( memDC, 0, 0, size.cx, size.cy, window->dc, 0, 0, SRCCOPY )) + if (!::BitBlt(memDC, 0, 0, size.cx, size.cy, window.dc, 0, 0, SRCCOPY)) break; // Finally, set bitmap to clipboard @@ -1712,7 +1803,7 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM case WM_RBUTTONUP: case WM_MBUTTONUP: case WM_MOUSEMOVE: - if( window->on_mouse ) + if (window.on_mouse) { POINT pt; @@ -1732,50 +1823,50 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM uMsg == WM_RBUTTONDBLCLK ? CV_EVENT_RBUTTONDBLCLK : uMsg == WM_MBUTTONDBLCLK ? CV_EVENT_MBUTTONDBLCLK : CV_EVENT_MOUSEMOVE; - if( uMsg == WM_LBUTTONDOWN || uMsg == WM_RBUTTONDOWN || uMsg == WM_MBUTTONDOWN ) - SetCapture( hwnd ); - if( uMsg == WM_LBUTTONUP || uMsg == WM_RBUTTONUP || uMsg == WM_MBUTTONUP ) + if (uMsg == WM_LBUTTONDOWN || uMsg == WM_RBUTTONDOWN || uMsg == WM_MBUTTONDOWN) + SetCapture(hwnd); + if (uMsg == WM_LBUTTONUP || uMsg == WM_RBUTTONUP || uMsg == WM_MBUTTONUP) ReleaseCapture(); - pt.x = GET_X_LPARAM( lParam ); - pt.y = GET_Y_LPARAM( lParam ); + pt.x = GET_X_LPARAM(lParam); + pt.y = GET_Y_LPARAM(lParam); - if (window->flags & CV_WINDOW_AUTOSIZE) + if (window.flags & CV_WINDOW_AUTOSIZE) { // As user can't change window size, do not scale window coordinates. Underlying windowing system // may prevent full window from being displayed and in this case coordinates should not be scaled. - window->on_mouse( event, pt.x, pt.y, flags, window->on_mouse_param ); + window.on_mouse(event, pt.x, pt.y, flags, window.on_mouse_param); } else { // Full window is displayed using different size. Scale coordinates to match underlying positions. RECT rect = { 0 }; SIZE size = {0, 0}; - GetClientRect( window->hwnd, &rect ); + GetClientRect(window.hwnd, &rect); #ifdef HAVE_OPENGL - if (window->useGl) + if (window.useGl) { - cv::ogl::Texture2D* texObj = static_cast(window->glDrawData); + cv::ogl::Texture2D* texObj = static_cast(window.glDrawData); size.cx = texObj->cols(); size.cy = texObj->rows(); } else { - icvGetBitmapData(window, &size, 0, 0); + icvGetBitmapData(window, size); } #else - icvGetBitmapData( window, &size, 0, 0 ); + icvGetBitmapData(window, size); #endif - window->on_mouse( event, pt.x*size.cx/MAX(rect.right - rect.left,1), - pt.y*size.cy/MAX(rect.bottom - rect.top,1), flags, - window->on_mouse_param ); + int x = cvRound((float)pt.x*size.cx/MAX(rect.right - rect.left,1)); + int y = cvRound((float)pt.y*size.cy/MAX(rect.bottom - rect.top,1)); + window.on_mouse(event, x, y, flags, window.on_mouse_param); } } break; case WM_PAINT: - if(window->image != 0) + if (window.image != 0) { int nchannels = 3; SIZE size = {0,0}; @@ -1784,12 +1875,13 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM RGBQUAD table[256]; // Determine the bitmap's dimensions - icvGetBitmapData( window, &size, &nchannels, 0 ); + void* data = 0; // unused + icvGetBitmapData(window, size, nchannels, data); hdc = BeginPaint(hwnd, &paint); SetStretchBltMode(hdc, COLORONCOLOR); - if( nchannels == 1 ) + if (nchannels == 1) { int i; for(i = 0; i < 256; i++) @@ -1798,25 +1890,25 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM table[i].rgbGreen = (unsigned char)i; table[i].rgbRed = (unsigned char)i; } - SetDIBColorTable(window->dc, 0, 255, table); + SetDIBColorTable(window.dc, 0, 255, table); } - if(window->flags & CV_WINDOW_AUTOSIZE) + if (window.flags & CV_WINDOW_AUTOSIZE) { - BitBlt( hdc, 0, 0, size.cx, size.cy, window->dc, 0, 0, SRCCOPY ); + BitBlt(hdc, 0, 0, size.cx, size.cy, window.dc, 0, 0, SRCCOPY); } else { RECT rect = { 0 }; - GetClientRect(window->hwnd, &rect); - StretchBlt( hdc, 0, 0, rect.right - rect.left, rect.bottom - rect.top, - window->dc, 0, 0, size.cx, size.cy, SRCCOPY ); + GetClientRect(window.hwnd, &rect); + StretchBlt(hdc, 0, 0, rect.right - rect.left, rect.bottom - rect.top, + window.dc, 0, 0, size.cx, size.cy, SRCCOPY); } //DeleteDC(hdc); EndPaint(hwnd, &paint); } #ifdef HAVE_OPENGL - else if(window->useGl) + else if (window.useGl) { drawGl(window); return DefWindowProc(hwnd, uMsg, wParam, lParam); @@ -1829,13 +1921,13 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM return 0; case WM_ERASEBKGND: - if(window->image) + if (window.image) return 0; break; case WM_DESTROY: - icvRemoveWindow(window); + icvRemoveWindow(window_); // Do nothing!!! //PostQuitMessage(0); break; @@ -1845,15 +1937,15 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM return 0; case WM_KEYDOWN: - window->last_key = (int)wParam; + window.last_key = (int)wParam; return 0; case WM_SIZE: - window->width = LOWORD(lParam); - window->height = HIWORD(lParam); + window.width = LOWORD(lParam); + window.height = HIWORD(lParam); #ifdef HAVE_OPENGL - if (window->useGl) + if (window.useGl) resizeGl(window); #endif } @@ -1862,24 +1954,24 @@ static LRESULT CALLBACK HighGUIProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM } -static LRESULT CALLBACK WindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) +static LRESULT CALLBACK WindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam) { LRESULT ret; - if( hg_on_preprocess ) + if (hg_on_preprocess) { int was_processed = 0; int rethg = hg_on_preprocess(hwnd, uMsg, wParam, lParam, &was_processed); - if( was_processed ) + if (was_processed) return rethg; } ret = HighGUIProc(hwnd, uMsg, wParam, lParam); - if(hg_on_postprocess) + if (hg_on_postprocess) { int was_processed = 0; int rethg = hg_on_postprocess(hwnd, uMsg, wParam, lParam, &was_processed); - if( was_processed ) + if (was_processed) return rethg; } @@ -1887,51 +1979,56 @@ static LRESULT CALLBACK WindowProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM } -static void icvUpdateTrackbar( CvTrackbar* trackbar, int pos ) +static void icvUpdateTrackbar(CvTrackbar& trackbar, int pos) { const int max_name_len = 10; const char* suffix = ""; char pos_text[32]; int name_len; - if( trackbar->data ) - *trackbar->data = pos; + if (trackbar.data) + *trackbar.data = pos; - if( trackbar->pos != pos ) + if (trackbar.pos != pos) { - trackbar->pos = pos; - if( trackbar->notify2 ) - trackbar->notify2(pos, trackbar->userdata); - if( trackbar->notify ) - trackbar->notify(pos); - - name_len = (int)strlen(trackbar->name); - - if( name_len > max_name_len ) + trackbar.pos = pos; + if (trackbar.onChangeCallback) + trackbar.onChangeCallback(pos, trackbar.userdata); + if (trackbar.notify2) + trackbar.notify2(pos, trackbar.userdata); + if (trackbar.notify) + trackbar.notify(pos); + + name_len = (int)trackbar.name.size(); + + // TODO replace C strings manipulation + if (name_len > max_name_len) { int start_len = max_name_len*2/3; int end_len = max_name_len - start_len - 2; - memcpy( pos_text, trackbar->name, start_len ); - memcpy( pos_text + start_len, "...", 3 ); - memcpy( pos_text + start_len + 3, trackbar->name + name_len - end_len, end_len + 1 ); + memcpy(pos_text, trackbar.name.c_str(), start_len); + memcpy(pos_text + start_len, "...", 3); + memcpy(pos_text + start_len + 3, trackbar.name.c_str() + name_len - end_len, end_len + 1); } else { - memcpy( pos_text, trackbar->name, name_len + 1); + memcpy(pos_text, trackbar.name.c_str(), name_len + 1); } - sprintf( pos_text + strlen(pos_text), "%s: %d\n", suffix, pos ); - SetWindowText( trackbar->buddy, pos_text ); + sprintf(pos_text + strlen(pos_text), "%s: %d\n", suffix, pos); + SetWindowText(trackbar.buddy, pos_text); } } -static LRESULT CALLBACK HGToolbarProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam ) +static LRESULT CALLBACK HGToolbarProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam) { - CvWindow* window = icvWindowByHWND( hwnd ); - if(!window) + auto window_ = icvWindowByHWND(hwnd); + if (!window_) return DefWindowProc(hwnd, uMsg, wParam, lParam); + CvWindow& window = *window_; + // Control messages processing switch(uMsg) { @@ -1940,32 +2037,34 @@ static LRESULT CALLBACK HGToolbarProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPAR { HWND slider = (HWND)lParam; int pos = (int)SendMessage(slider, TBM_GETPOS, 0, 0); - CvTrackbar* trackbar = icvTrackbarByHWND( slider ); + auto trackbar = icvTrackbarByHWND(slider); - if( trackbar ) + if (trackbar) { - if( trackbar->pos != pos ) - icvUpdateTrackbar( trackbar, pos ); + if (trackbar->pos != pos) + icvUpdateTrackbar(*trackbar, pos); } - SetFocus( window->hwnd ); + SetFocus(window.hwnd); return 0; } case WM_NCCALCSIZE: { - LRESULT ret = CallWindowProc(window->toolbar.toolBarProc, hwnd, uMsg, wParam, lParam); + LRESULT ret = CallWindowProc(window.toolbar.toolBarProc, hwnd, uMsg, wParam, lParam); int rows = (int)SendMessage(hwnd, TB_GETROWS, 0, 0); - if(window->toolbar.rows != rows) + if (window.toolbar.rows != rows) { - SendMessage(window->toolbar.toolbar, TB_BUTTONCOUNT, 0, 0); - CvTrackbar* trackbar = window->toolbar.first; + SendMessage(window.toolbar.toolbar, TB_BUTTONCOUNT, 0, 0); + auto& trakbars = window.toolbar.trackbars; - for( ; trackbar != 0; trackbar = trackbar->next ) + for (auto it = trakbars.begin(); it != trakbars.end(); ++it) { + auto trackbar = *it; + CV_Assert(trackbar); RECT rect = { 0 }; - SendMessage(window->toolbar.toolbar, TB_GETITEMRECT, + SendMessage(window.toolbar.toolbar, TB_GETITEMRECT, (WPARAM)trackbar->id, (LPARAM)&rect); MoveWindow(trackbar->hwnd, rect.left + HG_BUDDY_WIDTH, rect.top, rect.right - rect.left - HG_BUDDY_WIDTH, @@ -1973,46 +2072,63 @@ static LRESULT CALLBACK HGToolbarProc( HWND hwnd, UINT uMsg, WPARAM wParam, LPAR MoveWindow(trackbar->buddy, rect.left, rect.top, HG_BUDDY_WIDTH, rect.bottom - rect.top, FALSE); } - window->toolbar.rows = rows; + window.toolbar.rows = rows; } return ret; } } - return CallWindowProc(window->toolbar.toolBarProc, hwnd, uMsg, wParam, lParam); + return CallWindowProc(window.toolbar.toolBarProc, hwnd, uMsg, wParam, lParam); } CV_IMPL void cvDestroyAllWindows(void) { - CvWindow* window = hg_windows; - - while( window ) + std::vector< std::shared_ptr > g_windows; { - HWND mainhWnd = window->frame; - HWND hwnd = window->hwnd; - window = window->next; + AutoLock lock(getWindowMutex()); + g_windows = getWindowsList(); // copy + } + for (auto it = g_windows.begin(); it != g_windows.end(); ++it) + { + auto window_ = *it; + if (!window_) + continue; + + { + CvWindow& window = *window_; + + HWND mainhWnd = window.frame; + HWND hwnd = window.hwnd; + + SendMessage(hwnd, WM_CLOSE, 0, 0); + SendMessage(mainhWnd, WM_CLOSE, 0, 0); + } - SendMessage( hwnd, WM_CLOSE, 0, 0 ); - SendMessage( mainhWnd, WM_CLOSE, 0, 0 ); + window_.reset(); + } + // TODO needed? + { + AutoLock lock(getWindowMutex()); + getWindowsList().clear(); } } -static void showSaveDialog(CvWindow* window) +static void showSaveDialog(CvWindow& window) { - if (!window || !window->image) + if (!window.image) return; SIZE sz; int channels; void* data; - if (icvGetBitmapData(window, &sz, &channels, &data)) + if (icvGetBitmapData(window, sz, channels, data)) return; // nothing to save char szFileName[MAX_PATH] = ""; // try to use window title as file name - GetWindowText(window->frame, szFileName, MAX_PATH); + GetWindowText(window.frame, szFileName, MAX_PATH); OPENFILENAME ofn; ZeroMemory(&ofn, sizeof(ofn)); @@ -2022,7 +2138,7 @@ static void showSaveDialog(CvWindow* window) #else ofn.lStructSize = sizeof(ofn); #endif - ofn.hwndOwner = window->hwnd; + ofn.hwndOwner = window.hwnd; ofn.lpstrFilter = #ifdef HAVE_PNG "Portable Network Graphics files (*.png)\0*.png\0" @@ -2075,9 +2191,15 @@ static bool handleMessage(MSG& message, int& keyCode) // otherwise the message was handled specifically bool is_processed = false; - for (CvWindow* window = hg_windows; window != 0 && is_processed == 0; window = window->next) + AutoLock lock(getWindowMutex()); + auto& g_windows = getWindowsList(); + for (auto it = g_windows.begin(); it != g_windows.end() && !is_processed; ++it) { - if (!(window->hwnd == message.hwnd || window->frame == message.hwnd)) + auto window_ = *it; + if (!window_) + continue; + CvWindow& window = *window_; + if (!(window.hwnd == message.hwnd || window.frame == message.hwnd)) continue; is_processed = true; @@ -2140,7 +2262,7 @@ static bool handleMessage(MSG& message, int& keyCode) /* * process until queue is empty but don't wait. */ -int cv::pollKey() +int pollKey_W32() { CV_TRACE_FUNCTION(); for(;;) @@ -2156,7 +2278,7 @@ int cv::pollKey() } CV_IMPL int -cvWaitKey( int delay ) +cvWaitKey(int delay) { int64 time0 = cv::getTickCount(); int64 timeEnd = time0 + (int64)(delay * 0.001f * cv::getTickFrequency()); @@ -2165,9 +2287,9 @@ cvWaitKey( int delay ) { MSG message; - if( (delay <= 0) && hg_windows) + if ((delay <= 0) && !getWindowsList().empty()) GetMessage(&message, 0, 0, 0); - else if( PeekMessage(&message, 0, 0, 0, PM_REMOVE) == FALSE ) + else if (PeekMessage(&message, 0, 0, 0, PM_REMOVE) == FALSE) { int64 t = cv::getTickCount(); if (t - timeEnd >= 0) @@ -2183,110 +2305,135 @@ cvWaitKey( int delay ) } -static CvTrackbar* -icvFindTrackbarByName( const CvWindow* window, const char* name ) +static +std::shared_ptr icvFindTrackbarByName(CvWindow& window, const std::string& name) { - CvTrackbar* trackbar = window->toolbar.first; - - for( ; trackbar != 0 && strcmp( trackbar->name, name ) != 0; trackbar = trackbar->next ) - ; - - return trackbar; + auto trackbars = window.toolbar.trackbars; + for (auto it = trackbars.begin(); it != trackbars.end(); ++it) + { + auto& trackbar = *it; + CV_Assert(trackbar); + if (trackbar->name == name) + return trackbar; + } + return std::shared_ptr(); +} +static inline +std::shared_ptr icvFindTrackbarByName(const std::shared_ptr& window, const std::string& name) +{ + CV_Assert(window); + return icvFindTrackbarByName(window, name); } +static +std::shared_ptr createTrackbar_(CvWindow& window, const std::string& trackbar_name, + int count, + TrackbarCallback onChange, void* userdata); static int -icvCreateTrackbar( const char* trackbar_name, const char* window_name, - int* val, int count, CvTrackbarCallback on_notify, - CvTrackbarCallback2 on_notify2, void* userdata ) +icvCreateTrackbar(const char* trackbar_name, const char* window_name, + int* val, int count, CvTrackbarCallback on_notify, + CvTrackbarCallback2 on_notify2, void* userdata) { - int result = 0; + CV_FUNCNAME("icvCreateTrackbar"); - CV_FUNCNAME( "icvCreateTrackbar" ); + AutoLock lock(getWindowMutex()); - __BEGIN__; + if (!window_name || !trackbar_name) + CV_Error(Error::StsNullPtr, "NULL window or trackbar name"); - char slider_name[32]; - CvWindow* window = 0; - CvTrackbar* trackbar = 0; - int pos = 0; + if (count < 0) + CV_Error(Error::StsOutOfRange, "Bad trackbar maximal value"); - if( !window_name || !trackbar_name ) - CV_ERROR( CV_StsNullPtr, "NULL window or trackbar name" ); + auto window = icvFindWindowByName(window_name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", window_name)); - if( count < 0 ) - CV_ERROR( CV_StsOutOfRange, "Bad trackbar maximal value" ); + auto trackbar = icvFindTrackbarByName(*window, trackbar_name); + if (!trackbar) + trackbar = createTrackbar_(*window, trackbar_name, count, nullptr, userdata); + CV_Assert(trackbar); - window = icvFindWindowByName(window_name); - if( !window ) - EXIT; + trackbar->notify = on_notify; + trackbar->notify2 = on_notify2; + trackbar->userdata = userdata; + trackbar->data = val; - trackbar = icvFindTrackbarByName(window,trackbar_name); - if( !trackbar ) - { - TBBUTTON tbs = {}; - TBBUTTONINFO tbis = {}; - RECT rect = { 0 }; - int bcount; - int len = (int)strlen( trackbar_name ); + return 1; +} - // create toolbar if it is not created yet - if( !window->toolbar.toolbar ) - { - const int default_height = 30; - - // CreateToolbarEx is deprecated and forces linking against Comctl32.lib. - window->toolbar.toolbar = CreateWindowEx(0, TOOLBARCLASSNAME, NULL, - WS_CHILD | CCS_TOP | TBSTYLE_WRAPABLE | BTNS_AUTOSIZE | BTNS_BUTTON, - 0, 0, 0, 0, - window->frame, NULL, GetModuleHandle(NULL), NULL); - // CreateToolbarEx automatically sends this but CreateWindowEx doesn't. - SendMessage(window->toolbar.toolbar, TB_BUTTONSTRUCTSIZE, (WPARAM)sizeof(TBBUTTON), 0); - - GetClientRect(window->frame, &rect); - MoveWindow( window->toolbar.toolbar, 0, 0, - rect.right - rect.left, default_height, TRUE); - SendMessage(window->toolbar.toolbar, TB_AUTOSIZE, 0, 0); - ShowWindow(window->toolbar.toolbar, SW_SHOW); - - window->toolbar.first = 0; - window->toolbar.pos = 0; - window->toolbar.rows = 0; - window->toolbar.toolBarProc = - (WNDPROC)icvGetWindowLongPtr(window->toolbar.toolbar, CV_WNDPROC); - - icvUpdateWindowPos(window); - - // Subclassing from toolbar - icvSetWindowLongPtr(window->toolbar.toolbar, CV_WNDPROC, HGToolbarProc); - icvSetWindowLongPtr(window->toolbar.toolbar, CV_USERDATA, window); - } +static void createToolbar_(CvWindow& window) +{ + CV_Assert(!window.toolbar.toolbar); + + const int default_height = 30; + + // CreateToolbarEx is deprecated and forces linking against Comctl32.lib. + window.toolbar.toolbar = CreateWindowEx(0, TOOLBARCLASSNAME, NULL, + WS_CHILD | CCS_TOP | TBSTYLE_WRAPABLE | BTNS_AUTOSIZE | BTNS_BUTTON, + 0, 0, 0, 0, + window.frame, NULL, GetModuleHandle(NULL), NULL); + // CreateToolbarEx automatically sends this but CreateWindowEx doesn't. + SendMessage(window.toolbar.toolbar, TB_BUTTONSTRUCTSIZE, (WPARAM)sizeof(TBBUTTON), 0); + + RECT rect; + GetClientRect(window.frame, &rect); + MoveWindow(window.toolbar.toolbar, 0, 0, + rect.right - rect.left, default_height, TRUE); + SendMessage(window.toolbar.toolbar, TB_AUTOSIZE, 0, 0); + ShowWindow(window.toolbar.toolbar, SW_SHOW); + + window.toolbar.pos = 0; + window.toolbar.rows = 0; + window.toolbar.toolBarProc = + (WNDPROC)icvGetWindowLongPtr(window.toolbar.toolbar, CV_WNDPROC); - /* Retrieve current buttons count */ - bcount = (int)SendMessage(window->toolbar.toolbar, TB_BUTTONCOUNT, 0, 0); + icvUpdateWindowPos(window); - if(bcount > 1) - { - /* If this is not the first button then we need to - separate it from the previous one */ - tbs.iBitmap = 0; - tbs.idCommand = bcount; // Set button id to it's number - tbs.iString = 0; - tbs.fsStyle = TBSTYLE_SEP; - tbs.fsState = TBSTATE_ENABLED; - SendMessage(window->toolbar.toolbar, TB_ADDBUTTONS, 1, (LPARAM)&tbs); - - // Retrieve current buttons count - bcount = (int)SendMessage(window->toolbar.toolbar, TB_BUTTONCOUNT, 0, 0); - } + // Subclassing from toolbar + icvSetWindowLongPtr(window.toolbar.toolbar, CV_WNDPROC, HGToolbarProc); + icvSetWindowLongPtr(window.toolbar.toolbar, CV_USERDATA, (void*)&window); + +} + +static +std::shared_ptr createTrackbar_(CvWindow& window, const std::string& trackbar_name, + int count, + TrackbarCallback onChange, void* userdata) +{ + // create toolbar if it is not created yet + if (!window.toolbar.toolbar) + { + createToolbar_(window); + } + + TBBUTTON tbs = {}; - /* Add a button which we're going to cover with the slider */ + /* Retrieve current buttons count */ + int bcount = (int)SendMessage(window.toolbar.toolbar, TB_BUTTONCOUNT, 0, 0); + + if (bcount > 1) + { + /* If this is not the first button then we need to + separate it from the previous one */ tbs.iBitmap = 0; tbs.idCommand = bcount; // Set button id to it's number + tbs.iString = 0; + tbs.fsStyle = TBSTYLE_SEP; tbs.fsState = TBSTATE_ENABLED; + SendMessage(window.toolbar.toolbar, TB_ADDBUTTONS, 1, (LPARAM)&tbs); + + // Retrieve current buttons count + bcount = (int)SendMessage(window.toolbar.toolbar, TB_BUTTONCOUNT, 0, 0); + } + + /* Add a button which we're going to cover with the slider */ + tbs.iBitmap = 0; + tbs.idCommand = bcount; // Set button id to it's number + tbs.fsState = TBSTATE_ENABLED; #if 0/*!defined WIN64 && !defined EM64T*/ - tbs.fsStyle = 0; - tbs.iString = 0; + tbs.fsStyle = 0; + tbs.iString = 0; #else #ifndef TBSTYLE_AUTOSIZE @@ -2296,320 +2443,640 @@ icvCreateTrackbar( const char* trackbar_name, const char* window_name, #ifndef TBSTYLE_GROUP #define TBSTYLE_GROUP 0x0004 #endif - //tbs.fsStyle = TBSTYLE_AUTOSIZE; - tbs.fsStyle = TBSTYLE_GROUP; - tbs.iString = (INT_PTR)trackbar_text; + //tbs.fsStyle = TBSTYLE_AUTOSIZE; + tbs.fsStyle = TBSTYLE_GROUP; + tbs.iString = (INT_PTR)trackbar_text; #endif - SendMessage(window->toolbar.toolbar, TB_ADDBUTTONS, 1, (LPARAM)&tbs); - - /* Adjust button size to the slider */ - tbis.cbSize = sizeof(tbis); - tbis.dwMask = TBIF_SIZE; - - GetClientRect(window->hwnd, &rect); - tbis.cx = (unsigned short)(rect.right - rect.left); - - SendMessage(window->toolbar.toolbar, TB_SETBUTTONINFO, - (WPARAM)tbs.idCommand, (LPARAM)&tbis); - - /* Get button pos */ - SendMessage(window->toolbar.toolbar, TB_GETITEMRECT, - (WPARAM)tbs.idCommand, (LPARAM)&rect); - - /* Create a slider */ - trackbar = (CvTrackbar*)cvAlloc( sizeof(CvTrackbar) + len + 1 ); - trackbar->signature = CV_TRACKBAR_MAGIC_VAL; - trackbar->notify = 0; - trackbar->notify2 = 0; - trackbar->parent = window; - trackbar->pos = 0; - trackbar->data = 0; - trackbar->id = bcount; - trackbar->next = window->toolbar.first; - trackbar->name = (char*)(trackbar + 1); - memcpy( trackbar->name, trackbar_name, len + 1 ); - window->toolbar.first = trackbar; - - sprintf(slider_name, "Trackbar%p", val); - trackbar->hwnd = CreateWindowEx(0, TRACKBAR_CLASS, slider_name, - WS_CHILD | WS_VISIBLE | TBS_AUTOTICKS | - TBS_FIXEDLENGTH | TBS_HORZ | TBS_BOTTOM, - rect.left + HG_BUDDY_WIDTH, rect.top, - rect.right - rect.left - HG_BUDDY_WIDTH, - rect.bottom - rect.top, window->toolbar.toolbar, - (HMENU)(size_t)bcount, hg_hinstance, 0); - - sprintf(slider_name,"Buddy%p", val); - trackbar->buddy = CreateWindowEx(0, "STATIC", slider_name, - WS_CHILD | SS_RIGHT, - rect.left, rect.top, - HG_BUDDY_WIDTH, rect.bottom - rect.top, - window->toolbar.toolbar, 0, hg_hinstance, 0); - - icvSetWindowLongPtr( trackbar->hwnd, CV_USERDATA, trackbar ); - - /* Minimize the number of rows */ - SendMessage( window->toolbar.toolbar, TB_SETROWS, - MAKEWPARAM(1, FALSE), (LPARAM)&rect ); - } - else - { - trackbar->data = 0; - trackbar->notify = 0; - trackbar->notify2 = 0; - } + SendMessage(window.toolbar.toolbar, TB_ADDBUTTONS, 1, (LPARAM)&tbs); + + TBBUTTONINFO tbis = {}; + + /* Adjust button size to the slider */ + tbis.cbSize = sizeof(tbis); + tbis.dwMask = TBIF_SIZE; + + RECT rect = { 0 }; + GetClientRect(window.hwnd, &rect); + tbis.cx = (unsigned short)(rect.right - rect.left); + + SendMessage(window.toolbar.toolbar, TB_SETBUTTONINFO, + (WPARAM)tbs.idCommand, (LPARAM)&tbis); + + /* Get button pos */ + SendMessage(window.toolbar.toolbar, TB_GETITEMRECT, + (WPARAM)tbs.idCommand, (LPARAM)&rect); + + /* Create a slider */ + auto trackbar = std::make_shared(window, trackbar_name); + trackbar->id = bcount; + window.toolbar.trackbars.push_back(trackbar); + + auto slider_name = cv::format("Trackbar%p", trackbar.get()); + trackbar->hwnd = CreateWindowEx(0, TRACKBAR_CLASS, slider_name.c_str(), + WS_CHILD | WS_VISIBLE | TBS_AUTOTICKS | + TBS_FIXEDLENGTH | TBS_HORZ | TBS_BOTTOM, + rect.left + HG_BUDDY_WIDTH, rect.top, + rect.right - rect.left - HG_BUDDY_WIDTH, + rect.bottom - rect.top, window.toolbar.toolbar, + (HMENU)(size_t)bcount, hg_hinstance, 0); + + slider_name = cv::format("Buddy%p", trackbar.get()); + trackbar->buddy = CreateWindowEx(0, "STATIC", slider_name.c_str(), + WS_CHILD | SS_RIGHT, + rect.left, rect.top, + HG_BUDDY_WIDTH, rect.bottom - rect.top, + window.toolbar.toolbar, 0, hg_hinstance, 0); + + icvSetWindowLongPtr(trackbar->hwnd, CV_USERDATA, (void*)trackbar.get()); + + /* Minimize the number of rows */ + SendMessage(window.toolbar.toolbar, TB_SETROWS, + MAKEWPARAM(1, FALSE), (LPARAM)&rect); trackbar->maxval = count; /* Adjust slider parameters */ SendMessage(trackbar->hwnd, TBM_SETRANGEMIN, (WPARAM)TRUE, (LPARAM)0); SendMessage(trackbar->hwnd, TBM_SETRANGEMAX, (WPARAM)TRUE, (LPARAM)count); - SendMessage(trackbar->hwnd, TBM_SETTICFREQ, (WPARAM)1, (LPARAM)0 ); - if( val ) - pos = *val; + SendMessage(trackbar->hwnd, TBM_SETTICFREQ, (WPARAM)1, (LPARAM)0); - SendMessage(trackbar->hwnd, TBM_SETPOS, (WPARAM)TRUE, (LPARAM)pos ); - SendMessage(window->toolbar.toolbar, TB_AUTOSIZE, 0, 0); + int pos = 0; + SendMessage(trackbar->hwnd, TBM_SETPOS, (WPARAM)TRUE, (LPARAM)pos); + SendMessage(window.toolbar.toolbar, TB_AUTOSIZE, 0, 0); trackbar->pos = -1; - icvUpdateTrackbar( trackbar, pos ); - ShowWindow( trackbar->buddy, SW_SHOW ); - ShowWindow( trackbar->hwnd, SW_SHOW ); - - trackbar->notify = on_notify; - trackbar->notify2 = on_notify2; - trackbar->userdata = userdata; - trackbar->data = val; + icvUpdateTrackbar(*trackbar, pos); + ShowWindow(trackbar->buddy, SW_SHOW); + ShowWindow(trackbar->hwnd, SW_SHOW); /* Resize the window to reflect the toolbar resizing*/ icvUpdateWindowPos(window); - result = 1; - - __END__; + trackbar->onChangeCallback = onChange; + trackbar->userdata = userdata; - return result; + return trackbar; } CV_IMPL int -cvCreateTrackbar( const char* trackbar_name, const char* window_name, - int* val, int count, CvTrackbarCallback on_notify ) +cvCreateTrackbar(const char* trackbar_name, const char* window_name, + int* val, int count, CvTrackbarCallback on_notify) { - return icvCreateTrackbar( trackbar_name, window_name, val, count, - on_notify, 0, 0 ); + return icvCreateTrackbar(trackbar_name, window_name, val, count, + on_notify, 0, 0); } CV_IMPL int -cvCreateTrackbar2( const char* trackbar_name, const char* window_name, - int* val, int count, CvTrackbarCallback2 on_notify2, - void* userdata ) +cvCreateTrackbar2(const char* trackbar_name, const char* window_name, + int* val, int count, CvTrackbarCallback2 on_notify2, + void* userdata) { - return icvCreateTrackbar( trackbar_name, window_name, val, count, - 0, on_notify2, userdata ); + return icvCreateTrackbar(trackbar_name, window_name, val, count, + 0, on_notify2, userdata); } CV_IMPL void -cvSetMouseCallback( const char* window_name, CvMouseCallback on_mouse, void* param ) +cvSetMouseCallback(const char* name, CvMouseCallback on_mouse, void* param) { - CV_FUNCNAME( "cvSetMouseCallback" ); - - __BEGIN__; + CV_FUNCNAME("cvSetMouseCallback"); - CvWindow* window = 0; + if (!name) + CV_Error(Error::StsNullPtr, "NULL window name"); - if( !window_name ) - CV_ERROR( CV_StsNullPtr, "NULL window name" ); + AutoLock lock(getWindowMutex()); - window = icvFindWindowByName(window_name); - if( !window ) - EXIT; + auto window = icvFindWindowByName(name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", name)); window->on_mouse = on_mouse; window->on_mouse_param = param; - - __END__; } -CV_IMPL int cvGetTrackbarPos( const char* trackbar_name, const char* window_name ) +CV_IMPL int cvGetTrackbarPos(const char* trackbar_name, const char* window_name) { - int pos = -1; - - CV_FUNCNAME( "cvGetTrackbarPos" ); + CV_FUNCNAME("cvGetTrackbarPos"); - __BEGIN__; + AutoLock lock(getWindowMutex()); - CvWindow* window; - CvTrackbar* trackbar = 0; + if (trackbar_name == 0 || window_name == 0) + CV_Error(Error::StsNullPtr, "NULL trackbar or window name"); - if( trackbar_name == 0 || window_name == 0 ) - CV_ERROR( CV_StsNullPtr, "NULL trackbar or window name" ); - - window = icvFindWindowByName( window_name ); - if( window ) - trackbar = icvFindTrackbarByName( window, trackbar_name ); - - if( trackbar ) - pos = trackbar->pos; + auto window = icvFindWindowByName(window_name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", window_name)); - __END__; + auto trackbar = icvFindTrackbarByName(window, trackbar_name); + if (!trackbar) + CV_Error_(Error::StsNullPtr, ("NULL trackbar: '%s'", trackbar_name)); - return pos; + return trackbar->pos; } -CV_IMPL void cvSetTrackbarPos( const char* trackbar_name, const char* window_name, int pos ) +CV_IMPL void cvSetTrackbarPos(const char* trackbar_name, const char* window_name, int pos) { - CV_FUNCNAME( "cvSetTrackbarPos" ); + CV_FUNCNAME("cvSetTrackbarPos"); - __BEGIN__; + AutoLock lock(getWindowMutex()); - CvWindow* window; - CvTrackbar* trackbar = 0; + if (trackbar_name == 0 || window_name == 0) + CV_Error(Error::StsNullPtr, "NULL trackbar or window name"); - if( trackbar_name == 0 || window_name == 0 ) - CV_ERROR( CV_StsNullPtr, "NULL trackbar or window name" ); + auto window = icvFindWindowByName(window_name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", window_name)); - window = icvFindWindowByName( window_name ); - if( window ) - trackbar = icvFindTrackbarByName( window, trackbar_name ); + auto trackbar = icvFindTrackbarByName(window, trackbar_name); + if (!trackbar) + CV_Error_(Error::StsNullPtr, ("NULL trackbar: '%s'", trackbar_name)); - if( trackbar ) { - if( pos < 0 ) + if (pos < 0) pos = 0; - if( pos > trackbar->maxval ) + if (pos > trackbar->maxval) pos = trackbar->maxval; - SendMessage( trackbar->hwnd, TBM_SETPOS, (WPARAM)TRUE, (LPARAM)pos ); - icvUpdateTrackbar( trackbar, pos ); + SendMessage(trackbar->hwnd, TBM_SETPOS, (WPARAM)TRUE, (LPARAM)pos); + icvUpdateTrackbar(*trackbar, pos); } - - __END__; } CV_IMPL void cvSetTrackbarMax(const char* trackbar_name, const char* window_name, int maxval) { - CV_FUNCNAME( "cvSetTrackbarMax" ); + CV_FUNCNAME("cvSetTrackbarMax"); + + if (trackbar_name == 0 || window_name == 0) + { + CV_Error(Error::StsNullPtr, "NULL trackbar or window name"); + } + + AutoLock lock(getWindowMutex()); + + auto window = icvFindWindowByName(window_name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", window_name)); - __BEGIN__; + auto trackbar = icvFindTrackbarByName(window, trackbar_name); + if (!trackbar) + CV_Error_(Error::StsNullPtr, ("NULL trackbar: '%s'", trackbar_name)); + // FIXIT if (maxval >= 0) { - CvWindow* window = 0; - CvTrackbar* trackbar = 0; - if (trackbar_name == 0 || window_name == 0) + // The position will be min(pos, maxval). + trackbar->maxval = (trackbar->minval>maxval)?trackbar->minval:maxval; + SendMessage(trackbar->hwnd, TBM_SETRANGEMAX, (WPARAM)TRUE, (LPARAM)maxval); + } +} + + +CV_IMPL void cvSetTrackbarMin(const char* trackbar_name, const char* window_name, int minval) +{ + CV_FUNCNAME("cvSetTrackbarMin"); + + if (trackbar_name == 0 || window_name == 0) + { + CV_Error(Error::StsNullPtr, "NULL trackbar or window name"); + } + + AutoLock lock(getWindowMutex()); + + auto window = icvFindWindowByName(window_name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", window_name)); + + auto trackbar = icvFindTrackbarByName(window, trackbar_name); + if (!trackbar) + CV_Error_(Error::StsNullPtr, ("NULL trackbar: '%s'", trackbar_name)); + + // FIXIT + if (minval >= 0) + { + // The position will be min(pos, maxval). + trackbar->minval = (minvalmaxval)?minval:trackbar->maxval; + SendMessage(trackbar->hwnd, TBM_SETRANGEMIN, (WPARAM)TRUE, (LPARAM)minval); + } +} + + +CV_IMPL void* cvGetWindowHandle(const char* window_name) +{ + CV_FUNCNAME("cvGetWindowHandle"); + + AutoLock lock(getWindowMutex()); + + if (window_name == 0) + CV_Error(Error::StsNullPtr, "NULL window name"); + + auto window = icvFindWindowByName(window_name); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%s'", window_name)); + + return (void*)window->hwnd; +} + +// FIXIT: result is not safe to use +CV_IMPL const char* cvGetWindowName(void* window_handle) +{ + CV_FUNCNAME("cvGetWindowName"); + + AutoLock lock(getWindowMutex()); + + if (window_handle == 0) + CV_Error(Error::StsNullPtr, "NULL window handle"); + + auto window = icvWindowByHWND((HWND)window_handle); + if (!window) + CV_Error_(Error::StsNullPtr, ("NULL window: '%p'", window_handle)); + + return window->name.c_str(); +} + + +CV_IMPL void +cvSetPreprocessFuncWin32_(const void* callback) +{ + hg_on_preprocess = (CvWin32WindowCallback)callback; +} + +CV_IMPL void +cvSetPostprocessFuncWin32_(const void* callback) +{ + hg_on_postprocess = (CvWin32WindowCallback)callback; +} + + + +namespace cv { namespace impl { + +using namespace cv::highgui_backend; + +class Win32UITrackbar; + +class Win32UIWindow + : public UIWindow + , public std::enable_shared_from_this +{ +protected: + const std::string name_; + std::weak_ptr window_; + std::map > trackbars_; +public: + Win32UIWindow(const std::string& name, const std::shared_ptr& window) + : name_(name) + , window_(window) + { + // nothing + } + + ~Win32UIWindow() CV_OVERRIDE + { + if (!window_.expired()) + destroy(); + CV_LOG_DEBUG(NULL, "OpenCV/UI/Win32UI: Win32UIWindow(" << name_ << ") is disposed"); + } + + const std::string& getID() const CV_OVERRIDE { return name_; } + + bool isActive() const CV_OVERRIDE { return !window_.expired(); } + + void destroy() CV_OVERRIDE + { + cv::AutoLock lock(getWindowMutex()); + if (!window_.expired()) { - CV_ERROR(CV_StsNullPtr, "NULL trackbar or window name"); + auto window = window_.lock(); + if (window) + window->destroy(); + window_.reset(); } + } - window = icvFindWindowByName(window_name); - if (window) + void imshow(InputArray image) CV_OVERRIDE + { + auto window_ptr = window_.lock(); + CV_Assert(window_ptr); + CvWindow& window = *window_ptr; + Mat image_mat = image.getMat(); + showImage_(window, image_mat); + } + + double getProperty(int prop) const CV_OVERRIDE + { + auto window_ptr = window_.lock(); + CV_Assert(window_ptr); + CvWindow& window = *window_ptr; + // see cvGetWindowProperty + switch ((WindowPropertyFlags)prop) { - trackbar = icvFindTrackbarByName(window, trackbar_name); - if (trackbar) - { - // The position will be min(pos, maxval). - trackbar->maxval = (trackbar->minval>maxval)?trackbar->minval:maxval; - SendMessage(trackbar->hwnd, TBM_SETRANGEMAX, (WPARAM)TRUE, (LPARAM)maxval); - } + case WND_PROP_FULLSCREEN: + return (double)window.status; + + case WND_PROP_AUTOSIZE: + return (window.flags & WINDOW_AUTOSIZE) ? 1.0 : 0.0; + + case WND_PROP_ASPECT_RATIO: + return static_cast(window.width) / window.height; + +#ifdef HAVE_OPENGL + case WND_PROP_OPENGL: + return window.useGl ? 1.0 : 0.0; +#endif + + case WND_PROP_VISIBLE: + return 1.0; + + case WND_PROP_TOPMOST: + return getPropTopmost_(window); + + case WND_PROP_VSYNC: + return getPropVsync_(window); + + // don't use default, add unsupported cases below: + // case WND_PROP_UNSUPPORTED: // fallthru + // break; } + return std::numeric_limits::quiet_NaN(); } - __END__; -} + bool setProperty(int prop, double value) CV_OVERRIDE + { + auto window_ptr = window_.lock(); + CV_Assert(window_ptr); + CvWindow& window = *window_ptr; + // see cvSetWindowProperty + switch ((WindowPropertyFlags)prop) + { + case WND_PROP_FULLSCREEN: + if (value != WINDOW_NORMAL && value != WINDOW_FULLSCREEN) // bad arg + break; + setModeWindow_(window, (int)value); + return true; + case WND_PROP_TOPMOST: + return setPropTopmost_(window, value != 0.0); -CV_IMPL void cvSetTrackbarMin(const char* trackbar_name, const char* window_name, int minval) -{ - CV_FUNCNAME( "cvSetTrackbarMin" ); + case WND_PROP_VSYNC: + return setPropVsync_(window, value != 0.0); - __BEGIN__; + // don't use default, add unsupported cases below: + // case WND_PROP_UNSUPPORTED: // fallthru + case WND_PROP_AUTOSIZE: // fallthru + case WND_PROP_ASPECT_RATIO: // fallthru + case WND_PROP_OPENGL: // fallthru + case WND_PROP_VISIBLE: // fallthru + break; + } + return false; + } - if (minval >= 0) + void resize(int width, int height) CV_OVERRIDE { - CvWindow* window = 0; - CvTrackbar* trackbar = 0; - if (trackbar_name == 0 || window_name == 0) + auto window_ptr = window_.lock(); + CV_Assert(window_ptr); + CvWindow& window = *window_ptr; + resizeWindow_(window, Size(width, height)); + } + + void move(int x, int y) CV_OVERRIDE + { + auto window_ptr = window_.lock(); + CV_Assert(window_ptr); + CvWindow& window = *window_ptr; + moveWindow_(window, Point(x, y)); + } + + Rect getImageRect() const CV_OVERRIDE + { + auto window_ptr = window_.lock(); + CV_Assert(window_ptr); + CvWindow& window = *window_ptr; + return getImageRect_(window); + } + + void setTitle(const std::string& title) CV_OVERRIDE + { + auto window_ptr = window_.lock(); + CV_Assert(window_ptr); + CvWindow& window = *window_ptr; + if (!SetWindowText(window.frame, title.c_str())) + CV_Error_(Error::StsError, ("Failed to set \"%s\" window title to \"%s\"", window.name.c_str(), title.c_str())); + } + + void setMouseCallback(MouseCallback onMouse, void* userdata /*= 0*/) CV_OVERRIDE + { + auto window_ptr = window_.lock(); + CV_Assert(window_ptr); + CvWindow& window = *window_ptr; + window.on_mouse = onMouse; + window.on_mouse_param = userdata; + } + + std::shared_ptr createTrackbar( + const std::string& name, + int count, + TrackbarCallback onChange /*= 0*/, + void* userdata /*= 0*/ + ) CV_OVERRIDE + { + auto window_ptr = window_.lock(); + CV_Assert(window_ptr); + CvWindow& window = *window_ptr; + CV_LOG_INFO(NULL, "OpenCV/UI: Creating Win32UI trackbar at '" << name_ << "': '" << name << "'"); + auto trackbar = createTrackbar_(window, name, count, onChange, userdata); + auto ui_trackbar = std::make_shared(name, trackbar, shared_from_this()); { - CV_ERROR(CV_StsNullPtr, "NULL trackbar or window name"); + cv::AutoLock lock(getWindowMutex()); + trackbars_.emplace(name, ui_trackbar); } + return std::static_pointer_cast(ui_trackbar); + } - window = icvFindWindowByName(window_name); - if (window) + std::shared_ptr findTrackbar(const std::string& name) CV_OVERRIDE + { + cv::AutoLock lock(getWindowMutex()); + auto i = trackbars_.find(name); + if (i != trackbars_.end()) { - trackbar = icvFindTrackbarByName(window, trackbar_name); - if (trackbar) - { - // The position will be min(pos, maxval). - trackbar->minval = (minvalmaxval)?minval:trackbar->maxval; - SendMessage(trackbar->hwnd, TBM_SETRANGEMIN, (WPARAM)TRUE, (LPARAM)minval); - } + return std::static_pointer_cast(i->second); } + return std::shared_ptr(); } - - __END__; -} +}; // Win32UIWindow -CV_IMPL void* cvGetWindowHandle( const char* window_name ) +class Win32UITrackbar : public UITrackbar { - void* hwnd = 0; +protected: + /*const*/ std::string name_; + std::weak_ptr trackbar_; + std::weak_ptr parent_; + std::map > trackbars_; +public: + Win32UITrackbar(const std::string& name, const std::shared_ptr& trackbar, const std::shared_ptr& parent) + : trackbar_(trackbar) + , parent_(parent) + { + name_ = std::string("<") + name + ">@" + parent->getID(); + } - CV_FUNCNAME( "cvGetWindowHandle" ); + ~Win32UITrackbar() CV_OVERRIDE + { + if (!trackbar_.expired()) + destroy(); + CV_LOG_DEBUG(NULL, "OpenCV/UI/Win32UI: Win32UITrackbar(" << name_ << ") is disposed"); + } - __BEGIN__; + const std::string& getID() const CV_OVERRIDE { return name_; } - CvWindow* window; + bool isActive() const CV_OVERRIDE { return !trackbar_.expired(); } - if( window_name == 0 ) - CV_ERROR( CV_StsNullPtr, "NULL window name" ); + void destroy() CV_OVERRIDE + { + // nothing (destroyed with parent window, dedicated trackbar removal is not supported) + } - window = icvFindWindowByName( window_name ); - if( window ) - hwnd = (void*)window->hwnd; + int getPos() const CV_OVERRIDE + { + auto trackbar_ptr = trackbar_.lock(); + CV_Assert(trackbar_ptr); + CvTrackbar& trackbar = *trackbar_ptr; + return trackbar.pos; + } + void setPos(int pos) CV_OVERRIDE + { + auto trackbar_ptr = trackbar_.lock(); + CV_Assert(trackbar_ptr); + CvTrackbar& trackbar = *trackbar_ptr; + SendMessage(trackbar.hwnd, TBM_SETPOS, (WPARAM)TRUE, (LPARAM)pos); + icvUpdateTrackbar(trackbar, pos); + } - __END__; + cv::Range getRange() const CV_OVERRIDE + { + auto trackbar_ptr = trackbar_.lock(); + CV_Assert(trackbar_ptr); + CvTrackbar& trackbar = *trackbar_ptr; + return cv::Range(trackbar.minval, trackbar.maxval); + } - return hwnd; -} + void setRange(const cv::Range& range) CV_OVERRIDE + { + auto trackbar_ptr = trackbar_.lock(); + CV_Assert(trackbar_ptr); + CvTrackbar& trackbar = *trackbar_ptr; + CV_CheckLE(range.start, range.end, "Invalid trackbar range"); + trackbar.minval = range.start; + trackbar.maxval = range.start; + SendMessage(trackbar.hwnd, TBM_SETRANGEMIN, (WPARAM)TRUE, (LPARAM)trackbar.minval); + SendMessage(trackbar.hwnd, TBM_SETRANGEMAX, (WPARAM)TRUE, (LPARAM)trackbar.maxval); + } +}; // Win32UITrackbar -CV_IMPL const char* cvGetWindowName( void* window_handle ) +class Win32BackendUI : public UIBackend { - const char* window_name = ""; +public: + ~Win32BackendUI() CV_OVERRIDE + { + destroyAllWindows(); + } - CV_FUNCNAME( "cvGetWindowName" ); + void destroyAllWindows() CV_OVERRIDE + { + cvDestroyAllWindows(); + } - __BEGIN__; + // namedWindow + virtual std::shared_ptr createWindow( + const std::string& winname, + int flags + ) CV_OVERRIDE + { + CV_LOG_INFO(NULL, "OpenCV/UI: Creating Win32UI window: " << winname << " (" << flags << ")"); + auto window = namedWindow_(winname, flags); + auto ui_window = std::make_shared(winname, window); + return ui_window; + } - CvWindow* window; + int waitKeyEx(int delay) CV_OVERRIDE + { + return cvWaitKey(delay); + } + int pollKey() CV_OVERRIDE + { + return pollKey_W32(); + } +}; // Win32BackendUI - if( window_handle == 0 ) - CV_ERROR( CV_StsNullPtr, "NULL window" ); +static +std::shared_ptr& getInstance() +{ + static std::shared_ptr g_instance = std::make_shared(); + return g_instance; +} - window = icvWindowByHWND( (HWND)window_handle ); - if( window ) - window_name = window->name; +} // namespace impl - __END__; +#ifndef BUILD_PLUGIN +namespace highgui_backend { - return window_name; +std::shared_ptr createUIBackendWin32UI() +{ + return impl::getInstance(); } +} // namespace highgui_backend +#endif -CV_IMPL void -cvSetPreprocessFuncWin32_(const void* callback) +} // namespace + + +#ifdef BUILD_PLUGIN + +#define ABI_VERSION 0 +#define API_VERSION 0 +#include "plugin_api.hpp" + +static +CvResult cv_getInstance(CV_OUT CvPluginUIBackend* handle) CV_NOEXCEPT { - hg_on_preprocess = (CvWin32WindowCallback)callback; + try + { + if (!handle) + return CV_ERROR_FAIL; + *handle = cv::impl::getInstance().get(); + return CV_ERROR_OK; + } + catch (...) + { + return CV_ERROR_FAIL; + } } -CV_IMPL void -cvSetPostprocessFuncWin32_(const void* callback) +static const OpenCV_UI_Plugin_API plugin_api = { - hg_on_postprocess = (CvWin32WindowCallback)callback; + { + sizeof(OpenCV_UI_Plugin_API), ABI_VERSION, API_VERSION, + CV_VERSION_MAJOR, CV_VERSION_MINOR, CV_VERSION_REVISION, CV_VERSION_STATUS, + "Win32 OpenCV UI plugin" + }, + { + /* 1*/cv_getInstance + } +}; + +const OpenCV_UI_Plugin_API* CV_API_CALL opencv_ui_plugin_init_v0(int requested_abi_version, int requested_api_version, void* /*reserved=NULL*/) CV_NOEXCEPT +{ + if (requested_abi_version == ABI_VERSION && requested_api_version <= API_VERSION) + return &plugin_api; + return NULL; } -#endif //_WIN32 +#endif // BUILD_PLUGIN + +#endif // HAVE_WIN32UI From 55e1dfb778b806272262871c3060ae32a4ff20d0 Mon Sep 17 00:00:00 2001 From: SamFC10 Date: Sun, 20 Jun 2021 13:19:29 +0530 Subject: [PATCH 011/128] Fix BatchNorm reinitialization --- modules/dnn/src/layers/batch_norm_layer.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index 27c3db6c4414..42676c79386f 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -29,6 +29,7 @@ namespace dnn class BatchNormLayerImpl CV_FINAL : public BatchNormLayer { public: + Mat origin_weights, origin_bias; Mat weights_, bias_; UMat umat_weight, umat_bias; mutable int dims; @@ -82,11 +83,11 @@ class BatchNormLayerImpl CV_FINAL : public BatchNormLayer const float* weightsData = hasWeights ? blobs[weightsBlobIndex].ptr() : 0; const float* biasData = hasBias ? blobs[biasBlobIndex].ptr() : 0; - weights_.create(1, (int)n, CV_32F); - bias_.create(1, (int)n, CV_32F); + origin_weights.create(1, (int)n, CV_32F); + origin_bias.create(1, (int)n, CV_32F); - float* dstWeightsData = weights_.ptr(); - float* dstBiasData = bias_.ptr(); + float* dstWeightsData = origin_weights.ptr(); + float* dstBiasData = origin_bias.ptr(); for (size_t i = 0; i < n; ++i) { @@ -94,15 +95,12 @@ class BatchNormLayerImpl CV_FINAL : public BatchNormLayer dstWeightsData[i] = w; dstBiasData[i] = (hasBias ? biasData[i] : 0.0f) - w * meanData[i] * varMeanScale; } - // We will use blobs to store origin weights and bias to restore them in case of reinitialization. - weights_.copyTo(blobs[0].reshape(1, 1)); - bias_.copyTo(blobs[1].reshape(1, 1)); } virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE { - blobs[0].reshape(1, 1).copyTo(weights_); - blobs[1].reshape(1, 1).copyTo(bias_); + origin_weights.reshape(1, 1).copyTo(weights_); + origin_bias.reshape(1, 1).copyTo(bias_); } void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE From bb60cb0bf97338848859e5402518ff9b53c2a6d1 Mon Sep 17 00:00:00 2001 From: kikaxa Date: Thu, 20 May 2021 16:01:36 +0300 Subject: [PATCH 012/128] Reenable filesystem for ios builds --- .../core/include/opencv2/core/utils/filesystem.private.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/core/include/opencv2/core/utils/filesystem.private.hpp b/modules/core/include/opencv2/core/utils/filesystem.private.hpp index ea2591c9de1d..72b2bb947968 100644 --- a/modules/core/include/opencv2/core/utils/filesystem.private.hpp +++ b/modules/core/include/opencv2/core/utils/filesystem.private.hpp @@ -16,8 +16,8 @@ # define OPENCV_HAVE_FILESYSTEM_SUPPORT 1 # elif defined(__APPLE__) # include -# if (defined(TARGET_OS_OSX) && TARGET_OS_OSX) || (!defined(TARGET_OS_OSX) && !TARGET_OS_IPHONE) -# define OPENCV_HAVE_FILESYSTEM_SUPPORT 1 // OSX only +# if (defined(TARGET_OS_OSX) && TARGET_OS_OSX) || (defined(TARGET_OS_IOS) && TARGET_OS_IOS) +# define OPENCV_HAVE_FILESYSTEM_SUPPORT 1 // OSX, iOS only # endif # else /* unknown */ From 8be86cbdfd3c941fc12500bcb61a034a9fa9e148 Mon Sep 17 00:00:00 2001 From: Dale Phurrough Date: Tue, 22 Jun 2021 05:32:54 +0200 Subject: [PATCH 013/128] add usageFlags to UMat static factories - add abi compatible overloads - add test case --- modules/core/include/opencv2/core/mat.hpp | 27 ++++++++++++++------- modules/core/src/matrix_operations.cpp | 8 +++---- modules/core/src/umatrix.cpp | 28 +++++++++++----------- modules/core/test/ocl/test_matrix_expr.cpp | 18 ++++++++++++++ 4 files changed, 54 insertions(+), 27 deletions(-) diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 5e667b213242..6768be76834b 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -2451,7 +2451,8 @@ class CV_EXPORTS UMat //! <0 - a diagonal from the lower half) UMat diag(int d=0) const; //! constructs a square diagonal matrix which main diagonal is vector "d" - static UMat diag(const UMat& d); + static UMat diag(const UMat& d, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + static UMat diag(const UMat& d) { return diag(d, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload //! returns deep copy of the matrix, i.e. the data is copied UMat clone() const CV_NODISCARD; @@ -2485,14 +2486,22 @@ class CV_EXPORTS UMat double dot(InputArray m) const; //! Matlab-style matrix initialization - static UMat zeros(int rows, int cols, int type); - static UMat zeros(Size size, int type); - static UMat zeros(int ndims, const int* sz, int type); - static UMat ones(int rows, int cols, int type); - static UMat ones(Size size, int type); - static UMat ones(int ndims, const int* sz, int type); - static UMat eye(int rows, int cols, int type); - static UMat eye(Size size, int type); + static UMat zeros(int rows, int cols, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + static UMat zeros(Size size, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + static UMat zeros(int ndims, const int* sz, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + static UMat zeros(int rows, int cols, int type) { return zeros(rows, cols, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + static UMat zeros(Size size, int type) { return zeros(size, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + static UMat zeros(int ndims, const int* sz, int type) { return zeros(ndims, sz, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + static UMat ones(int rows, int cols, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + static UMat ones(Size size, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + static UMat ones(int ndims, const int* sz, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + static UMat ones(int rows, int cols, int type) { return ones(rows, cols, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + static UMat ones(Size size, int type) { return ones(size, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + static UMat ones(int ndims, const int* sz, int type) { return ones(ndims, sz, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + static UMat eye(int rows, int cols, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + static UMat eye(Size size, int type, UMatUsageFlags usageFlags /*= USAGE_DEFAULT*/); + static UMat eye(int rows, int cols, int type) { return eye(rows, cols, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload + static UMat eye(Size size, int type) { return eye(size, type, USAGE_DEFAULT); } // OpenCV 5.0: remove abi compatibility overload //! allocates new matrix data unless the matrix already has specified size and type. // previous data is unreferenced if needed. diff --git a/modules/core/src/matrix_operations.cpp b/modules/core/src/matrix_operations.cpp index 83c8aaeb5705..227c7aaef774 100644 --- a/modules/core/src/matrix_operations.cpp +++ b/modules/core/src/matrix_operations.cpp @@ -229,14 +229,14 @@ void cv::setIdentity( InputOutputArray _m, const Scalar& s ) namespace cv { -UMat UMat::eye(int rows, int cols, int type) +UMat UMat::eye(int rows, int cols, int type, UMatUsageFlags usageFlags) { - return UMat::eye(Size(cols, rows), type); + return UMat::eye(Size(cols, rows), type, usageFlags); } -UMat UMat::eye(Size size, int type) +UMat UMat::eye(Size size, int type, UMatUsageFlags usageFlags) { - UMat m(size, type); + UMat m(size, type, usageFlags); setIdentity(m); return m; } diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index c80d240ecc02..bf5dfb68a318 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -951,11 +951,11 @@ UMat UMat::reshape(int new_cn, int new_rows) const return hdr; } -UMat UMat::diag(const UMat& d) +UMat UMat::diag(const UMat& d, UMatUsageFlags usageFlags) { CV_Assert( d.cols == 1 || d.rows == 1 ); int len = d.rows + d.cols - 1; - UMat m(len, len, d.type(), Scalar(0)); + UMat m(len, len, d.type(), Scalar(0), usageFlags); UMat md = m.diag(); if( d.cols == 1 ) d.copyTo(md); @@ -1323,34 +1323,34 @@ UMat UMat::t() const return m; } -UMat UMat::zeros(int rows, int cols, int type) +UMat UMat::zeros(int rows, int cols, int type, UMatUsageFlags usageFlags) { - return UMat(rows, cols, type, Scalar::all(0)); + return UMat(rows, cols, type, Scalar::all(0), usageFlags); } -UMat UMat::zeros(Size size, int type) +UMat UMat::zeros(Size size, int type, UMatUsageFlags usageFlags) { - return UMat(size, type, Scalar::all(0)); + return UMat(size, type, Scalar::all(0), usageFlags); } -UMat UMat::zeros(int ndims, const int* sz, int type) +UMat UMat::zeros(int ndims, const int* sz, int type, UMatUsageFlags usageFlags) { - return UMat(ndims, sz, type, Scalar::all(0)); + return UMat(ndims, sz, type, Scalar::all(0), usageFlags); } -UMat UMat::ones(int rows, int cols, int type) +UMat UMat::ones(int rows, int cols, int type, UMatUsageFlags usageFlags) { - return UMat::ones(Size(cols, rows), type); + return UMat(rows, cols, type, Scalar(1), usageFlags); } -UMat UMat::ones(Size size, int type) +UMat UMat::ones(Size size, int type, UMatUsageFlags usageFlags) { - return UMat(size, type, Scalar(1)); + return UMat(size, type, Scalar(1), usageFlags); } -UMat UMat::ones(int ndims, const int* sz, int type) +UMat UMat::ones(int ndims, const int* sz, int type, UMatUsageFlags usageFlags) { - return UMat(ndims, sz, type, Scalar(1)); + return UMat(ndims, sz, type, Scalar(1), usageFlags); } } diff --git a/modules/core/test/ocl/test_matrix_expr.cpp b/modules/core/test/ocl/test_matrix_expr.cpp index 7a5ff72cb24e..f11c0a6ebb6d 100644 --- a/modules/core/test/ocl/test_matrix_expr.cpp +++ b/modules/core/test/ocl/test_matrix_expr.cpp @@ -76,6 +76,24 @@ OCL_TEST_P(UMatExpr, Ones) } } +//////////////////////////////// with usageFlags ///////////////////////////////////////////////// + +OCL_TEST_P(UMatExpr, WithUsageFlags) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + UMat u0 = UMat::zeros(size, type, cv::USAGE_ALLOCATE_HOST_MEMORY); + UMat u1 = UMat::ones(size, type, cv::USAGE_ALLOCATE_HOST_MEMORY); + UMat u8 = UMat::eye(size, type, cv::USAGE_ALLOCATE_HOST_MEMORY); + + EXPECT_EQ(cv::USAGE_ALLOCATE_HOST_MEMORY, u0.usageFlags); + EXPECT_EQ(cv::USAGE_ALLOCATE_HOST_MEMORY, u1.usageFlags); + EXPECT_EQ(cv::USAGE_ALLOCATE_HOST_MEMORY, u8.usageFlags); + } +} + //////////////////////////////// Instantiation ///////////////////////////////////////////////// OCL_INSTANTIATE_TEST_CASE_P(MatrixOperation, UMatExpr, Combine(OCL_ALL_DEPTHS_16F, OCL_ALL_CHANNELS)); From b68057d92701e24765d9ff199011f5171d320143 Mon Sep 17 00:00:00 2001 From: Vincent Rabaud Date: Wed, 23 Jun 2021 21:27:54 +0200 Subject: [PATCH 014/128] Do not use = 0 for a cv::Mat. There are several operator= overloads and some compilers can be confused. --- modules/calib3d/src/chessboard.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/calib3d/src/chessboard.cpp b/modules/calib3d/src/chessboard.cpp index dbc47722cba9..18e2605f53b5 100644 --- a/modules/calib3d/src/chessboard.cpp +++ b/modules/calib3d/src/chessboard.cpp @@ -3924,7 +3924,7 @@ bool findChessboardCornersSB(cv::InputArray image_, cv::Size pattern_size, { meta_.create(int(board.rowCount()),int(board.colCount()),CV_8UC1); cv::Mat meta = meta_.getMat(); - meta = 0; + meta.setTo(cv::Scalar::all(0)); for(int row =0;row < meta.rows-1;++row) { for(int col=0;col< meta.cols-1;++col) From dc5199feeae9cef33bd55cc8c161917bc3ef367b Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Mon, 21 Jun 2021 11:11:14 +0300 Subject: [PATCH 015/128] skipping missing layers and layer failures --- apps/model-diagnostics/model_diagnostics.cpp | 6 +-- modules/dnn/src/tensorflow/tf_importer.cpp | 43 +++++++++++++++++--- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/apps/model-diagnostics/model_diagnostics.cpp b/apps/model-diagnostics/model_diagnostics.cpp index 2ffeaa1ea5b9..d3934577aec6 100644 --- a/apps/model-diagnostics/model_diagnostics.cpp +++ b/apps/model-diagnostics/model_diagnostics.cpp @@ -1,6 +1,6 @@ /************************************************* USAGE: -./model_diagnostics -m +./model_diagnostics -m **************************************************/ #include #include @@ -32,7 +32,7 @@ static std::string checkFileExists(const std::string& fileName) } std::string diagnosticKeys = - "{ model m | | Path to the model .onnx file. }" + "{ model m | | Path to the model file. }" "{ config c | | Path to the model configuration file. }" "{ framework f | | [Optional] Name of the model framework. }"; @@ -41,7 +41,7 @@ std::string diagnosticKeys = int main( int argc, const char** argv ) { CommandLineParser argParser(argc, argv, diagnosticKeys); - argParser.about("Use this tool to run the diagnostics of provided ONNX model" + argParser.about("Use this tool to run the diagnostics of provided ONNX/TF model" "to obtain the information about its support (supported layers)."); if (argc == 1) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index ed6a40792053..15f88007b4d1 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -32,6 +32,8 @@ namespace cv { namespace dnn { CV__DNN_INLINE_NS_BEGIN +extern bool DNN_DIAGNOSTICS_RUN; + #if HAVE_PROTOBUF using ::google::protobuf::RepeatedField; @@ -471,6 +473,7 @@ class TFImporter TFImporter(Net& net, const char *dataModel, size_t lenModel, const char *dataConfig = NULL, size_t lenConfig = 0); protected: + std::unique_ptr utilNet; Net& dstNet; void populateNet(); @@ -2337,7 +2340,8 @@ void TFImporter::parseCustomLayer(tensorflow::GraphDef& net, const tensorflow::N } TFImporter::TFImporter(Net& net, const char *model, const char *config) - : dstNet(net), dispatch(buildDispatchMap()) + : utilNet(DNN_DIAGNOSTICS_RUN ? new Net : nullptr), + dstNet(DNN_DIAGNOSTICS_RUN ? *utilNet : net), dispatch(buildDispatchMap()) { if (model && model[0]) { @@ -2358,7 +2362,8 @@ TFImporter::TFImporter( const char *dataModel, size_t lenModel, const char *dataConfig, size_t lenConfig ) - : dstNet(net), dispatch(buildDispatchMap()) + : utilNet(DNN_DIAGNOSTICS_RUN ? new Net : nullptr), + dstNet(DNN_DIAGNOSTICS_RUN ? *utilNet : net), dispatch(buildDispatchMap()) { if (dataModel != NULL && lenModel > 0) { @@ -2615,6 +2620,11 @@ DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) return it->second; } +Ptr dummy_constructor(LayerParams & params) +{ + return new Layer(params); +} + void TFImporter::populateNet() { CV_Assert(netBin.ByteSize() || netTxt.ByteSize()); @@ -2757,9 +2767,9 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer) const std::string& name = layer.name(); const std::string& type = layer.op(); + LayerParams layerParams; try { - LayerParams layerParams; if (layers_to_ignore.find(name) != layers_to_ignore.end()) { @@ -2777,13 +2787,36 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer) } else { + if (DNN_DIAGNOSTICS_RUN && !LayerFactory::createLayerInstance(type, layerParams)) + { + CV_LOG_ERROR(NULL, "DNN/TF: Node='" << name << "' of type='"<< type + << "' is not supported. This error won't be displayed again."); + LayerFactory::registerLayer(type, dummy_constructor); + } + parseCustomLayer(net, layer, layerParams); } } catch (const std::exception& e) { - CV_LOG_ERROR(NULL, "DNN/TF: Can't parse layer for node='" << name << "'. Exception: " << e.what()); - throw; + if (!DNN_DIAGNOSTICS_RUN) + { + CV_LOG_ERROR(NULL, "DNN/TF: Can't parse layer for node='" << name << "' of type='" << type + << "'. Exception: " << e.what()); + throw; + } + else + { + CV_LOG_ERROR(NULL, "DNN/TF: Can't parse layer for node='" << name << "' of type='" << type + << "'. Exception: " << e.what()); + + // internal layer failure (didnt call addLayer) + if (dstNet.getLayerId(name) == -1) + { + int id = dstNet.addLayer(name, type, layerParams); + layer_id[name] = id; + } + } } } From c95a56450dd8c612b3f8e83d680a0a6c6533acf0 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Sat, 26 Jun 2021 00:09:33 +0300 Subject: [PATCH 016/128] Merge pull request #20156 from smirnov-alexey:as/gapi_remote_infer G-API: Support remote inference * Extend MediaFrame to be able to extract additional info besides access * Add API for remote inference * Add default implementation for blobParams() * Add default implementation for blobParams() * Address review comments * Fix any_cast usage * Add comment on the default blobParams() * Address review comments * Add missing rctx * Minor fix * Fix indentation and comment * Address review comments * Add documentation --- .../gapi/include/opencv2/gapi/infer/ie.hpp | 44 +++++++++++++++++-- modules/gapi/src/backends/ie/giebackend.cpp | 40 ++++++++++++++++- .../src/backends/ie/giebackend/giewrapper.hpp | 26 +++++++++-- 3 files changed, 100 insertions(+), 10 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi/infer/ie.hpp b/modules/gapi/include/opencv2/gapi/infer/ie.hpp index 70712ba74039..2be739e51840 100644 --- a/modules/gapi/include/opencv2/gapi/infer/ie.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/ie.hpp @@ -74,7 +74,11 @@ struct ParamDesc { std::map> reshape_table; std::unordered_set layer_names_to_reshape; + // NB: Number of asyncrhonious infer requests size_t nireq; + + // NB: An optional config to setup RemoteContext for IE + cv::util::any context_config; }; } // namespace detail @@ -115,7 +119,8 @@ template class Params { , {} , {} , {} - , 1u} { + , 1u + , {}} { }; /** @overload @@ -135,7 +140,8 @@ template class Params { , {} , {} , {} - , 1u} { + , 1u + , {}} { }; /** @brief Specifies sequence of network input layers names for inference. @@ -217,6 +223,30 @@ template class Params { return *this; } + /** @brief Specifies configuration for RemoteContext in InferenceEngine. + + When RemoteContext is configured the backend imports the networks using the context. + It also expects cv::MediaFrames to be actually remote, to operate with blobs via the context. + + @param ctx_cfg cv::util::any value which holds InferenceEngine::ParamMap. + @return reference to this parameter structure. + */ + Params& cfgContextParams(const cv::util::any& ctx_cfg) { + desc.context_config = ctx_cfg; + return *this; + } + + /** @overload + Function with an rvalue parameter. + + @param ctx_cfg cv::util::any value which holds InferenceEngine::ParamMap. + @return reference to this parameter structure. + */ + Params& cfgContextParams(cv::util::any&& ctx_cfg) { + desc.context_config = std::move(ctx_cfg); + return *this; + } + /** @brief Specifies number of asynchronous inference requests. @param nireq Number of inference asynchronous requests. @@ -318,7 +348,10 @@ class Params { const std::string &model, const std::string &weights, const std::string &device) - : desc{ model, weights, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Load, true, {}, {}, {}, 1u}, m_tag(tag) { + : desc{ model, weights, device, {}, {}, {}, 0u, 0u, + detail::ParamDesc::Kind::Load, true, {}, {}, {}, 1u, + {}}, + m_tag(tag) { }; /** @overload @@ -333,7 +366,10 @@ class Params { Params(const std::string &tag, const std::string &model, const std::string &device) - : desc{ model, {}, device, {}, {}, {}, 0u, 0u, detail::ParamDesc::Kind::Import, true, {}, {}, {}, 1u}, m_tag(tag) { + : desc{ model, {}, device, {}, {}, {}, 0u, 0u, + detail::ParamDesc::Kind::Import, true, {}, {}, {}, 1u, + {}}, + m_tag(tag) { }; /** @see ie::Params::pluginConfig. */ diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 46b6bdbb97ab..77a6515f8530 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -222,8 +222,17 @@ struct IEUnit { IE::ExecutableNetwork this_network; cv::gimpl::ie::wrap::Plugin this_plugin; + InferenceEngine::RemoteContext::Ptr rctx = nullptr; + explicit IEUnit(const cv::gapi::ie::detail::ParamDesc &pp) : params(pp) { + InferenceEngine::ParamMap* ctx_params = + cv::util::any_cast(¶ms.context_config); + if (ctx_params != nullptr) { + auto ie_core = cv::gimpl::ie::wrap::getCore(); + rctx = ie_core.CreateContext(params.device_id, *ctx_params); + } + if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { net = cv::gimpl::ie::wrap::readNetwork(params); inputs = net.getInputsInfo(); @@ -231,7 +240,7 @@ struct IEUnit { } else if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Import) { this_plugin = cv::gimpl::ie::wrap::getPlugin(params); this_plugin.SetConfig(params.config); - this_network = cv::gimpl::ie::wrap::importNetwork(this_plugin, params); + this_network = cv::gimpl::ie::wrap::importNetwork(this_plugin, params, rctx); // FIXME: ICNNetwork returns InputsDataMap/OutputsDataMap, // but ExecutableNetwork returns ConstInputsDataMap/ConstOutputsDataMap inputs = cv::gimpl::ie::wrap::toInputsDataMap(this_network.GetInputsInfo()); @@ -279,7 +288,8 @@ struct IEUnit { // for loadNetwork they can be obtained by using readNetwork non_const_this->this_plugin = cv::gimpl::ie::wrap::getPlugin(params); non_const_this->this_plugin.SetConfig(params.config); - non_const_this->this_network = cv::gimpl::ie::wrap::loadNetwork(non_const_this->this_plugin, net, params); + non_const_this->this_network = cv::gimpl::ie::wrap::loadNetwork(non_const_this->this_plugin, + net, params, rctx); } return {params, this_plugin, this_network}; @@ -481,7 +491,32 @@ using GConstGIEModel = ade::ConstTypedGraph , IECallable >; +inline IE::Blob::Ptr extractRemoteBlob(IECallContext& ctx, std::size_t i) { + GAPI_Assert(ctx.inShape(i) == cv::GShape::GFRAME && + "Remote blob is supported for MediaFrame only"); + + cv::util::any any_blob_params = ctx.inFrame(i).blobParams(); + auto ie_core = cv::gimpl::ie::wrap::getCore(); + + using ParamType = std::pair; + + ParamType* blob_params = cv::util::any_cast(&any_blob_params); + if (blob_params == nullptr) { + GAPI_Assert(false && "Incorrect type of blobParams: " + "expected std::pair"); + } + + return ctx.uu.rctx->CreateBlob(blob_params->first, + blob_params->second); +} + inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i) { + if (ctx.uu.rctx != nullptr) { + return extractRemoteBlob(ctx, i); + } + switch (ctx.inShape(i)) { case cv::GShape::GFRAME: { const auto& frame = ctx.inFrame(i); @@ -1060,6 +1095,7 @@ struct InferList: public cv::detail::KernelTag { } IE::Blob::Ptr this_blob = extractBlob(*ctx, 1); + std::vector> cached_dims(ctx->uu.params.num_out); for (auto i : ade::util::iota(ctx->uu.params.num_out)) { const IE::DataPtr& ie_out = ctx->uu.outputs.at(ctx->uu.params.output_names[i]); diff --git a/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp b/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp index 3927c802b713..7e67cb8989d6 100644 --- a/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp +++ b/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp @@ -13,6 +13,7 @@ #include #include +#include #include "opencv2/gapi/infer/ie.hpp" @@ -50,12 +51,29 @@ GAPI_EXPORTS IE::Core getCore(); GAPI_EXPORTS IE::Core getPlugin(const GIEParam& params); GAPI_EXPORTS inline IE::ExecutableNetwork loadNetwork( IE::Core& core, const IE::CNNNetwork& net, - const GIEParam& params) { - return core.LoadNetwork(net, params.device_id); + const GIEParam& params, + IE::RemoteContext::Ptr rctx = nullptr) { + if (rctx != nullptr) { + return core.LoadNetwork(net, rctx); + } else { + return core.LoadNetwork(net, params.device_id); + } } GAPI_EXPORTS inline IE::ExecutableNetwork importNetwork( IE::Core& core, - const GIEParam& param) { - return core.ImportNetwork(param.model_path, param.device_id, {}); + const GIEParam& params, + IE::RemoteContext::Ptr rctx = nullptr) { + if (rctx != nullptr) { + std::filebuf blobFile; + if (!blobFile.open(params.model_path, std::ios::in | std::ios::binary)) + { + blobFile.close(); + throw std::runtime_error("Could not open file"); + } + std::istream graphBlob(&blobFile); + return core.ImportNetwork(graphBlob, rctx); + } else { + return core.ImportNetwork(params.model_path, params.device_id, {}); + } } #endif // INF_ENGINE_RELEASE < 2019020000 }}}} From 42d644ef9134bcb620dd00f7ab7a6d7d039bfdf2 Mon Sep 17 00:00:00 2001 From: xzvno Date: Sun, 27 Jun 2021 05:01:31 +0800 Subject: [PATCH 017/128] Merge pull request #20293 from endjkv:fix-mem-leak-when-throw * fix memory leak when exception is thrown --- modules/core/src/system.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index af4a62181697..441457d50fd2 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -1835,7 +1835,15 @@ void* TLSDataContainer::getData() const { // Create new data instance and save it to TLS storage pData = createDataInstance(); - getTlsStorage().setData(key_, pData); + try + { + getTlsStorage().setData(key_, pData); + } + catch (...) + { + deleteDataInstance(pData); + throw; + } } return pData; } From 61a5378aeb3a6be13cbca1a1e6a6874358996fb4 Mon Sep 17 00:00:00 2001 From: Giles Payne Date: Sun, 27 Jun 2021 21:08:25 +0900 Subject: [PATCH 018/128] Improvements/fixes for unsigned type handling in Swift/Kotlin --- modules/core/misc/java/src/java/core+MatAt.kt | 93 ++++++++-- modules/core/misc/objc/common/Mat.mm | 2 +- modules/core/misc/objc/common/MatExt.swift | 146 +++++++++++++-- modules/core/misc/objc/test/MatTest.swift | 172 ++++++++++++++++-- 4 files changed, 366 insertions(+), 47 deletions(-) diff --git a/modules/core/misc/java/src/java/core+MatAt.kt b/modules/core/misc/java/src/java/core+MatAt.kt index f48a3deaedf9..c81e21057f27 100644 --- a/modules/core/misc/java/src/java/core+MatAt.kt +++ b/modules/core/misc/java/src/java/core+MatAt.kt @@ -3,6 +3,16 @@ package org.opencv.core import org.opencv.core.Mat.* import java.lang.RuntimeException +fun Mat.get(row: Int, col: Int, data: UByteArray) = this.get(row, col, data.asByteArray()) +fun Mat.get(indices: IntArray, data: UByteArray) = this.get(indices, data.asByteArray()) +fun Mat.put(row: Int, col: Int, data: UByteArray) = this.put(row, col, data.asByteArray()) +fun Mat.put(indices: IntArray, data: UByteArray) = this.put(indices, data.asByteArray()) + +fun Mat.get(row: Int, col: Int, data: UShortArray) = this.get(row, col, data.asShortArray()) +fun Mat.get(indices: IntArray, data: UShortArray) = this.get(indices, data.asShortArray()) +fun Mat.put(row: Int, col: Int, data: UShortArray) = this.put(row, col, data.asShortArray()) +fun Mat.put(indices: IntArray, data: UShortArray) = this.put(indices, data.asShortArray()) + /*** * Example use: * @@ -19,6 +29,7 @@ inline fun Mat.at(row: Int, col: Int) : Atable = col ) UByte::class -> AtableUByte(this, row, col) as Atable + UShort::class -> AtableUShort(this, row, col) as Atable else -> throw RuntimeException("Unsupported class type") } @@ -30,6 +41,7 @@ inline fun Mat.at(idx: IntArray) : Atable = idx ) UByte::class -> AtableUByte(this, idx) as Atable + UShort::class -> AtableUShort(this, idx) as Atable else -> throw RuntimeException("Unsupported class type") } @@ -38,46 +50,95 @@ class AtableUByte(val mat: Mat, val indices: IntArray): Atable { constructor(mat: Mat, row: Int, col: Int) : this(mat, intArrayOf(row, col)) override fun getV(): UByte { - val data = ByteArray(1) - mat[indices, data] - return data[0].toUByte() + val data = UByteArray(1) + mat.get(indices, data) + return data[0] } override fun setV(v: UByte) { - val data = byteArrayOf(v.toByte()) + val data = ubyteArrayOf(v) mat.put(indices, data) } override fun getV2c(): Tuple2 { - val data = ByteArray(2) - mat[indices, data] - return Tuple2(data[0].toUByte(), data[1].toUByte()) + val data = UByteArray(2) + mat.get(indices, data) + return Tuple2(data[0], data[1]) } override fun setV2c(v: Tuple2) { - val data = byteArrayOf(v._0.toByte(), v._1.toByte()) + val data = ubyteArrayOf(v._0, v._1) mat.put(indices, data) } override fun getV3c(): Tuple3 { - val data = ByteArray(3) - mat[indices, data] - return Tuple3(data[0].toUByte(), data[1].toUByte(), data[2].toUByte()) + val data = UByteArray(3) + mat.get(indices, data) + return Tuple3(data[0], data[1], data[2]) } override fun setV3c(v: Tuple3) { - val data = byteArrayOf(v._0.toByte(), v._1.toByte(), v._2.toByte()) + val data = ubyteArrayOf(v._0, v._1, v._2) mat.put(indices, data) } override fun getV4c(): Tuple4 { - val data = ByteArray(4) - mat[indices, data] - return Tuple4(data[0].toUByte(), data[1].toUByte(), data[2].toUByte(), data[3].toUByte()) + val data = UByteArray(4) + mat.get(indices, data) + return Tuple4(data[0], data[1], data[2], data[3]) } override fun setV4c(v: Tuple4) { - val data = byteArrayOf(v._0.toByte(), v._1.toByte(), v._2.toByte(), v._3.toByte()) + val data = ubyteArrayOf(v._0, v._1, v._2, v._3) + mat.put(indices, data) + } +} + +class AtableUShort(val mat: Mat, val indices: IntArray): Atable { + + constructor(mat: Mat, row: Int, col: Int) : this(mat, intArrayOf(row, col)) + + override fun getV(): UShort { + val data = UShortArray(1) + mat.get(indices, data) + return data[0] + } + + override fun setV(v: UShort) { + val data = ushortArrayOf(v) + mat.put(indices, data) + } + + override fun getV2c(): Tuple2 { + val data = UShortArray(2) + mat.get(indices, data) + return Tuple2(data[0], data[1]) + } + + override fun setV2c(v: Tuple2) { + val data = ushortArrayOf(v._0, v._1) + mat.put(indices, data) + } + + override fun getV3c(): Tuple3 { + val data = UShortArray(3) + mat.get(indices, data) + return Tuple3(data[0], data[1], data[2]) + } + + override fun setV3c(v: Tuple3) { + val data = ushortArrayOf(v._0, v._1, v._2) + mat.put(indices, data) + } + + override fun getV4c(): Tuple4 { + val data = UShortArray(4) + mat.get(indices, data) + return Tuple4(data[0], data[1], data[2], data[3]) + } + + override fun setV4c(v: Tuple4) { + val data = ushortArrayOf(v._0, v._1, v._2, v._3) mat.put(indices, data) } } diff --git a/modules/core/misc/objc/common/Mat.mm b/modules/core/misc/objc/common/Mat.mm index 5d41a3622e71..045bd8393ea3 100644 --- a/modules/core/misc/objc/common/Mat.mm +++ b/modules/core/misc/objc/common/Mat.mm @@ -548,7 +548,7 @@ - (void)put:(uchar*)dest data:(NSArray*)data offset:(int)offset count if (depth == CV_8U) { putData(dest, count, ^uchar (int index) { return cv::saturate_cast(data[offset + index].doubleValue);} ); } else if (depth == CV_8S) { - putData(dest, count, ^char (int index) { return cv::saturate_cast(data[offset + index].doubleValue);} ); + putData(dest, count, ^schar (int index) { return cv::saturate_cast(data[offset + index].doubleValue);} ); } else if (depth == CV_16U) { putData(dest, count, ^ushort (int index) { return cv::saturate_cast(data[offset + index].doubleValue);} ); } else if (depth == CV_16S) { diff --git a/modules/core/misc/objc/common/MatExt.swift b/modules/core/misc/objc/common/MatExt.swift index 5ce3a5e6fb56..a6ba548599d8 100644 --- a/modules/core/misc/objc/common/MatExt.swift +++ b/modules/core/misc/objc/common/MatExt.swift @@ -62,6 +62,21 @@ public extension Mat { } } + @discardableResult func get(indices:[Int32], data:inout [UInt8]) throws -> Int32 { + let channels = CvType.channels(Int32(type())) + if Int32(data.count) % channels != 0 { + try throwIncompatibleBufferSize(count: data.count, channels: channels) + } else if depth() != CvType.CV_8U { + try throwIncompatibleDataType(typeName: CvType.type(toString: type())) + } + let count = Int32(data.count) + return data.withUnsafeMutableBufferPointer { body in + body.withMemoryRebound(to: Int8.self) { reboundBody in + return __get(indices as [NSNumber], count: count, byteBuffer: reboundBody.baseAddress!) + } + } + } + @discardableResult func get(indices:[Int32], data:inout [Double]) throws -> Int32 { let channels = CvType.channels(Int32(type())) if Int32(data.count) % channels != 0 { @@ -114,10 +129,29 @@ public extension Mat { } } + @discardableResult func get(indices:[Int32], data:inout [UInt16]) throws -> Int32 { + let channels = CvType.channels(Int32(type())) + if Int32(data.count) % channels != 0 { + try throwIncompatibleBufferSize(count: data.count, channels: channels) + } else if depth() != CvType.CV_16U { + try throwIncompatibleDataType(typeName: CvType.type(toString: type())) + } + let count = Int32(data.count) + return data.withUnsafeMutableBufferPointer { body in + body.withMemoryRebound(to: Int16.self) { reboundBody in + return __get(indices as [NSNumber], count: count, shortBuffer: reboundBody.baseAddress!) + } + } + } + @discardableResult func get(row: Int32, col: Int32, data:inout [Int8]) throws -> Int32 { return try get(indices: [row, col], data: &data) } + @discardableResult func get(row: Int32, col: Int32, data:inout [UInt8]) throws -> Int32 { + return try get(indices: [row, col], data: &data) + } + @discardableResult func get(row: Int32, col: Int32, data:inout [Double]) throws -> Int32 { return try get(indices: [row, col], data: &data) } @@ -134,6 +168,10 @@ public extension Mat { return try get(indices: [row, col], data: &data) } + @discardableResult func get(row: Int32, col: Int32, data:inout [UInt16]) throws -> Int32 { + return try get(indices: [row, col], data: &data) + } + @discardableResult func put(indices:[Int32], data:[Int8]) throws -> Int32 { let channels = CvType.channels(Int32(type())) if Int32(data.count) % channels != 0 { @@ -147,6 +185,21 @@ public extension Mat { } } + @discardableResult func put(indices:[Int32], data:[UInt8]) throws -> Int32 { + let channels = CvType.channels(Int32(type())) + if Int32(data.count) % channels != 0 { + try throwIncompatibleBufferSize(count: data.count, channels: channels) + } else if depth() != CvType.CV_8U { + try throwIncompatibleDataType(typeName: CvType.type(toString: type())) + } + let count = Int32(data.count) + return data.withUnsafeBufferPointer { body in + body.withMemoryRebound(to: Int8.self) { reboundBody in + return __put(indices as [NSNumber], count: count, byteBuffer: reboundBody.baseAddress!) + } + } + } + @discardableResult func put(indices:[Int32], data:[Int8], offset: Int, length: Int32) throws -> Int32 { let channels = CvType.channels(Int32(type())) if Int32(data.count) % channels != 0 { @@ -214,10 +267,29 @@ public extension Mat { } } + @discardableResult func put(indices:[Int32], data:[UInt16]) throws -> Int32 { + let channels = CvType.channels(Int32(type())) + if Int32(data.count) % channels != 0 { + try throwIncompatibleBufferSize(count: data.count, channels: channels) + } else if depth() != CvType.CV_16U { + try throwIncompatibleDataType(typeName: CvType.type(toString: type())) + } + let count = Int32(data.count) + return data.withUnsafeBufferPointer { body in + body.withMemoryRebound(to: Int16.self) { reboundBody in + return __put(indices as [NSNumber], count: count, shortBuffer: reboundBody.baseAddress!) + } + } + } + @discardableResult func put(row: Int32, col: Int32, data:[Int8]) throws -> Int32 { return try put(indices: [row, col], data: data) } + @discardableResult func put(row: Int32, col: Int32, data:[UInt8]) throws -> Int32 { + return try put(indices: [row, col], data: data) + } + @discardableResult func put(row: Int32, col: Int32, data: [Int8], offset: Int, length: Int32) throws -> Int32 { return try put(indices: [row, col], data: data, offset: offset, length: length) } @@ -238,6 +310,10 @@ public extension Mat { return try put(indices: [row, col], data: data) } + @discardableResult func put(row: Int32, col: Int32, data: [UInt16]) throws -> Int32 { + return try put(indices: [row, col], data: data) + } + @discardableResult func get(row: Int32, col: Int32) -> [Double] { return get(indices: [row, col]) } @@ -303,46 +379,46 @@ public class MatAt { extension UInt8: Atable { public static func getAt(m: Mat, indices:[Int32]) -> UInt8 { - var tmp = [Int8](repeating: 0, count: 1) + var tmp = [UInt8](repeating: 0, count: 1) try! m.get(indices: indices, data: &tmp) - return UInt8(bitPattern: tmp[0]) + return tmp[0] } public static func putAt(m: Mat, indices: [Int32], v: UInt8) { - let tmp = [Int8(bitPattern: v)] + let tmp = [v] try! m.put(indices: indices, data: tmp) } public static func getAt2c(m: Mat, indices:[Int32]) -> (UInt8, UInt8) { - var tmp = [Int8](repeating: 0, count: 2) + var tmp = [UInt8](repeating: 0, count: 2) try! m.get(indices: indices, data: &tmp) - return (UInt8(bitPattern: tmp[0]), UInt8(bitPattern: tmp[1])) + return (tmp[0], tmp[1]) } public static func putAt2c(m: Mat, indices: [Int32], v: (UInt8, UInt8)) { - let tmp = [Int8(bitPattern: v.0), Int8(bitPattern: v.1)] + let tmp = [v.0, v.1] try! m.put(indices: indices, data: tmp) } public static func getAt3c(m: Mat, indices:[Int32]) -> (UInt8, UInt8, UInt8) { - var tmp = [Int8](repeating: 0, count: 3) + var tmp = [UInt8](repeating: 0, count: 3) try! m.get(indices: indices, data: &tmp) - return (UInt8(bitPattern: tmp[0]), UInt8(bitPattern: tmp[1]), UInt8(bitPattern: tmp[2])) + return (tmp[0], tmp[1], tmp[2]) } public static func putAt3c(m: Mat, indices: [Int32], v: (UInt8, UInt8, UInt8)) { - let tmp = [Int8(bitPattern: v.0), Int8(bitPattern: v.1), Int8(bitPattern: v.2)] + let tmp = [v.0, v.1, v.2] try! m.put(indices: indices, data: tmp) } public static func getAt4c(m: Mat, indices:[Int32]) -> (UInt8, UInt8, UInt8, UInt8) { - var tmp = [Int8](repeating: 0, count: 4) + var tmp = [UInt8](repeating: 0, count: 4) try! m.get(indices: indices, data: &tmp) - return (UInt8(bitPattern: tmp[0]), UInt8(bitPattern: tmp[1]), UInt8(bitPattern: tmp[2]), UInt8(bitPattern: tmp[3])) + return (tmp[0], tmp[1], tmp[2], tmp[3]) } public static func putAt4c(m: Mat, indices: [Int32], v: (UInt8, UInt8, UInt8, UInt8)) { - let tmp = [Int8(bitPattern: v.0), Int8(bitPattern: v.1), Int8(bitPattern: v.2), Int8(bitPattern: v.3)] + let tmp = [v.0, v.1, v.2, v.3] try! m.put(indices: indices, data: tmp) } } @@ -531,6 +607,52 @@ extension Int32: Atable { } } +extension UInt16: Atable { + public static func getAt(m: Mat, indices:[Int32]) -> UInt16 { + var tmp = [UInt16](repeating: 0, count: 1) + try! m.get(indices: indices, data: &tmp) + return tmp[0] + } + + public static func putAt(m: Mat, indices: [Int32], v: UInt16) { + let tmp = [v] + try! m.put(indices: indices, data: tmp) + } + + public static func getAt2c(m: Mat, indices:[Int32]) -> (UInt16, UInt16) { + var tmp = [UInt16](repeating: 0, count: 2) + try! m.get(indices: indices, data: &tmp) + return (tmp[0], tmp[1]) + } + + public static func putAt2c(m: Mat, indices: [Int32], v: (UInt16, UInt16)) { + let tmp = [v.0, v.1] + try! m.put(indices: indices, data: tmp) + } + + public static func getAt3c(m: Mat, indices:[Int32]) -> (UInt16, UInt16, UInt16) { + var tmp = [UInt16](repeating: 0, count: 3) + try! m.get(indices: indices, data: &tmp) + return (tmp[0], tmp[1], tmp[2]) + } + + public static func putAt3c(m: Mat, indices: [Int32], v: (UInt16, UInt16, UInt16)) { + let tmp = [v.0, v.1, v.2] + try! m.put(indices: indices, data: tmp) + } + + public static func getAt4c(m: Mat, indices:[Int32]) -> (UInt16, UInt16, UInt16, UInt16) { + var tmp = [UInt16](repeating: 0, count: 4) + try! m.get(indices: indices, data: &tmp) + return (tmp[0], tmp[1], tmp[2], tmp[3]) + } + + public static func putAt4c(m: Mat, indices: [Int32], v: (UInt16, UInt16, UInt16, UInt16)) { + let tmp = [v.0, v.1, v.2, v.3] + try! m.put(indices: indices, data: tmp) + } +} + extension Int16: Atable { public static func getAt(m: Mat, indices:[Int32]) -> Int16 { var tmp = [Int16](repeating: 0, count: 1) diff --git a/modules/core/misc/objc/test/MatTest.swift b/modules/core/misc/objc/test/MatTest.swift index 14c440b5eb88..8a513505cc14 100644 --- a/modules/core/misc/objc/test/MatTest.swift +++ b/modules/core/misc/objc/test/MatTest.swift @@ -308,15 +308,15 @@ class MatTests: OpenCVTestCase { XCTAssert([340] == sm.get(row: 1, col: 1)) } - func testGetIntIntByteArray() throws { - let m = try getTestMat(size: 5, type: CvType.CV_8UC3) + func testGetIntIntInt8Array() throws { + let m = try getTestMat(size: 5, type: CvType.CV_8SC3) var goodData = [Int8](repeating: 0, count: 9) // whole Mat var bytesNum = try m.get(row: 1, col: 1, data: &goodData) XCTAssertEqual(9, bytesNum) - XCTAssert([110, 111, 112, 120, 121, 122, -126, -125, -124] == goodData) + XCTAssert([110, 111, 112, 120, 121, 122, 127, 127, 127] == goodData) var badData = [Int8](repeating: 0, count: 7) XCTAssertThrowsError(bytesNum = try m.get(row: 0, col: 0, data: &badData)) @@ -326,11 +326,36 @@ class MatTests: OpenCVTestCase { var buff00 = [Int8](repeating: 0, count: 3) bytesNum = try sm.get(row: 0, col: 0, data: &buff00) XCTAssertEqual(3, bytesNum) - XCTAssert(buff00 == [-26, -25, -24]) + XCTAssert(buff00 == [127, 127, 127]) var buff11 = [Int8](repeating: 0, count: 3) bytesNum = try sm.get(row: 1, col: 1, data: &buff11) XCTAssertEqual(3, bytesNum) - XCTAssert(buff11 == [-1, -1, -1]) + XCTAssert(buff11 == [127, 127, 127]) + } + + func testGetIntIntUInt8Array() throws { + let m = try getTestMat(size: 5, type: CvType.CV_8UC3) + var goodData = [UInt8](repeating: 0, count: 9) + + // whole Mat + var bytesNum = try m.get(row: 1, col: 1, data: &goodData) + + XCTAssertEqual(9, bytesNum) + XCTAssert([110, 111, 112, 120, 121, 122, 130, 131, 132] == goodData) + + var badData = [UInt8](repeating: 0, count: 7) + XCTAssertThrowsError(bytesNum = try m.get(row: 0, col: 0, data: &badData)) + + // sub-Mat + let sm = m.submat(rowStart: 2, rowEnd: 4, colStart: 3, colEnd: 5) + var buff00 = [UInt8](repeating: 0, count: 3) + bytesNum = try sm.get(row: 0, col: 0, data: &buff00) + XCTAssertEqual(3, bytesNum) + XCTAssert(buff00 == [230, 231, 232]) + var buff11 = [UInt8](repeating: 0, count: 3) + bytesNum = try sm.get(row: 1, col: 1, data: &buff11) + XCTAssertEqual(3, bytesNum) + XCTAssert(buff11 == [255, 255, 255]) } func testGetIntIntDoubleArray() throws { @@ -399,7 +424,7 @@ class MatTests: OpenCVTestCase { XCTAssert(buff11 == [340, 341, 0, 0]) } - func testGetIntIntShortArray() throws { + func testGetIntIntInt16Array() throws { let m = try getTestMat(size: 5, type: CvType.CV_16SC2) var buff = [Int16](repeating: 0, count: 6) @@ -421,6 +446,28 @@ class MatTests: OpenCVTestCase { XCTAssert(buff11 == [340, 341, 0, 0]) } + func testGetIntIntUInt16Array() throws { + let m = try getTestMat(size: 5, type: CvType.CV_16UC2) + var buff = [UInt16](repeating: 0, count: 6) + + // whole Mat + var bytesNum = try m.get(row: 1, col: 1, data: &buff) + + XCTAssertEqual(12, bytesNum); + XCTAssert(buff == [110, 111, 120, 121, 130, 131]) + + // sub-Mat + let sm = m.submat(rowStart: 2, rowEnd: 4, colStart: 3, colEnd: 5) + var buff00 = [UInt16](repeating: 0, count: 4) + bytesNum = try sm.get(row: 0, col: 0, data: &buff00) + XCTAssertEqual(8, bytesNum) + XCTAssert(buff00 == [230, 231, 240, 241]) + var buff11 = [UInt16](repeating: 0, count: 4) + bytesNum = try sm.get(row: 1, col: 1, data: &buff11) + XCTAssertEqual(4, bytesNum); + XCTAssert(buff11 == [340, 341, 0, 0]) + } + func testHeight() { XCTAssertEqual(gray0.rows(), gray0.height()) XCTAssertEqual(rgbLena.rows(), rgbLena.height()) @@ -653,7 +700,7 @@ class MatTests: OpenCVTestCase { try assertMatEqual(truth!, m1, OpenCVTestCase.EPS) } - func testPutIntIntByteArray() throws { + func testPutIntIntInt8Array() throws { let m = Mat(rows: 5, cols: 5, type: CvType.CV_8SC3, scalar: Scalar(1, 2, 3)) let sm = m.submat(rowStart: 2, rowEnd: 4, colStart: 3, colEnd: 5) var buff = [Int8](repeating: 0, count: 6) @@ -683,7 +730,37 @@ class MatTests: OpenCVTestCase { XCTAssert(buff == buff0) } - func testPutIntArrayByteArray() throws { + func testPutIntIntUInt8Array() throws { + let m = Mat(rows: 5, cols: 5, type: CvType.CV_8UC3, scalar: Scalar(1, 2, 3)) + let sm = m.submat(rowStart: 2, rowEnd: 4, colStart: 3, colEnd: 5) + var buff = [UInt8](repeating: 0, count: 6) + let buff0:[UInt8] = [10, 20, 30, 40, 50, 60] + let buff1:[UInt8] = [255, 254, 253, 252, 251, 250] + + var bytesNum = try m.put(row:1, col:2, data:buff0) + + XCTAssertEqual(6, bytesNum) + bytesNum = try m.get(row: 1, col: 2, data: &buff) + XCTAssertEqual(6, bytesNum) + XCTAssert(buff == buff0) + + bytesNum = try sm.put(row:0, col:0, data:buff1) + + XCTAssertEqual(6, bytesNum) + bytesNum = try sm.get(row: 0, col: 0, data: &buff) + XCTAssertEqual(6, bytesNum) + XCTAssert(buff == buff1) + bytesNum = try m.get(row: 2, col: 3, data: &buff) + XCTAssertEqual(6, bytesNum); + XCTAssert(buff == buff1) + + let m1 = m.row(1) + bytesNum = try m1.get(row: 0, col: 2, data: &buff) + XCTAssertEqual(6, bytesNum) + XCTAssert(buff == buff0) + } + + func testPutIntArrayInt8Array() throws { let m = Mat(sizes: [5, 5, 5], type: CvType.CV_8SC3, scalar: Scalar(1, 2, 3)) let sm = m.submat(ranges: [Range(start: 0, end: 2), Range(start: 1, end: 3), Range(start: 2, end: 4)]) var buff = [Int8](repeating: 0, count: 6) @@ -714,10 +791,41 @@ class MatTests: OpenCVTestCase { XCTAssert(buff == buff0) } + func testPutIntArrayUInt8Array() throws { + let m = Mat(sizes: [5, 5, 5], type: CvType.CV_8UC3, scalar: Scalar(1, 2, 3)) + let sm = m.submat(ranges: [Range(start: 0, end: 2), Range(start: 1, end: 3), Range(start: 2, end: 4)]) + var buff = [UInt8](repeating: 0, count: 6) + let buff0:[UInt8] = [10, 20, 30, 40, 50, 60] + let buff1:[UInt8] = [255, 254, 253, 252, 251, 250] + + var bytesNum = try m.put(indices:[1, 2, 0], data:buff0) + + XCTAssertEqual(6, bytesNum) + bytesNum = try m.get(indices: [1, 2, 0], data: &buff) + XCTAssertEqual(6, bytesNum) + XCTAssert(buff == buff0) + + bytesNum = try sm.put(indices: [0, 0, 0], data: buff1) + + XCTAssertEqual(6, bytesNum) + bytesNum = try sm.get(indices: [0, 0, 0], data: &buff) + XCTAssertEqual(6, bytesNum) + XCTAssert(buff == buff1) + + bytesNum = try m.get(indices: [0, 1, 2], data: &buff) + XCTAssertEqual(6, bytesNum) + XCTAssert(buff == buff1) + + let m1 = m.submat(ranges: [Range(start: 1,end: 2), Range.all(), Range.all()]) + bytesNum = try m1.get(indices: [0, 2, 0], data: &buff) + XCTAssertEqual(6, bytesNum) + XCTAssert(buff == buff0) + } + func testPutIntIntDoubleArray() throws { - let m = Mat(rows: 5, cols: 5, type: CvType.CV_8SC3, scalar: Scalar(1, 2, 3)) + let m = Mat(rows: 5, cols: 5, type: CvType.CV_8UC3, scalar: Scalar(1, 2, 3)) let sm = m.submat(rowStart: 2, rowEnd: 4, colStart: 3, colEnd: 5) - var buff = [Int8](repeating: 0, count: 6) + var buff = [UInt8](repeating: 0, count: 6) var bytesNum = try m.put(row: 1, col: 2, data: [10, 20, 30, 40, 50, 60] as [Double]) @@ -731,16 +839,16 @@ class MatTests: OpenCVTestCase { XCTAssertEqual(6, bytesNum) bytesNum = try sm.get(row: 0, col: 0, data: &buff) XCTAssertEqual(6, bytesNum); - XCTAssert(buff == [-1, -2, -3, -4, -5, -6]) + XCTAssert(buff == [255, 254, 253, 252, 251, 250]) bytesNum = try m.get(row: 2, col: 3, data: &buff) XCTAssertEqual(6, bytesNum); - XCTAssert(buff == [-1, -2, -3, -4, -5, -6]) + XCTAssert(buff == [255, 254, 253, 252, 251, 250]) } func testPutIntArrayDoubleArray() throws { - let m = Mat(sizes: [5, 5, 5], type: CvType.CV_8SC3, scalar: Scalar(1, 2, 3)) + let m = Mat(sizes: [5, 5, 5], type: CvType.CV_8UC3, scalar: Scalar(1, 2, 3)) let sm = m.submat(ranges: [Range(start: 0, end: 2), Range(start: 1, end: 3), Range(start: 2, end: 4)]) - var buff = [Int8](repeating: 0, count: 6) + var buff = [UInt8](repeating: 0, count: 6) var bytesNum = try m.put(indices: [1, 2, 0], data: [10, 20, 30, 40, 50, 60] as [Double]) @@ -754,10 +862,10 @@ class MatTests: OpenCVTestCase { XCTAssertEqual(6, bytesNum); bytesNum = try sm.get(indices: [0, 0, 0], data: &buff) XCTAssertEqual(6, bytesNum); - XCTAssert(buff == [-1, -2, -3, -4, -5, -6]) + XCTAssert(buff == [255, 254, 253, 252, 251, 250]) bytesNum = try m.get(indices: [0, 1, 2], data: &buff) XCTAssertEqual(6, bytesNum) - XCTAssert(buff == [-1, -2, -3, -4, -5, -6]) + XCTAssert(buff == [255, 254, 253, 252, 251, 250]) } func testPutIntIntFloatArray() throws { @@ -820,7 +928,7 @@ class MatTests: OpenCVTestCase { XCTAssert([40, 50, 60] == m.get(indices: [0, 1, 0])) } - func testPutIntIntShortArray() throws { + func testPutIntIntInt16Array() throws { let m = Mat(rows: 5, cols: 5, type: CvType.CV_16SC3, scalar: Scalar(-1, -2, -3)) let elements: [Int16] = [ 10, 20, 30, 40, 50, 60] @@ -834,7 +942,21 @@ class MatTests: OpenCVTestCase { XCTAssert([40, 50, 60] == m.get(row: 2, col: 4)) } - func testPutIntArrayShortArray() throws { + func testPutIntIntUInt16Array() throws { + let m = Mat(rows: 5, cols: 5, type: CvType.CV_16UC3, scalar: Scalar(-1, -2, -3)) + let elements: [UInt16] = [ 10, 20, 30, 40, 50, 60] + + var bytesNum = try m.put(row: 2, col: 3, data: elements) + + XCTAssertEqual(Int32(elements.count * 2), bytesNum) + let m1 = m.col(3) + var buff = [UInt16](repeating: 0, count: 3) + bytesNum = try m1.get(row: 2, col: 0, data: &buff) + XCTAssert(buff == [10, 20, 30]) + XCTAssert([40, 50, 60] == m.get(row: 2, col: 4)) + } + + func testPutIntArrayInt16Array() throws { let m = Mat(sizes: [5, 5, 5], type: CvType.CV_16SC3, scalar: Scalar(-1, -2, -3)) let elements: [Int16] = [ 10, 20, 30, 40, 50, 60] @@ -848,6 +970,20 @@ class MatTests: OpenCVTestCase { XCTAssert([40, 50, 60] == m.get(indices: [0, 2, 4])) } + func testPutIntArrayUInt16Array() throws { + let m = Mat(sizes: [5, 5, 5], type: CvType.CV_16UC3, scalar: Scalar(-1, -2, -3)) + let elements: [UInt16] = [ 10, 20, 30, 40, 50, 60] + + var bytesNum = try m.put(indices: [0, 2, 3], data: elements) + + XCTAssertEqual(Int32(elements.count * 2), bytesNum) + let m1 = m.submat(ranges: [Range.all(), Range.all(), Range(start: 3, end: 4)]) + var buff = [UInt16](repeating: 0, count: 3) + bytesNum = try m1.get(indices: [0, 2, 0], data: &buff) + XCTAssert(buff == [10, 20, 30]) + XCTAssert([40, 50, 60] == m.get(indices: [0, 2, 4])) + } + func testReshapeInt() throws { let src = Mat(rows: 4, cols: 4, type: CvType.CV_8U, scalar: Scalar(0)) dst = src.reshape(channels: 4) From 4eac198270783d8924ed26ecfb82f8aa54d9e67d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 29 Jun 2021 09:00:10 +0000 Subject: [PATCH 019/128] core(persistence): fix types format handling, fix 16F support --- modules/core/src/persistence.cpp | 19 +++++++--- modules/core/test/test_io.cpp | 65 ++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 6 deletions(-) diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index 4bf52a3134df..32328361e874 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -143,17 +143,17 @@ static const char symbols[9] = "ucwsifdh"; static char typeSymbol(int depth) { CV_StaticAssert(CV_64F == 6, ""); - CV_Assert(depth >=0 && depth <= CV_64F); + CV_CheckDepth(depth, depth >=0 && depth <= CV_16F, ""); return symbols[depth]; } static int symbolToType(char c) { + if (c == 'r') + return CV_SEQ_ELTYPE_PTR; const char* pos = strchr( symbols, c ); if( !pos ) CV_Error( CV_StsBadArg, "Invalid data type specification" ); - if (c == 'r') - return CV_SEQ_ELTYPE_PTR; return static_cast(pos - symbols); } @@ -245,8 +245,12 @@ int calcStructSize( const char* dt, int initial_size ) { int size = calcElemSize( dt, initial_size ); size_t elem_max_size = 0; - for ( const char * type = dt; *type != '\0'; type++ ) { - switch ( *type ) + for ( const char * type = dt; *type != '\0'; type++ ) + { + char v = *type; + if (v >= '0' && v <= '9') + continue; // skip vector size + switch (v) { case 'u': { elem_max_size = std::max( elem_max_size, sizeof(uchar ) ); break; } case 'c': { elem_max_size = std::max( elem_max_size, sizeof(schar ) ); break; } @@ -255,7 +259,9 @@ int calcStructSize( const char* dt, int initial_size ) case 'i': { elem_max_size = std::max( elem_max_size, sizeof(int ) ); break; } case 'f': { elem_max_size = std::max( elem_max_size, sizeof(float ) ); break; } case 'd': { elem_max_size = std::max( elem_max_size, sizeof(double) ); break; } - default: break; + case 'h': { elem_max_size = std::max(elem_max_size, sizeof(float16_t)); break; } + default: + CV_Error_(Error::StsNotImplemented, ("Unknown type identifier: '%c' in '%s'", (char)(*type), dt)); } } size = cvAlign( size, static_cast(elem_max_size) ); @@ -1054,6 +1060,7 @@ class FileStorage::Impl : public FileStorage_API CV_Assert(write_mode); size_t elemSize = fs::calcStructSize(dt.c_str(), 0); + CV_Assert(elemSize); CV_Assert( len % elemSize == 0 ); len /= elemSize; diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp index d30c48536888..82bd05372da7 100644 --- a/modules/core/test/test_io.cpp +++ b/modules/core/test/test_io.cpp @@ -1837,4 +1837,69 @@ TEST(Core_InputOutput, FileStorage_copy_constructor_17412_heap) EXPECT_EQ(0, remove(fname.c_str())); } + +static void test_20279(FileStorage& fs) +{ + Mat m32fc1(5, 10, CV_32FC1, Scalar::all(0)); + for (size_t i = 0; i < m32fc1.total(); i++) + { + float v = (float)i; + m32fc1.at((int)i) = v * 0.5f; + } + Mat m16fc1; + // produces CV_16S output: convertFp16(m32fc1, m16fc1); + m32fc1.convertTo(m16fc1, CV_16FC1); + EXPECT_EQ(CV_16FC1, m16fc1.type()) << typeToString(m16fc1.type()); + //std::cout << m16fc1 << std::endl; + + Mat m32fc3(4, 3, CV_32FC3, Scalar::all(0)); + for (size_t i = 0; i < m32fc3.total(); i++) + { + float v = (float)i; + m32fc3.at((int)i) = Vec3f(v, v * 0.2f, -v); + } + Mat m16fc3; + m32fc3.convertTo(m16fc3, CV_16FC3); + EXPECT_EQ(CV_16FC3, m16fc3.type()) << typeToString(m16fc3.type()); + //std::cout << m16fc3 << std::endl; + + fs << "m16fc1" << m16fc1; + fs << "m16fc3" << m16fc3; + + string content = fs.releaseAndGetString(); + if (cvtest::debugLevel > 0) std::cout << content << std::endl; + + FileStorage fs_read(content, FileStorage::READ + FileStorage::MEMORY); + Mat m16fc1_result; + Mat m16fc3_result; + fs_read["m16fc1"] >> m16fc1_result; + ASSERT_FALSE(m16fc1_result.empty()); + EXPECT_EQ(CV_16FC1, m16fc1_result.type()) << typeToString(m16fc1_result.type()); + EXPECT_LE(cvtest::norm(m16fc1_result, m16fc1, NORM_INF), 1e-2); + + fs_read["m16fc3"] >> m16fc3_result; + ASSERT_FALSE(m16fc3_result.empty()); + EXPECT_EQ(CV_16FC3, m16fc3_result.type()) << typeToString(m16fc3_result.type()); + EXPECT_LE(cvtest::norm(m16fc3_result, m16fc3, NORM_INF), 1e-2); +} + +TEST(Core_InputOutput, FileStorage_16F_xml) +{ + FileStorage fs("test.xml", cv::FileStorage::WRITE | cv::FileStorage::MEMORY); + test_20279(fs); +} + +TEST(Core_InputOutput, FileStorage_16F_yml) +{ + FileStorage fs("test.yml", cv::FileStorage::WRITE | cv::FileStorage::MEMORY); + test_20279(fs); +} + +TEST(Core_InputOutput, FileStorage_16F_json) +{ + FileStorage fs("test.json", cv::FileStorage::WRITE | cv::FileStorage::MEMORY); + test_20279(fs); +} + + }} // namespace From 7d842f5bcffc54e25b365935a79b99b96c49e01d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 29 Jun 2021 18:48:21 +0000 Subject: [PATCH 020/128] dnn: use OpenVINO 2021.4 defines --- cmake/OpenCVDetectInferenceEngine.cmake | 4 ++-- modules/dnn/src/op_inf_engine.hpp | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake index aa4bb4a864ae..829ddbfe7e2a 100644 --- a/cmake/OpenCVDetectInferenceEngine.cmake +++ b/cmake/OpenCVDetectInferenceEngine.cmake @@ -141,9 +141,9 @@ endif() if(INF_ENGINE_TARGET) if(NOT INF_ENGINE_RELEASE) - message(WARNING "InferenceEngine version has not been set, 2021.3 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") + message(WARNING "InferenceEngine version has not been set, 2021.4 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") endif() - set(INF_ENGINE_RELEASE "2021030000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") + set(INF_ENGINE_RELEASE "2021040000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") set_target_properties(${INF_ENGINE_TARGET} PROPERTIES INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" ) diff --git a/modules/dnn/src/op_inf_engine.hpp b/modules/dnn/src/op_inf_engine.hpp index 42008b0f10b8..a825431627bd 100644 --- a/modules/dnn/src/op_inf_engine.hpp +++ b/modules/dnn/src/op_inf_engine.hpp @@ -30,10 +30,11 @@ #define INF_ENGINE_RELEASE_2021_1 2021010000 #define INF_ENGINE_RELEASE_2021_2 2021020000 #define INF_ENGINE_RELEASE_2021_3 2021030000 +#define INF_ENGINE_RELEASE_2021_4 2021040000 #ifndef INF_ENGINE_RELEASE -#warning("IE version have not been provided via command-line. Using 2021.3 by default") -#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2021_3 +#warning("IE version have not been provided via command-line. Using 2021.4 by default") +#define INF_ENGINE_RELEASE INF_ENGINE_RELEASE_2021_4 #endif #define INF_ENGINE_VER_MAJOR_GT(ver) (((INF_ENGINE_RELEASE) / 10000) > ((ver) / 10000)) From db4b1e613ccdb449fad83c0711a3d44cb21662bc Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 29 Jun 2021 09:00:10 +0000 Subject: [PATCH 021/128] core(persistence): fix types format handling partial backport of 4eac198270783d8924ed26ecfb82f8aa54d9e67d --- modules/core/src/persistence.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index 40d1cdfa0796..7e9d107c3524 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -6,6 +6,8 @@ #include "precomp.hpp" #include "persistence.hpp" +using namespace cv; + char* icv_itoa( int _val, char* buffer, int /*radix*/ ) { const int radix = 10; @@ -519,12 +521,16 @@ static const char symbols[9] = "ucwsifdr"; char icvTypeSymbol(int depth) { - CV_Assert(depth >=0 && depth < 9); + CV_StaticAssert(CV_64F == 6, ""); + CV_Assert(depth >=0 && depth <= CV_64F); + CV_CheckDepth(depth, depth >=0 && depth <= CV_64F, ""); return symbols[depth]; } static int icvSymbolToType(char c) { + if (c == 'r') + return CV_SEQ_ELTYPE_PTR; const char* pos = strchr( symbols, c ); if( !pos ) CV_Error( CV_StsBadArg, "Invalid data type specification" ); @@ -618,8 +624,12 @@ int icvCalcStructSize( const char* dt, int initial_size ) { int size = icvCalcElemSize( dt, initial_size ); size_t elem_max_size = 0; - for ( const char * type = dt; *type != '\0'; type++ ) { - switch ( *type ) + for ( const char * type = dt; *type != '\0'; type++ ) + { + char v = *type; + if (v >= '0' && v <= '9') + continue; // skip vector size + switch (v) { case 'u': { elem_max_size = std::max( elem_max_size, sizeof(uchar ) ); break; } case 'c': { elem_max_size = std::max( elem_max_size, sizeof(schar ) ); break; } @@ -628,7 +638,8 @@ int icvCalcStructSize( const char* dt, int initial_size ) case 'i': { elem_max_size = std::max( elem_max_size, sizeof(int ) ); break; } case 'f': { elem_max_size = std::max( elem_max_size, sizeof(float ) ); break; } case 'd': { elem_max_size = std::max( elem_max_size, sizeof(double) ); break; } - default: break; + default: + CV_Error_(Error::StsNotImplemented, ("Unknown type identifier: '%c' in '%s'", (char)(*type), dt)); } } size = cvAlign( size, static_cast(elem_max_size) ); From fb7ef76e742f408cc4e23ba7b67c99db06d0140d Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Wed, 30 Jun 2021 12:04:09 +0300 Subject: [PATCH 022/128] Merge pull request #20271 from TolyaTalamanov:at/extend-python-bindings G-API: Extend python bindings * Extend G-API bindings * Wrap timestamp, seqNo, seq_id * Wrap copy * Wrap parseSSD, parseYolo * Rewrap cv.gapi.networks * Add test for metabackend in pytnon * Remove int64 pyopencv_to --- modules/gapi/include/opencv2/gapi/gcommon.hpp | 2 + .../gapi/include/opencv2/gapi/gstreaming.hpp | 2 +- modules/gapi/include/opencv2/gapi/infer.hpp | 6 +- .../include/opencv2/gapi/infer/parsers.hpp | 20 +- .../include/opencv2/gapi/streaming/format.hpp | 2 +- .../gapi/misc/python/package/gapi/__init__.py | 25 ++ modules/gapi/misc/python/pyopencv_gapi.hpp | 220 ++++++++++-------- modules/gapi/misc/python/python_bridge.hpp | 3 + modules/gapi/misc/python/shadow_gapi.hpp | 29 +-- .../misc/python/test/test_gapi_streaming.py | 42 +++- modules/gapi/src/api/ginfer.cpp | 4 + .../gapi/src/backends/common/gmetabackend.cpp | 13 ++ modules/python/src2/cv2.cpp | 6 - 13 files changed, 235 insertions(+), 139 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi/gcommon.hpp b/modules/gapi/include/opencv2/gapi/gcommon.hpp index a9cb0159014e..d3c280816ff9 100644 --- a/modules/gapi/include/opencv2/gapi/gcommon.hpp +++ b/modules/gapi/include/opencv2/gapi/gcommon.hpp @@ -44,6 +44,7 @@ namespace detail CV_UNKNOWN, // Unknown, generic, opaque-to-GAPI data type unsupported in graph seriallization CV_BOOL, // bool user G-API data CV_INT, // int user G-API data + CV_INT64, // int64_t user G-API data CV_DOUBLE, // double user G-API data CV_FLOAT, // float user G-API data CV_UINT64, // uint64_t user G-API data @@ -61,6 +62,7 @@ namespace detail template struct GOpaqueTraits; template struct GOpaqueTraits { static constexpr const OpaqueKind kind = OpaqueKind::CV_UNKNOWN; }; template<> struct GOpaqueTraits { static constexpr const OpaqueKind kind = OpaqueKind::CV_INT; }; + template<> struct GOpaqueTraits { static constexpr const OpaqueKind kind = OpaqueKind::CV_INT64; }; template<> struct GOpaqueTraits { static constexpr const OpaqueKind kind = OpaqueKind::CV_DOUBLE; }; template<> struct GOpaqueTraits { static constexpr const OpaqueKind kind = OpaqueKind::CV_FLOAT; }; template<> struct GOpaqueTraits { static constexpr const OpaqueKind kind = OpaqueKind::CV_UINT64; }; diff --git a/modules/gapi/include/opencv2/gapi/gstreaming.hpp b/modules/gapi/include/opencv2/gapi/gstreaming.hpp index 5bbed5e12dda..47e103fd0ea7 100644 --- a/modules/gapi/include/opencv2/gapi/gstreaming.hpp +++ b/modules/gapi/include/opencv2/gapi/gstreaming.hpp @@ -196,7 +196,7 @@ class GAPI_EXPORTS_W_SIMPLE GStreamingCompiled * @param s a shared pointer to IStreamSource representing the * input video stream. */ - GAPI_WRAP void setSource(const gapi::wip::IStreamSource::Ptr& s); + void setSource(const gapi::wip::IStreamSource::Ptr& s); /** * @brief Constructs and specifies an input video stream for a diff --git a/modules/gapi/include/opencv2/gapi/infer.hpp b/modules/gapi/include/opencv2/gapi/infer.hpp index 93701856bbdb..807c82d31f89 100644 --- a/modules/gapi/include/opencv2/gapi/infer.hpp +++ b/modules/gapi/include/opencv2/gapi/infer.hpp @@ -136,11 +136,12 @@ class GInferInputsTyped } template - void setInput(const std::string& name, U in) + GInferInputsTyped& setInput(const std::string& name, U in) { m_priv->blobs.emplace(std::piecewise_construct, std::forward_as_tuple(name), std::forward_as_tuple(in)); + return *this; } using StorageT = cv::util::variant; @@ -654,7 +655,7 @@ namespace gapi { // A type-erased form of network parameters. // Similar to how a type-erased GKernel is represented and used. /// @private -struct GAPI_EXPORTS GNetParam { +struct GAPI_EXPORTS_W_SIMPLE GNetParam { std::string tag; // FIXME: const? GBackend backend; // Specifies the execution model util::any params; // Backend-interpreted parameter structure @@ -671,6 +672,7 @@ struct GAPI_EXPORTS GNetParam { */ struct GAPI_EXPORTS_W_SIMPLE GNetPackage { GAPI_WRAP GNetPackage() = default; + GAPI_WRAP explicit GNetPackage(std::vector nets); explicit GNetPackage(std::initializer_list ii); std::vector backends() const; std::vector networks; diff --git a/modules/gapi/include/opencv2/gapi/infer/parsers.hpp b/modules/gapi/include/opencv2/gapi/infer/parsers.hpp index 22c8701a6c2e..c7308dd39f47 100644 --- a/modules/gapi/include/opencv2/gapi/infer/parsers.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/parsers.hpp @@ -64,10 +64,10 @@ detection is smaller than confidence threshold, detection is rejected. given label will get to the output. @return a tuple with a vector of detected boxes and a vector of appropriate labels. */ -GAPI_EXPORTS std::tuple, GArray> parseSSD(const GMat& in, - const GOpaque& inSz, - const float confidenceThreshold = 0.5f, - const int filterLabel = -1); +GAPI_EXPORTS_W std::tuple, GArray> parseSSD(const GMat& in, + const GOpaque& inSz, + const float confidenceThreshold = 0.5f, + const int filterLabel = -1); /** @brief Parses output of SSD network. @@ -113,12 +113,12 @@ If 1.f, nms is not performed and no boxes are rejected. documentation. @return a tuple with a vector of detected boxes and a vector of appropriate labels. */ -GAPI_EXPORTS std::tuple, GArray> parseYolo(const GMat& in, - const GOpaque& inSz, - const float confidenceThreshold = 0.5f, - const float nmsThreshold = 0.5f, - const std::vector& anchors - = nn::parsers::GParseYolo::defaultAnchors()); +GAPI_EXPORTS_W std::tuple, GArray> parseYolo(const GMat& in, + const GOpaque& inSz, + const float confidenceThreshold = 0.5f, + const float nmsThreshold = 0.5f, + const std::vector& anchors + = nn::parsers::GParseYolo::defaultAnchors()); } // namespace gapi } // namespace cv diff --git a/modules/gapi/include/opencv2/gapi/streaming/format.hpp b/modules/gapi/include/opencv2/gapi/streaming/format.hpp index c9d2fa3e0a29..f7c3bd457dfb 100644 --- a/modules/gapi/include/opencv2/gapi/streaming/format.hpp +++ b/modules/gapi/include/opencv2/gapi/streaming/format.hpp @@ -74,7 +74,7 @@ e.g when graph's input needs to be passed directly to output, like in Streaming @param in Input image @return Copy of the input */ -GAPI_EXPORTS GMat copy(const GMat& in); +GAPI_EXPORTS_W GMat copy(const GMat& in); /** @brief Makes a copy of the input frame. Note that this copy may be not real (no actual data copied). Use this function to maintain graph contracts, diff --git a/modules/gapi/misc/python/package/gapi/__init__.py b/modules/gapi/misc/python/package/gapi/__init__.py index 23f5f41846f3..587f641fd33a 100644 --- a/modules/gapi/misc/python/package/gapi/__init__.py +++ b/modules/gapi/misc/python/package/gapi/__init__.py @@ -11,11 +11,36 @@ def parameterized(func): return parameterized +@register('cv2.gapi') +def networks(*args): + return cv.gapi_GNetPackage(list(map(cv.detail.strip, args))) + + @register('cv2.gapi') def compile_args(*args): return list(map(cv.GCompileArg, args)) +@register('cv2') +def GIn(*args): + return [*args] + + +@register('cv2') +def GOut(*args): + return [*args] + + +@register('cv2') +def gin(*args): + return [*args] + + +@register('cv2.gapi') +def descr_of(*args): + return [*args] + + @register('cv2') class GOpaque(): # NB: Inheritance from c++ class cause segfault. diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index 6b782cfc8dd8..6cd79e4a7318 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -17,6 +17,7 @@ using gapi_ie_PyParams = cv::gapi::ie::PyParams; using gapi_wip_IStreamSource_Ptr = cv::Ptr; using detail_ExtractArgsCallback = cv::detail::ExtractArgsCallback; using detail_ExtractMetaCallback = cv::detail::ExtractMetaCallback; +using vector_GNetParam = std::vector; // NB: Python wrapper generate T_U for T // This behavior is only observed for inputs @@ -138,6 +139,7 @@ PyObject* pyopencv_from(const cv::GArg& value) { HANDLE_CASE(BOOL, bool); HANDLE_CASE(INT, int); + HANDLE_CASE(INT64, int64_t); HANDLE_CASE(DOUBLE, double); HANDLE_CASE(FLOAT, float); HANDLE_CASE(STRING, std::string); @@ -164,23 +166,29 @@ bool pyopencv_to(PyObject* obj, cv::GArg& value, const ArgInfo& info) } template <> -bool pyopencv_to(PyObject* obj, std::vector& value, const ArgInfo& info) +bool pyopencv_to(PyObject* obj, std::vector& value, const ArgInfo& info) { return pyopencv_to_generic_vec(obj, value, info); } template <> -PyObject* pyopencv_from(const std::vector& value) +PyObject* pyopencv_from(const std::vector& value) { return pyopencv_from_generic_vec(value); } template <> -bool pyopencv_to(PyObject* obj, GRunArgs& value, const ArgInfo& info) +bool pyopencv_to(PyObject* obj, std::vector& value, const ArgInfo& info) { return pyopencv_to_generic_vec(obj, value, info); } +template <> +PyObject* pyopencv_from(const std::vector& value) +{ + return pyopencv_from_generic_vec(value); +} + template<> PyObject* pyopencv_from(const cv::detail::OpaqueRef& o) { @@ -188,6 +196,7 @@ PyObject* pyopencv_from(const cv::detail::OpaqueRef& o) { case cv::detail::OpaqueKind::CV_BOOL : return pyopencv_from(o.rref()); case cv::detail::OpaqueKind::CV_INT : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_INT64 : return pyopencv_from(o.rref()); case cv::detail::OpaqueKind::CV_DOUBLE : return pyopencv_from(o.rref()); case cv::detail::OpaqueKind::CV_FLOAT : return pyopencv_from(o.rref()); case cv::detail::OpaqueKind::CV_STRING : return pyopencv_from(o.rref()); @@ -213,6 +222,7 @@ PyObject* pyopencv_from(const cv::detail::VectorRef& v) { case cv::detail::OpaqueKind::CV_BOOL : return pyopencv_from_generic_vec(v.rref()); case cv::detail::OpaqueKind::CV_INT : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_INT64 : return pyopencv_from_generic_vec(v.rref()); case cv::detail::OpaqueKind::CV_DOUBLE : return pyopencv_from_generic_vec(v.rref()); case cv::detail::OpaqueKind::CV_FLOAT : return pyopencv_from_generic_vec(v.rref()); case cv::detail::OpaqueKind::CV_STRING : return pyopencv_from_generic_vec(v.rref()); @@ -285,18 +295,6 @@ PyObject* pyopencv_from(const GRunArgs& value) return list; } -template<> -bool pyopencv_to(PyObject* obj, GMetaArgs& value, const ArgInfo& info) -{ - return pyopencv_to_generic_vec(obj, value, info); -} - -template<> -PyObject* pyopencv_from(const GMetaArgs& value) -{ - return pyopencv_from_generic_vec(value); -} - template void pyopencv_to_with_check(PyObject* from, T& to, const std::string& msg = "") { @@ -318,16 +316,16 @@ void pyopencv_to_generic_vec_with_check(PyObject* from, } template -static PyObject* extract_proto_args(PyObject* py_args, PyObject* kw) +static T extract_proto_args(PyObject* py_args) { using namespace cv; GProtoArgs args; - Py_ssize_t size = PyTuple_Size(py_args); + Py_ssize_t size = PyList_Size(py_args); args.reserve(size); for (int i = 0; i < size; ++i) { - PyObject* item = PyTuple_GetItem(py_args, i); + PyObject* item = PyList_GetItem(py_args, i); if (PyObject_TypeCheck(item, reinterpret_cast(pyopencv_GScalar_TypePtr))) { args.emplace_back(reinterpret_cast(item)->v); @@ -346,22 +344,11 @@ static PyObject* extract_proto_args(PyObject* py_args, PyObject* kw) } else { - PyErr_SetString(PyExc_TypeError, "Unsupported type for cv.GIn()/cv.GOut()"); - return NULL; + util::throw_error(std::logic_error("Unsupported type for GProtoArgs")); } } - return pyopencv_from(T{std::move(args)}); -} - -static PyObject* pyopencv_cv_GIn(PyObject* , PyObject* py_args, PyObject* kw) -{ - return extract_proto_args(py_args, kw); -} - -static PyObject* pyopencv_cv_GOut(PyObject* , PyObject* py_args, PyObject* kw) -{ - return extract_proto_args(py_args, kw); + return T(std::move(args)); } static cv::detail::OpaqueRef extract_opaque_ref(PyObject* from, cv::detail::OpaqueKind kind) @@ -386,6 +373,7 @@ static cv::detail::OpaqueRef extract_opaque_ref(PyObject* from, cv::detail::Opaq HANDLE_CASE(RECT, cv::Rect); HANDLE_CASE(UNKNOWN, cv::GArg); UNSUPPORTED(UINT64); + UNSUPPORTED(INT64); UNSUPPORTED(SCALAR); UNSUPPORTED(MAT); UNSUPPORTED(DRAW_PRIM); @@ -419,6 +407,7 @@ static cv::detail::VectorRef extract_vector_ref(PyObject* from, cv::detail::Opaq HANDLE_CASE(MAT, cv::Mat); HANDLE_CASE(UNKNOWN, cv::GArg); UNSUPPORTED(UINT64); + UNSUPPORTED(INT64); UNSUPPORTED(DRAW_PRIM); #undef HANDLE_CASE #undef UNSUPPORTED @@ -470,13 +459,15 @@ static cv::GRunArg extract_run_arg(const cv::GTypeInfo& info, PyObject* item) static cv::GRunArgs extract_run_args(const cv::GTypesInfo& info, PyObject* py_args) { + GAPI_Assert(PyList_Check(py_args)); + cv::GRunArgs args; - Py_ssize_t tuple_size = PyTuple_Size(py_args); - args.reserve(tuple_size); + Py_ssize_t list_size = PyList_Size(py_args); + args.reserve(list_size); - for (int i = 0; i < tuple_size; ++i) + for (int i = 0; i < list_size; ++i) { - args.push_back(extract_run_arg(info[i], PyTuple_GetItem(py_args, i))); + args.push_back(extract_run_arg(info[i], PyList_GetItem(py_args, i))); } return args; @@ -517,13 +508,15 @@ static cv::GMetaArg extract_meta_arg(const cv::GTypeInfo& info, PyObject* item) static cv::GMetaArgs extract_meta_args(const cv::GTypesInfo& info, PyObject* py_args) { + GAPI_Assert(PyList_Check(py_args)); + cv::GMetaArgs metas; - Py_ssize_t tuple_size = PyTuple_Size(py_args); - metas.reserve(tuple_size); + Py_ssize_t list_size = PyList_Size(py_args); + metas.reserve(list_size); - for (int i = 0; i < tuple_size; ++i) + for (int i = 0; i < list_size; ++i) { - metas.push_back(extract_meta_arg(info[i], PyTuple_GetItem(py_args, i))); + metas.push_back(extract_meta_arg(info[i], PyList_GetItem(py_args, i))); } return metas; @@ -589,8 +582,27 @@ static cv::GRunArgs run_py_kernel(cv::detail::PyObjectHolder kernel, // NB: In fact it's impossible situation, becase errors were handled above. GAPI_Assert(result.get() && "Python kernel returned NULL!"); - outs = out_info.size() == 1 ? cv::GRunArgs{extract_run_arg(out_info[0], result.get())} - : extract_run_args(out_info, result.get()); + if (out_info.size() == 1) + { + outs = cv::GRunArgs{extract_run_arg(out_info[0], result.get())}; + } + else if (out_info.size() > 1) + { + GAPI_Assert(PyTuple_Check(result.get())); + + Py_ssize_t tuple_size = PyTuple_Size(result.get()); + outs.reserve(tuple_size); + + for (int i = 0; i < tuple_size; ++i) + { + outs.push_back(extract_run_arg(out_info[i], PyTuple_GetItem(result.get(), i))); + } + } + else + { + // Seems to be impossible case. + GAPI_Assert(false); + } } catch (...) { @@ -756,23 +768,6 @@ static PyObject* pyopencv_cv_gapi_kernels(PyObject* , PyObject* py_args, PyObjec return pyopencv_from(pkg); } -static PyObject* pyopencv_cv_gapi_networks(PyObject*, PyObject* py_args, PyObject*) -{ - using namespace cv; - gapi::GNetPackage pkg; - Py_ssize_t size = PyTuple_Size(py_args); - for (int i = 0; i < size; ++i) - { - gapi_ie_PyParams params; - PyObject* item = PyTuple_GetItem(py_args, i); - if (pyopencv_to(item, params, ArgInfo("PyParams", false))) - { - pkg += gapi::networks(params); - } - } - return pyopencv_from(pkg); -} - static PyObject* pyopencv_cv_gapi_op(PyObject* , PyObject* py_args, PyObject*) { using namespace cv; @@ -834,53 +829,54 @@ static PyObject* pyopencv_cv_gapi_op(PyObject* , PyObject* py_args, PyObject*) return pyopencv_from(cv::gapi::wip::op(id, outMetaWrapper, std::move(args))); } -static PyObject* pyopencv_cv_gin(PyObject*, PyObject* py_args, PyObject*) +template<> +bool pyopencv_to(PyObject* obj, cv::detail::ExtractArgsCallback& value, const ArgInfo&) { - cv::detail::PyObjectHolder holder{py_args}; - auto callback = cv::detail::ExtractArgsCallback{[=](const cv::GTypesInfo& info) - { - PyGILState_STATE gstate; - gstate = PyGILState_Ensure(); + cv::detail::PyObjectHolder holder{obj}; + value = cv::detail::ExtractArgsCallback{[=](const cv::GTypesInfo& info) + { + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); - cv::GRunArgs args; - try - { - args = extract_run_args(info, holder.get()); - } - catch (...) - { - PyGILState_Release(gstate); - throw; - } + cv::GRunArgs args; + try + { + args = extract_run_args(info, holder.get()); + } + catch (...) + { PyGILState_Release(gstate); - return args; - }}; - - return pyopencv_from(callback); + throw; + } + PyGILState_Release(gstate); + return args; + }}; + return true; } -static PyObject* pyopencv_cv_descr_of(PyObject*, PyObject* py_args, PyObject*) +template<> +bool pyopencv_to(PyObject* obj, cv::detail::ExtractMetaCallback& value, const ArgInfo&) { - Py_INCREF(py_args); - auto callback = cv::detail::ExtractMetaCallback{[=](const cv::GTypesInfo& info) - { - PyGILState_STATE gstate; - gstate = PyGILState_Ensure(); + cv::detail::PyObjectHolder holder{obj}; + value = cv::detail::ExtractMetaCallback{[=](const cv::GTypesInfo& info) + { + PyGILState_STATE gstate; + gstate = PyGILState_Ensure(); - cv::GMetaArgs args; - try - { - args = extract_meta_args(info, py_args); - } - catch (...) - { - PyGILState_Release(gstate); - throw; - } + cv::GMetaArgs args; + try + { + args = extract_meta_args(info, holder.get()); + } + catch (...) + { PyGILState_Release(gstate); - return args; - }}; - return pyopencv_from(callback); + throw; + } + PyGILState_Release(gstate); + return args; + }}; + return true; } template @@ -929,11 +925,39 @@ struct PyOpenCV_Converter> } }; +template<> +bool pyopencv_to(PyObject* obj, cv::GProtoInputArgs& value, const ArgInfo& info) +{ + try + { + value = extract_proto_args(obj); + return true; + } + catch (...) + { + failmsg("Can't parse cv::GProtoInputArgs"); + return false; + } +} + +template<> +bool pyopencv_to(PyObject* obj, cv::GProtoOutputArgs& value, const ArgInfo& info) +{ + try + { + value = extract_proto_args(obj); + return true; + } + catch (...) + { + failmsg("Can't parse cv::GProtoOutputArgs"); + return false; + } +} // extend cv.gapi methods #define PYOPENCV_EXTRA_METHODS_GAPI \ {"kernels", CV_PY_FN_WITH_KW(pyopencv_cv_gapi_kernels), "kernels(...) -> GKernelPackage"}, \ - {"networks", CV_PY_FN_WITH_KW(pyopencv_cv_gapi_networks), "networks(...) -> GNetPackage"}, \ {"__op", CV_PY_FN_WITH_KW(pyopencv_cv_gapi_op), "__op(...) -> retval\n"}, diff --git a/modules/gapi/misc/python/python_bridge.hpp b/modules/gapi/misc/python/python_bridge.hpp index 0d1c6d51c574..b212babe4599 100644 --- a/modules/gapi/misc/python/python_bridge.hpp +++ b/modules/gapi/misc/python/python_bridge.hpp @@ -27,6 +27,7 @@ #define GARRAY_TYPE_LIST_G(G, G2) \ WRAP_ARGS(bool , cv::gapi::ArgType::CV_BOOL, G) \ WRAP_ARGS(int , cv::gapi::ArgType::CV_INT, G) \ +WRAP_ARGS(int64_t , cv::gapi::ArgType::CV_INT64, G) \ WRAP_ARGS(double , cv::gapi::ArgType::CV_DOUBLE, G) \ WRAP_ARGS(float , cv::gapi::ArgType::CV_FLOAT, G) \ WRAP_ARGS(std::string , cv::gapi::ArgType::CV_STRING, G) \ @@ -42,6 +43,7 @@ WRAP_ARGS(cv::GMat , cv::gapi::ArgType::CV_GMAT, G2) \ #define GOPAQUE_TYPE_LIST_G(G, G2) \ WRAP_ARGS(bool , cv::gapi::ArgType::CV_BOOL, G) \ WRAP_ARGS(int , cv::gapi::ArgType::CV_INT, G) \ +WRAP_ARGS(int64_t , cv::gapi::ArgType::CV_INT64, G) \ WRAP_ARGS(double , cv::gapi::ArgType::CV_DOUBLE, G) \ WRAP_ARGS(float , cv::gapi::ArgType::CV_FLOAT, G) \ WRAP_ARGS(std::string , cv::gapi::ArgType::CV_STRING, G) \ @@ -58,6 +60,7 @@ namespace gapi { enum ArgType { CV_BOOL, CV_INT, + CV_INT64, CV_DOUBLE, CV_FLOAT, CV_STRING, diff --git a/modules/gapi/misc/python/shadow_gapi.hpp b/modules/gapi/misc/python/shadow_gapi.hpp index 941250c2fb45..e777aa5d934b 100644 --- a/modules/gapi/misc/python/shadow_gapi.hpp +++ b/modules/gapi/misc/python/shadow_gapi.hpp @@ -8,31 +8,20 @@ namespace cv GAPI_WRAP GCompileArg(gapi::GNetPackage pkg); }; - // NB: This classes doesn't exist in *.so - // HACK: Mark them as a class to force python wrapper generate code for this entities - class GAPI_EXPORTS_W_SIMPLE GProtoArg { }; - class GAPI_EXPORTS_W_SIMPLE GProtoInputArgs { }; - class GAPI_EXPORTS_W_SIMPLE GProtoOutputArgs { }; - class GAPI_EXPORTS_W_SIMPLE GRunArg { }; - class GAPI_EXPORTS_W_SIMPLE GMetaArg { GAPI_WRAP GMetaArg(); }; - - using GProtoInputArgs = GIOProtoArgs; - using GProtoOutputArgs = GIOProtoArgs; - class GAPI_EXPORTS_W_SIMPLE GInferInputs { public: GAPI_WRAP GInferInputs(); - GAPI_WRAP void setInput(const std::string& name, const cv::GMat& value); - GAPI_WRAP void setInput(const std::string& name, const cv::GFrame& value); + GAPI_WRAP GInferInputs& setInput(const std::string& name, const cv::GMat& value); + GAPI_WRAP GInferInputs& setInput(const std::string& name, const cv::GFrame& value); }; class GAPI_EXPORTS_W_SIMPLE GInferListInputs { public: GAPI_WRAP GInferListInputs(); - GAPI_WRAP void setInput(const std::string& name, const cv::GArray& value); - GAPI_WRAP void setInput(const std::string& name, const cv::GArray& value); + GAPI_WRAP GInferListInputs setInput(const std::string& name, const cv::GArray& value); + GAPI_WRAP GInferListInputs setInput(const std::string& name, const cv::GArray& value); }; class GAPI_EXPORTS_W_SIMPLE GInferOutputs @@ -51,12 +40,18 @@ namespace cv namespace detail { - struct GAPI_EXPORTS_W_SIMPLE ExtractArgsCallback { }; - struct GAPI_EXPORTS_W_SIMPLE ExtractMetaCallback { }; + gapi::GNetParam GAPI_EXPORTS_W strip(gapi::ie::PyParams params); } // namespace detail namespace gapi { + namespace streaming + { + // FIXME: Extend to work with an arbitrary G-type. + cv::GOpaque GAPI_EXPORTS_W timestamp(cv::GMat); + cv::GOpaque GAPI_EXPORTS_W seqNo(cv::GMat); + cv::GOpaque GAPI_EXPORTS_W seq_id(cv::GMat); + } // namespace streaming namespace wip { class GAPI_EXPORTS_W IStreamSource { }; diff --git a/modules/gapi/misc/python/test/test_gapi_streaming.py b/modules/gapi/misc/python/test/test_gapi_streaming.py index f1cce4fb72fc..4ea88878eeab 100644 --- a/modules/gapi/misc/python/test/test_gapi_streaming.py +++ b/modules/gapi/misc/python/test/test_gapi_streaming.py @@ -28,7 +28,7 @@ def test_image_input(self): g_in = cv.GMat() g_out = cv.gapi.medianBlur(g_in, 3) c = cv.GComputation(g_in, g_out) - ccomp = c.compileStreaming(cv.descr_of(in_mat)) + ccomp = c.compileStreaming(cv.gapi.descr_of(in_mat)) ccomp.setSource(cv.gin(in_mat)) ccomp.start() @@ -52,7 +52,7 @@ def test_video_input(self): ccomp = c.compileStreaming() source = cv.gapi.wip.make_capture_src(path) - ccomp.setSource(source) + ccomp.setSource(cv.gin(source)) ccomp.start() # Assert @@ -87,7 +87,7 @@ def test_video_split3(self): ccomp = c.compileStreaming() source = cv.gapi.wip.make_capture_src(path) - ccomp.setSource(source) + ccomp.setSource(cv.gin(source)) ccomp.start() # Assert @@ -176,7 +176,7 @@ def test_video_good_features_to_track(self): ccomp = c.compileStreaming() source = cv.gapi.wip.make_capture_src(path) - ccomp.setSource(source) + ccomp.setSource(cv.gin(source)) ccomp.start() # Assert @@ -209,6 +209,40 @@ def test_video_good_features_to_track(self): break + def test_gapi_streaming_meta(self): + ksize = 3 + path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) + + # G-API + g_in = cv.GMat() + g_ts = cv.gapi.streaming.timestamp(g_in) + g_seqno = cv.gapi.streaming.seqNo(g_in) + g_seqid = cv.gapi.streaming.seq_id(g_in) + + c = cv.GComputation(cv.GIn(g_in), cv.GOut(g_ts, g_seqno, g_seqid)) + + ccomp = c.compileStreaming() + source = cv.gapi.wip.make_capture_src(path) + ccomp.setSource(cv.gin(source)) + ccomp.start() + + # Assert + max_num_frames = 10 + curr_frame_number = 0 + while True: + has_frame, (ts, seqno, seqid) = ccomp.pull() + + if not has_frame: + break + + self.assertEqual(curr_frame_number, seqno) + self.assertEqual(curr_frame_number, seqid) + + curr_frame_number += 1 + if curr_frame_number == max_num_frames: + break + + except unittest.SkipTest as e: message = str(e) diff --git a/modules/gapi/src/api/ginfer.cpp b/modules/gapi/src/api/ginfer.cpp index e3cc94041c32..9db05a43c369 100644 --- a/modules/gapi/src/api/ginfer.cpp +++ b/modules/gapi/src/api/ginfer.cpp @@ -15,6 +15,10 @@ cv::gapi::GNetPackage::GNetPackage(std::initializer_list ii) : networks(ii) { } +cv::gapi::GNetPackage::GNetPackage(std::vector nets) + : networks(nets) { +} + std::vector cv::gapi::GNetPackage::backends() const { std::unordered_set unique_set; for (const auto &nn : networks) unique_set.insert(nn.backend); diff --git a/modules/gapi/src/backends/common/gmetabackend.cpp b/modules/gapi/src/backends/common/gmetabackend.cpp index c535569b0cef..40e87c3ea0aa 100644 --- a/modules/gapi/src/backends/common/gmetabackend.cpp +++ b/modules/gapi/src/backends/common/gmetabackend.cpp @@ -85,6 +85,19 @@ class GGraphMetaBackendImpl final: public cv::gapi::GBackend::Priv { const std::vector&) const override { return EPtr{new GraphMetaExecutable(graph, nodes)}; } + + virtual bool controlsMerge() const override + { + return true; + } + + virtual bool allowsMerge(const cv::gimpl::GIslandModel::Graph &, + const ade::NodeHandle &, + const ade::NodeHandle &, + const ade::NodeHandle &) const override + { + return false; + } }; cv::gapi::GBackend graph_meta_backend() { diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index 9e8a6ee13bd9..795afb13f276 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -2219,12 +2219,6 @@ static PyMethodDef special_methods[] = { #ifdef HAVE_OPENCV_DNN {"dnn_registerLayer", CV_PY_FN_WITH_KW(pyopencv_cv_dnn_registerLayer), "registerLayer(type, class) -> None"}, {"dnn_unregisterLayer", CV_PY_FN_WITH_KW(pyopencv_cv_dnn_unregisterLayer), "unregisterLayer(type) -> None"}, -#endif -#ifdef HAVE_OPENCV_GAPI - {"GIn", CV_PY_FN_WITH_KW(pyopencv_cv_GIn), "GIn(...) -> GInputProtoArgs"}, - {"GOut", CV_PY_FN_WITH_KW(pyopencv_cv_GOut), "GOut(...) -> GOutputProtoArgs"}, - {"gin", CV_PY_FN_WITH_KW(pyopencv_cv_gin), "gin(...) -> ExtractArgsCallback"}, - {"descr_of", CV_PY_FN_WITH_KW(pyopencv_cv_descr_of), "descr_of(...) -> ExtractMetaCallback"}, #endif {NULL, NULL}, }; From 5e80bd3cc922bad5eb1fa02b56bc3db44abb96e9 Mon Sep 17 00:00:00 2001 From: APrigarina Date: Wed, 30 Jun 2021 12:50:21 +0300 Subject: [PATCH 023/128] fix samples 3.4 --- samples/python/camera_calibration_show_extrinsics.py | 2 +- samples/python/gaussian_mix.py | 2 +- samples/python/hist.py | 2 +- samples/python/lk_homography.py | 6 +++--- samples/python/lk_track.py | 2 +- samples/python/video_v4l2.py | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/samples/python/camera_calibration_show_extrinsics.py b/samples/python/camera_calibration_show_extrinsics.py index 0118b5b913d5..d676691f15d7 100755 --- a/samples/python/camera_calibration_show_extrinsics.py +++ b/samples/python/camera_calibration_show_extrinsics.py @@ -188,7 +188,7 @@ def main(): fig = plt.figure() ax = fig.gca(projection='3d') - ax.set_aspect("equal") + ax.set_aspect("auto") cam_width = args.cam_width cam_height = args.cam_height diff --git a/samples/python/gaussian_mix.py b/samples/python/gaussian_mix.py index 5f2dfcc44093..6a656647ddcf 100755 --- a/samples/python/gaussian_mix.py +++ b/samples/python/gaussian_mix.py @@ -32,7 +32,7 @@ def draw_gaussain(img, mean, cov, color): w, u, _vt = cv.SVDecomp(cov) ang = np.arctan2(u[1, 0], u[0, 0])*(180/np.pi) s1, s2 = np.sqrt(w)*3.0 - cv.ellipse(img, (x, y), (s1, s2), ang, 0, 360, color, 1, cv.LINE_AA) + cv.ellipse(img, (int(x), int(y)), (int(s1), int(s2)), ang, 0, 360, color, 1, cv.LINE_AA) def main(): diff --git a/samples/python/hist.py b/samples/python/hist.py index 4c2c1ad395ef..157d5ff0ba3e 100755 --- a/samples/python/hist.py +++ b/samples/python/hist.py @@ -48,7 +48,7 @@ def hist_lines(im): cv.normalize(hist_item,hist_item,0,255,cv.NORM_MINMAX) hist=np.int32(np.around(hist_item)) for x,y in enumerate(hist): - cv.line(h,(x,0),(x,y),(255,255,255)) + cv.line(h,(x,0),(x,y[0]),(255,255,255)) y = np.flipud(h) return y diff --git a/samples/python/lk_homography.py b/samples/python/lk_homography.py index 808f30965f0d..38a05f63b6a5 100755 --- a/samples/python/lk_homography.py +++ b/samples/python/lk_homography.py @@ -77,8 +77,8 @@ def run(self): for (x0, y0), (x1, y1), good in zip(self.p0[:,0], self.p1[:,0], status[:,0]): if good: - cv.line(vis, (x0, y0), (x1, y1), (0, 128, 0)) - cv.circle(vis, (x1, y1), 2, (red, green)[good], -1) + cv.line(vis, (int(x0), int(y0)), (int(x1), int(y1)), (0, 128, 0)) + cv.circle(vis, (int(x1), int(y1)), 2, (red, green)[good], -1) draw_str(vis, (20, 20), 'track count: %d' % len(self.p1)) if self.use_ransac: draw_str(vis, (20, 40), 'RANSAC') @@ -86,7 +86,7 @@ def run(self): p = cv.goodFeaturesToTrack(frame_gray, **feature_params) if p is not None: for x, y in p[:,0]: - cv.circle(vis, (x, y), 2, green, -1) + cv.circle(vis, (int(x), int(y)), 2, green, -1) draw_str(vis, (20, 20), 'feature count: %d' % len(p)) cv.imshow('lk_homography', vis) diff --git a/samples/python/lk_track.py b/samples/python/lk_track.py index 7b77f1b33595..97a8c40241e2 100755 --- a/samples/python/lk_track.py +++ b/samples/python/lk_track.py @@ -65,7 +65,7 @@ def run(self): if len(tr) > self.track_len: del tr[0] new_tracks.append(tr) - cv.circle(vis, (x, y), 2, (0, 255, 0), -1) + cv.circle(vis, (int(x), int(y)), 2, (0, 255, 0), -1) self.tracks = new_tracks cv.polylines(vis, [np.int32(tr) for tr in self.tracks], False, (0, 255, 0)) draw_str(vis, (20, 20), 'track count: %d' % len(self.tracks)) diff --git a/samples/python/video_v4l2.py b/samples/python/video_v4l2.py index 61b1e3580483..abebb2a2cacc 100644 --- a/samples/python/video_v4l2.py +++ b/samples/python/video_v4l2.py @@ -30,7 +30,7 @@ def decode_fourcc(v): color = (0, 255, 0) cap = cv.VideoCapture(0) - cap.set(cv.CAP_PROP_AUTOFOCUS, False) # Known bug: https://github.com/opencv/opencv/pull/5474 + cap.set(cv.CAP_PROP_AUTOFOCUS, 0) # Known bug: https://github.com/opencv/opencv/pull/5474 cv.namedWindow("Video") @@ -67,7 +67,7 @@ def decode_fourcc(v): break elif k == ord('g'): convert_rgb = not convert_rgb - cap.set(cv.CAP_PROP_CONVERT_RGB, convert_rgb) + cap.set(cv.CAP_PROP_CONVERT_RGB, 1 if convert_rgb else 0) print('Done') From 90be83ae99cb719619f26fedf6d199f6422b551e Mon Sep 17 00:00:00 2001 From: Vladimir <10669582+Wovchena@users.noreply.github.com> Date: Wed, 30 Jun 2021 10:15:58 +0300 Subject: [PATCH 024/128] Fix an arg for calcHist() in demos `float* histRange = { range };` doesn't make much sense. `histRange` is an array of array(s), so it should have a type of ptr to ptr. Strangely some domos are correct as well as the example for the function https://docs.opencv.org/master/d6/dc7/group__imgproc__hist.html#ga4b2b5fd75503ff9e6844cc4dcdaed35d --- .../Histograms_Matching/MatchTemplate_Demo.cpp | 2 +- .../Histograms_Matching/calcBackProject_Demo1.cpp | 6 +++--- .../tutorial_code/Histograms_Matching/calcHist_Demo.cpp | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp b/samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp index 5bcc878965a2..f9abbae94527 100644 --- a/samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp +++ b/samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp @@ -89,7 +89,7 @@ void MatchingMethod( int, void* ) //! [create_result_matrix] /// Create the result matrix - int result_cols = img.cols - templ.cols + 1; + int result_cols = img.cols - templ.cols + 1; int result_rows = img.rows - templ.rows + 1; result.create( result_rows, result_cols, CV_32FC1 ); diff --git a/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo1.cpp b/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo1.cpp index 61b6d607ceb6..bcb547a2fb9f 100644 --- a/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo1.cpp +++ b/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo1.cpp @@ -72,18 +72,18 @@ void Hist_and_Backproj(int, void* ) //! [initialize] int histSize = MAX( bins, 2 ); float hue_range[] = { 0, 180 }; - const float* ranges = { hue_range }; + const float* ranges[] = { hue_range }; //! [initialize] //! [Get the Histogram and normalize it] Mat hist; - calcHist( &hue, 1, 0, Mat(), hist, 1, &histSize, &ranges, true, false ); + calcHist( &hue, 1, 0, Mat(), hist, 1, &histSize, ranges, true, false ); normalize( hist, hist, 0, 255, NORM_MINMAX, -1, Mat() ); //! [Get the Histogram and normalize it] //! [Get Backprojection] Mat backproj; - calcBackProject( &hue, 1, 0, hist, backproj, &ranges, 1, true ); + calcBackProject( &hue, 1, 0, hist, backproj, ranges, 1, true ); //! [Get Backprojection] //! [Draw the backproj] diff --git a/samples/cpp/tutorial_code/Histograms_Matching/calcHist_Demo.cpp b/samples/cpp/tutorial_code/Histograms_Matching/calcHist_Demo.cpp index 86167e519a2f..a7582e42820a 100644 --- a/samples/cpp/tutorial_code/Histograms_Matching/calcHist_Demo.cpp +++ b/samples/cpp/tutorial_code/Histograms_Matching/calcHist_Demo.cpp @@ -37,7 +37,7 @@ int main(int argc, char** argv) //! [Set the ranges ( for B,G,R) )] float range[] = { 0, 256 }; //the upper boundary is exclusive - const float* histRange = { range }; + const float* histRange[] = { range }; //! [Set the ranges ( for B,G,R) )] //! [Set histogram param] @@ -46,9 +46,9 @@ int main(int argc, char** argv) //! [Compute the histograms] Mat b_hist, g_hist, r_hist; - calcHist( &bgr_planes[0], 1, 0, Mat(), b_hist, 1, &histSize, &histRange, uniform, accumulate ); - calcHist( &bgr_planes[1], 1, 0, Mat(), g_hist, 1, &histSize, &histRange, uniform, accumulate ); - calcHist( &bgr_planes[2], 1, 0, Mat(), r_hist, 1, &histSize, &histRange, uniform, accumulate ); + calcHist( &bgr_planes[0], 1, 0, Mat(), b_hist, 1, &histSize, histRange, uniform, accumulate ); + calcHist( &bgr_planes[1], 1, 0, Mat(), g_hist, 1, &histSize, histRange, uniform, accumulate ); + calcHist( &bgr_planes[2], 1, 0, Mat(), r_hist, 1, &histSize, histRange, uniform, accumulate ); //! [Compute the histograms] //! [Draw the histograms for B, G and R] From 6a3d925a47d54945eab7b50a769531703cde99a2 Mon Sep 17 00:00:00 2001 From: Joe Howse Date: Mon, 21 Jun 2021 00:46:32 -0300 Subject: [PATCH 025/128] OpenCL: core support for FP16, more channel orders * Support cl_image conversion for CL_HALF_FLOAT (float16) * Support cl_image conversion for additional channel orders: CL_A, CL_INTENSITY, CL_LUMINANCE, CL_RG, CL_RA * Comment on why cl_image conversion is unsupported for CL_RGB * Predict optimal vector width for float16 * ocl::kernelToStr: support float16 * ocl::Device::halfFPConfig: drop artificial requirement for OpenCL version >= 1.2. Even OpenCL 1.0 supports the underlying config property, CL_DEVICE_HALF_FP_CONFIG. * dumpOpenCLInformation: provide info on OpenCL half-float support and preferred half-float vector width * randu: support default range [-1.0, 1.0] for float16 * TestBase::warmup: support float16 --- .../opencv2/core/opencl/opencl_info.hpp | 7 +++ modules/core/src/ocl.cpp | 45 ++++++++++++++----- modules/ts/src/ocl_perf.cpp | 2 +- modules/ts/src/ts_perf.cpp | 2 +- 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/modules/core/include/opencv2/core/opencl/opencl_info.hpp b/modules/core/include/opencv2/core/opencl/opencl_info.hpp index 5e5c846ad059..3ead76e5c46e 100644 --- a/modules/core/include/opencv2/core/opencl/opencl_info.hpp +++ b/modules/core/include/opencv2/core/opencl/opencl_info.hpp @@ -144,6 +144,10 @@ static void dumpOpenCLInformation() DUMP_MESSAGE_STDOUT(" Double support = " << doubleSupportStr); DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0); + const char* halfSupportStr = device.halfFPConfig() > 0 ? "Yes" : "No"; + DUMP_MESSAGE_STDOUT(" Half support = " << halfSupportStr); + DUMP_CONFIG_PROPERTY("cv_ocl_current_haveHalfSupport", device.halfFPConfig() > 0); + const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No"; DUMP_MESSAGE_STDOUT(" Host unified memory = " << isUnifiedMemoryStr); DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory()); @@ -191,6 +195,9 @@ static void dumpOpenCLInformation() DUMP_MESSAGE_STDOUT(" Preferred vector width double = " << device.preferredVectorWidthDouble()); DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble()); + + DUMP_MESSAGE_STDOUT(" Preferred vector width half = " << device.preferredVectorWidthHalf()); + DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthHalf", device.preferredVectorWidthHalf()); } catch (...) { diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 0e97cf52feb3..46185446f726 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -1566,6 +1566,7 @@ struct Device::Impl version_ = getStrProp(CL_DEVICE_VERSION); extensions_ = getStrProp(CL_DEVICE_EXTENSIONS); doubleFPConfig_ = getProp(CL_DEVICE_DOUBLE_FP_CONFIG); + halfFPConfig_ = getProp(CL_DEVICE_HALF_FP_CONFIG); hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY); maxComputeUnits_ = getProp(CL_DEVICE_MAX_COMPUTE_UNITS); maxWorkGroupSize_ = getProp(CL_DEVICE_MAX_WORK_GROUP_SIZE); @@ -1678,6 +1679,7 @@ struct Device::Impl String version_; std::string extensions_; int doubleFPConfig_; + int halfFPConfig_; bool hostUnifiedMemory_; int maxComputeUnits_; size_t maxWorkGroupSize_; @@ -1827,11 +1829,7 @@ int Device::singleFPConfig() const { return p ? p->getProp(CL_DEVICE_SINGLE_FP_CONFIG) : 0; } int Device::halfFPConfig() const -#ifdef CL_VERSION_1_2 -{ return p ? p->getProp(CL_DEVICE_HALF_FP_CONFIG) : 0; } -#else -{ CV_REQUIRE_OPENCL_1_2_ERROR; } -#endif +{ return p ? p->halfFPConfig_ : 0; } bool Device::endianLittle() const { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; } @@ -6668,6 +6666,10 @@ void convertFromImage(void* cl_mem_image, UMat& dst) depth = CV_32F; break; + case CL_HALF_FLOAT: + depth = CV_16F; + break; + default: CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type"); } @@ -6676,9 +6678,23 @@ void convertFromImage(void* cl_mem_image, UMat& dst) switch (fmt.image_channel_order) { case CL_R: + case CL_A: + case CL_INTENSITY: + case CL_LUMINANCE: type = CV_MAKE_TYPE(depth, 1); break; + case CL_RG: + case CL_RA: + type = CV_MAKE_TYPE(depth, 2); + break; + + // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with + // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010. + /*case CL_RGB: + type = CV_MAKE_TYPE(depth, 3); + break;*/ + case CL_RGBA: case CL_BGRA: case CL_ARGB: @@ -7068,6 +7084,13 @@ static std::string kerToStr(const Mat & k) stream << "DIG(" << data[i] << "f)"; stream << "DIG(" << data[width] << "f)"; } + else if (depth == CV_16F) + { + stream.setf(std::ios_base::showpoint); + for (int i = 0; i < width; ++i) + stream << "DIG(" << (float)data[i] << "h)"; + stream << "DIG(" << (float)data[width] << "h)"; + } else { for (int i = 0; i < width; ++i) @@ -7091,7 +7114,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name) typedef std::string (* func_t)(const Mat &); static const func_t funcs[] = { kerToStr, kerToStr, kerToStr, kerToStr, - kerToStr, kerToStr, kerToStr, 0 }; + kerToStr, kerToStr, kerToStr, kerToStr }; const func_t func = funcs[ddepth]; CV_Assert(func != 0); @@ -7130,14 +7153,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(), d.preferredVectorWidthShort(), d.preferredVectorWidthShort(), d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(), - d.preferredVectorWidthDouble(), -1 }; + d.preferredVectorWidthDouble(), d.preferredVectorWidthHalf() }; // if the device says don't use vectors if (vectorWidths[0] == 1) { // it's heuristic vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4; - vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2; + vectorWidths[CV_16U] = vectorWidths[CV_16S] = vectorWidths[CV_16F] = 2; vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1; } @@ -7225,10 +7248,12 @@ struct Image2D::Impl { cl_image_format format; static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16, - CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 }; + CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, CL_HALF_FLOAT }; static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16, CL_SNORM_INT16, -1, -1, -1, -1 }; - static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA }; + // CL_RGB has no mappings to OpenCV types because CL_RGB can only be used with + // CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, or CL_UNORM_INT_101010. + static const int channelOrders[] = { -1, CL_R, CL_RG, /*CL_RGB*/ -1, CL_RGBA }; int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth]; int channelOrder = channelOrders[cn]; diff --git a/modules/ts/src/ocl_perf.cpp b/modules/ts/src/ocl_perf.cpp index 8dacf219f64b..fe521f2c00d9 100644 --- a/modules/ts/src/ocl_perf.cpp +++ b/modules/ts/src/ocl_perf.cpp @@ -70,7 +70,7 @@ void randu(InputOutputArray dst) cv::randu(dst, -128, 128); else if (dst.depth() == CV_16U) cv::randu(dst, 0, 1024); - else if (dst.depth() == CV_32F || dst.depth() == CV_64F) + else if (dst.depth() == CV_32F || dst.depth() == CV_64F || dst.depth() == CV_16F) cv::randu(dst, -1.0, 1.0); else if (dst.depth() == CV_16S || dst.depth() == CV_32S) cv::randu(dst, -4096, 4096); diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp index 2a9169fd13a5..5a42ca01cdc4 100644 --- a/modules/ts/src/ts_perf.cpp +++ b/modules/ts/src/ts_perf.cpp @@ -1297,7 +1297,7 @@ void TestBase::warmup(cv::InputOutputArray a, WarmUpType wtype) cv::randu(a, -128, 128); else if (depth == CV_16U) cv::randu(a, 0, 1024); - else if (depth == CV_32F || depth == CV_64F) + else if (depth == CV_32F || depth == CV_64F || depth == CV_16F) cv::randu(a, -1.0, 1.0); else if (depth == CV_16S || depth == CV_32S) cv::randu(a, -4096, 4096); From bf489feef11f365f8bd911bf4491d207d04ca46f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafael=20H=20Tib=C3=A3es?= Date: Wed, 30 Jun 2021 19:08:24 -0300 Subject: [PATCH 026/128] Merge pull request #20327 from tibaes:MSMF-Slow-Webcam-Startup * fixes MSMF slow webcam startup * add variable to change MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS at runtime --- modules/videoio/src/cap_msmf.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/videoio/src/cap_msmf.cpp b/modules/videoio/src/cap_msmf.cpp index 73288c3d03b1..9e45fd1bacce 100644 --- a/modules/videoio/src/cap_msmf.cpp +++ b/modules/videoio/src/cap_msmf.cpp @@ -708,9 +708,10 @@ bool CvCapture_MSMF::initStream(DWORD streamID, const MediaType& mt) _ComPtr CvCapture_MSMF::getDefaultSourceConfig(UINT32 num) { CV_Assert(num > 0); + const bool OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS = utils::getConfigurationParameterBool("OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS", true); _ComPtr res; if (FAILED(MFCreateAttributes(&res, num)) || - FAILED(res->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true)) || + FAILED(res->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, OPENCV_VIDEOIO_MSMF_ENABLE_HW_TRANSFORMS)) || FAILED(res->SetUINT32(MF_SOURCE_READER_DISABLE_DXVA, false)) || FAILED(res->SetUINT32(MF_SOURCE_READER_ENABLE_VIDEO_PROCESSING, false)) || FAILED(res->SetUINT32(MF_SOURCE_READER_ENABLE_ADVANCED_VIDEO_PROCESSING, true)) From 6797fd65a5de608d30273ace911c5de14ac7693e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 29 Jun 2021 20:25:22 +0000 Subject: [PATCH 027/128] dnn(test): update tests for OpenVINO 2021.4 --- modules/dnn/test/test_backends.cpp | 10 +++++----- modules/dnn/test/test_ie_models.cpp | 9 +++++++++ modules/dnn/test/test_torch_importer.cpp | 9 +++++++-- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/modules/dnn/test/test_backends.cpp b/modules/dnn/test/test_backends.cpp index b9958c107ebc..5426a11a3f51 100644 --- a/modules/dnn/test/test_backends.cpp +++ b/modules/dnn/test/test_backends.cpp @@ -196,7 +196,7 @@ TEST_P(DNNTestNetwork, MobileNet_SSD_Caffe) Mat inp = blobFromImage(sample, 1.0f / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); float diffScores = (target == DNN_TARGET_OPENCL_FP16 || target == DNN_TARGET_MYRIAD) ? 1.5e-2 : 0.0; float diffSquares = (target == DNN_TARGET_MYRIAD) ? 0.063 : 0.0; - float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.252 : FLT_MIN; + float detectionConfThresh = (target == DNN_TARGET_MYRIAD) ? 0.262 : FLT_MIN; processNet("dnn/MobileNetSSD_deploy.caffemodel", "dnn/MobileNetSSD_deploy.prototxt", inp, "detection_out", "", diffScores, diffSquares, detectionConfThresh); expectNoFallbacksFromIE(net); @@ -301,8 +301,8 @@ TEST_P(DNNTestNetwork, OpenPose_pose_coco) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif - const float l1 = (target == DNN_TARGET_MYRIAD) ? 0.0056 : 0.0; - const float lInf = (target == DNN_TARGET_MYRIAD) ? 0.072 : 0.0; + const float l1 = (target == DNN_TARGET_MYRIAD) ? 0.009 : 0.0; + const float lInf = (target == DNN_TARGET_MYRIAD) ? 0.09 : 0.0; processNet("dnn/openpose_pose_coco.caffemodel", "dnn/openpose_pose_coco.prototxt", Size(46, 46), "", "", l1, lInf); expectNoFallbacksFromIE(net); @@ -321,8 +321,8 @@ TEST_P(DNNTestNetwork, OpenPose_pose_mpi) #endif // output range: [-0.001, 0.97] - const float l1 = (target == DNN_TARGET_MYRIAD) ? 0.012 : 0.0; - const float lInf = (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.16 : 0.0; + const float l1 = (target == DNN_TARGET_MYRIAD) ? 0.02 : 0.0; + const float lInf = (target == DNN_TARGET_MYRIAD || target == DNN_TARGET_OPENCL_FP16) ? 0.2 : 0.0; processNet("dnn/openpose_pose_mpi.caffemodel", "dnn/openpose_pose_mpi.prototxt", Size(46, 46), "", "", l1, lInf); expectNoFallbacksFromIE(net); diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp index 2ba7d80f5865..da6cbd6fbc2f 100644 --- a/modules/dnn/test/test_ie_models.cpp +++ b/modules/dnn/test/test_ie_models.cpp @@ -288,6 +288,15 @@ TEST_P(DNNTestOpenVINO, models) ASSERT_FALSE(backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) << "Inference Engine backend is required"; +#if INF_ENGINE_VER_MAJOR_EQ(2021040000) + if (targetId == DNN_TARGET_MYRIAD && ( + modelName == "person-detection-retail-0013" || // ncDeviceOpen:1013 Failed to find booted device after boot + modelName == "age-gender-recognition-retail-0013" // ncDeviceOpen:1013 Failed to find booted device after boot + ) + ) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + #if INF_ENGINE_VER_MAJOR_GE(2020020000) if (targetId == DNN_TARGET_MYRIAD && backendId == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) { diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index 8738e5e25cc4..7316a0685630 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -254,9 +254,14 @@ TEST_P(Test_Torch_layers, net_padding) TEST_P(Test_Torch_layers, net_non_spatial) { -#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2021030000) +#if defined(INF_ENGINE_RELEASE) && ( \ + INF_ENGINE_VER_MAJOR_EQ(2021030000) || \ + INF_ENGINE_VER_MAJOR_EQ(2021040000) \ +) if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) - applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // crash + // 2021.3: crash + // 2021.4: [ GENERAL_ERROR ] AssertionFailed: !out.networkInputs.empty() + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); // exception if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) From f2057ce1ab7d92f2deb4de29e7a81f401965a127 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 29 Jun 2021 21:55:18 +0000 Subject: [PATCH 028/128] dnn(ie): replace deprecated calls --- modules/dnn/src/ie_ngraph.cpp | 51 +++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index 3b87243717a4..e6c219f13e5a 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -654,7 +654,11 @@ void InfEngineNgraphNet::initPlugin(InferenceEngine::CNNNetwork& net) try { InferenceEngine::IExtensionPtr extension = +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2021_4) + std::make_shared(libName); +#else InferenceEngine::make_so_pointer(libName); +#endif ie.AddExtension(extension, "CPU"); CV_LOG_INFO(NULL, "DNN-IE: Loaded extension plugin: " << libName); @@ -1002,35 +1006,54 @@ void InfEngineNgraphNet::forward(const std::vector >& outBlo reqWrapper->req.SetInput(inpBlobs); reqWrapper->req.SetOutput(outBlobs); +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2021_4) + InferenceEngine::InferRequest infRequest = reqWrapper->req; + NgraphReqWrapper* wrapperPtr = reqWrapper.get(); + CV_Assert(wrapperPtr && "Internal error"); +#else InferenceEngine::IInferRequest::Ptr infRequestPtr = reqWrapper->req; - infRequestPtr->SetUserData(reqWrapper.get(), 0); + CV_Assert(infRequestPtr); + InferenceEngine::IInferRequest& infRequest = *infRequestPtr.get(); + infRequest.SetUserData(reqWrapper.get(), 0); +#endif - infRequestPtr->SetCompletionCallback( - [](InferenceEngine::IInferRequest::Ptr request, InferenceEngine::StatusCode status) +#if INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2021_4) + // do NOT capture 'reqWrapper' (smart ptr) in the lambda callback + infRequest.SetCompletionCallback>( + [wrapperPtr](InferenceEngine::InferRequest /*request*/, InferenceEngine::StatusCode status) +#else + infRequest.SetCompletionCallback( + [](InferenceEngine::IInferRequest::Ptr requestPtr, InferenceEngine::StatusCode status) +#endif { CV_LOG_DEBUG(NULL, "DNN(nGraph): completionCallback(" << (int)status << ")"); +#if !INF_ENGINE_VER_MAJOR_GE(INF_ENGINE_RELEASE_2021_4) + CV_Assert(requestPtr); + InferenceEngine::IInferRequest& request = *requestPtr.get(); - NgraphReqWrapper* wrapper; - request->GetUserData((void**)&wrapper, 0); - CV_Assert(wrapper && "Internal error"); + NgraphReqWrapper* wrapperPtr; + request.GetUserData((void**)&wrapperPtr, 0); + CV_Assert(wrapperPtr && "Internal error"); +#endif + NgraphReqWrapper& wrapper = *wrapperPtr; size_t processedOutputs = 0; try { - for (; processedOutputs < wrapper->outProms.size(); ++processedOutputs) + for (; processedOutputs < wrapper.outProms.size(); ++processedOutputs) { - const std::string& name = wrapper->outsNames[processedOutputs]; - Mat m = ngraphBlobToMat(wrapper->req.GetBlob(name)); + const std::string& name = wrapper.outsNames[processedOutputs]; + Mat m = ngraphBlobToMat(wrapper.req.GetBlob(name)); try { CV_Assert(status == InferenceEngine::StatusCode::OK); - wrapper->outProms[processedOutputs].setValue(m.clone()); + wrapper.outProms[processedOutputs].setValue(m.clone()); } catch (...) { try { - wrapper->outProms[processedOutputs].setException(std::current_exception()); + wrapper.outProms[processedOutputs].setException(std::current_exception()); } catch(...) { CV_LOG_ERROR(NULL, "DNN: Exception occurred during async inference exception propagation"); } @@ -1040,16 +1063,16 @@ void InfEngineNgraphNet::forward(const std::vector >& outBlo catch (...) { std::exception_ptr e = std::current_exception(); - for (; processedOutputs < wrapper->outProms.size(); ++processedOutputs) + for (; processedOutputs < wrapper.outProms.size(); ++processedOutputs) { try { - wrapper->outProms[processedOutputs].setException(e); + wrapper.outProms[processedOutputs].setException(e); } catch(...) { CV_LOG_ERROR(NULL, "DNN: Exception occurred during async inference exception propagation"); } } } - wrapper->isReady = true; + wrapper.isReady = true; } ); } From 5b8c10f2f85eb54fe3c945d1b5c8d0e4344ddbbd Mon Sep 17 00:00:00 2001 From: SamFC10 Date: Wed, 30 Jun 2021 21:55:42 +0530 Subject: [PATCH 029/128] modified onnx importer to concat const input blobs --- modules/dnn/src/onnx/onnx_importer.cpp | 17 +++++++++++++++++ modules/dnn/test/test_onnx_importer.cpp | 1 + 2 files changed, 18 insertions(+) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 651a2ab33344..3668c9b51e5d 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -1792,6 +1792,23 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) addConstant(layerParams.name, concatenated[0]); return; } + else + { + for (int i = 0; i < node_proto.input_size(); ++i) + { + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) + { + LayerParams constParams; + constParams.name = node_proto.input(i); + constParams.type = "Const"; + constParams.blobs.push_back(getBlob(node_proto, i)); + + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(constParams, proto); + } + } + } } else if (layer_type == "Resize") { diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index c4cb87717200..600f727d7db4 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -327,6 +327,7 @@ TEST_P(Test_ONNX_layers, Concatenation) if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); } testONNXModels("concatenation"); + testONNXModels("concat_const_blobs"); } TEST_P(Test_ONNX_layers, Eltwise3D) From 9fe49497bb8b3e9c5b2043b4c74d3af495a66927 Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Thu, 1 Jul 2021 12:36:19 +0300 Subject: [PATCH 030/128] Merge pull request #20284 from TolyaTalamanov:at/wrap-render G-API: Wrap render functionality to python * Wrap render Rect prim * Add all primitives and tests * Cover mosaic and image * Handle error in pyopencv_to(Prim) * Move Mosaic and Rect ctors wrappers to shadow file * Use GAPI_PROP_RW * Fix indent --- .../gapi/include/opencv2/gapi/own/exports.hpp | 2 + .../include/opencv2/gapi/render/render.hpp | 24 +- .../opencv2/gapi/render/render_types.hpp | 100 ++++---- .../gapi/misc/python/package/gapi/__init__.py | 45 +++- modules/gapi/misc/python/pyopencv_gapi.hpp | 116 ++++++--- modules/gapi/misc/python/python_bridge.hpp | 33 +-- modules/gapi/misc/python/shadow_gapi.hpp | 127 +++++----- .../gapi/misc/python/test/test_gapi_render.py | 227 ++++++++++++++++++ modules/gapi/src/api/render_ocv.cpp | 6 +- .../test/render/gapi_render_tests_ocv.cpp | 8 +- modules/python/src2/hdr_parser.py | 1 + 11 files changed, 518 insertions(+), 171 deletions(-) create mode 100644 modules/gapi/misc/python/test/test_gapi_render.py diff --git a/modules/gapi/include/opencv2/gapi/own/exports.hpp b/modules/gapi/include/opencv2/gapi/own/exports.hpp index 1978991b7518..c36f4003d0fb 100644 --- a/modules/gapi/include/opencv2/gapi/own/exports.hpp +++ b/modules/gapi/include/opencv2/gapi/own/exports.hpp @@ -13,11 +13,13 @@ # define GAPI_EXPORTS CV_EXPORTS /* special informative macros for wrapper generators */ # define GAPI_PROP CV_PROP +# define GAPI_PROP_RW CV_PROP_RW # define GAPI_WRAP CV_WRAP # define GAPI_EXPORTS_W_SIMPLE CV_EXPORTS_W_SIMPLE # define GAPI_EXPORTS_W CV_EXPORTS_W # else # define GAPI_PROP +# define GAPI_PROP_RW # define GAPI_WRAP # define GAPI_EXPORTS # define GAPI_EXPORTS_W_SIMPLE diff --git a/modules/gapi/include/opencv2/gapi/render/render.hpp b/modules/gapi/include/opencv2/gapi/render/render.hpp index 6bfe92388a12..537541222414 100644 --- a/modules/gapi/include/opencv2/gapi/render/render.hpp +++ b/modules/gapi/include/opencv2/gapi/render/render.hpp @@ -81,9 +81,9 @@ using GMatDesc2 = std::tuple; @param prims vector of drawing primitivies @param args graph compile time parameters */ -void GAPI_EXPORTS render(cv::Mat& bgr, - const Prims& prims, - cv::GCompileArgs&& args = {}); +void GAPI_EXPORTS_W render(cv::Mat& bgr, + const Prims& prims, + cv::GCompileArgs&& args = {}); /** @brief The function renders on two NV12 planes passed drawing primitivies @@ -92,10 +92,10 @@ void GAPI_EXPORTS render(cv::Mat& bgr, @param prims vector of drawing primitivies @param args graph compile time parameters */ -void GAPI_EXPORTS render(cv::Mat& y_plane, - cv::Mat& uv_plane, - const Prims& prims, - cv::GCompileArgs&& args = {}); +void GAPI_EXPORTS_W render(cv::Mat& y_plane, + cv::Mat& uv_plane, + const Prims& prims, + cv::GCompileArgs&& args = {}); /** @brief The function renders on the input media frame passed drawing primitivies @@ -139,7 +139,7 @@ Output image must be 8-bit unsigned planar 3-channel image @param src input image: 8-bit unsigned 3-channel image @ref CV_8UC3 @param prims draw primitives */ -GAPI_EXPORTS GMat render3ch(const GMat& src, const GArray& prims); +GAPI_EXPORTS_W GMat render3ch(const GMat& src, const GArray& prims); /** @brief Renders on two planes @@ -150,9 +150,9 @@ uv image must be 8-bit unsigned planar 2-channel image @ref CV_8UC2 @param uv input image: 8-bit unsigned 2-channel image @ref CV_8UC2 @param prims draw primitives */ -GAPI_EXPORTS GMat2 renderNV12(const GMat& y, - const GMat& uv, - const GArray& prims); +GAPI_EXPORTS_W GMat2 renderNV12(const GMat& y, + const GMat& uv, + const GArray& prims); /** @brief Renders Media Frame @@ -177,7 +177,7 @@ namespace render { namespace ocv { - GAPI_EXPORTS cv::gapi::GKernelPackage kernels(); + GAPI_EXPORTS_W cv::gapi::GKernelPackage kernels(); } // namespace ocv } // namespace render diff --git a/modules/gapi/include/opencv2/gapi/render/render_types.hpp b/modules/gapi/include/opencv2/gapi/render/render_types.hpp index ca403be361ee..6d70e3a877dd 100644 --- a/modules/gapi/include/opencv2/gapi/render/render_types.hpp +++ b/modules/gapi/include/opencv2/gapi/render/render_types.hpp @@ -41,7 +41,7 @@ struct freetype_font * * Parameters match cv::putText(). */ -struct Text +struct GAPI_EXPORTS_W_SIMPLE Text { /** * @brief Text constructor @@ -55,6 +55,7 @@ struct Text * @param lt_ The line type. See #LineTypes * @param bottom_left_origin_ When true, the image data origin is at the bottom-left corner. Otherwise, it is at the top-left corner */ + GAPI_WRAP Text(const std::string& text_, const cv::Point& org_, int ff_, @@ -68,17 +69,18 @@ struct Text { } + GAPI_WRAP Text() = default; /*@{*/ - std::string text; //!< The text string to be drawn - cv::Point org; //!< The bottom-left corner of the text string in the image - int ff; //!< The font type, see #HersheyFonts - double fs; //!< The font scale factor that is multiplied by the font-specific base size - cv::Scalar color; //!< The text color - int thick; //!< The thickness of the lines used to draw a text - int lt; //!< The line type. See #LineTypes - bool bottom_left_origin; //!< When true, the image data origin is at the bottom-left corner. Otherwise, it is at the top-left corner + GAPI_PROP_RW std::string text; //!< The text string to be drawn + GAPI_PROP_RW cv::Point org; //!< The bottom-left corner of the text string in the image + GAPI_PROP_RW int ff; //!< The font type, see #HersheyFonts + GAPI_PROP_RW double fs; //!< The font scale factor that is multiplied by the font-specific base size + GAPI_PROP_RW cv::Scalar color; //!< The text color + GAPI_PROP_RW int thick; //!< The thickness of the lines used to draw a text + GAPI_PROP_RW int lt; //!< The line type. See #LineTypes + GAPI_PROP_RW bool bottom_left_origin; //!< When true, the image data origin is at the bottom-left corner. Otherwise, it is at the top-left corner /*@{*/ }; @@ -122,7 +124,7 @@ struct FText * * Parameters match cv::rectangle(). */ -struct Rect +struct GAPI_EXPORTS_W_SIMPLE Rect { /** * @brief Rect constructor @@ -142,14 +144,15 @@ struct Rect { } + GAPI_WRAP Rect() = default; /*@{*/ - cv::Rect rect; //!< Coordinates of the rectangle - cv::Scalar color; //!< The rectangle color or brightness (grayscale image) - int thick; //!< The thickness of lines that make up the rectangle. Negative values, like #FILLED, mean that the function has to draw a filled rectangle - int lt; //!< The type of the line. See #LineTypes - int shift; //!< The number of fractional bits in the point coordinates + GAPI_PROP_RW cv::Rect rect; //!< Coordinates of the rectangle + GAPI_PROP_RW cv::Scalar color; //!< The rectangle color or brightness (grayscale image) + GAPI_PROP_RW int thick; //!< The thickness of lines that make up the rectangle. Negative values, like #FILLED, mean that the function has to draw a filled rectangle + GAPI_PROP_RW int lt; //!< The type of the line. See #LineTypes + GAPI_PROP_RW int shift; //!< The number of fractional bits in the point coordinates /*@{*/ }; @@ -158,7 +161,7 @@ struct Rect * * Parameters match cv::circle(). */ -struct Circle +struct GAPI_EXPORTS_W_SIMPLE Circle { /** * @brief Circle constructor @@ -170,6 +173,7 @@ struct Circle * @param lt_ The Type of the circle boundary. See #LineTypes * @param shift_ The Number of fractional bits in the coordinates of the center and in the radius value */ + GAPI_WRAP Circle(const cv::Point& center_, int radius_, const cv::Scalar& color_, @@ -180,15 +184,16 @@ struct Circle { } + GAPI_WRAP Circle() = default; /*@{*/ - cv::Point center; //!< The center of the circle - int radius; //!< The radius of the circle - cv::Scalar color; //!< The color of the circle - int thick; //!< The thickness of the circle outline, if positive. Negative values, like #FILLED, mean that a filled circle is to be drawn - int lt; //!< The Type of the circle boundary. See #LineTypes - int shift; //!< The Number of fractional bits in the coordinates of the center and in the radius value + GAPI_PROP_RW cv::Point center; //!< The center of the circle + GAPI_PROP_RW int radius; //!< The radius of the circle + GAPI_PROP_RW cv::Scalar color; //!< The color of the circle + GAPI_PROP_RW int thick; //!< The thickness of the circle outline, if positive. Negative values, like #FILLED, mean that a filled circle is to be drawn + GAPI_PROP_RW int lt; //!< The Type of the circle boundary. See #LineTypes + GAPI_PROP_RW int shift; //!< The Number of fractional bits in the coordinates of the center and in the radius value /*@{*/ }; @@ -197,7 +202,7 @@ struct Circle * * Parameters match cv::line(). */ -struct Line +struct GAPI_EXPORTS_W_SIMPLE Line { /** * @brief Line constructor @@ -209,6 +214,7 @@ struct Line * @param lt_ The Type of the line. See #LineTypes * @param shift_ The number of fractional bits in the point coordinates */ + GAPI_WRAP Line(const cv::Point& pt1_, const cv::Point& pt2_, const cv::Scalar& color_, @@ -219,15 +225,16 @@ struct Line { } + GAPI_WRAP Line() = default; /*@{*/ - cv::Point pt1; //!< The first point of the line segment - cv::Point pt2; //!< The second point of the line segment - cv::Scalar color; //!< The line color - int thick; //!< The thickness of line - int lt; //!< The Type of the line. See #LineTypes - int shift; //!< The number of fractional bits in the point coordinates + GAPI_PROP_RW cv::Point pt1; //!< The first point of the line segment + GAPI_PROP_RW cv::Point pt2; //!< The second point of the line segment + GAPI_PROP_RW cv::Scalar color; //!< The line color + GAPI_PROP_RW int thick; //!< The thickness of line + GAPI_PROP_RW int lt; //!< The Type of the line. See #LineTypes + GAPI_PROP_RW int shift; //!< The number of fractional bits in the point coordinates /*@{*/ }; @@ -236,7 +243,7 @@ struct Line * * Mosaicing is a very basic method to obfuscate regions in the image. */ -struct Mosaic +struct GAPI_EXPORTS_W_SIMPLE Mosaic { /** * @brief Mosaic constructor @@ -252,12 +259,13 @@ struct Mosaic { } + GAPI_WRAP Mosaic() : cellSz(0), decim(0) {} /*@{*/ - cv::Rect mos; //!< Coordinates of the mosaic - int cellSz; //!< Cell size (same for X, Y) - int decim; //!< Decimation (0 stands for no decimation) + GAPI_PROP_RW cv::Rect mos; //!< Coordinates of the mosaic + GAPI_PROP_RW int cellSz; //!< Cell size (same for X, Y) + GAPI_PROP_RW int decim; //!< Decimation (0 stands for no decimation) /*@{*/ }; @@ -266,7 +274,7 @@ struct Mosaic * * Image is blended on a frame using the specified mask. */ -struct Image +struct GAPI_EXPORTS_W_SIMPLE Image { /** * @brief Mosaic constructor @@ -275,6 +283,7 @@ struct Image * @param img_ Image to draw * @param alpha_ Alpha channel for image to draw (same size and number of channels) */ + GAPI_WRAP Image(const cv::Point& org_, const cv::Mat& img_, const cv::Mat& alpha_) : @@ -282,19 +291,20 @@ struct Image { } + GAPI_WRAP Image() = default; /*@{*/ - cv::Point org; //!< The bottom-left corner of the image - cv::Mat img; //!< Image to draw - cv::Mat alpha; //!< Alpha channel for image to draw (same size and number of channels) + GAPI_PROP_RW cv::Point org; //!< The bottom-left corner of the image + GAPI_PROP_RW cv::Mat img; //!< Image to draw + GAPI_PROP_RW cv::Mat alpha; //!< Alpha channel for image to draw (same size and number of channels) /*@{*/ }; /** * @brief This structure represents a polygon to draw. */ -struct Poly +struct GAPI_EXPORTS_W_SIMPLE Poly { /** * @brief Mosaic constructor @@ -305,6 +315,7 @@ struct Poly * @param lt_ The Type of the line. See #LineTypes * @param shift_ The number of fractional bits in the point coordinate */ + GAPI_WRAP Poly(const std::vector& points_, const cv::Scalar& color_, int thick_ = 1, @@ -314,14 +325,15 @@ struct Poly { } + GAPI_WRAP Poly() = default; /*@{*/ - std::vector points; //!< Points to connect - cv::Scalar color; //!< The line color - int thick; //!< The thickness of line - int lt; //!< The Type of the line. See #LineTypes - int shift; //!< The number of fractional bits in the point coordinate + GAPI_PROP_RW std::vector points; //!< Points to connect + GAPI_PROP_RW cv::Scalar color; //!< The line color + GAPI_PROP_RW int thick; //!< The thickness of line + GAPI_PROP_RW int lt; //!< The Type of the line. See #LineTypes + GAPI_PROP_RW int shift; //!< The number of fractional bits in the point coordinate /*@{*/ }; @@ -336,7 +348,7 @@ using Prim = util::variant , Poly >; -using Prims = std::vector; +using Prims = std::vector; //! @} gapi_draw_prims } // namespace draw diff --git a/modules/gapi/misc/python/package/gapi/__init__.py b/modules/gapi/misc/python/package/gapi/__init__.py index 587f641fd33a..dc874f0b0ca5 100644 --- a/modules/gapi/misc/python/package/gapi/__init__.py +++ b/modules/gapi/misc/python/package/gapi/__init__.py @@ -84,6 +84,10 @@ class Rect(): def __new__(self): return cv.GOpaqueT(cv.gapi.CV_RECT) + class Prim(): + def __new__(self): + return cv.GOpaqueT(cv.gapi.CV_DRAW_PRIM) + class Any(): def __new__(self): return cv.GOpaqueT(cv.gapi.CV_ANY) @@ -143,6 +147,10 @@ class GMat(): def __new__(self): return cv.GArrayT(cv.gapi.CV_GMAT) + class Prim(): + def __new__(self): + return cv.GArray(cv.gapi.CV_DRAW_PRIM) + class Any(): def __new__(self): return cv.GArray(cv.gapi.CV_ANY) @@ -164,6 +172,7 @@ def op(op_id, in_types, out_types): cv.GArray.Scalar: cv.gapi.CV_SCALAR, cv.GArray.Mat: cv.gapi.CV_MAT, cv.GArray.GMat: cv.gapi.CV_GMAT, + cv.GArray.Prim: cv.gapi.CV_DRAW_PRIM, cv.GArray.Any: cv.gapi.CV_ANY } @@ -179,22 +188,24 @@ def op(op_id, in_types, out_types): cv.GOpaque.Point2f: cv.gapi.CV_POINT2F, cv.GOpaque.Size: cv.gapi.CV_SIZE, cv.GOpaque.Rect: cv.gapi.CV_RECT, + cv.GOpaque.Prim: cv.gapi.CV_DRAW_PRIM, cv.GOpaque.Any: cv.gapi.CV_ANY } type2str = { - cv.gapi.CV_BOOL: 'cv.gapi.CV_BOOL' , - cv.gapi.CV_INT: 'cv.gapi.CV_INT' , - cv.gapi.CV_DOUBLE: 'cv.gapi.CV_DOUBLE' , - cv.gapi.CV_FLOAT: 'cv.gapi.CV_FLOAT' , - cv.gapi.CV_STRING: 'cv.gapi.CV_STRING' , - cv.gapi.CV_POINT: 'cv.gapi.CV_POINT' , - cv.gapi.CV_POINT2F: 'cv.gapi.CV_POINT2F' , - cv.gapi.CV_SIZE: 'cv.gapi.CV_SIZE', - cv.gapi.CV_RECT: 'cv.gapi.CV_RECT', - cv.gapi.CV_SCALAR: 'cv.gapi.CV_SCALAR', - cv.gapi.CV_MAT: 'cv.gapi.CV_MAT', - cv.gapi.CV_GMAT: 'cv.gapi.CV_GMAT' + cv.gapi.CV_BOOL: 'cv.gapi.CV_BOOL' , + cv.gapi.CV_INT: 'cv.gapi.CV_INT' , + cv.gapi.CV_DOUBLE: 'cv.gapi.CV_DOUBLE' , + cv.gapi.CV_FLOAT: 'cv.gapi.CV_FLOAT' , + cv.gapi.CV_STRING: 'cv.gapi.CV_STRING' , + cv.gapi.CV_POINT: 'cv.gapi.CV_POINT' , + cv.gapi.CV_POINT2F: 'cv.gapi.CV_POINT2F' , + cv.gapi.CV_SIZE: 'cv.gapi.CV_SIZE', + cv.gapi.CV_RECT: 'cv.gapi.CV_RECT', + cv.gapi.CV_SCALAR: 'cv.gapi.CV_SCALAR', + cv.gapi.CV_MAT: 'cv.gapi.CV_MAT', + cv.gapi.CV_GMAT: 'cv.gapi.CV_GMAT', + cv.gapi.CV_DRAW_PRIM: 'cv.gapi.CV_DRAW_PRIM' } # NB: Second lvl decorator takes class to decorate @@ -274,3 +285,13 @@ def kernel_with_params(cls): return cls return kernel_with_params + + +# FIXME: On the c++ side every class is placed in cv2 module. +cv.gapi.wip.draw.Rect = cv.gapi_wip_draw_Rect +cv.gapi.wip.draw.Text = cv.gapi_wip_draw_Text +cv.gapi.wip.draw.Circle = cv.gapi_wip_draw_Circle +cv.gapi.wip.draw.Line = cv.gapi_wip_draw_Line +cv.gapi.wip.draw.Mosaic = cv.gapi_wip_draw_Mosaic +cv.gapi.wip.draw.Image = cv.gapi_wip_draw_Image +cv.gapi.wip.draw.Poly = cv.gapi_wip_draw_Poly diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index 6cd79e4a7318..3c428dde6d82 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -43,6 +43,7 @@ using GArray_Rect = cv::GArray; using GArray_Scalar = cv::GArray; using GArray_Mat = cv::GArray; using GArray_GMat = cv::GArray; +using GArray_Prim = cv::GArray; // FIXME: Python wrapper generate code without namespace std, // so it cause error: "string wasn't declared" @@ -125,6 +126,65 @@ PyObject* pyopencv_from(const cv::detail::PyObjectHolder& v) return o; } +// #FIXME: Is it possible to implement pyopencv_from/pyopencv_to for generic +// cv::variant ? +template <> +PyObject* pyopencv_from(const cv::gapi::wip::draw::Prim& prim) +{ + switch (prim.index()) { + case cv::gapi::wip::draw::Prim::index_of(): + return pyopencv_from(cv::util::get(prim)); + case cv::gapi::wip::draw::Prim::index_of(): + return pyopencv_from(cv::util::get(prim)); + case cv::gapi::wip::draw::Prim::index_of(): + return pyopencv_from(cv::util::get(prim)); + case cv::gapi::wip::draw::Prim::index_of(): + return pyopencv_from(cv::util::get(prim)); + case cv::gapi::wip::draw::Prim::index_of(): + return pyopencv_from(cv::util::get(prim)); + case cv::gapi::wip::draw::Prim::index_of(): + return pyopencv_from(cv::util::get(prim)); + case cv::gapi::wip::draw::Prim::index_of(): + return pyopencv_from(cv::util::get(prim)); + } + + util::throw_error(std::logic_error("Unsupported draw primitive type")); +} + +template <> +PyObject* pyopencv_from(const cv::gapi::wip::draw::Prims& value) +{ + return pyopencv_from_generic_vec(value); +} + +template<> +bool pyopencv_to(PyObject* obj, cv::gapi::wip::draw::Prim& value, const ArgInfo& info) +{ +#define TRY_EXTRACT(Prim) \ + if (PyObject_TypeCheck(obj, reinterpret_cast(pyopencv_gapi_wip_draw_##Prim##_TypePtr))) \ + { \ + value = reinterpret_cast(obj)->v; \ + return true; \ + } \ + + TRY_EXTRACT(Rect) + TRY_EXTRACT(Text) + TRY_EXTRACT(Circle) + TRY_EXTRACT(Line) + TRY_EXTRACT(Mosaic) + TRY_EXTRACT(Image) + TRY_EXTRACT(Poly) + + failmsg("Unsupported primitive type"); + return false; +} + +template <> +bool pyopencv_to(PyObject* obj, cv::gapi::wip::draw::Prims& value, const ArgInfo& info) +{ + return pyopencv_to_generic_vec(obj, value, info); +} + template<> PyObject* pyopencv_from(const cv::GArg& value) { @@ -137,21 +197,21 @@ PyObject* pyopencv_from(const cv::GArg& value) #define UNSUPPORTED(T) case cv::detail::OpaqueKind::CV_##T: break switch (value.opaque_kind) { - HANDLE_CASE(BOOL, bool); - HANDLE_CASE(INT, int); + HANDLE_CASE(BOOL, bool); + HANDLE_CASE(INT, int); HANDLE_CASE(INT64, int64_t); - HANDLE_CASE(DOUBLE, double); - HANDLE_CASE(FLOAT, float); - HANDLE_CASE(STRING, std::string); - HANDLE_CASE(POINT, cv::Point); - HANDLE_CASE(POINT2F, cv::Point2f); - HANDLE_CASE(SIZE, cv::Size); - HANDLE_CASE(RECT, cv::Rect); - HANDLE_CASE(SCALAR, cv::Scalar); - HANDLE_CASE(MAT, cv::Mat); - HANDLE_CASE(UNKNOWN, cv::detail::PyObjectHolder); + HANDLE_CASE(DOUBLE, double); + HANDLE_CASE(FLOAT, float); + HANDLE_CASE(STRING, std::string); + HANDLE_CASE(POINT, cv::Point); + HANDLE_CASE(POINT2F, cv::Point2f); + HANDLE_CASE(SIZE, cv::Size); + HANDLE_CASE(RECT, cv::Rect); + HANDLE_CASE(SCALAR, cv::Scalar); + HANDLE_CASE(MAT, cv::Mat); + HANDLE_CASE(UNKNOWN, cv::detail::PyObjectHolder); + HANDLE_CASE(DRAW_PRIM, cv::gapi::wip::draw::Prim); UNSUPPORTED(UINT64); - UNSUPPORTED(DRAW_PRIM); #undef HANDLE_CASE #undef UNSUPPORTED } @@ -205,10 +265,10 @@ PyObject* pyopencv_from(const cv::detail::OpaqueRef& o) case cv::detail::OpaqueKind::CV_SIZE : return pyopencv_from(o.rref()); case cv::detail::OpaqueKind::CV_RECT : return pyopencv_from(o.rref()); case cv::detail::OpaqueKind::CV_UNKNOWN : return pyopencv_from(o.rref()); + case cv::detail::OpaqueKind::CV_DRAW_PRIM : return pyopencv_from(o.rref()); case cv::detail::OpaqueKind::CV_UINT64 : break; case cv::detail::OpaqueKind::CV_SCALAR : break; case cv::detail::OpaqueKind::CV_MAT : break; - case cv::detail::OpaqueKind::CV_DRAW_PRIM : break; } PyErr_SetString(PyExc_TypeError, "Unsupported GOpaque type"); @@ -233,8 +293,8 @@ PyObject* pyopencv_from(const cv::detail::VectorRef& v) case cv::detail::OpaqueKind::CV_SCALAR : return pyopencv_from_generic_vec(v.rref()); case cv::detail::OpaqueKind::CV_MAT : return pyopencv_from_generic_vec(v.rref()); case cv::detail::OpaqueKind::CV_UNKNOWN : return pyopencv_from_generic_vec(v.rref()); + case cv::detail::OpaqueKind::CV_DRAW_PRIM : return pyopencv_from_generic_vec(v.rref()); case cv::detail::OpaqueKind::CV_UINT64 : break; - case cv::detail::OpaqueKind::CV_DRAW_PRIM : break; } PyErr_SetString(PyExc_TypeError, "Unsupported GArray type"); @@ -394,21 +454,21 @@ static cv::detail::VectorRef extract_vector_ref(PyObject* from, cv::detail::Opaq #define UNSUPPORTED(T) case cv::detail::OpaqueKind::CV_##T: break switch (kind) { - HANDLE_CASE(BOOL, bool); - HANDLE_CASE(INT, int); - HANDLE_CASE(DOUBLE, double); - HANDLE_CASE(FLOAT, float); - HANDLE_CASE(STRING, std::string); - HANDLE_CASE(POINT, cv::Point); - HANDLE_CASE(POINT2F, cv::Point2f); - HANDLE_CASE(SIZE, cv::Size); - HANDLE_CASE(RECT, cv::Rect); - HANDLE_CASE(SCALAR, cv::Scalar); - HANDLE_CASE(MAT, cv::Mat); - HANDLE_CASE(UNKNOWN, cv::GArg); + HANDLE_CASE(BOOL, bool); + HANDLE_CASE(INT, int); + HANDLE_CASE(DOUBLE, double); + HANDLE_CASE(FLOAT, float); + HANDLE_CASE(STRING, std::string); + HANDLE_CASE(POINT, cv::Point); + HANDLE_CASE(POINT2F, cv::Point2f); + HANDLE_CASE(SIZE, cv::Size); + HANDLE_CASE(RECT, cv::Rect); + HANDLE_CASE(SCALAR, cv::Scalar); + HANDLE_CASE(MAT, cv::Mat); + HANDLE_CASE(UNKNOWN, cv::GArg); + HANDLE_CASE(DRAW_PRIM, cv::gapi::wip::draw::Prim); UNSUPPORTED(UINT64); UNSUPPORTED(INT64); - UNSUPPORTED(DRAW_PRIM); #undef HANDLE_CASE #undef UNSUPPORTED } diff --git a/modules/gapi/misc/python/python_bridge.hpp b/modules/gapi/misc/python/python_bridge.hpp index b212babe4599..11d17287308e 100644 --- a/modules/gapi/misc/python/python_bridge.hpp +++ b/modules/gapi/misc/python/python_bridge.hpp @@ -10,6 +10,7 @@ #include #include #include +#include // Prim #define ID(T, E) T #define ID_(T, E) ID(T, E), @@ -24,21 +25,24 @@ GAPI_Assert(false && "Unsupported type"); \ } +using cv::gapi::wip::draw::Prim; + #define GARRAY_TYPE_LIST_G(G, G2) \ -WRAP_ARGS(bool , cv::gapi::ArgType::CV_BOOL, G) \ -WRAP_ARGS(int , cv::gapi::ArgType::CV_INT, G) \ -WRAP_ARGS(int64_t , cv::gapi::ArgType::CV_INT64, G) \ -WRAP_ARGS(double , cv::gapi::ArgType::CV_DOUBLE, G) \ -WRAP_ARGS(float , cv::gapi::ArgType::CV_FLOAT, G) \ -WRAP_ARGS(std::string , cv::gapi::ArgType::CV_STRING, G) \ -WRAP_ARGS(cv::Point , cv::gapi::ArgType::CV_POINT, G) \ -WRAP_ARGS(cv::Point2f , cv::gapi::ArgType::CV_POINT2F, G) \ -WRAP_ARGS(cv::Size , cv::gapi::ArgType::CV_SIZE, G) \ -WRAP_ARGS(cv::Rect , cv::gapi::ArgType::CV_RECT, G) \ -WRAP_ARGS(cv::Scalar , cv::gapi::ArgType::CV_SCALAR, G) \ -WRAP_ARGS(cv::Mat , cv::gapi::ArgType::CV_MAT, G) \ -WRAP_ARGS(cv::GArg , cv::gapi::ArgType::CV_ANY, G) \ -WRAP_ARGS(cv::GMat , cv::gapi::ArgType::CV_GMAT, G2) \ +WRAP_ARGS(bool , cv::gapi::ArgType::CV_BOOL, G) \ +WRAP_ARGS(int , cv::gapi::ArgType::CV_INT, G) \ +WRAP_ARGS(int64_t , cv::gapi::ArgType::CV_INT64, G) \ +WRAP_ARGS(double , cv::gapi::ArgType::CV_DOUBLE, G) \ +WRAP_ARGS(float , cv::gapi::ArgType::CV_FLOAT, G) \ +WRAP_ARGS(std::string , cv::gapi::ArgType::CV_STRING, G) \ +WRAP_ARGS(cv::Point , cv::gapi::ArgType::CV_POINT, G) \ +WRAP_ARGS(cv::Point2f , cv::gapi::ArgType::CV_POINT2F, G) \ +WRAP_ARGS(cv::Size , cv::gapi::ArgType::CV_SIZE, G) \ +WRAP_ARGS(cv::Rect , cv::gapi::ArgType::CV_RECT, G) \ +WRAP_ARGS(cv::Scalar , cv::gapi::ArgType::CV_SCALAR, G) \ +WRAP_ARGS(cv::Mat , cv::gapi::ArgType::CV_MAT, G) \ +WRAP_ARGS(Prim , cv::gapi::ArgType::CV_DRAW_PRIM, G) \ +WRAP_ARGS(cv::GArg , cv::gapi::ArgType::CV_ANY, G) \ +WRAP_ARGS(cv::GMat , cv::gapi::ArgType::CV_GMAT, G2) \ #define GOPAQUE_TYPE_LIST_G(G, G2) \ WRAP_ARGS(bool , cv::gapi::ArgType::CV_BOOL, G) \ @@ -71,6 +75,7 @@ enum ArgType { CV_SCALAR, CV_MAT, CV_GMAT, + CV_DRAW_PRIM, CV_ANY, }; diff --git a/modules/gapi/misc/python/shadow_gapi.hpp b/modules/gapi/misc/python/shadow_gapi.hpp index e777aa5d934b..41d0f1973223 100644 --- a/modules/gapi/misc/python/shadow_gapi.hpp +++ b/modules/gapi/misc/python/shadow_gapi.hpp @@ -3,58 +3,77 @@ namespace cv { - struct GAPI_EXPORTS_W_SIMPLE GCompileArg { - GAPI_WRAP GCompileArg(gapi::GKernelPackage pkg); - GAPI_WRAP GCompileArg(gapi::GNetPackage pkg); - }; - - class GAPI_EXPORTS_W_SIMPLE GInferInputs - { - public: - GAPI_WRAP GInferInputs(); - GAPI_WRAP GInferInputs& setInput(const std::string& name, const cv::GMat& value); - GAPI_WRAP GInferInputs& setInput(const std::string& name, const cv::GFrame& value); - }; - - class GAPI_EXPORTS_W_SIMPLE GInferListInputs - { - public: - GAPI_WRAP GInferListInputs(); - GAPI_WRAP GInferListInputs setInput(const std::string& name, const cv::GArray& value); - GAPI_WRAP GInferListInputs setInput(const std::string& name, const cv::GArray& value); - }; - - class GAPI_EXPORTS_W_SIMPLE GInferOutputs - { - public: - GAPI_WRAP GInferOutputs(); - GAPI_WRAP cv::GMat at(const std::string& name); - }; - - class GAPI_EXPORTS_W_SIMPLE GInferListOutputs - { - public: - GAPI_WRAP GInferListOutputs(); - GAPI_WRAP cv::GArray at(const std::string& name); - }; - - namespace detail - { - gapi::GNetParam GAPI_EXPORTS_W strip(gapi::ie::PyParams params); - } // namespace detail - - namespace gapi - { - namespace streaming - { - // FIXME: Extend to work with an arbitrary G-type. - cv::GOpaque GAPI_EXPORTS_W timestamp(cv::GMat); - cv::GOpaque GAPI_EXPORTS_W seqNo(cv::GMat); - cv::GOpaque GAPI_EXPORTS_W seq_id(cv::GMat); - } // namespace streaming - namespace wip - { - class GAPI_EXPORTS_W IStreamSource { }; - } // namespace wip - } // namespace gapi +struct GAPI_EXPORTS_W_SIMPLE GCompileArg { + GAPI_WRAP GCompileArg(gapi::GKernelPackage pkg); + GAPI_WRAP GCompileArg(gapi::GNetPackage pkg); +}; + +class GAPI_EXPORTS_W_SIMPLE GInferInputs +{ +public: + GAPI_WRAP GInferInputs(); + GAPI_WRAP GInferInputs& setInput(const std::string& name, const cv::GMat& value); + GAPI_WRAP GInferInputs& setInput(const std::string& name, const cv::GFrame& value); +}; + +class GAPI_EXPORTS_W_SIMPLE GInferListInputs +{ +public: + GAPI_WRAP GInferListInputs(); + GAPI_WRAP GInferListInputs setInput(const std::string& name, const cv::GArray& value); + GAPI_WRAP GInferListInputs setInput(const std::string& name, const cv::GArray& value); +}; + +class GAPI_EXPORTS_W_SIMPLE GInferOutputs +{ +public: + GAPI_WRAP GInferOutputs(); + GAPI_WRAP cv::GMat at(const std::string& name); +}; + +class GAPI_EXPORTS_W_SIMPLE GInferListOutputs +{ +public: + GAPI_WRAP GInferListOutputs(); + GAPI_WRAP cv::GArray at(const std::string& name); +}; + +namespace gapi +{ +namespace wip +{ +class GAPI_EXPORTS_W IStreamSource { }; +namespace draw +{ + // NB: These render primitives are partially wrapped in shadow file + // because cv::Rect conflicts with cv::gapi::wip::draw::Rect in python generator + // and cv::Rect2i breaks standalone mode. + struct Rect + { + GAPI_WRAP Rect(const cv::Rect2i& rect_, + const cv::Scalar& color_, + int thick_ = 1, + int lt_ = 8, + int shift_ = 0); + }; + + struct Mosaic + { + GAPI_WRAP Mosaic(const cv::Rect2i& mos_, int cellSz_, int decim_); + }; +} // namespace draw +} // namespace wip +namespace streaming +{ + // FIXME: Extend to work with an arbitrary G-type. + cv::GOpaque GAPI_EXPORTS_W timestamp(cv::GMat); + cv::GOpaque GAPI_EXPORTS_W seqNo(cv::GMat); + cv::GOpaque GAPI_EXPORTS_W seq_id(cv::GMat); +} // namespace streaming +} // namespace gapi + +namespace detail +{ + gapi::GNetParam GAPI_EXPORTS_W strip(gapi::ie::PyParams params); +} // namespace detail } // namespace cv diff --git a/modules/gapi/misc/python/test/test_gapi_render.py b/modules/gapi/misc/python/test/test_gapi_render.py new file mode 100644 index 000000000000..70601a72e57d --- /dev/null +++ b/modules/gapi/misc/python/test/test_gapi_render.py @@ -0,0 +1,227 @@ +#!/usr/bin/env python + +import numpy as np +import cv2 as cv +import os +import sys +import unittest + +from tests_common import NewOpenCVTests + +try: + + if sys.version_info[:2] < (3, 0): + raise unittest.SkipTest('Python 2.x is not supported') + + # FIXME: FText isn't supported yet. + class gapi_render_test(NewOpenCVTests): + def __init__(self, *args): + super().__init__(*args) + + self.size = (300, 300, 3) + + # Rect + self.rect = (30, 30, 50, 50) + self.rcolor = (0, 255, 0) + self.rlt = cv.LINE_4 + self.rthick = 2 + self.rshift = 3 + + # Text + self.text = 'Hello, world!' + self.org = (100, 100) + self.ff = cv.FONT_HERSHEY_SIMPLEX + self.fs = 1.0 + self.tthick = 2 + self.tlt = cv.LINE_8 + self.tcolor = (255, 255, 255) + self.blo = False + + # Circle + self.center = (200, 200) + self.radius = 200 + self.ccolor = (255, 255, 0) + self.cthick = 2 + self.clt = cv.LINE_4 + self.cshift = 1 + + # Line + self.pt1 = (50, 50) + self.pt2 = (200, 200) + self.lcolor = (0, 255, 128) + self.lthick = 5 + self.llt = cv.LINE_8 + self.lshift = 2 + + # Poly + self.pts = [(50, 100), (100, 200), (25, 250)] + self.pcolor = (0, 0, 255) + self.pthick = 3 + self.plt = cv.LINE_4 + self.pshift = 1 + + # Image + self.iorg = (150, 150) + img_path = self.find_file('cv/face/david2.jpg', [os.environ.get('OPENCV_TEST_DATA_PATH')]) + self.img = cv.resize(cv.imread(img_path), (50, 50)) + self.alpha = np.full(self.img.shape[:2], 0.8, dtype=np.float32) + + # Mosaic + self.mos = (100, 100, 100, 100) + self.cell_sz = 25 + self.decim = 0 + + # Render primitives + self.prims = [cv.gapi.wip.draw.Rect(self.rect, self.rcolor, self.rthick, self.rlt, self.rshift), + cv.gapi.wip.draw.Text(self.text, self.org, self.ff, self.fs, self.tcolor, self.tthick, self.tlt, self.blo), + cv.gapi.wip.draw.Circle(self.center, self.radius, self.ccolor, self.cthick, self.clt, self.cshift), + cv.gapi.wip.draw.Line(self.pt1, self.pt2, self.lcolor, self.lthick, self.llt, self.lshift), + cv.gapi.wip.draw.Mosaic(self.mos, self.cell_sz, self.decim), + cv.gapi.wip.draw.Image(self.iorg, self.img, self.alpha), + cv.gapi.wip.draw.Poly(self.pts, self.pcolor, self.pthick, self.plt, self.pshift)] + + def cvt_nv12_to_yuv(self, y, uv): + h,w,_ = uv.shape + upsample_uv = cv.resize(uv, (h * 2, w * 2)) + return cv.merge([y, upsample_uv]) + + def cvt_yuv_to_nv12(self, yuv, y_out, uv_out): + chs = cv.split(yuv, [y_out, None, None]) + uv = cv.merge([chs[1], chs[2]]) + uv_out = cv.resize(uv, (uv.shape[0] // 2, uv.shape[1] // 2), dst=uv_out) + return y_out, uv_out + + def cvt_bgr_to_yuv_color(self, bgr): + y = bgr[2] * 0.299000 + bgr[1] * 0.587000 + bgr[0] * 0.114000; + u = bgr[2] * -0.168736 + bgr[1] * -0.331264 + bgr[0] * 0.500000 + 128; + v = bgr[2] * 0.500000 + bgr[1] * -0.418688 + bgr[0] * -0.081312 + 128; + return (y, u, v) + + def blend_img(self, background, org, img, alpha): + x, y = org + h, w, _ = img.shape + roi_img = background[x:x+w, y:y+h, :] + img32f_w = cv.merge([alpha] * 3).astype(np.float32) + roi32f_w = np.full(roi_img.shape, 1.0, dtype=np.float32) + roi32f_w -= img32f_w + img32f = (img / 255).astype(np.float32) + roi32f = (roi_img / 255).astype(np.float32) + cv.multiply(img32f, img32f_w, dst=img32f) + cv.multiply(roi32f, roi32f_w, dst=roi32f) + roi32f += img32f + roi_img[...] = np.round(roi32f * 255) + + # This is quite naive implementations used as a simple reference + # doesn't consider corner cases. + def draw_mosaic(self, img, mos, cell_sz, decim): + x,y,w,h = mos + mosaic_area = img[x:x+w, y:y+h, :] + for i in range(0, mosaic_area.shape[0], cell_sz): + for j in range(0, mosaic_area.shape[1], cell_sz): + cell_roi = mosaic_area[j:j+cell_sz, i:i+cell_sz, :] + s0, s1, s2 = cv.mean(cell_roi)[:3] + mosaic_area[j:j+cell_sz, i:i+cell_sz] = (round(s0), round(s1), round(s2)) + + def render_primitives_bgr_ref(self, img): + cv.rectangle(img, self.rect, self.rcolor, self.rthick, self.rlt, self.rshift) + cv.putText(img, self.text, self.org, self.ff, self.fs, self.tcolor, self.tthick, self.tlt, self.blo) + cv.circle(img, self.center, self.radius, self.ccolor, self.cthick, self.clt, self.cshift) + cv.line(img, self.pt1, self.pt2, self.lcolor, self.lthick, self.llt, self.lshift) + cv.fillPoly(img, np.expand_dims(np.array([self.pts]), axis=0), self.pcolor, self.plt, self.pshift) + self.draw_mosaic(img, self.mos, self.cell_sz, self.decim) + self.blend_img(img, self.iorg, self.img, self.alpha) + + def render_primitives_nv12_ref(self, y_plane, uv_plane): + yuv = self.cvt_nv12_to_yuv(y_plane, uv_plane) + cv.rectangle(yuv, self.rect, self.cvt_bgr_to_yuv_color(self.rcolor), self.rthick, self.rlt, self.rshift) + cv.putText(yuv, self.text, self.org, self.ff, self.fs, self.cvt_bgr_to_yuv_color(self.tcolor), self.tthick, self.tlt, self.blo) + cv.circle(yuv, self.center, self.radius, self.cvt_bgr_to_yuv_color(self.ccolor), self.cthick, self.clt, self.cshift) + cv.line(yuv, self.pt1, self.pt2, self.cvt_bgr_to_yuv_color(self.lcolor), self.lthick, self.llt, self.lshift) + cv.fillPoly(yuv, np.expand_dims(np.array([self.pts]), axis=0), self.cvt_bgr_to_yuv_color(self.pcolor), self.plt, self.pshift) + self.draw_mosaic(yuv, self.mos, self.cell_sz, self.decim) + self.blend_img(yuv, self.iorg, cv.cvtColor(self.img, cv.COLOR_BGR2YUV), self.alpha) + self.cvt_yuv_to_nv12(yuv, y_plane, uv_plane) + + def test_render_primitives_on_bgr_graph(self): + expected = np.zeros(self.size, dtype=np.uint8) + actual = np.array(expected, copy=True) + + # OpenCV + self.render_primitives_bgr_ref(expected) + + # G-API + g_in = cv.GMat() + g_prims = cv.GArray.Prim() + g_out = cv.gapi.wip.draw.render3ch(g_in, g_prims) + + + comp = cv.GComputation(cv.GIn(g_in, g_prims), cv.GOut(g_out)) + actual = comp.apply(cv.gin(actual, self.prims)) + + self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) + + def test_render_primitives_on_bgr_function(self): + expected = np.zeros(self.size, dtype=np.uint8) + actual = np.array(expected, copy=True) + + # OpenCV + self.render_primitives_bgr_ref(expected) + + # G-API + cv.gapi.wip.draw.render(actual, self.prims) + self.assertEqual(0.0, cv.norm(expected, actual, cv.NORM_INF)) + + def test_render_primitives_on_nv12_graph(self): + y_expected = np.zeros((self.size[0], self.size[1], 1), dtype=np.uint8) + uv_expected = np.zeros((self.size[0] // 2, self.size[1] // 2, 2), dtype=np.uint8) + + y_actual = np.array(y_expected, copy=True) + uv_actual = np.array(uv_expected, copy=True) + + # OpenCV + self.render_primitives_nv12_ref(y_expected, uv_expected) + + # G-API + g_y = cv.GMat() + g_uv = cv.GMat() + g_prims = cv.GArray.Prim() + g_out_y, g_out_uv = cv.gapi.wip.draw.renderNV12(g_y, g_uv, g_prims) + + comp = cv.GComputation(cv.GIn(g_y, g_uv, g_prims), cv.GOut(g_out_y, g_out_uv)) + y_actual, uv_actual = comp.apply(cv.gin(y_actual, uv_actual, self.prims)) + + self.assertEqual(0.0, cv.norm(y_expected, y_actual, cv.NORM_INF)) + self.assertEqual(0.0, cv.norm(uv_expected, uv_actual, cv.NORM_INF)) + + def test_render_primitives_on_nv12_function(self): + y_expected = np.zeros((self.size[0], self.size[1], 1), dtype=np.uint8) + uv_expected = np.zeros((self.size[0] // 2, self.size[1] // 2, 2), dtype=np.uint8) + + y_actual = np.array(y_expected, copy=True) + uv_actual = np.array(uv_expected, copy=True) + + # OpenCV + self.render_primitives_nv12_ref(y_expected, uv_expected) + + # G-API + cv.gapi.wip.draw.render(y_actual, uv_actual, self.prims) + + self.assertEqual(0.0, cv.norm(y_expected, y_actual, cv.NORM_INF)) + self.assertEqual(0.0, cv.norm(uv_expected, uv_actual, cv.NORM_INF)) + + +except unittest.SkipTest as e: + + message = str(e) + + class TestSkip(unittest.TestCase): + def setUp(self): + self.skipTest('Skip tests: ' + message) + + def test_skip(): + pass + + pass + +if __name__ == '__main__': + NewOpenCVTests.bootstrap() diff --git a/modules/gapi/src/api/render_ocv.cpp b/modules/gapi/src/api/render_ocv.cpp index 5ab2e1dd07c2..f1e9be4b4893 100644 --- a/modules/gapi/src/api/render_ocv.cpp +++ b/modules/gapi/src/api/render_ocv.cpp @@ -159,7 +159,7 @@ void drawPrimitivesOCV(cv::Mat& in, { const auto& rp = cv::util::get(p); const auto color = converter.cvtColor(rp.color); - cv::rectangle(in, rp.rect, color , rp.thick); + cv::rectangle(in, rp.rect, color, rp.thick, rp.lt, rp.shift); break; } @@ -198,7 +198,7 @@ void drawPrimitivesOCV(cv::Mat& in, { const auto& cp = cv::util::get(p); const auto color = converter.cvtColor(cp.color); - cv::circle(in, cp.center, cp.radius, color, cp.thick); + cv::circle(in, cp.center, cp.radius, color, cp.thick, cp.lt, cp.shift); break; } @@ -206,7 +206,7 @@ void drawPrimitivesOCV(cv::Mat& in, { const auto& lp = cv::util::get(p); const auto color = converter.cvtColor(lp.color); - cv::line(in, lp.pt1, lp.pt2, color, lp.thick); + cv::line(in, lp.pt1, lp.pt2, color, lp.thick, lp.lt, lp.shift); break; } diff --git a/modules/gapi/test/render/gapi_render_tests_ocv.cpp b/modules/gapi/test/render/gapi_render_tests_ocv.cpp index 010df5dff75b..95f695415609 100644 --- a/modules/gapi/test/render/gapi_render_tests_ocv.cpp +++ b/modules/gapi/test/render/gapi_render_tests_ocv.cpp @@ -639,8 +639,8 @@ INSTANTIATE_TEST_CASE_P(RenderBGROCVTestRectsImpl, RenderBGROCVTestRects, Values(cv::Rect(100, 100, 200, 200)), Values(cv::Scalar(100, 50, 150)), Values(2), - Values(LINE_8), - Values(0))); + Values(LINE_8, LINE_4), + Values(0, 1))); INSTANTIATE_TEST_CASE_P(RenderNV12OCVTestRectsImpl, RenderNV12OCVTestRects, Combine(Values(cv::Size(1280, 720)), @@ -673,8 +673,8 @@ INSTANTIATE_TEST_CASE_P(RenderNV12OCVTestCirclesImpl, RenderNV12OCVTestCircles, Values(10), Values(cv::Scalar(100, 50, 150)), Values(2), - Values(LINE_8), - Values(0))); + Values(LINE_8, LINE_4), + Values(0, 1))); INSTANTIATE_TEST_CASE_P(RenderMFrameOCVTestCirclesImpl, RenderMFrameOCVTestCircles, Combine(Values(cv::Size(1280, 720)), diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py index 412d41a4df3f..1e0f9b3a954d 100755 --- a/modules/python/src2/hdr_parser.py +++ b/modules/python/src2/hdr_parser.py @@ -832,6 +832,7 @@ def parse(self, hname, wmode=True): ("GAPI_EXPORTS_W_SIMPLE","CV_EXPORTS_W_SIMPLE"), ("GAPI_WRAP", "CV_WRAP"), ("GAPI_PROP", "CV_PROP"), + ("GAPI_PROP_RW", "CV_PROP_RW"), ('defined(GAPI_STANDALONE)', '0'), ]) From d70053aba528ec5ddf8d6906109c7cc9310654a5 Mon Sep 17 00:00:00 2001 From: Maxim Pashchenkov Date: Thu, 1 Jul 2021 13:27:28 +0300 Subject: [PATCH 031/128] Merge pull request #20144 from mpashchenkov:mp/python-ge G-API: Python. Gaze Estimation sample. * GE pep8 * Added function description, wrapped copy * Applying review comments * One more change * Added gin * Rstrt bb --- .../misc/python/samples/gaze_estimation.py | 467 ++++++++++++++++++ 1 file changed, 467 insertions(+) create mode 100644 modules/gapi/misc/python/samples/gaze_estimation.py diff --git a/modules/gapi/misc/python/samples/gaze_estimation.py b/modules/gapi/misc/python/samples/gaze_estimation.py new file mode 100644 index 000000000000..db190f67bb99 --- /dev/null +++ b/modules/gapi/misc/python/samples/gaze_estimation.py @@ -0,0 +1,467 @@ +import argparse +import time +import numpy as np +import cv2 as cv + +# ------------------------Service operations------------------------ +def weight_path(model_path): + """ Get path of weights based on path to IR + + Params: + model_path: the string contains path to IR file + + Return: + Path to weights file + """ + assert model_path.endswith('.xml'), "Wrong topology path was provided" + return model_path[:-3] + 'bin' + + +def build_argparser(): + """ Parse arguments from command line + + Return: + Pack of arguments from command line + """ + parser = argparse.ArgumentParser(description='This is an OpenCV-based version of Gaze Estimation example') + + parser.add_argument('--input', + help='Path to the input video file') + parser.add_argument('--out', + help='Path to the output video file') + parser.add_argument('--facem', + default='face-detection-retail-0005.xml', + help='Path to OpenVINO face detection model (.xml)') + parser.add_argument('--faced', + default='CPU', + help='Target device for the face detection' + + '(e.g. CPU, GPU, VPU, ...)') + parser.add_argument('--headm', + default='head-pose-estimation-adas-0001.xml', + help='Path to OpenVINO head pose estimation model (.xml)') + parser.add_argument('--headd', + default='CPU', + help='Target device for the head pose estimation inference ' + + '(e.g. CPU, GPU, VPU, ...)') + parser.add_argument('--landm', + default='facial-landmarks-35-adas-0002.xml', + help='Path to OpenVINO landmarks detector model (.xml)') + parser.add_argument('--landd', + default='CPU', + help='Target device for the landmarks detector (e.g. CPU, GPU, VPU, ...)') + parser.add_argument('--gazem', + default='gaze-estimation-adas-0002.xml', + help='Path to OpenVINO gaze vector estimaiton model (.xml)') + parser.add_argument('--gazed', + default='CPU', + help='Target device for the gaze vector estimation inference ' + + '(e.g. CPU, GPU, VPU, ...)') + parser.add_argument('--eyem', + default='open-closed-eye-0001.xml', + help='Path to OpenVINO open closed eye model (.xml)') + parser.add_argument('--eyed', + default='CPU', + help='Target device for the eyes state inference (e.g. CPU, GPU, VPU, ...)') + return parser + + +# ------------------------Support functions for custom kernels------------------------ +def intersection(surface, rect): + """ Remove zone of out of bound from ROI + + Params: + surface: image bounds is rect representation (top left coordinates and width and height) + rect: region of interest is also has rect representation + + Return: + Modified ROI with correct bounds + """ + l_x = max(surface[0], rect[0]) + l_y = max(surface[1], rect[1]) + width = min(surface[0] + surface[2], rect[0] + rect[2]) - l_x + height = min(surface[1] + surface[3], rect[1] + rect[3]) - l_y + if width < 0 or height < 0: + return (0, 0, 0, 0) + return (l_x, l_y, width, height) + + +def process_landmarks(r_x, r_y, r_w, r_h, landmarks): + """ Create points from result of inference of facial-landmarks network and size of input image + + Params: + r_x: x coordinate of top left corner of input image + r_y: y coordinate of top left corner of input image + r_w: width of input image + r_h: height of input image + landmarks: result of inference of facial-landmarks network + + Return: + Array of landmarks points for one face + """ + lmrks = landmarks[0] + raw_x = lmrks[::2] * r_w + r_x + raw_y = lmrks[1::2] * r_h + r_y + return np.array([[int(x), int(y)] for x, y in zip(raw_x, raw_y)]) + + +def eye_box(p_1, p_2, scale=1.8): + """ Get bounding box of eye + + Params: + p_1: point of left edge of eye + p_2: point of right edge of eye + scale: change size of box with this value + + Return: + Bounding box of eye and its midpoint + """ + + size = np.linalg.norm(p_1 - p_2) + midpoint = (p_1 + p_2) / 2 + width = scale * size + height = width + p_x = midpoint[0] - (width / 2) + p_y = midpoint[1] - (height / 2) + return (int(p_x), int(p_y), int(width), int(height)), list(map(int, midpoint)) + + +# ------------------------Custom graph operations------------------------ +@cv.gapi.op('custom.GProcessPoses', + in_types=[cv.GArray.GMat, cv.GArray.GMat, cv.GArray.GMat], + out_types=[cv.GArray.GMat]) +class GProcessPoses: + @staticmethod + def outMeta(arr_desc0, arr_desc1, arr_desc2): + return cv.empty_array_desc() + + +@cv.gapi.op('custom.GParseEyes', + in_types=[cv.GArray.GMat, cv.GArray.Rect, cv.GOpaque.Size], + out_types=[cv.GArray.Rect, cv.GArray.Rect, cv.GArray.Point, cv.GArray.Point]) +class GParseEyes: + @staticmethod + def outMeta(arr_desc0, arr_desc1, arr_desc2): + return cv.empty_array_desc(), cv.empty_array_desc(), \ + cv.empty_array_desc(), cv.empty_array_desc() + + +@cv.gapi.op('custom.GGetStates', + in_types=[cv.GArray.GMat, cv.GArray.GMat], + out_types=[cv.GArray.Int, cv.GArray.Int]) +class GGetStates: + @staticmethod + def outMeta(arr_desc0, arr_desc1): + return cv.empty_array_desc(), cv.empty_array_desc() + + +# ------------------------Custom kernels------------------------ +@cv.gapi.kernel(GProcessPoses) +class GProcessPosesImpl: + """ Custom kernel. Processed poses of heads + """ + @staticmethod + def run(in_ys, in_ps, in_rs): + """ Сustom kernel executable code + + Params: + in_ys: yaw angle of head + in_ps: pitch angle of head + in_rs: roll angle of head + + Return: + Arrays with heads poses + """ + out_poses = [] + size = len(in_ys) + for i in range(size): + out_poses.append(np.array([in_ys[i][0], in_ps[i][0], in_rs[i][0]]).T) + return out_poses + + +@cv.gapi.kernel(GParseEyes) +class GParseEyesImpl: + """ Custom kernel. Get information about eyes + """ + @staticmethod + def run(in_landm_per_face, in_face_rcs, frame_size): + """ Сustom kernel executable code + + Params: + in_landm_per_face: landmarks from inference of facial-landmarks network for each face + in_face_rcs: bounding boxes for each face + frame_size: size of input image + + Return: + Arrays of ROI for left and right eyes, array of midpoints and + array of landmarks points + """ + left_eyes = [] + right_eyes = [] + midpoints = [] + lmarks = [] + num_faces = len(in_landm_per_face) + surface = (0, 0, *frame_size) + for i in range(num_faces): + rect = in_face_rcs[i] + points = process_landmarks(*rect, in_landm_per_face[i]) + for p in points: + lmarks.append(p) + size = int(len(in_landm_per_face[i][0]) / 2) + + rect, midpoint_l = eye_box(lmarks[0 + i * size], lmarks[1 + i * size]) + left_eyes.append(intersection(surface, rect)) + rect, midpoint_r = eye_box(lmarks[2 + i * size], lmarks[3 + i * size]) + right_eyes.append(intersection(surface, rect)) + midpoints += [midpoint_l, midpoint_r] + return left_eyes, right_eyes, midpoints, lmarks + + +@cv.gapi.kernel(GGetStates) +class GGetStatesImpl: + """ Custom kernel. Get state of eye - open or closed + """ + @staticmethod + def run(eyesl, eyesr): + """ Сustom kernel executable code + + Params: + eyesl: result of inference of open-closed-eye network for left eye + eyesr: result of inference of open-closed-eye network for right eye + + Return: + States of left eyes and states of right eyes + """ + size = len(eyesl) + out_l_st = [] + out_r_st = [] + for i in range(size): + for st in eyesl[i]: + out_l_st += [1 if st[0] < st[1] else 0] + for st in eyesr[i]: + out_r_st += [1 if st[0] < st[1] else 0] + return out_l_st, out_r_st + + +if __name__ == '__main__': + ARGUMENTS = build_argparser().parse_args() + + # ------------------------Demo's graph------------------------ + g_in = cv.GMat() + + # Detect faces + face_inputs = cv.GInferInputs() + face_inputs.setInput('data', g_in) + face_outputs = cv.gapi.infer('face-detection', face_inputs) + faces = face_outputs.at('detection_out') + + # Parse faces + sz = cv.gapi.streaming.size(g_in) + faces_rc = cv.gapi.parseSSD(faces, sz, 0.5, False, False) + + # Detect poses + head_inputs = cv.GInferInputs() + head_inputs.setInput('data', g_in) + face_outputs = cv.gapi.infer('head-pose', faces_rc, head_inputs) + angles_y = face_outputs.at('angle_y_fc') + angles_p = face_outputs.at('angle_p_fc') + angles_r = face_outputs.at('angle_r_fc') + + # Parse poses + heads_pos = GProcessPoses.on(angles_y, angles_p, angles_r) + + # Detect landmarks + landmark_inputs = cv.GInferInputs() + landmark_inputs.setInput('data', g_in) + landmark_outputs = cv.gapi.infer('facial-landmarks', faces_rc, + landmark_inputs) + landmark = landmark_outputs.at('align_fc3') + + # Parse landmarks + left_eyes, right_eyes, mids, lmarks = GParseEyes.on(landmark, faces_rc, sz) + + # Detect eyes + eyes_inputs = cv.GInferInputs() + eyes_inputs.setInput('input.1', g_in) + eyesl_outputs = cv.gapi.infer('open-closed-eye', left_eyes, eyes_inputs) + eyesr_outputs = cv.gapi.infer('open-closed-eye', right_eyes, eyes_inputs) + eyesl = eyesl_outputs.at('19') + eyesr = eyesr_outputs.at('19') + + # Process eyes states + l_eye_st, r_eye_st = GGetStates.on(eyesl, eyesr) + + # Gaze estimation + gaze_inputs = cv.GInferListInputs() + gaze_inputs.setInput('left_eye_image', left_eyes) + gaze_inputs.setInput('right_eye_image', right_eyes) + gaze_inputs.setInput('head_pose_angles', heads_pos) + gaze_outputs = cv.gapi.infer2('gaze-estimation', g_in, gaze_inputs) + gaze_vectors = gaze_outputs.at('gaze_vector') + + out = cv.gapi.copy(g_in) + # ------------------------End of graph------------------------ + + comp = cv.GComputation(cv.GIn(g_in), cv.GOut(out, + faces_rc, + left_eyes, + right_eyes, + gaze_vectors, + angles_y, + angles_p, + angles_r, + l_eye_st, + r_eye_st, + mids, + lmarks)) + + # Networks + face_net = cv.gapi.ie.params('face-detection', ARGUMENTS.facem, + weight_path(ARGUMENTS.facem), ARGUMENTS.faced) + head_pose_net = cv.gapi.ie.params('head-pose', ARGUMENTS.headm, + weight_path(ARGUMENTS.headm), ARGUMENTS.headd) + landmarks_net = cv.gapi.ie.params('facial-landmarks', ARGUMENTS.landm, + weight_path(ARGUMENTS.landm), ARGUMENTS.landd) + gaze_net = cv.gapi.ie.params('gaze-estimation', ARGUMENTS.gazem, + weight_path(ARGUMENTS.gazem), ARGUMENTS.gazed) + eye_net = cv.gapi.ie.params('open-closed-eye', ARGUMENTS.eyem, + weight_path(ARGUMENTS.eyem), ARGUMENTS.eyed) + + nets = cv.gapi.networks(face_net, head_pose_net, landmarks_net, gaze_net, eye_net) + + # Kernels pack + kernels = cv.gapi.kernels(GParseEyesImpl, GProcessPosesImpl, GGetStatesImpl) + + # ------------------------Execution part------------------------ + ccomp = comp.compileStreaming(args=cv.gapi.compile_args(kernels, nets)) + source = cv.gapi.wip.make_capture_src(ARGUMENTS.input) + ccomp.setSource(cv.gin(source)) + ccomp.start() + + frames = 0 + fps = 0 + print('Processing') + START_TIME = time.time() + + while True: + start_time_cycle = time.time() + has_frame, (oimg, + outr, + l_eyes, + r_eyes, + outg, + out_y, + out_p, + out_r, + out_st_l, + out_st_r, + out_mids, + outl) = ccomp.pull() + + if not has_frame: + break + + # Draw + GREEN = (0, 255, 0) + RED = (0, 0, 255) + WHITE = (255, 255, 255) + BLUE = (255, 0, 0) + PINK = (255, 0, 255) + YELLOW = (0, 255, 255) + + M_PI_180 = np.pi / 180 + M_PI_2 = np.pi / 2 + M_PI = np.pi + + FACES_SIZE = len(outr) + + for i, out_rect in enumerate(outr): + # Face box + cv.rectangle(oimg, out_rect, WHITE, 1) + rx, ry, rwidth, rheight = out_rect + + # Landmarks + lm_radius = int(0.01 * rwidth + 1) + lmsize = int(len(outl) / FACES_SIZE) + for j in range(lmsize): + cv.circle(oimg, outl[j + i * lmsize], lm_radius, YELLOW, -1) + + # Headposes + yaw = out_y[i] + pitch = out_p[i] + roll = out_r[i] + sin_y = np.sin(yaw[:] * M_PI_180) + sin_p = np.sin(pitch[:] * M_PI_180) + sin_r = np.sin(roll[:] * M_PI_180) + + cos_y = np.cos(yaw[:] * M_PI_180) + cos_p = np.cos(pitch[:] * M_PI_180) + cos_r = np.cos(roll[:] * M_PI_180) + + axis_length = 0.4 * rwidth + x_center = int(rx + rwidth / 2) + y_center = int(ry + rheight / 2) + + # center to right + cv.line(oimg, [x_center, y_center], + [int(x_center + axis_length * (cos_r * cos_y + sin_y * sin_p * sin_r)), + int(y_center + axis_length * cos_p * sin_r)], + RED, 2) + + # center to top + cv.line(oimg, [x_center, y_center], + [int(x_center + axis_length * (cos_r * sin_y * sin_p + cos_y * sin_r)), + int(y_center - axis_length * cos_p * cos_r)], + GREEN, 2) + + # center to forward + cv.line(oimg, [x_center, y_center], + [int(x_center + axis_length * sin_y * cos_p), + int(y_center + axis_length * sin_p)], + PINK, 2) + + scale_box = 0.002 * rwidth + cv.putText(oimg, "head pose: (y=%0.0f, p=%0.0f, r=%0.0f)" % + (np.round(yaw), np.round(pitch), np.round(roll)), + [int(rx), int(ry + rheight + 5 * rwidth / 100)], + cv.FONT_HERSHEY_PLAIN, scale_box * 2, WHITE, 1) + + # Eyes boxes + color_l = GREEN if out_st_l[i] else RED + cv.rectangle(oimg, l_eyes[i], color_l, 1) + color_r = GREEN if out_st_r[i] else RED + cv.rectangle(oimg, r_eyes[i], color_r, 1) + + # Gaze vectors + norm_gazes = np.linalg.norm(outg[i][0]) + gaze_vector = outg[i][0] / norm_gazes + + arrow_length = 0.4 * rwidth + gaze_arrow = [arrow_length * gaze_vector[0], -arrow_length * gaze_vector[1]] + left_arrow = [int(a+b) for a, b in zip(out_mids[0 + i * 2], gaze_arrow)] + right_arrow = [int(a+b) for a, b in zip(out_mids[1 + i * 2], gaze_arrow)] + if out_st_l[i]: + cv.arrowedLine(oimg, out_mids[0 + i * 2], left_arrow, BLUE, 2) + if out_st_r[i]: + cv.arrowedLine(oimg, out_mids[1 + i * 2], right_arrow, BLUE, 2) + + v0, v1, v2 = outg[i][0] + + gaze_angles = [180 / M_PI * (M_PI_2 + np.arctan2(v2, v0)), + 180 / M_PI * (M_PI_2 - np.arccos(v1 / norm_gazes))] + cv.putText(oimg, "gaze angles: (h=%0.0f, v=%0.0f)" % + (np.round(gaze_angles[0]), np.round(gaze_angles[1])), + [int(rx), int(ry + rheight + 12 * rwidth / 100)], + cv.FONT_HERSHEY_PLAIN, scale_box * 2, WHITE, 1) + + # Add FPS value to frame + cv.putText(oimg, "FPS: %0i" % (fps), [int(20), int(40)], + cv.FONT_HERSHEY_PLAIN, 2, RED, 2) + + # Show result + cv.imshow('Gaze Estimation', oimg) + + fps = int(1. / (time.time() - start_time_cycle)) + frames += 1 + EXECUTION_TIME = time.time() - START_TIME + print('Execution successful') + print('Mean FPS is ', int(frames / EXECUTION_TIME)) From fc799191f4f19c523343c41a99ec9e6e4269da9e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 1 Jul 2021 13:48:11 +0000 Subject: [PATCH 032/128] gapi(ie): replace deprecated calls --- modules/gapi/src/backends/ie/giebackend/giewrapper.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp b/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp index ba0632d4f0f2..d4ec806e4846 100644 --- a/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp +++ b/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp @@ -124,7 +124,11 @@ IE::Core giewrap::getPlugin(const GIEParam& params) { { try { +#if INF_ENGINE_RELEASE >= 2021040000 + plugin.AddExtension(std::make_shared(extlib), params.device_id); +#else plugin.AddExtension(IE::make_so_pointer(extlib), params.device_id); +#endif CV_LOG_INFO(NULL, "DNN-IE: Loaded extension plugin: " << extlib); break; } From 0f24d4d2a14d8f79eaa4ca0710d7770aeeb833f2 Mon Sep 17 00:00:00 2001 From: APrigarina Date: Tue, 29 Jun 2021 22:15:37 +0300 Subject: [PATCH 033/128] fix samples --- samples/python/gaussian_mix.py | 2 +- samples/python/hist.py | 2 +- samples/python/morphology.py | 7 +++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/samples/python/gaussian_mix.py b/samples/python/gaussian_mix.py index 6a656647ddcf..4c1f86794cd6 100755 --- a/samples/python/gaussian_mix.py +++ b/samples/python/gaussian_mix.py @@ -28,7 +28,7 @@ def make_gaussians(cluster_n, img_size): return points, ref_distrs def draw_gaussain(img, mean, cov, color): - x, y = np.int32(mean) + x, y = mean w, u, _vt = cv.SVDecomp(cov) ang = np.arctan2(u[1, 0], u[0, 0])*(180/np.pi) s1, s2 = np.sqrt(w)*3.0 diff --git a/samples/python/hist.py b/samples/python/hist.py index 157d5ff0ba3e..8c1f4546a817 100755 --- a/samples/python/hist.py +++ b/samples/python/hist.py @@ -46,7 +46,7 @@ def hist_lines(im): im = cv.cvtColor(im,cv.COLOR_BGR2GRAY) hist_item = cv.calcHist([im],[0],None,[256],[0,256]) cv.normalize(hist_item,hist_item,0,255,cv.NORM_MINMAX) - hist=np.int32(np.around(hist_item)) + hist = np.int32(np.around(hist_item)) for x,y in enumerate(hist): cv.line(h,(x,0),(x,y[0]),(255,255,255)) y = np.flipud(h) diff --git a/samples/python/morphology.py b/samples/python/morphology.py index 9ecf5b0682e7..183f5e828815 100755 --- a/samples/python/morphology.py +++ b/samples/python/morphology.py @@ -50,8 +50,11 @@ def main(): cur_str_mode = str_modes.next() def update(dummy=None): - sz = cv.getTrackbarPos('op/size', 'morphology') - iters = cv.getTrackbarPos('iters', 'morphology') + try: # do not get trackbar position while trackbar is not created + sz = cv.getTrackbarPos('op/size', 'morphology') + iters = cv.getTrackbarPos('iters', 'morphology') + except: + return opers = cur_mode.split('/') if len(opers) > 1: sz = sz - 10 From 05f1939b0284c55f987b6caa92f34affd50dd454 Mon Sep 17 00:00:00 2001 From: Maxim Pashchenkov Date: Thu, 1 Jul 2021 22:06:35 +0300 Subject: [PATCH 034/128] Merge pull request #20298 from mpashchenkov:mp/python-desync G-API: Python. Desync. * Desync. GMat. * Alignment --- .../gapi/include/opencv2/gapi/gstreaming.hpp | 11 +- modules/gapi/misc/python/pyopencv_gapi.hpp | 112 ++++++++++++------ modules/gapi/misc/python/shadow_gapi.hpp | 31 ++--- .../misc/python/test/test_gapi_streaming.py | 68 ++++++++++- modules/gapi/src/compiler/gstreaming.cpp | 18 ++- modules/gapi/src/compiler/gstreaming_priv.hpp | 1 + .../gapi/src/executor/gstreamingexecutor.cpp | 78 ++++++++++++ .../gapi/src/executor/gstreamingexecutor.hpp | 3 + .../test/streaming/gapi_streaming_tests.cpp | 81 ++++++++++++- 9 files changed, 334 insertions(+), 69 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi/gstreaming.hpp b/modules/gapi/include/opencv2/gapi/gstreaming.hpp index 47e103fd0ea7..50abe69f87b7 100644 --- a/modules/gapi/include/opencv2/gapi/gstreaming.hpp +++ b/modules/gapi/include/opencv2/gapi/gstreaming.hpp @@ -71,6 +71,15 @@ using GOptRunArgP = util::variant< >; using GOptRunArgsP = std::vector; +using GOptRunArg = util::variant< + optional, + optional, + optional, + optional, + optional +>; +using GOptRunArgs = std::vector; + namespace detail { template inline GOptRunArgP wrap_opt_arg(optional& arg) { @@ -255,7 +264,7 @@ class GAPI_EXPORTS_W_SIMPLE GStreamingCompiled // NB: Used from python /// @private -- Exclude this function from OpenCV documentation - GAPI_WRAP std::tuple pull(); + GAPI_WRAP std::tuple> pull(); /** * @brief Get some next available data from the pipeline. diff --git a/modules/gapi/misc/python/pyopencv_gapi.hpp b/modules/gapi/misc/python/pyopencv_gapi.hpp index 3c428dde6d82..d378a91b5fd6 100644 --- a/modules/gapi/misc/python/pyopencv_gapi.hpp +++ b/modules/gapi/misc/python/pyopencv_gapi.hpp @@ -131,7 +131,8 @@ PyObject* pyopencv_from(const cv::detail::PyObjectHolder& v) template <> PyObject* pyopencv_from(const cv::gapi::wip::draw::Prim& prim) { - switch (prim.index()) { + switch (prim.index()) + { case cv::gapi::wip::draw::Prim::index_of(): return pyopencv_from(cv::util::get(prim)); case cv::gapi::wip::draw::Prim::index_of(): @@ -319,40 +320,69 @@ PyObject* pyopencv_from(const GRunArg& v) return pyopencv_from(util::get(v)); } - PyErr_SetString(PyExc_TypeError, "Failed to unpack GRunArgs"); + PyErr_SetString(PyExc_TypeError, "Failed to unpack GRunArgs. Index of variant is unknown"); return NULL; } -template<> -PyObject* pyopencv_from(const GRunArgs& value) +template +PyObject* pyopencv_from(const cv::optional& opt) { - size_t i, n = value.size(); + if (!opt.has_value()) + { + Py_RETURN_NONE; + } + return pyopencv_from(*opt); +} - // NB: It doesn't make sense to return list with a single element - if (n == 1) +template <> +PyObject* pyopencv_from(const GOptRunArg& v) +{ + switch (v.index()) { - PyObject* item = pyopencv_from(value[0]); - if(!item) - { - return NULL; - } - return item; + case GOptRunArg::index_of>(): + return pyopencv_from(util::get>(v)); + + case GOptRunArg::index_of>(): + return pyopencv_from(util::get>(v)); + + case GOptRunArg::index_of>(): + return pyopencv_from(util::get>(v)); + + case GOptRunArg::index_of>(): + return pyopencv_from(util::get>(v)); } - PyObject* list = PyList_New(n); - for(i = 0; i < n; ++i) + PyErr_SetString(PyExc_TypeError, "Failed to unpack GOptRunArg. Index of variant is unknown"); + return NULL; +} + +template<> +PyObject* pyopencv_from(const GRunArgs& value) +{ + return value.size() == 1 ? pyopencv_from(value[0]) : pyopencv_from_generic_vec(value); +} + +template<> +PyObject* pyopencv_from(const GOptRunArgs& value) +{ + return value.size() == 1 ? pyopencv_from(value[0]) : pyopencv_from_generic_vec(value); +} + +// FIXME: cv::variant should be wrapped once for all types. +template <> +PyObject* pyopencv_from(const cv::util::variant& v) +{ + using RunArgs = cv::util::variant; + switch (v.index()) { - PyObject* item = pyopencv_from(value[i]); - if(!item) - { - Py_DECREF(list); - PyErr_SetString(PyExc_TypeError, "Failed to unpack GRunArgs"); - return NULL; - } - PyList_SetItem(list, i, item); + case RunArgs::index_of(): + return pyopencv_from(util::get(v)); + case RunArgs::index_of(): + return pyopencv_from(util::get(v)); } - return list; + PyErr_SetString(PyExc_TypeError, "Failed to recognize kind of RunArgs. Index of variant is unknown"); + return NULL; } template @@ -634,7 +664,8 @@ static cv::GRunArgs run_py_kernel(cv::detail::PyObjectHolder kernel, cv::detail::PyObjectHolder result( PyObject_CallObject(kernel.get(), args.get()), false); - if (PyErr_Occurred()) { + if (PyErr_Occurred()) + { PyErr_PrintEx(0); PyErr_Clear(); throw std::logic_error("Python kernel failed with error!"); @@ -717,8 +748,9 @@ static cv::GMetaArgs get_meta_args(PyObject* tuple) } static GMetaArgs run_py_meta(cv::detail::PyObjectHolder out_meta, - const cv::GMetaArgs &meta, - const cv::GArgs &gargs) { + const cv::GMetaArgs &meta, + const cv::GArgs &gargs) +{ PyGILState_STATE gstate; gstate = PyGILState_Ensure(); @@ -760,7 +792,8 @@ static GMetaArgs run_py_meta(cv::detail::PyObjectHolder out_meta, cv::detail::PyObjectHolder result( PyObject_CallObject(out_meta.get(), args.get()), false); - if (PyErr_Occurred()) { + if (PyErr_Occurred()) + { PyErr_PrintEx(0); PyErr_Clear(); throw std::logic_error("Python outMeta failed with error!"); @@ -792,21 +825,24 @@ static PyObject* pyopencv_cv_gapi_kernels(PyObject* , PyObject* py_args, PyObjec PyObject* user_kernel = PyTuple_GetItem(py_args, i); PyObject* id_obj = PyObject_GetAttrString(user_kernel, "id"); - if (!id_obj) { + if (!id_obj) + { PyErr_SetString(PyExc_TypeError, "Python kernel should contain id, please use cv.gapi.kernel to define kernel"); return NULL; } PyObject* out_meta = PyObject_GetAttrString(user_kernel, "outMeta"); - if (!out_meta) { + if (!out_meta) + { PyErr_SetString(PyExc_TypeError, "Python kernel should contain outMeta, please use cv.gapi.kernel to define kernel"); return NULL; } PyObject* run = PyObject_GetAttrString(user_kernel, "run"); - if (!run) { + if (!run) + { PyErr_SetString(PyExc_TypeError, "Python kernel should contain run, please use cv.gapi.kernel to define kernel"); return NULL; @@ -951,9 +987,12 @@ struct PyOpenCV_Converter> if (PyObject_TypeCheck(obj, reinterpret_cast(pyopencv_GArrayT_TypePtr))) { auto& array = reinterpret_cast(obj)->v; - try { + try + { value = cv::util::get>(array.arg()); - } catch (...) { + } + catch (...) + { return false; } return true; @@ -974,9 +1013,12 @@ struct PyOpenCV_Converter> if (PyObject_TypeCheck(obj, reinterpret_cast(pyopencv_GOpaqueT_TypePtr))) { auto& opaque = reinterpret_cast(obj)->v; - try { + try + { value = cv::util::get>(opaque.arg()); - } catch (...) { + } + catch (...) + { return false; } return true; diff --git a/modules/gapi/misc/python/shadow_gapi.hpp b/modules/gapi/misc/python/shadow_gapi.hpp index 41d0f1973223..0b489dde0f55 100644 --- a/modules/gapi/misc/python/shadow_gapi.hpp +++ b/modules/gapi/misc/python/shadow_gapi.hpp @@ -3,39 +3,40 @@ namespace cv { -struct GAPI_EXPORTS_W_SIMPLE GCompileArg { - GAPI_WRAP GCompileArg(gapi::GKernelPackage pkg); - GAPI_WRAP GCompileArg(gapi::GNetPackage pkg); +struct GAPI_EXPORTS_W_SIMPLE GCompileArg +{ + GAPI_WRAP GCompileArg(gapi::GKernelPackage pkg); + GAPI_WRAP GCompileArg(gapi::GNetPackage pkg); }; class GAPI_EXPORTS_W_SIMPLE GInferInputs { public: - GAPI_WRAP GInferInputs(); - GAPI_WRAP GInferInputs& setInput(const std::string& name, const cv::GMat& value); - GAPI_WRAP GInferInputs& setInput(const std::string& name, const cv::GFrame& value); + GAPI_WRAP GInferInputs(); + GAPI_WRAP GInferInputs& setInput(const std::string& name, const cv::GMat& value); + GAPI_WRAP GInferInputs& setInput(const std::string& name, const cv::GFrame& value); }; class GAPI_EXPORTS_W_SIMPLE GInferListInputs { public: - GAPI_WRAP GInferListInputs(); - GAPI_WRAP GInferListInputs setInput(const std::string& name, const cv::GArray& value); - GAPI_WRAP GInferListInputs setInput(const std::string& name, const cv::GArray& value); + GAPI_WRAP GInferListInputs(); + GAPI_WRAP GInferListInputs setInput(const std::string& name, const cv::GArray& value); + GAPI_WRAP GInferListInputs setInput(const std::string& name, const cv::GArray& value); }; class GAPI_EXPORTS_W_SIMPLE GInferOutputs { public: - GAPI_WRAP GInferOutputs(); - GAPI_WRAP cv::GMat at(const std::string& name); + GAPI_WRAP GInferOutputs(); + GAPI_WRAP cv::GMat at(const std::string& name); }; class GAPI_EXPORTS_W_SIMPLE GInferListOutputs { public: - GAPI_WRAP GInferListOutputs(); - GAPI_WRAP cv::GArray at(const std::string& name); + GAPI_WRAP GInferListOutputs(); + GAPI_WRAP cv::GArray at(const std::string& name); }; namespace gapi @@ -69,11 +70,13 @@ namespace streaming cv::GOpaque GAPI_EXPORTS_W timestamp(cv::GMat); cv::GOpaque GAPI_EXPORTS_W seqNo(cv::GMat); cv::GOpaque GAPI_EXPORTS_W seq_id(cv::GMat); + + GAPI_EXPORTS_W cv::GMat desync(const cv::GMat &g); } // namespace streaming } // namespace gapi namespace detail { - gapi::GNetParam GAPI_EXPORTS_W strip(gapi::ie::PyParams params); + gapi::GNetParam GAPI_EXPORTS_W strip(gapi::ie::PyParams params); } // namespace detail } // namespace cv diff --git a/modules/gapi/misc/python/test/test_gapi_streaming.py b/modules/gapi/misc/python/test/test_gapi_streaming.py index 4ea88878eeab..7ede1b5cf38d 100644 --- a/modules/gapi/misc/python/test/test_gapi_streaming.py +++ b/modules/gapi/misc/python/test/test_gapi_streaming.py @@ -5,16 +5,35 @@ import os import sys import unittest +import time from tests_common import NewOpenCVTests try: - if sys.version_info[:2] < (3, 0): raise unittest.SkipTest('Python 2.x is not supported') + @cv.gapi.op('custom.delay', in_types=[cv.GMat], out_types=[cv.GMat]) + class GDelay: + """Delay for 10 ms.""" + + @staticmethod + def outMeta(desc): + return desc + + + @cv.gapi.kernel(GDelay) + class GDelayImpl: + """Implementation for GDelay operation.""" + + @staticmethod + def run(img): + time.sleep(0.01) + return img + + class test_gapi_streaming(NewOpenCVTests): def test_image_input(self): @@ -148,7 +167,7 @@ def test_video_add(self): proc_num_frames += 1 if proc_num_frames == max_num_frames: - break; + break def test_video_good_features_to_track(self): @@ -242,6 +261,51 @@ def test_gapi_streaming_meta(self): if curr_frame_number == max_num_frames: break + def test_desync(self): + path = self.find_file('cv/video/768x576.avi', [os.environ['OPENCV_TEST_DATA_PATH']]) + + # G-API + g_in = cv.GMat() + g_out1 = cv.gapi.copy(g_in) + des = cv.gapi.streaming.desync(g_in) + g_out2 = GDelay.on(des) + + c = cv.GComputation(cv.GIn(g_in), cv.GOut(g_out1, g_out2)) + + kernels = cv.gapi.kernels(GDelayImpl) + ccomp = c.compileStreaming(args=cv.gapi.compile_args(kernels)) + source = cv.gapi.wip.make_capture_src(path) + ccomp.setSource(cv.gin(source)) + ccomp.start() + + # Assert + max_num_frames = 10 + proc_num_frames = 0 + + out_counter = 0 + desync_out_counter = 0 + none_counter = 0 + while True: + has_frame, (out1, out2) = ccomp.pull() + if not has_frame: + break + + if not out1 is None: + out_counter += 1 + if not out2 is None: + desync_out_counter += 1 + else: + none_counter += 1 + + proc_num_frames += 1 + if proc_num_frames == max_num_frames: + ccomp.stop() + break + + self.assertLess(0, proc_num_frames) + self.assertLess(desync_out_counter, out_counter) + self.assertLess(0, none_counter) + except unittest.SkipTest as e: diff --git a/modules/gapi/src/compiler/gstreaming.cpp b/modules/gapi/src/compiler/gstreaming.cpp index 3bdc0323b5c7..e45e77042755 100644 --- a/modules/gapi/src/compiler/gstreaming.cpp +++ b/modules/gapi/src/compiler/gstreaming.cpp @@ -75,6 +75,11 @@ bool cv::GStreamingCompiled::Priv::pull(cv::GOptRunArgsP &&outs) return m_exec->pull(std::move(outs)); } +std::tuple> cv::GStreamingCompiled::Priv::pull() +{ + return m_exec->pull(); +} + bool cv::GStreamingCompiled::Priv::try_pull(cv::GRunArgsP &&outs) { return m_exec->try_pull(std::move(outs)); @@ -123,18 +128,9 @@ bool cv::GStreamingCompiled::pull(cv::GRunArgsP &&outs) return m_priv->pull(std::move(outs)); } -std::tuple cv::GStreamingCompiled::pull() +std::tuple> cv::GStreamingCompiled::pull() { - GRunArgs run_args; - GRunArgsP outs; - const auto& out_info = m_priv->outInfo(); - run_args.reserve(out_info.size()); - outs.reserve(out_info.size()); - - cv::detail::constructGraphOutputs(m_priv->outInfo(), run_args, outs); - - bool is_over = m_priv->pull(std::move(outs)); - return std::make_tuple(is_over, run_args); + return m_priv->pull(); } bool cv::GStreamingCompiled::pull(cv::GOptRunArgsP &&outs) diff --git a/modules/gapi/src/compiler/gstreaming_priv.hpp b/modules/gapi/src/compiler/gstreaming_priv.hpp index 59b19d425261..1b559ba31030 100644 --- a/modules/gapi/src/compiler/gstreaming_priv.hpp +++ b/modules/gapi/src/compiler/gstreaming_priv.hpp @@ -46,6 +46,7 @@ class GAPI_EXPORTS GStreamingCompiled::Priv void start(); bool pull(cv::GRunArgsP &&outs); bool pull(cv::GOptRunArgsP &&outs); + std::tuple> pull(); bool try_pull(cv::GRunArgsP &&outs); void stop(); diff --git a/modules/gapi/src/executor/gstreamingexecutor.cpp b/modules/gapi/src/executor/gstreamingexecutor.cpp index 74c96bdf3ef3..27049aef6327 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.cpp +++ b/modules/gapi/src/executor/gstreamingexecutor.cpp @@ -1017,6 +1017,49 @@ void check_DesyncObjectConsumedByMultipleIslands(const cv::gimpl::GIslandModel:: } // for(nodes) } +// NB: Construct GRunArgsP based on passed info and store the memory in passed cv::GRunArgs. +// Needed for python bridge, because in case python user doesn't pass output arguments to apply. +void constructOptGraphOutputs(const cv::GTypesInfo &out_info, + cv::GOptRunArgs &args, + cv::GOptRunArgsP &outs) +{ + for (auto&& info : out_info) + { + switch (info.shape) + { + case cv::GShape::GMAT: + { + args.emplace_back(cv::optional{}); + outs.emplace_back(&cv::util::get>(args.back())); + break; + } + case cv::GShape::GSCALAR: + { + args.emplace_back(cv::optional{}); + outs.emplace_back(&cv::util::get>(args.back())); + break; + } + case cv::GShape::GARRAY: + { + cv::detail::VectorRef ref; + cv::util::get(info.ctor)(ref); + args.emplace_back(cv::util::make_optional(std::move(ref))); + outs.emplace_back(wrap_opt_arg(cv::util::get>(args.back()))); + break; + } + case cv::GShape::GOPAQUE: + { + cv::detail::OpaqueRef ref; + cv::util::get(info.ctor)(ref); + args.emplace_back(cv::util::make_optional(std::move(ref))); + outs.emplace_back(wrap_opt_arg(cv::util::get>(args.back()))); + break; + } + default: + cv::util::throw_error(std::logic_error("Unsupported optional output shape for Python")); + } + } +} } // anonymous namespace class cv::gimpl::GStreamingExecutor::Synchronizer final { @@ -1320,6 +1363,16 @@ cv::gimpl::GStreamingExecutor::GStreamingExecutor(std::unique_ptr && // per the same input frame, so the output traffic multiplies) GAPI_Assert(m_collector_map.size() > 0u); m_out_queue.set_capacity(queue_capacity * m_collector_map.size()); + + // FIXME: The code duplicates logic of collectGraphInfo() + cv::gimpl::GModel::ConstGraph cgr(*m_orig_graph); + auto meta = cgr.metadata().get().out_nhs; + out_info.reserve(meta.size()); + + ade::util::transform(meta, std::back_inserter(out_info), [&cgr](const ade::NodeHandle& nh) { + const auto& data = cgr.metadata(nh).get(); + return cv::GTypeInfo{data.shape, data.kind, data.ctor}; + }); } cv::gimpl::GStreamingExecutor::~GStreamingExecutor() @@ -1653,6 +1706,31 @@ bool cv::gimpl::GStreamingExecutor::pull(cv::GOptRunArgsP &&outs) return true; } +std::tuple> cv::gimpl::GStreamingExecutor::pull() +{ + using RunArgs = cv::util::variant; + bool is_over = false; + + if (m_desync) { + GOptRunArgs opt_run_args; + GOptRunArgsP opt_outs; + opt_outs.reserve(out_info.size()); + opt_run_args.reserve(out_info.size()); + + constructOptGraphOutputs(out_info, opt_run_args, opt_outs); + is_over = pull(std::move(opt_outs)); + return std::make_tuple(is_over, RunArgs(opt_run_args)); + } + + GRunArgs run_args; + GRunArgsP outs; + run_args.reserve(out_info.size()); + outs.reserve(out_info.size()); + + constructGraphOutputs(out_info, run_args, outs); + is_over = pull(std::move(outs)); + return std::make_tuple(is_over, RunArgs(run_args)); +} bool cv::gimpl::GStreamingExecutor::try_pull(cv::GRunArgsP &&outs) { diff --git a/modules/gapi/src/executor/gstreamingexecutor.hpp b/modules/gapi/src/executor/gstreamingexecutor.hpp index 40b787268228..b4aadcbbaf4d 100644 --- a/modules/gapi/src/executor/gstreamingexecutor.hpp +++ b/modules/gapi/src/executor/gstreamingexecutor.hpp @@ -195,6 +195,8 @@ class GStreamingExecutor final void wait_shutdown(); + cv::GTypesInfo out_info; + public: explicit GStreamingExecutor(std::unique_ptr &&g_model, const cv::GCompileArgs &comp_args); @@ -203,6 +205,7 @@ class GStreamingExecutor final void start(); bool pull(cv::GRunArgsP &&outs); bool pull(cv::GOptRunArgsP &&outs); + std::tuple> pull(); bool try_pull(cv::GRunArgsP &&outs); void stop(); bool running() const; diff --git a/modules/gapi/test/streaming/gapi_streaming_tests.cpp b/modules/gapi/test/streaming/gapi_streaming_tests.cpp index f3179a70813a..5386d1736f67 100644 --- a/modules/gapi/test/streaming/gapi_streaming_tests.cpp +++ b/modules/gapi/test/streaming/gapi_streaming_tests.cpp @@ -244,6 +244,35 @@ class NV12Source : public cv::gapi::wip::GCaptureSource { } }; +void checkPullOverload(const cv::Mat& ref, + const bool has_output, + cv::util::variant& args) { + EXPECT_TRUE(has_output); + using runArgs = cv::util::variant; + cv::Mat out_mat; + switch (args.index()) { + case runArgs::index_of(): + { + auto outputs = util::get(args); + EXPECT_EQ(1u, outputs.size()); + out_mat = cv::util::get(outputs[0]); + break; + } + case runArgs::index_of(): + { + auto outputs = util::get(args); + EXPECT_EQ(1u, outputs.size()); + auto opt_mat = cv::util::get>(outputs[0]); + ASSERT_TRUE(opt_mat.has_value()); + out_mat = *opt_mat; + break; + } + default: GAPI_Assert(false && "Incorrect type of Args"); + } + + EXPECT_EQ(0., cv::norm(ref, out_mat, cv::NORM_INF)); +} + } // anonymous namespace TEST_P(GAPI_Streaming, SmokeTest_ConstInput_GMat) @@ -1336,13 +1365,45 @@ TEST(Streaming, Python_Pull_Overload) bool has_output; cv::GRunArgs outputs; - std::tie(has_output, outputs) = ccomp.pull(); + using RunArgs = cv::util::variant; + RunArgs args; - EXPECT_TRUE(has_output); - EXPECT_EQ(1u, outputs.size()); + std::tie(has_output, args) = ccomp.pull(); + + checkPullOverload(in_mat, has_output, args); + + ccomp.stop(); + EXPECT_FALSE(ccomp.running()); +} + +TEST(GAPI_Streaming_Desync, Python_Pull_Overload) +{ + cv::GMat in; + cv::GMat out = cv::gapi::streaming::desync(in); + cv::GComputation c(in, out); + + cv::Size sz(3,3); + cv::Mat in_mat(sz, CV_8UC3); + cv::randu(in_mat, cv::Scalar::all(0), cv::Scalar(255)); - auto out_mat = cv::util::get(outputs[0]); - EXPECT_EQ(0., cv::norm(in_mat, out_mat, cv::NORM_INF)); + auto ccomp = c.compileStreaming(); + + EXPECT_TRUE(ccomp); + EXPECT_FALSE(ccomp.running()); + + ccomp.setSource(cv::gin(in_mat)); + + ccomp.start(); + EXPECT_TRUE(ccomp.running()); + + bool has_output; + cv::GRunArgs outputs; + using RunArgs = cv::util::variant; + RunArgs args; + + std::tie(has_output, args) = ccomp.pull(); + + checkPullOverload(in_mat, has_output, args); ccomp.stop(); EXPECT_FALSE(ccomp.running()); @@ -2132,9 +2193,17 @@ TEST(GAPI_Streaming, TestPythonAPI) bool is_over = false; cv::GRunArgs out_args; + using RunArgs = cv::util::variant; + RunArgs args; // NB: Used by python bridge - std::tie(is_over, out_args) = cc.pull(); + std::tie(is_over, args) = cc.pull(); + + switch (args.index()) { + case RunArgs::index_of(): + out_args = util::get(args); break; + default: GAPI_Assert(false && "Incorrect type of return value"); + } ASSERT_EQ(1u, out_args.size()); ASSERT_TRUE(cv::util::holds_alternative(out_args[0])); From 90df3af6cffec4a3255ae0f217471d013fd68492 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 30 Jun 2021 21:41:25 +0000 Subject: [PATCH 035/128] build: winpack_dldt with dldt 2021.4.0 --- ...-dldt-disable-multidevice-autoplugin.patch | 16 ++ ...20210630-dldt-disable-unused-targets.patch | 219 ++++++++++++++++++ .../2021.4/20210630-dldt-pdb.patch | 15 ++ .../2021.4/20210630-dldt-vs-version.patch | 16 ++ platforms/winpack_dldt/2021.4/build.config.py | 1 + platforms/winpack_dldt/2021.4/patch.config.py | 4 + .../winpack_dldt/2021.4/sysroot.config.py | 56 +++++ platforms/winpack_dldt/build_package.py | 14 +- 8 files changed, 336 insertions(+), 5 deletions(-) create mode 100644 platforms/winpack_dldt/2021.4/20210630-dldt-disable-multidevice-autoplugin.patch create mode 100644 platforms/winpack_dldt/2021.4/20210630-dldt-disable-unused-targets.patch create mode 100644 platforms/winpack_dldt/2021.4/20210630-dldt-pdb.patch create mode 100644 platforms/winpack_dldt/2021.4/20210630-dldt-vs-version.patch create mode 100644 platforms/winpack_dldt/2021.4/build.config.py create mode 100644 platforms/winpack_dldt/2021.4/patch.config.py create mode 100644 platforms/winpack_dldt/2021.4/sysroot.config.py diff --git a/platforms/winpack_dldt/2021.4/20210630-dldt-disable-multidevice-autoplugin.patch b/platforms/winpack_dldt/2021.4/20210630-dldt-disable-multidevice-autoplugin.patch new file mode 100644 index 000000000000..f1e748744277 --- /dev/null +++ b/platforms/winpack_dldt/2021.4/20210630-dldt-disable-multidevice-autoplugin.patch @@ -0,0 +1,16 @@ +diff --git a/inference-engine/src/CMakeLists.txt b/inference-engine/src/CMakeLists.txt +index 0ba0dd78..7d34e7cb 100644 +--- a/inference-engine/src/CMakeLists.txt ++++ b/inference-engine/src/CMakeLists.txt +@@ -26,9 +26,9 @@ endif() + + add_subdirectory(hetero_plugin) + +-add_subdirectory(auto_plugin) ++#add_subdirectory(auto_plugin) + +-add_subdirectory(multi_device) ++#add_subdirectory(multi_device) + + add_subdirectory(transformations) + diff --git a/platforms/winpack_dldt/2021.4/20210630-dldt-disable-unused-targets.patch b/platforms/winpack_dldt/2021.4/20210630-dldt-disable-unused-targets.patch new file mode 100644 index 000000000000..9d44cdadc6cd --- /dev/null +++ b/platforms/winpack_dldt/2021.4/20210630-dldt-disable-unused-targets.patch @@ -0,0 +1,219 @@ +diff --git a/cmake/developer_package/add_ie_target.cmake b/cmake/developer_package/add_ie_target.cmake +index d49f16a4d..2726ca787 100644 +--- a/cmake/developer_package/add_ie_target.cmake ++++ b/cmake/developer_package/add_ie_target.cmake +@@ -92,7 +92,7 @@ function(addIeTarget) + if (ARG_TYPE STREQUAL EXECUTABLE) + add_executable(${ARG_NAME} ${all_sources}) + elseif(ARG_TYPE STREQUAL STATIC OR ARG_TYPE STREQUAL SHARED) +- add_library(${ARG_NAME} ${ARG_TYPE} ${all_sources}) ++ add_library(${ARG_NAME} ${ARG_TYPE} EXCLUDE_FROM_ALL ${all_sources}) + else() + message(SEND_ERROR "Invalid target type ${ARG_TYPE} specified for target name ${ARG_NAME}") + endif() +diff --git a/inference-engine/CMakeLists.txt b/inference-engine/CMakeLists.txt +index 1ac7fd8bf..df7091e51 100644 +--- a/inference-engine/CMakeLists.txt ++++ b/inference-engine/CMakeLists.txt +@@ -39,7 +39,7 @@ if(ENABLE_TESTS) + add_subdirectory(tests) + endif() + +-add_subdirectory(tools) ++#add_subdirectory(tools) + + function(ie_build_samples) + # samples should be build with the same flags as from OpenVINO package, +@@ -58,7 +58,7 @@ endfunction() + + # gflags and format_reader targets are kept inside of samples directory and + # they must be built even if samples build is disabled (required for tests and tools). +-ie_build_samples() ++#ie_build_samples() + + if(ENABLE_PYTHON) + add_subdirectory(ie_bridges/python) +@@ -142,7 +142,7 @@ endif() + # Developer package + # + +-openvino_developer_export_targets(COMPONENT openvino_common TARGETS format_reader gflags ie_samples_utils) ++#openvino_developer_export_targets(COMPONENT openvino_common TARGETS format_reader gflags ie_samples_utils) + + # for Template plugin + if(NGRAPH_INTERPRETER_ENABLE) +@@ -166,7 +166,7 @@ function(ie_generate_dev_package_config) + @ONLY) + endfunction() + +-ie_generate_dev_package_config() ++#ie_generate_dev_package_config() + + # + # Coverage +diff --git a/inference-engine/src/inference_engine/CMakeLists.txt b/inference-engine/src/inference_engine/CMakeLists.txt +index e8ed1a5c4..1fc9fc3ff 100644 +--- a/inference-engine/src/inference_engine/CMakeLists.txt ++++ b/inference-engine/src/inference_engine/CMakeLists.txt +@@ -110,7 +110,7 @@ add_cpplint_target(${TARGET_NAME}_plugin_api_cpplint FOR_SOURCES ${plugin_api_sr + + # Create object library + +-add_library(${TARGET_NAME}_obj OBJECT ++add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL + ${LIBRARY_SRC} + ${LIBRARY_HEADERS} + ${PUBLIC_HEADERS}) +@@ -181,7 +181,7 @@ ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME}) + + # Static library used for unit tests which are always built + +-add_library(${TARGET_NAME}_s STATIC ++add_library(${TARGET_NAME}_s STATIC EXCLUDE_FROM_ALL + $ + $ + ${IE_STATIC_DEPENDENT_FILES}) +diff --git a/inference-engine/src/legacy_api/CMakeLists.txt b/inference-engine/src/legacy_api/CMakeLists.txt +index 8eae82bd2..e0e6745b1 100644 +--- a/inference-engine/src/legacy_api/CMakeLists.txt ++++ b/inference-engine/src/legacy_api/CMakeLists.txt +@@ -26,7 +26,7 @@ endif() + + file(TOUCH ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) + +-add_library(${TARGET_NAME}_obj OBJECT ++add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL + ${LIBRARY_SRC} + ${PUBLIC_HEADERS}) + +diff --git a/inference-engine/src/mkldnn_plugin/CMakeLists.txt b/inference-engine/src/mkldnn_plugin/CMakeLists.txt +index fe57b29dd..07831e2fb 100644 +--- a/inference-engine/src/mkldnn_plugin/CMakeLists.txt ++++ b/inference-engine/src/mkldnn_plugin/CMakeLists.txt +@@ -67,7 +67,7 @@ ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME}) + + # add test object library + +-add_library(${TARGET_NAME}_obj OBJECT ${SOURCES} ${HEADERS}) ++add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL ${SOURCES} ${HEADERS}) + target_link_libraries(${TARGET_NAME}_obj PUBLIC mkldnn) + + target_include_directories(${TARGET_NAME}_obj PRIVATE $ +diff --git a/inference-engine/src/preprocessing/CMakeLists.txt b/inference-engine/src/preprocessing/CMakeLists.txt +index f9548339d..ef962145a 100644 +--- a/inference-engine/src/preprocessing/CMakeLists.txt ++++ b/inference-engine/src/preprocessing/CMakeLists.txt +@@ -101,7 +101,7 @@ endif() + + # Create object library + +-add_library(${TARGET_NAME}_obj OBJECT ++add_library(${TARGET_NAME}_obj OBJECT EXCLUDE_FROM_ALL + ${LIBRARY_SRC} + ${LIBRARY_HEADERS}) + +@@ -153,7 +153,7 @@ ie_add_api_validator_post_build_step(TARGET ${TARGET_NAME}) + + # Static library used for unit tests which are always built + +-add_library(${TARGET_NAME}_s STATIC ++add_library(${TARGET_NAME}_s STATIC EXCLUDE_FROM_ALL + $) + + set_ie_threading_interface_for(${TARGET_NAME}_s) +diff --git a/inference-engine/src/vpu/common/CMakeLists.txt b/inference-engine/src/vpu/common/CMakeLists.txt +index 249e47c28..4ddf63049 100644 +--- a/inference-engine/src/vpu/common/CMakeLists.txt ++++ b/inference-engine/src/vpu/common/CMakeLists.txt +@@ -5,7 +5,7 @@ + file(GLOB_RECURSE SOURCES *.cpp *.hpp *.h) + + function(add_common_target TARGET_NAME STATIC_IE) +- add_library(${TARGET_NAME} STATIC ${SOURCES}) ++ add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${SOURCES}) + + ie_faster_build(${TARGET_NAME} + UNITY +@@ -60,7 +60,7 @@ add_common_target("vpu_common_lib" FALSE) + + # Unit tests support for graph transformer + if(WIN32) +- add_common_target("vpu_common_lib_test_static" TRUE) ++ #add_common_target("vpu_common_lib_test_static" TRUE) + else() + add_library("vpu_common_lib_test_static" ALIAS "vpu_common_lib") + endif() +diff --git a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt +index bc73ab5b1..b4c1547fc 100644 +--- a/inference-engine/src/vpu/graph_transformer/CMakeLists.txt ++++ b/inference-engine/src/vpu/graph_transformer/CMakeLists.txt +@@ -5,7 +5,7 @@ + file(GLOB_RECURSE SOURCES *.cpp *.hpp *.h *.inc) + + function(add_graph_transformer_target TARGET_NAME STATIC_IE) +- add_library(${TARGET_NAME} STATIC ${SOURCES}) ++ add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${SOURCES}) + + set_ie_threading_interface_for(${TARGET_NAME}) + +@@ -70,7 +70,7 @@ add_graph_transformer_target("vpu_graph_transformer" FALSE) + + # Unit tests support for graph transformer + if(WIN32) +- add_graph_transformer_target("vpu_graph_transformer_test_static" TRUE) ++ #add_graph_transformer_target("vpu_graph_transformer_test_static" TRUE) + else() + add_library("vpu_graph_transformer_test_static" ALIAS "vpu_graph_transformer") + endif() +diff --git a/inference-engine/thirdparty/pugixml/CMakeLists.txt b/inference-engine/thirdparty/pugixml/CMakeLists.txt +index 8bcb2801a..f7e031c01 100644 +--- a/inference-engine/thirdparty/pugixml/CMakeLists.txt ++++ b/inference-engine/thirdparty/pugixml/CMakeLists.txt +@@ -41,7 +41,7 @@ if(BUILD_SHARED_LIBS) + else() + add_library(pugixml STATIC ${SOURCES}) + if (MSVC) +- add_library(pugixml_mt STATIC ${SOURCES}) ++ #add_library(pugixml_mt STATIC ${SOURCES}) + #if (WIN32) + # set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") + # set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") +diff --git a/ngraph/core/builder/CMakeLists.txt b/ngraph/core/builder/CMakeLists.txt +index ff5c381e7..2797ec9ab 100644 +--- a/ngraph/core/builder/CMakeLists.txt ++++ b/ngraph/core/builder/CMakeLists.txt +@@ -16,7 +16,7 @@ source_group("src" FILES ${LIBRARY_SRC}) + source_group("include" FILES ${PUBLIC_HEADERS}) + + # Create shared library +-add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${PUBLIC_HEADERS}) ++add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${LIBRARY_SRC} ${PUBLIC_HEADERS}) + + if(COMMAND ie_faster_build) + ie_faster_build(${TARGET_NAME} +diff --git a/ngraph/core/reference/CMakeLists.txt b/ngraph/core/reference/CMakeLists.txt +index ef4a764ab..f6d3172e2 100644 +--- a/ngraph/core/reference/CMakeLists.txt ++++ b/ngraph/core/reference/CMakeLists.txt +@@ -16,7 +16,7 @@ source_group("src" FILES ${LIBRARY_SRC}) + source_group("include" FILES ${PUBLIC_HEADERS}) + + # Create shared library +-add_library(${TARGET_NAME} STATIC ${LIBRARY_SRC} ${PUBLIC_HEADERS}) ++add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${LIBRARY_SRC} ${PUBLIC_HEADERS}) + + if(COMMAND ie_faster_build) + ie_faster_build(${TARGET_NAME} +diff --git a/openvino/itt/CMakeLists.txt b/openvino/itt/CMakeLists.txt +index e9f880b8c..c63f4df63 100644 +--- a/openvino/itt/CMakeLists.txt ++++ b/openvino/itt/CMakeLists.txt +@@ -6,7 +6,7 @@ set(TARGET_NAME itt) + + file(GLOB_RECURSE SOURCES "src/*.cpp" "src/*.hpp") + +-add_library(${TARGET_NAME} STATIC ${SOURCES}) ++add_library(${TARGET_NAME} STATIC EXCLUDE_FROM_ALL ${SOURCES}) + + add_library(openvino::itt ALIAS ${TARGET_NAME}) + diff --git a/platforms/winpack_dldt/2021.4/20210630-dldt-pdb.patch b/platforms/winpack_dldt/2021.4/20210630-dldt-pdb.patch new file mode 100644 index 000000000000..65e6f84dc80b --- /dev/null +++ b/platforms/winpack_dldt/2021.4/20210630-dldt-pdb.patch @@ -0,0 +1,15 @@ +iff --git a/CMakeLists.txt b/CMakeLists.txt +index e0706a72e..9a053b1e4 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -6,6 +6,10 @@ cmake_minimum_required(VERSION 3.13) + + project(OpenVINO) + ++set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi /FS") ++set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") ++set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") ++ + set(OpenVINO_MAIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + set(IE_MAIN_SOURCE_DIR ${OpenVINO_MAIN_SOURCE_DIR}/inference-engine) + diff --git a/platforms/winpack_dldt/2021.4/20210630-dldt-vs-version.patch b/platforms/winpack_dldt/2021.4/20210630-dldt-vs-version.patch new file mode 100644 index 000000000000..36b0068775eb --- /dev/null +++ b/platforms/winpack_dldt/2021.4/20210630-dldt-vs-version.patch @@ -0,0 +1,16 @@ +diff --git a/cmake/developer_package/vs_version/vs_version.cmake b/cmake/developer_package/vs_version/vs_version.cmake +index 14d4c0e1e..6a44f73b9 100644 +--- a/cmake/developer_package/vs_version/vs_version.cmake ++++ b/cmake/developer_package/vs_version/vs_version.cmake +@@ -8,9 +8,9 @@ set(IE_VS_VER_FILEVERSION_STR "${IE_VERSION_MAJOR}.${IE_VERSION_MINOR}.${IE_VERS + + set(IE_VS_VER_COMPANY_NAME_STR "Intel Corporation") + set(IE_VS_VER_PRODUCTVERSION_STR "${CI_BUILD_NUMBER}") +-set(IE_VS_VER_PRODUCTNAME_STR "OpenVINO toolkit") ++set(IE_VS_VER_PRODUCTNAME_STR "OpenVINO toolkit (for OpenCV Windows package)") + set(IE_VS_VER_COPYRIGHT_STR "Copyright (C) 2018-2021, Intel Corporation") +-set(IE_VS_VER_COMMENTS_STR "https://docs.openvinotoolkit.org/") ++set(IE_VS_VER_COMMENTS_STR "https://github.com/opencv/opencv/wiki/Intel%27s-Deep-Learning-Inference-Engine-backend") + + # + # ie_add_vs_version_file(NAME diff --git a/platforms/winpack_dldt/2021.4/build.config.py b/platforms/winpack_dldt/2021.4/build.config.py new file mode 100644 index 000000000000..33ef1050cad4 --- /dev/null +++ b/platforms/winpack_dldt/2021.4/build.config.py @@ -0,0 +1 @@ +os.environ['CI_BUILD_NUMBER'] = '2021.4.0-opencv_winpack_dldt' diff --git a/platforms/winpack_dldt/2021.4/patch.config.py b/platforms/winpack_dldt/2021.4/patch.config.py new file mode 100644 index 000000000000..7f8715aae2da --- /dev/null +++ b/platforms/winpack_dldt/2021.4/patch.config.py @@ -0,0 +1,4 @@ +applyPatch('20210630-dldt-disable-unused-targets.patch') +applyPatch('20210630-dldt-pdb.patch') +applyPatch('20210630-dldt-disable-multidevice-autoplugin.patch') +applyPatch('20210630-dldt-vs-version.patch') diff --git a/platforms/winpack_dldt/2021.4/sysroot.config.py b/platforms/winpack_dldt/2021.4/sysroot.config.py new file mode 100644 index 000000000000..fa4281107d23 --- /dev/null +++ b/platforms/winpack_dldt/2021.4/sysroot.config.py @@ -0,0 +1,56 @@ +sysroot_bin_dir = prepare_dir(self.sysrootdir / 'bin') +copytree(self.build_dir / 'install', self.sysrootdir / 'ngraph') +#rm_one(self.sysrootdir / 'ngraph' / 'lib' / 'ngraph.dll') + +build_config = 'Release' if not self.config.build_debug else 'Debug' +build_bin_dir = self.build_dir / 'bin' / 'intel64' / build_config + +def copy_bin(name): + global build_bin_dir, sysroot_bin_dir + copytree(build_bin_dir / name, sysroot_bin_dir / name) + +dll_suffix = 'd' if self.config.build_debug else '' +def copy_dll(name): + global copy_bin, dll_suffix + copy_bin(name + dll_suffix + '.dll') + copy_bin(name + dll_suffix + '.pdb') + +copy_bin('cache.json') +copy_dll('clDNNPlugin') +copy_dll('HeteroPlugin') +copy_dll('inference_engine') +copy_dll('inference_engine_ir_reader') +#copy_dll('inference_engine_ir_v7_reader') +copy_dll('inference_engine_legacy') +copy_dll('inference_engine_transformations') # runtime +copy_dll('inference_engine_lp_transformations') # runtime +#copy_dll('inference_engine_preproc') # runtime +copy_dll('MKLDNNPlugin') # runtime +copy_dll('myriadPlugin') # runtime +#copy_dll('MultiDevicePlugin') # runtime, not used +copy_dll('ngraph') +copy_bin('plugins.xml') +copy_bin('pcie-ma2x8x.elf') +copy_bin('usb-ma2x8x.mvcmd') + +copytree(self.srcdir / 'inference-engine' / 'temp' / 'tbb' / 'bin', sysroot_bin_dir) +copytree(self.srcdir / 'inference-engine' / 'temp' / 'tbb', self.sysrootdir / 'tbb') + +sysroot_ie_dir = prepare_dir(self.sysrootdir / 'deployment_tools' / 'inference_engine') +sysroot_ie_lib_dir = prepare_dir(sysroot_ie_dir / 'lib' / 'intel64') + +copytree(self.srcdir / 'inference-engine' / 'include', sysroot_ie_dir / 'include') +if not self.config.build_debug: + copytree(build_bin_dir / 'ngraph.lib', sysroot_ie_lib_dir / 'ngraph.lib') + copytree(build_bin_dir / 'inference_engine.lib', sysroot_ie_lib_dir / 'inference_engine.lib') + copytree(build_bin_dir / 'inference_engine_ir_reader.lib', sysroot_ie_lib_dir / 'inference_engine_ir_reader.lib') + copytree(build_bin_dir / 'inference_engine_legacy.lib', sysroot_ie_lib_dir / 'inference_engine_legacy.lib') +else: + copytree(build_bin_dir / 'ngraphd.lib', sysroot_ie_lib_dir / 'ngraphd.lib') + copytree(build_bin_dir / 'inference_engined.lib', sysroot_ie_lib_dir / 'inference_engined.lib') + copytree(build_bin_dir / 'inference_engine_ir_readerd.lib', sysroot_ie_lib_dir / 'inference_engine_ir_readerd.lib') + copytree(build_bin_dir / 'inference_engine_legacyd.lib', sysroot_ie_lib_dir / 'inference_engine_legacyd.lib') + +sysroot_license_dir = prepare_dir(self.sysrootdir / 'etc' / 'licenses') +copytree(self.srcdir / 'LICENSE', sysroot_license_dir / 'dldt-LICENSE') +copytree(self.sysrootdir / 'tbb/LICENSE', sysroot_license_dir / 'tbb-LICENSE') diff --git a/platforms/winpack_dldt/build_package.py b/platforms/winpack_dldt/build_package.py index b993f076d901..6fde62241a5e 100644 --- a/platforms/winpack_dldt/build_package.py +++ b/platforms/winpack_dldt/build_package.py @@ -214,7 +214,7 @@ def init_patchset(self): patch_hashsum = hashlib.md5(self.patch_file_contents.encode('utf-8')).hexdigest() except: log.warn("Can't compute hashsum of patches: %s", self.patch_file) - self.patch_hashsum = patch_hashsum + self.patch_hashsum = self.config.override_patch_hashsum if self.config.override_patch_hashsum else patch_hashsum def prepare_sources(self): @@ -355,7 +355,6 @@ def build(self, builderDLDT): BUILD_PERF_TESTS='OFF', ENABLE_CXX11='ON', WITH_INF_ENGINE='ON', - INF_ENGINE_RELEASE=str(self.config.dldt_release), WITH_TBB='ON', CPU_BASELINE='AVX2', CMAKE_INSTALL_PREFIX=str(self.install_dir), @@ -381,6 +380,9 @@ def build(self, builderDLDT): OPENCV_PYTHON_INSTALL_PATH='python', ) + if self.config.dldt_release: + cmake_vars['INF_ENGINE_RELEASE'] = str(self.config.dldt_release) + cmake_vars['INF_ENGINE_LIB_DIRS:PATH'] = str(builderDLDT.sysrootdir / 'deployment_tools/inference_engine/lib/intel64') assert os.path.exists(cmake_vars['INF_ENGINE_LIB_DIRS:PATH']), cmake_vars['INF_ENGINE_LIB_DIRS:PATH'] cmake_vars['INF_ENGINE_INCLUDE_DIRS:PATH'] = str(builderDLDT.sysrootdir / 'deployment_tools/inference_engine/include') @@ -464,8 +466,8 @@ def package_sources(self): def main(): dldt_src_url = 'https://github.com/openvinotoolkit/openvino' - dldt_src_commit = '2021.3' - dldt_release = '2021030000' + dldt_src_commit = '2021.4' + dldt_release = None build_cache_dir_default = os.environ.get('BUILD_CACHE_DIR', '.build_cache') build_subst_drive = os.environ.get('BUILD_SUBST_DRIVE', None) @@ -492,13 +494,15 @@ def main(): parser.add_argument('--dldt_src_branch', help='DLDT checkout branch') parser.add_argument('--dldt_src_commit', default=dldt_src_commit, help='DLDT source commit / tag (default: %s)' % dldt_src_commit) parser.add_argument('--dldt_src_git_clone_extra', action='append', help='DLDT git clone extra args') - parser.add_argument('--dldt_release', default=dldt_release, help='DLDT release code for INF_ENGINE_RELEASE (default: %s)' % dldt_release) + parser.add_argument('--dldt_release', default=dldt_release, help='DLDT release code for INF_ENGINE_RELEASE, e.g 2021030000 (default: %s)' % dldt_release) parser.add_argument('--dldt_reference_dir', help='DLDT reference git repository (optional)') parser.add_argument('--dldt_src_dir', help='DLDT custom source repository (skip git checkout and patching, use for TESTING only)') parser.add_argument('--dldt_config', help='Specify DLDT build configuration (defaults to evaluate from DLDT commit/branch)') + parser.add_argument('--override_patch_hashsum', default='', help='(script debug mode)') + args = parser.parse_args() log.basicConfig( From e5841d3126527ff1151ab480ba8e291e29942e07 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 2 Jul 2021 10:41:41 +0000 Subject: [PATCH 036/128] java: force using of 'Ptr<>' for OpenCV classes --- modules/dnn/misc/java/gen_dict.json | 2 +- modules/java/generator/gen_java.py | 34 +++++++++++-------- .../misc/java/test/TrackerCreateTest.java | 7 ++++ 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/modules/dnn/misc/java/gen_dict.json b/modules/dnn/misc/java/gen_dict.json index 5a397eac51c0..65ecfdc25ea7 100644 --- a/modules/dnn/misc/java/gen_dict.json +++ b/modules/dnn/misc/java/gen_dict.json @@ -54,7 +54,7 @@ ] ], - "jni_name": "(*(cv::dnn::DictValue*)%(n)s_nativeObj)", + "jni_name": "(*(*(Ptr*)%(n)s_nativeObj))", "jni_type": "jlong", "suffix": "J", "j_import": "org.opencv.dnn.DictValue" diff --git a/modules/java/generator/gen_java.py b/modules/java/generator/gen_java.py index 6019ca340d25..c5b4f34a8f2b 100755 --- a/modules/java/generator/gen_java.py +++ b/modules/java/generator/gen_java.py @@ -258,6 +258,8 @@ def __init__(self, decl, namespaces=[]): # [ 'class/struct cname', ': base', [mo for m in decl[2]: if m.startswith("="): self.jname = m[1:] + if m == '/Simple': + self.smart = False if self.classpath: prefix = self.classpath.replace('.', '_') @@ -445,7 +447,7 @@ def __init__(self): def clear(self): self.namespaces = ["cv"] - classinfo_Mat = ClassInfo([ 'class cv.Mat', '', [], [] ], self.namespaces) + classinfo_Mat = ClassInfo([ 'class cv.Mat', '', ['/Simple'], [] ], self.namespaces) self.classes = { "Mat" : classinfo_Mat } self.module = "" self.Module = "" @@ -466,10 +468,15 @@ def add_class(self, decl): if name in type_dict and not classinfo.base: logging.warning('duplicated: %s', classinfo) return + if self.isSmartClass(classinfo): + jni_name = "*((*(Ptr<"+classinfo.fullNameCPP()+">*)%(n)s_nativeObj).get())" + else: + jni_name = "(*("+classinfo.fullNameCPP()+"*)%(n)s_nativeObj)" type_dict.setdefault(name, {}).update( { "j_type" : classinfo.jname, "jn_type" : "long", "jn_args" : (("__int64", ".nativeObj"),), - "jni_name" : "(*("+classinfo.fullNameCPP()+"*)%(n)s_nativeObj)", "jni_type" : "jlong", + "jni_name" : jni_name, + "jni_type" : "jlong", "suffix" : "J", "j_import" : "org.opencv.%s.%s" % (self.module, classinfo.jname) } @@ -477,7 +484,8 @@ def add_class(self, decl): type_dict.setdefault(name+'*', {}).update( { "j_type" : classinfo.jname, "jn_type" : "long", "jn_args" : (("__int64", ".nativeObj"),), - "jni_name" : "("+classinfo.fullNameCPP()+"*)%(n)s_nativeObj", "jni_type" : "jlong", + "jni_name" : "&("+jni_name+")", + "jni_type" : "jlong", "suffix" : "J", "j_import" : "org.opencv.%s.%s" % (self.module, classinfo.jname) } @@ -966,7 +974,13 @@ def gen_func(self, ci, fi, prop_name=''): ret = "return env->NewStringUTF(_retval_.c_str());" default = 'return env->NewStringUTF("");' elif self.isWrapped(fi.ctype): # wrapped class: - ret = "return (jlong) new %s(_retval_);" % self.fullTypeNameCPP(fi.ctype) + ret = None + if fi.ctype in self.classes: + ret_ci = self.classes[fi.ctype] + if self.isSmartClass(ret_ci): + ret = "return (jlong)(new Ptr<%(ctype)s>(new %(ctype)s(_retval_)));" % { 'ctype': ret_ci.fullNameCPP() } + if ret is None: + ret = "return (jlong) new %s(_retval_);" % self.fullTypeNameCPP(fi.ctype) elif fi.ctype.startswith('Ptr_'): c_prologue.append("typedef Ptr<%s> %s;" % (self.fullTypeNameCPP(fi.ctype[4:]), fi.ctype)) ret = "return (jlong)(new %(ctype)s(_retval_));" % { 'ctype':fi.ctype } @@ -1207,17 +1221,7 @@ def isSmartClass(self, ci): if ci.smart != None: return ci.smart - # if parents are smart (we hope) then children are! - # if not we believe the class is smart if it has "create" method - ci.smart = False - if ci.base or ci.name == 'Algorithm': - ci.smart = True - else: - for fi in ci.methods: - if fi.name == "create": - ci.smart = True - break - + ci.smart = True # smart class is not properly handled in case of base/derived classes return ci.smart def smartWrap(self, ci, fullname): diff --git a/modules/video/misc/java/test/TrackerCreateTest.java b/modules/video/misc/java/test/TrackerCreateTest.java index dad696bebfa2..83bbd0b5d5ce 100644 --- a/modules/video/misc/java/test/TrackerCreateTest.java +++ b/modules/video/misc/java/test/TrackerCreateTest.java @@ -1,7 +1,10 @@ package org.opencv.test.video; import org.opencv.core.Core; +import org.opencv.core.CvType; import org.opencv.core.CvException; +import org.opencv.core.Mat; +import org.opencv.core.Rect; import org.opencv.test.OpenCVTestCase; import org.opencv.video.Tracker; @@ -27,6 +30,10 @@ public void testCreateTrackerGOTURN() { public void testCreateTrackerMIL() { Tracker tracker = TrackerMIL.create(); + assert(tracker != null); + Mat mat = new Mat(100, 100, CvType.CV_8UC1); + Rect rect = new Rect(10, 10, 30, 30); + tracker.init(mat, rect); // should not crash (https://github.com/opencv/opencv/issues/19915) } } From 8d1f254dcc2b4f413b1a610ec77b92896702cfca Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 2 Jul 2021 10:41:41 +0000 Subject: [PATCH 037/128] java: force using of 'Ptr<>' for OpenCV classes backport of commit: e5841d3126527ff1151ab480ba8e291e29942e07 --- modules/dnn/misc/java/gen_dict.json | 2 +- modules/java/generator/gen_java.py | 35 ++++++++++++++++------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/modules/dnn/misc/java/gen_dict.json b/modules/dnn/misc/java/gen_dict.json index 5a397eac51c0..65ecfdc25ea7 100644 --- a/modules/dnn/misc/java/gen_dict.json +++ b/modules/dnn/misc/java/gen_dict.json @@ -54,7 +54,7 @@ ] ], - "jni_name": "(*(cv::dnn::DictValue*)%(n)s_nativeObj)", + "jni_name": "(*(*(Ptr*)%(n)s_nativeObj))", "jni_type": "jlong", "suffix": "J", "j_import": "org.opencv.dnn.DictValue" diff --git a/modules/java/generator/gen_java.py b/modules/java/generator/gen_java.py index 8e5c69e78861..f3b5e132d1c6 100755 --- a/modules/java/generator/gen_java.py +++ b/modules/java/generator/gen_java.py @@ -224,6 +224,9 @@ def __init__(self, decl, namespaces=[]): # [ 'class/struct cname', ': base', [mo for m in decl[2]: if m.startswith("="): self.jname = m[1:] + if m == '/Simple': + self.smart = False + self.base = '' if decl[1]: #self.base = re.sub(r"\b"+self.jname+r"\b", "", decl[1].replace(":", "")).strip() @@ -370,7 +373,7 @@ def __init__(self): def clear(self): self.namespaces = ["cv"] - self.classes = { "Mat" : ClassInfo([ 'class Mat', '', [], [] ], self.namespaces) } + self.classes = { "Mat" : ClassInfo([ 'class Mat', '', ['/Simple'], [] ], self.namespaces) } self.module = "" self.Module = "" self.ported_func_list = [] @@ -390,10 +393,15 @@ def add_class(self, decl): if name in type_dict and not classinfo.base: logging.warning('duplicated: %s', classinfo) return + if self.isSmartClass(classinfo): + jni_name = "*((*(Ptr<"+classinfo.fullName(isCPP=True)+">*)%(n)s_nativeObj).get())" + else: + jni_name = "(*("+classinfo.fullName(isCPP=True)+"*)%(n)s_nativeObj)" type_dict.setdefault(name, {}).update( { "j_type" : classinfo.jname, "jn_type" : "long", "jn_args" : (("__int64", ".nativeObj"),), - "jni_name" : "(*("+classinfo.fullName(isCPP=True)+"*)%(n)s_nativeObj)", "jni_type" : "jlong", + "jni_name" : jni_name, + "jni_type" : "jlong", "suffix" : "J", "j_import" : "org.opencv.%s.%s" % (self.module, classinfo.jname) } @@ -401,7 +409,8 @@ def add_class(self, decl): type_dict.setdefault(name+'*', {}).update( { "j_type" : classinfo.jname, "jn_type" : "long", "jn_args" : (("__int64", ".nativeObj"),), - "jni_name" : "("+classinfo.fullName(isCPP=True)+"*)%(n)s_nativeObj", "jni_type" : "jlong", + "jni_name" : "&("+jni_name+")", + "jni_type" : "jlong", "suffix" : "J", "j_import" : "org.opencv.%s.%s" % (self.module, classinfo.jname) } @@ -889,7 +898,13 @@ def gen_func(self, ci, fi, prop_name=''): ret = "return env->NewStringUTF(_retval_.c_str());" default = 'return env->NewStringUTF("");' elif self.isWrapped(fi.ctype): # wrapped class: - ret = "return (jlong) new %s(_retval_);" % self.fullTypeName(fi.ctype) + ret = None + if fi.ctype in self.classes: + ret_ci = self.classes[fi.ctype] + if self.isSmartClass(ret_ci): + ret = "return (jlong)(new Ptr<%(ctype)s>(new %(ctype)s(_retval_)));" % { 'ctype': self.fullTypeName(fi.ctype) } + if ret is None: + ret = "return (jlong) new %s(_retval_);" % self.fullTypeName(fi.ctype) elif fi.ctype.startswith('Ptr_'): c_prologue.append("typedef Ptr<%s> %s;" % (self.fullTypeName(fi.ctype[4:]), fi.ctype)) ret = "return (jlong)(new %(ctype)s(_retval_));" % { 'ctype':fi.ctype } @@ -1128,17 +1143,7 @@ def isSmartClass(self, ci): if ci.smart != None: return ci.smart - # if parents are smart (we hope) then children are! - # if not we believe the class is smart if it has "create" method - ci.smart = False - if ci.base or ci.name == 'Algorithm': - ci.smart = True - else: - for fi in ci.methods: - if fi.name == "create": - ci.smart = True - break - + ci.smart = True # smart class is not properly handled in case of base/derived classes return ci.smart def smartWrap(self, ci, fullname): From 18dbac203f3b55ad445df26a5728798e26f78633 Mon Sep 17 00:00:00 2001 From: mitruska Date: Fri, 2 Jul 2021 15:29:34 +0200 Subject: [PATCH 038/128] Use explicit version of ngraph NormalizeL2 --- modules/dnn/src/layers/normalize_bbox_layer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/dnn/src/layers/normalize_bbox_layer.cpp b/modules/dnn/src/layers/normalize_bbox_layer.cpp index cdaa87bde568..001ea3bd16b0 100644 --- a/modules/dnn/src/layers/normalize_bbox_layer.cpp +++ b/modules/dnn/src/layers/normalize_bbox_layer.cpp @@ -328,7 +328,7 @@ class NormalizeBBoxLayerImpl CV_FINAL : public NormalizeBBoxLayer std::iota(axes_data.begin(), axes_data.end(), 1); } auto axes = std::make_shared(ngraph::element::i64, ngraph::Shape{axes_data.size()}, axes_data); - auto norm = std::make_shared(ieInpNode, axes, epsilon, ngraph::op::EpsMode::ADD); + auto norm = std::make_shared(ieInpNode, axes, epsilon, ngraph::op::EpsMode::ADD); CV_Assert(blobs.empty() || numChannels == blobs[0].total()); std::vector shape(ieInpNode->get_shape().size(), 1); From 9b0d6862c474b4edb012d76492834d58451d5fec Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 2 Jul 2021 21:37:37 +0000 Subject: [PATCH 039/128] cmake(IE): extract INF_ENGINE_RELEASE from InferenceEngine package --- cmake/OpenCVDetectInferenceEngine.cmake | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/cmake/OpenCVDetectInferenceEngine.cmake b/cmake/OpenCVDetectInferenceEngine.cmake index 829ddbfe7e2a..6308d1b42480 100644 --- a/cmake/OpenCVDetectInferenceEngine.cmake +++ b/cmake/OpenCVDetectInferenceEngine.cmake @@ -140,12 +140,21 @@ endif() # Add more features to the target if(INF_ENGINE_TARGET) - if(NOT INF_ENGINE_RELEASE) + if(DEFINED InferenceEngine_VERSION) + message(STATUS "InferenceEngine: ${InferenceEngine_VERSION}") + if(NOT INF_ENGINE_RELEASE AND NOT (InferenceEngine_VERSION VERSION_LESS "2021.4")) + math(EXPR INF_ENGINE_RELEASE_INIT "${InferenceEngine_VERSION_MAJOR} * 1000000 + ${InferenceEngine_VERSION_MINOR} * 10000 + ${InferenceEngine_VERSION_PATCH} * 100") + endif() + endif() + if(NOT INF_ENGINE_RELEASE AND NOT INF_ENGINE_RELEASE_INIT) message(WARNING "InferenceEngine version has not been set, 2021.4 will be used by default. Set INF_ENGINE_RELEASE variable if you experience build errors.") + set(INF_ENGINE_RELEASE_INIT "2021040000") + elseif(DEFINED INF_ENGINE_RELEASE) + set(INF_ENGINE_RELEASE_INIT "${INF_ENGINE_RELEASE}") endif() - set(INF_ENGINE_RELEASE "2021040000" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") + set(INF_ENGINE_RELEASE "${INF_ENGINE_RELEASE_INIT}" CACHE STRING "Force IE version, should be in form YYYYAABBCC (e.g. 2020.1.0.2 -> 2020010002)") set_target_properties(${INF_ENGINE_TARGET} PROPERTIES - INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" + INTERFACE_COMPILE_DEFINITIONS "HAVE_INF_ENGINE=1;INF_ENGINE_RELEASE=${INF_ENGINE_RELEASE}" ) endif() From 0e523618a17a99647fc13a2be1577b9c887d6d64 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 3 Jul 2021 10:57:18 +0000 Subject: [PATCH 040/128] cmake: exclude -pthread from Emscripten default build --- cmake/OpenCVCompilerOptions.cmake | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 0a10bfffcfed..303d4f451e64 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -177,7 +177,13 @@ if(CV_GCC OR CV_CLANG) endif() # We need pthread's - if(UNIX AND NOT ANDROID AND NOT (APPLE AND CV_CLANG)) # TODO + if((UNIX + AND NOT ANDROID + AND NOT (APPLE AND CV_CLANG) + AND NOT EMSCRIPTEN + ) + OR (EMSCRIPTEN AND WITH_PTHREADS_PF) # https://github.com/opencv/opencv/issues/20285 + ) add_extra_compiler_option(-pthread) endif() From 5d0cfa252797e115010eec269043f958c3305a51 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 3 Jul 2021 11:36:29 +0000 Subject: [PATCH 041/128] cmake(highgui): don't allow multiple builtin backends --- modules/highgui/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index 5eb9f5ab5e6b..bc31b84c74e1 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -141,7 +141,7 @@ endif() if(TARGET ocv.3rdparty.win32ui) if("win32ui" IN_LIST HIGHGUI_PLUGIN_LIST OR HIGHGUI_PLUGIN_LIST STREQUAL "all") ocv_create_builtin_highgui_plugin(opencv_highgui_win32 ocv.3rdparty.win32ui "window_w32.cpp") - else() + elseif(NOT OPENCV_HIGHGUI_BUILTIN_BACKEND) set(OPENCV_HIGHGUI_BUILTIN_BACKEND "WIN32UI") list(APPEND highgui_srcs ${CMAKE_CURRENT_LIST_DIR}/src/window_w32.cpp) list(APPEND tgts ocv.3rdparty.win32ui) From 4c3f9b2ef4ca1a74c0fd15f1747bd131e249c57f Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 4 Jul 2021 13:07:34 +0300 Subject: [PATCH 042/128] cmake: update Halide detection --- cmake/OpenCVDetectHalide.cmake | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/cmake/OpenCVDetectHalide.cmake b/cmake/OpenCVDetectHalide.cmake index 790f69205662..4828c299aead 100644 --- a/cmake/OpenCVDetectHalide.cmake +++ b/cmake/OpenCVDetectHalide.cmake @@ -9,9 +9,14 @@ set(HALIDE_ROOT_DIR "${HALIDE_ROOT_DIR}" CACHE PATH "Halide root directory") if(NOT HAVE_HALIDE) find_package(Halide QUIET) # Try CMake-based config files if(Halide_FOUND) - set(HALIDE_INCLUDE_DIRS "${Halide_INCLUDE_DIRS}" CACHE PATH "Halide include directories" FORCE) - set(HALIDE_LIBRARIES "${Halide_LIBRARIES}" CACHE PATH "Halide libraries" FORCE) - set(HAVE_HALIDE TRUE) + if(TARGET Halide::Halide) # modern Halide scripts defines imported target + set(HALIDE_INCLUDE_DIRS "") + set(HALIDE_LIBRARIES "Halide::Halide") + set(HAVE_HALIDE TRUE) + else() + # using HALIDE_INCLUDE_DIRS / Halide_LIBRARIES + set(HAVE_HALIDE TRUE) + endif() endif() endif() @@ -28,18 +33,15 @@ if(NOT HAVE_HALIDE AND HALIDE_ROOT_DIR) ) if(HALIDE_LIBRARY AND HALIDE_INCLUDE_DIR) # TODO try_compile - set(HALIDE_INCLUDE_DIRS "${HALIDE_INCLUDE_DIR}" CACHE PATH "Halide include directories" FORCE) - set(HALIDE_LIBRARIES "${HALIDE_LIBRARY}" CACHE PATH "Halide libraries" FORCE) + set(HALIDE_INCLUDE_DIRS "${HALIDE_INCLUDE_DIR}") + set(HALIDE_LIBRARIES "${HALIDE_LIBRARY}") set(HAVE_HALIDE TRUE) endif() - if(NOT HAVE_HALIDE) - ocv_clear_vars(HALIDE_LIBRARIES HALIDE_INCLUDE_DIRS CACHE) - endif() endif() if(HAVE_HALIDE) - include_directories(${HALIDE_INCLUDE_DIRS}) + if(HALIDE_INCLUDE_DIRS) + include_directories(${HALIDE_INCLUDE_DIRS}) + endif() list(APPEND OPENCV_LINKER_LIBS ${HALIDE_LIBRARIES}) -else() - ocv_clear_vars(HALIDE_INCLUDE_DIRS HALIDE_LIBRARIES) endif() From cbff19ff1a8be873c3c2aa0cafa2ff77738cc437 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 4 Jul 2021 17:33:18 +0300 Subject: [PATCH 043/128] highgui: fix win32 backend behavior --- modules/highgui/src/window.cpp | 2 +- modules/highgui/src/window_w32.cpp | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index d1ccd1dbc3a9..d9481de6da24 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -990,7 +990,7 @@ void cv::imshow( const String& winname, InputArray _img ) auto backend = getCurrentUIBackend(); if (backend) { - auto window = backend->createWindow(winname, WINDOW_NORMAL); + auto window = backend->createWindow(winname, WINDOW_AUTOSIZE); if (!window) { CV_LOG_ERROR(NULL, "OpenCV/UI: Can't create window: '" << winname << "'"); diff --git a/modules/highgui/src/window_w32.cpp b/modules/highgui/src/window_w32.cpp index d9a9d732227a..716af1094c29 100644 --- a/modules/highgui/src/window_w32.cpp +++ b/modules/highgui/src/window_w32.cpp @@ -2123,7 +2123,7 @@ static void showSaveDialog(CvWindow& window) SIZE sz; int channels; void* data; - if (icvGetBitmapData(window, sz, channels, data)) + if (!icvGetBitmapData(window, sz, channels, data)) return; // nothing to save char szFileName[MAX_PATH] = ""; @@ -2206,6 +2206,7 @@ static bool handleMessage(MSG& message, int& keyCode) switch (message.message) { case WM_DESTROY: + // fallthru case WM_CHAR: DispatchMessage(&message); keyCode = (int)message.wParam; @@ -2221,6 +2222,20 @@ static bool handleMessage(MSG& message, int& keyCode) break; case WM_KEYDOWN: + // Intercept Ctrl+C for copy to clipboard + if ('C' == message.wParam && (::GetKeyState(VK_CONTROL) >> 15)) + { + ::SendMessage(message.hwnd, WM_COPY, 0, 0); + return false; + } + + // Intercept Ctrl+S for "save as" dialog + if ('S' == message.wParam && (::GetKeyState(VK_CONTROL) >> 15)) + { + showSaveDialog(window); + return false; + } + TranslateMessage(&message); if ((message.wParam >= VK_F1 && message.wParam <= VK_F24) || message.wParam == VK_HOME || message.wParam == VK_END || @@ -2235,13 +2250,7 @@ static bool handleMessage(MSG& message, int& keyCode) return true; } - // Intercept Ctrl+C for copy to clipboard - if ('C' == message.wParam && (::GetKeyState(VK_CONTROL) >> 15)) - ::SendMessage(message.hwnd, WM_COPY, 0, 0); - - // Intercept Ctrl+S for "save as" dialog - if ('S' == message.wParam && (::GetKeyState(VK_CONTROL) >> 15)) - showSaveDialog(window); + // fallthru default: DispatchMessage(&message); From 591708903b4393b5d33c3a2f4af5be4daeb94c4d Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sun, 4 Jul 2021 21:10:13 +0000 Subject: [PATCH 044/128] release: OpenCV 3.4.15 --- modules/core/include/opencv2/core/version.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/version.hpp b/modules/core/include/opencv2/core/version.hpp index a71c13ebd1c4..ecd8e733c7b6 100644 --- a/modules/core/include/opencv2/core/version.hpp +++ b/modules/core/include/opencv2/core/version.hpp @@ -8,7 +8,7 @@ #define CV_VERSION_MAJOR 3 #define CV_VERSION_MINOR 4 #define CV_VERSION_REVISION 15 -#define CV_VERSION_STATUS "-pre" +#define CV_VERSION_STATUS "" #define CVAUX_STR_EXP(__A) #__A #define CVAUX_STR(__A) CVAUX_STR_EXP(__A) From ad6e82942b37be8ee2c71c1d9bc7fe79cd16f7ab Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 5 Jul 2021 12:03:22 +0000 Subject: [PATCH 045/128] release: OpenCV 4.5.3 --- modules/core/include/opencv2/core/version.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/version.hpp b/modules/core/include/opencv2/core/version.hpp index 4757e30f9b30..c9f4b7ed1165 100644 --- a/modules/core/include/opencv2/core/version.hpp +++ b/modules/core/include/opencv2/core/version.hpp @@ -8,7 +8,7 @@ #define CV_VERSION_MAJOR 4 #define CV_VERSION_MINOR 5 #define CV_VERSION_REVISION 3 -#define CV_VERSION_STATUS "-pre" +#define CV_VERSION_STATUS "" #define CVAUX_STR_EXP(__A) #__A #define CVAUX_STR(__A) CVAUX_STR_EXP(__A) From ed2a69839293625ecae9f2b9e53eeefc23ceedd6 Mon Sep 17 00:00:00 2001 From: Maxim Pashchenkov Date: Tue, 6 Jul 2021 21:35:41 +0300 Subject: [PATCH 046/128] Merge pull request #20359 from mpashchenkov:mp/onnx-tests G-API: ONNX. Skip tests. * imread for every test * Changed name for Yolo function --- .../gapi/test/infer/gapi_infer_onnx_test.cpp | 107 ++++++++++-------- 1 file changed, 62 insertions(+), 45 deletions(-) diff --git a/modules/gapi/test/infer/gapi_infer_onnx_test.cpp b/modules/gapi/test/infer/gapi_infer_onnx_test.cpp index ef192b9d6a96..b1bf9c935694 100644 --- a/modules/gapi/test/infer/gapi_infer_onnx_test.cpp +++ b/modules/gapi/test/infer/gapi_infer_onnx_test.cpp @@ -67,17 +67,17 @@ struct ONNXInitPath { static ONNXInitPath g_init_path; cv::Mat initMatrixRandU(const int type, const cv::Size& sz_in) { - const cv::Mat in_mat1 = cv::Mat(sz_in, type); + const cv::Mat in_mat = cv::Mat(sz_in, type); if (CV_MAT_DEPTH(type) < CV_32F) { - cv::randu(in_mat1, cv::Scalar::all(0), cv::Scalar::all(255)); + cv::randu(in_mat, cv::Scalar::all(0), cv::Scalar::all(255)); } else { const int fscale = 256; // avoid bits near ULP, generate stable test input - cv::Mat in_mat32s(in_mat1.size(), CV_MAKE_TYPE(CV_32S, CV_MAT_CN(type))); + cv::Mat in_mat32s(in_mat.size(), CV_MAKE_TYPE(CV_32S, CV_MAT_CN(type))); cv::randu(in_mat32s, cv::Scalar::all(0), cv::Scalar::all(255 * fscale)); - in_mat32s.convertTo(in_mat1, type, 1.0f / fscale, 0); + in_mat32s.convertTo(in_mat, type, 1.0f / fscale, 0); } - return in_mat1; + return in_mat; } } // anonymous namespace namespace opencv_test @@ -319,15 +319,13 @@ class ONNXtest : public ::testing::Test { size_t num_in, num_out; std::vector out_gapi; std::vector out_onnx; - cv::Mat in_mat1; + cv::Mat in_mat; ONNXtest() { initTestDataPath(); env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "test"); memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); out_gapi.resize(1); - // FIXME: It should be an image from own (gapi) directory in opencv extra - in_mat1 = cv::imread(findDataFile("cv/dpm/cat.png")); } template @@ -463,13 +461,9 @@ class ONNXMediaFrame : public ONNXClassification { cv::Rect(cv::Point{70, 10}, cv::Size{20, 260}), cv::Rect(cv::Point{5, 15}, cv::Size{200, 160}), }; - cv::Mat m_in_y; - cv::Mat m_in_uv; - virtual void SetUp() { - cv::Size sz{640, 480}; - m_in_y = initMatrixRandU(CV_8UC1, sz); - m_in_uv = initMatrixRandU(CV_8UC2, sz / 2); - } + const cv::Size sz{640, 480}; + const cv::Mat m_in_y = initMatrixRandU(CV_8UC1, sz); + const cv::Mat m_in_uv = initMatrixRandU(CV_8UC2, sz / 2); }; class ONNXGRayScale : public ONNXtest { @@ -545,20 +539,20 @@ class ONNXYoloV3 : public ONNXWithRemap { public: std::vector ins; -private: - virtual void SetUp() { + void constructYoloInputs(const cv::Mat& src) { const int yolo_in_h = 416; const int yolo_in_w = 416; cv::Mat yolov3_input, shape, prep_mat; - cv::resize(in_mat1, yolov3_input, cv::Size(yolo_in_w, yolo_in_h)); + cv::resize(src, yolov3_input, cv::Size(yolo_in_w, yolo_in_h)); shape.create(cv::Size(2, 1), CV_32F); float* ptr = shape.ptr(); - ptr[0] = in_mat1.cols; - ptr[1] = in_mat1.rows; + ptr[0] = src.cols; + ptr[1] = src.rows; preprocess(yolov3_input, prep_mat); ins = {prep_mat, shape}; } +private: void preprocess(const cv::Mat& src, cv::Mat& dst) { cv::Mat cvt; src.convertTo(cvt, CV_32F, 1.f / 255.f); @@ -571,9 +565,10 @@ class ONNXYoloV3 : public ONNXWithRemap { TEST_F(ONNXClassification, Infer) { useModel("classification/squeezenet/model/squeezenet1.0-9"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); // ONNX_API code cv::Mat processed_mat; - preprocess(in_mat1, processed_mat); + preprocess(in_mat, processed_mat); infer(processed_mat, out_onnx); // G_API code G_API_NET(SqueezNet, , "squeeznet"); @@ -583,7 +578,7 @@ TEST_F(ONNXClassification, Infer) // NOTE: We have to normalize U8 tensor // so cfgMeanStd() is here auto net = cv::gapi::onnx::Params { model_path }.cfgMeanStd({ mean }, { std }); - comp.apply(cv::gin(in_mat1), + comp.apply(cv::gin(in_mat), cv::gout(out_gapi.front()), cv::compile_args(cv::gapi::networks(net))); // Validate @@ -593,9 +588,10 @@ TEST_F(ONNXClassification, Infer) TEST_F(ONNXClassification, InferTensor) { useModel("classification/squeezenet/model/squeezenet1.0-9"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); // Create tensor cv::Mat tensor; - preprocess(in_mat1, tensor); + preprocess(in_mat, tensor); // ONNX_API code infer(tensor, out_onnx); // G_API code @@ -614,10 +610,11 @@ TEST_F(ONNXClassification, InferTensor) TEST_F(ONNXClassification, InferROI) { useModel("classification/squeezenet/model/squeezenet1.0-9"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); const auto ROI = rois.at(0); // ONNX_API code cv::Mat roi_mat; - preprocess(in_mat1(ROI), roi_mat); + preprocess(in_mat(ROI), roi_mat); infer(roi_mat, out_onnx); // G_API code G_API_NET(SqueezNet, , "squeeznet"); @@ -628,7 +625,7 @@ TEST_F(ONNXClassification, InferROI) // NOTE: We have to normalize U8 tensor // so cfgMeanStd() is here auto net = cv::gapi::onnx::Params { model_path }.cfgMeanStd({ mean }, { std }); - comp.apply(cv::gin(in_mat1, ROI), + comp.apply(cv::gin(in_mat, ROI), cv::gout(out_gapi.front()), cv::compile_args(cv::gapi::networks(net))); // Validate @@ -638,10 +635,11 @@ TEST_F(ONNXClassification, InferROI) TEST_F(ONNXClassification, InferROIList) { useModel("classification/squeezenet/model/squeezenet1.0-9"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); // ONNX_API code for (size_t i = 0; i < rois.size(); ++i) { cv::Mat roi_mat; - preprocess(in_mat1(rois[i]), roi_mat); + preprocess(in_mat(rois[i]), roi_mat); infer(roi_mat, out_onnx); } // G_API code @@ -653,7 +651,7 @@ TEST_F(ONNXClassification, InferROIList) // NOTE: We have to normalize U8 tensor // so cfgMeanStd() is here auto net = cv::gapi::onnx::Params { model_path }.cfgMeanStd({ mean }, { std }); - comp.apply(cv::gin(in_mat1, rois), + comp.apply(cv::gin(in_mat, rois), cv::gout(out_gapi), cv::compile_args(cv::gapi::networks(net))); // Validate @@ -663,10 +661,11 @@ TEST_F(ONNXClassification, InferROIList) TEST_F(ONNXClassification, Infer2ROIList) { useModel("classification/squeezenet/model/squeezenet1.0-9"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); // ONNX_API code for (size_t i = 0; i < rois.size(); ++i) { cv::Mat roi_mat; - preprocess(in_mat1(rois[i]), roi_mat); + preprocess(in_mat(rois[i]), roi_mat); infer(roi_mat, out_onnx); } // G_API code @@ -678,7 +677,7 @@ TEST_F(ONNXClassification, Infer2ROIList) // NOTE: We have to normalize U8 tensor // so cfgMeanStd() is here auto net = cv::gapi::onnx::Params { model_path }.cfgMeanStd({ mean }, { std }); - comp.apply(cv::gin(in_mat1, rois), + comp.apply(cv::gin(in_mat, rois), cv::gout(out_gapi), cv::compile_args(cv::gapi::networks(net))); // Validate @@ -688,9 +687,10 @@ TEST_F(ONNXClassification, Infer2ROIList) TEST_F(ONNXWithRemap, InferDynamicInputTensor) { useModel("object_detection_segmentation/tiny-yolov2/model/tinyyolov2-8"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); // Create tensor cv::Mat cvt, rsz, tensor; - cv::resize(in_mat1, rsz, cv::Size{416, 416}); + cv::resize(in_mat, rsz, cv::Size{416, 416}); rsz.convertTo(cvt, CV_32F, 1.f / 255.f); toCHW(cvt, tensor); tensor = tensor.reshape(1, {1, 3, 416, 416}); @@ -714,9 +714,10 @@ TEST_F(ONNXWithRemap, InferDynamicInputTensor) TEST_F(ONNXGRayScale, InferImage) { useModel("body_analysis/emotion_ferplus/model/emotion-ferplus-8"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); // ONNX_API code cv::Mat prep_mat; - preprocess(in_mat1, prep_mat); + preprocess(in_mat, prep_mat); infer(prep_mat, out_onnx); // G_API code G_API_NET(EmotionNet, , "emotion-ferplus"); @@ -725,7 +726,7 @@ TEST_F(ONNXGRayScale, InferImage) cv::GComputation comp(cv::GIn(in), cv::GOut(out)); auto net = cv::gapi::onnx::Params { model_path } .cfgNormalize({ false }); // model accepts 0..255 range in FP32; - comp.apply(cv::gin(in_mat1), + comp.apply(cv::gin(in_mat), cv::gout(out_gapi.front()), cv::compile_args(cv::gapi::networks(net))); // Validate @@ -735,8 +736,9 @@ TEST_F(ONNXGRayScale, InferImage) TEST_F(ONNXWithRemap, InferMultiOutput) { useModel("object_detection_segmentation/ssd-mobilenetv1/model/ssd_mobilenet_v1_10"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); // ONNX_API code - const auto prep_mat = in_mat1.reshape(1, {1, in_mat1.rows, in_mat1.cols, in_mat1.channels()}); + const auto prep_mat = in_mat.reshape(1, {1, in_mat.rows, in_mat.cols, in_mat.channels()}); infer(prep_mat, out_onnx); cv::Mat onnx_conv_out({1, 1, 200, 7}, CV_32F); remapToIESSDOut({out_onnx[3], out_onnx[0], out_onnx[2], out_onnx[1]}, onnx_conv_out); @@ -750,7 +752,7 @@ TEST_F(ONNXWithRemap, InferMultiOutput) auto net = cv::gapi::onnx::Params{ model_path } .cfgOutputLayers({"detection_output"}) .cfgPostProc({cv::GMatDesc{CV_32F, {1, 1, 200, 7}}}, remapSSDPorts); - comp.apply(cv::gin(in_mat1), + comp.apply(cv::gin(in_mat), cv::gout(out_gapi.front()), cv::compile_args(cv::gapi::networks(net))); // Validate @@ -760,12 +762,13 @@ TEST_F(ONNXWithRemap, InferMultiOutput) TEST_F(ONNXMediaFrame, InferBGR) { useModel("classification/squeezenet/model/squeezenet1.0-9"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); // ONNX_API code cv::Mat processed_mat; - preprocess(in_mat1, processed_mat); + preprocess(in_mat, processed_mat); infer(processed_mat, out_onnx); // G_API code - auto frame = MediaFrame::Create(in_mat1); + auto frame = MediaFrame::Create(in_mat); G_API_NET(SqueezNet, , "squeeznet"); cv::GFrame in; cv::GMat out = cv::gapi::infer(in); @@ -783,6 +786,7 @@ TEST_F(ONNXMediaFrame, InferBGR) TEST_F(ONNXMediaFrame, InferYUV) { useModel("classification/squeezenet/model/squeezenet1.0-9"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); const auto frame = MediaFrame::Create(m_in_y, m_in_uv); // ONNX_API code cv::Mat pp; @@ -808,10 +812,11 @@ TEST_F(ONNXMediaFrame, InferYUV) TEST_F(ONNXMediaFrame, InferROIBGR) { useModel("classification/squeezenet/model/squeezenet1.0-9"); - auto frame = MediaFrame::Create(in_mat1); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); + auto frame = MediaFrame::Create(in_mat); // ONNX_API code cv::Mat roi_mat; - preprocess(in_mat1(rois.front()), roi_mat); + preprocess(in_mat(rois.front()), roi_mat); infer(roi_mat, out_onnx); // G_API code G_API_NET(SqueezNet, , "squeeznet"); @@ -832,6 +837,7 @@ TEST_F(ONNXMediaFrame, InferROIBGR) TEST_F(ONNXMediaFrame, InferROIYUV) { useModel("classification/squeezenet/model/squeezenet1.0-9"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); const auto frame = MediaFrame::Create(m_in_y, m_in_uv); // ONNX_API code cv::Mat pp; @@ -858,11 +864,12 @@ TEST_F(ONNXMediaFrame, InferROIYUV) TEST_F(ONNXMediaFrame, InferListBGR) { useModel("classification/squeezenet/model/squeezenet1.0-9"); - const auto frame = MediaFrame::Create(in_mat1); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); + const auto frame = MediaFrame::Create(in_mat); // ONNX_API code for (size_t i = 0; i < rois.size(); ++i) { cv::Mat roi_mat; - preprocess(in_mat1(rois[i]), roi_mat); + preprocess(in_mat(rois[i]), roi_mat); infer(roi_mat, out_onnx); } // G_API code @@ -884,6 +891,7 @@ TEST_F(ONNXMediaFrame, InferListBGR) TEST_F(ONNXMediaFrame, InferListYUV) { useModel("classification/squeezenet/model/squeezenet1.0-9"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); const auto frame = MediaFrame::Create(m_in_y, m_in_uv); // ONNX_API code cv::Mat pp; @@ -911,8 +919,9 @@ TEST_F(ONNXMediaFrame, InferListYUV) TEST_F(ONNXRCNN, InferWithDisabledOut) { useModel("object_detection_segmentation/faster-rcnn/model/FasterRCNN-10"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); cv::Mat pp; - preprocess(in_mat1, pp); + preprocess(in_mat, pp); // ONNX_API code infer(pp, out_onnx, {"6379", "6383"}); // G_API code @@ -937,11 +946,12 @@ TEST_F(ONNXRCNN, InferWithDisabledOut) TEST_F(ONNXMediaFrame, InferList2BGR) { useModel("classification/squeezenet/model/squeezenet1.0-9"); - const auto frame = MediaFrame::Create(in_mat1); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); + const auto frame = MediaFrame::Create(in_mat); // ONNX_API code for (size_t i = 0; i < rois.size(); ++i) { cv::Mat roi_mat; - preprocess(in_mat1(rois[i]), roi_mat); + preprocess(in_mat(rois[i]), roi_mat); infer(roi_mat, out_onnx); } // G_API code @@ -963,6 +973,7 @@ TEST_F(ONNXMediaFrame, InferList2BGR) TEST_F(ONNXMediaFrame, InferList2YUV) { useModel("classification/squeezenet/model/squeezenet1.0-9"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); const auto frame = MediaFrame::Create(m_in_y, m_in_uv); // ONNX_API code cv::Mat pp; @@ -991,6 +1002,8 @@ TEST_F(ONNXMediaFrame, InferList2YUV) TEST_F(ONNXYoloV3, InferConstInput) { useModel("object_detection_segmentation/yolov3/model/yolov3-10"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); + constructYoloInputs(in_mat); // ONNX_API code infer(ins, out_onnx); // G_API code @@ -1022,6 +1035,8 @@ TEST_F(ONNXYoloV3, InferBSConstInput) // and all input layer names are specified. // Const input has the advantage. It is expected behavior. useModel("object_detection_segmentation/yolov3/model/yolov3-10"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); + constructYoloInputs(in_mat); // Tensor with incorrect image size // is used for check case when InputLayers and constInput have same names cv::Mat bad_shape; @@ -1059,8 +1074,9 @@ TEST_F(ONNXYoloV3, InferBSConstInput) TEST_F(ONNXRCNN, ConversionInt64to32) { useModel("object_detection_segmentation/faster-rcnn/model/FasterRCNN-10"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); cv::Mat dst; - preprocess(in_mat1, dst); + preprocess(in_mat, dst); // ONNX_API code infer(dst, out_onnx); // G_API code @@ -1087,6 +1103,7 @@ TEST_F(ONNXRCNN, ConversionInt64to32) TEST_F(ONNXWithRemap, InferOutReallocation) { useModel("object_detection_segmentation/ssd-mobilenetv1/model/ssd_mobilenet_v1_10"); + in_mat = cv::imread(findDataFile("cv/dpm/cat.png", false)); // G_API code G_API_NET(MobileNet, , "ssd_mobilenet"); auto net = cv::gapi::onnx::Params{model_path} @@ -1096,7 +1113,7 @@ TEST_F(ONNXWithRemap, InferOutReallocation) cv::GMat out1; out1 = cv::gapi::infer(in); cv::GComputation comp(cv::GIn(in), cv::GOut(out1)); - EXPECT_THROW(comp.apply(cv::gin(in_mat1), + EXPECT_THROW(comp.apply(cv::gin(in_mat), cv::gout(out_gapi[0]), cv::compile_args(cv::gapi::networks(net))), std::exception); } From 5627a0cbdf94a053e55aa4749a5c83d8b18ad34c Mon Sep 17 00:00:00 2001 From: Xinguang Bian Date: Wed, 7 Jul 2021 12:35:11 +0800 Subject: [PATCH 047/128] fix scale problem in DefaultViewPort::controlImagePosition() --- modules/highgui/src/window_QT.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/highgui/src/window_QT.cpp b/modules/highgui/src/window_QT.cpp index 68289eb87620..8dff03117e1b 100644 --- a/modules/highgui/src/window_QT.cpp +++ b/modules/highgui/src/window_QT.cpp @@ -2883,18 +2883,19 @@ inline bool DefaultViewPort::isSameSize(IplImage* img1, IplImage* img2) void DefaultViewPort::controlImagePosition() { qreal left, top, right, bottom; + qreal factor = 1.0 / param_matrixWorld.m11(); //after check top-left, bottom right corner to avoid getting "out" during zoom/panning param_matrixWorld.map(0,0,&left,&top); if (left > 0) { - param_matrixWorld.translate(-left,0); + param_matrixWorld.translate(-left * factor, 0); left = 0; } if (top > 0) { - param_matrixWorld.translate(0,-top); + param_matrixWorld.translate(0, -top * factor); top = 0; } //------- @@ -2903,12 +2904,12 @@ void DefaultViewPort::controlImagePosition() param_matrixWorld.map(sizeImage.width(),sizeImage.height(),&right,&bottom); if (right < sizeImage.width()) { - param_matrixWorld.translate(sizeImage.width()-right,0); + param_matrixWorld.translate((sizeImage.width() - right) * factor, 0); right = sizeImage.width(); } if (bottom < sizeImage.height()) { - param_matrixWorld.translate(0,sizeImage.height()-bottom); + param_matrixWorld.translate(0, (sizeImage.height() - bottom) * factor); bottom = sizeImage.height(); } From c0f63eb21f044415bf7a21cb98fa6f4ae2586e1b Mon Sep 17 00:00:00 2001 From: Sergey Ivanov Date: Wed, 7 Jul 2021 15:33:40 +0300 Subject: [PATCH 048/128] Merge pull request #20039 from sivanov-work:gapi_empty_input G-API: Implement variant visit() * Add variant visitor, use visitor for check compile args * Fix GAPI UT: variant *compiler * Aling apply_visior with std, fix indentations * Fix compilation (included compiler_hints.hpp) * Fix compilation (due gapi standalone) * Fix compilation2 (Docs) * Add Lambdas overload, Refactor visit() * Add ReturnType auto deduction * Fix comilation * Fix compilation * Fix warnings * Try to fix MSVC14 * Fix docs * Try fix Win compile * Fix Docs again * Revert GAPI empty input fix * Apply comment for `tuple_element` * Add std::decay for std::base_of to work arounf armv7 problem * Apply review comments * Apply review comments: added comment & removed unused args * Fix docs compilation --- .../include/opencv2/gapi/gtype_traits.hpp | 13 - modules/gapi/include/opencv2/gapi/gtyped.hpp | 1 - .../gapi/include/opencv2/gapi/util/util.hpp | 37 +++ .../include/opencv2/gapi/util/variant.hpp | 226 +++++++++++++++- modules/gapi/src/api/gproto.cpp | 5 - modules/gapi/test/util/variant_tests.cpp | 250 ++++++++++++++++++ 6 files changed, 512 insertions(+), 20 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp index 0b11b18485c0..2e8dcb1aec7d 100644 --- a/modules/gapi/include/opencv2/gapi/gtype_traits.hpp +++ b/modules/gapi/include/opencv2/gapi/gtype_traits.hpp @@ -43,19 +43,6 @@ namespace detail GOPAQUE, // a cv::GOpaqueU (note - exactly GOpaqueU, not GOpaque!) }; - template - constexpr const char* meta_to_string() noexcept; - template<> - constexpr const char* meta_to_string() noexcept { return "GMatDesc"; } - template<> - constexpr const char* meta_to_string() noexcept { return "GScalarDesc"; } - template<> - constexpr const char* meta_to_string() noexcept { return "GArrayDesc"; } - template<> - constexpr const char* meta_to_string() noexcept { return "GOpaqueDesc"; } - template<> - constexpr const char* meta_to_string() noexcept { return "GFrameDesc";} - // Describe G-API types (G-types) with traits. Mostly used by // cv::GArg to store meta information about types passed into // operation arguments. Please note that cv::GComputation is diff --git a/modules/gapi/include/opencv2/gapi/gtyped.hpp b/modules/gapi/include/opencv2/gapi/gtyped.hpp index 27d977794454..6fe52a62e16b 100644 --- a/modules/gapi/include/opencv2/gapi/gtyped.hpp +++ b/modules/gapi/include/opencv2/gapi/gtyped.hpp @@ -35,7 +35,6 @@ namespace detail template<> struct ProtoToMeta { using type = cv::GScalarDesc; }; template struct ProtoToMeta > { using type = cv::GArrayDesc; }; template struct ProtoToMeta > { using type = cv::GOpaqueDesc; }; - template<> struct ProtoToMeta { using type = cv::GFrameDesc; }; template using ProtoToMetaT = typename ProtoToMeta::type; //workaround for MSVC 19.0 bug diff --git a/modules/gapi/include/opencv2/gapi/util/util.hpp b/modules/gapi/include/opencv2/gapi/util/util.hpp index afcf5596fd60..c6ad0632e268 100644 --- a/modules/gapi/include/opencv2/gapi/util/util.hpp +++ b/modules/gapi/include/opencv2/gapi/util/util.hpp @@ -117,6 +117,43 @@ namespace detail static type get(std::tuple&& objs) { return std::forward>(objs); } }; } // namespace detail + +namespace util +{ +template +struct overload_lamba_set; + +template +struct overload_lamba_set : public L1 +{ + overload_lamba_set(L1&& lambda) : L1(std::move(lambda)) {} + overload_lamba_set(const L1& lambda) : L1(lambda) {} + + using L1::operator(); +}; + +template +struct overload_lamba_set : public L1, public overload_lamba_set +{ + using base_type = overload_lamba_set; + overload_lamba_set(L1 &&lambda1, L&& ...lambdas): + L1(std::move(lambda1)), + base_type(std::forward(lambdas)...) {} + + overload_lamba_set(const L1 &lambda1, L&& ...lambdas): + L1(lambda1), + base_type(std::forward(lambdas)...) {} + + using L1::operator(); + using base_type::operator(); +}; + +template +overload_lamba_set overload_lambdas(L&& ...lambdas) +{ + return overload_lamba_set(std::forward(lambdas)...); +} +} } // namespace cv // \endcond diff --git a/modules/gapi/include/opencv2/gapi/util/variant.hpp b/modules/gapi/include/opencv2/gapi/util/variant.hpp index 71a06d2dcf22..f412110deb76 100644 --- a/modules/gapi/include/opencv2/gapi/util/variant.hpp +++ b/modules/gapi/include/opencv2/gapi/util/variant.hpp @@ -11,6 +11,7 @@ #include #include +#include #include #include // max_of_t #include @@ -44,6 +45,12 @@ namespace util static const constexpr std::size_t value = detail::type_list_index_helper<0, Target, Types...>::value; }; + template + struct type_list_element + { + using type = typename std::tuple_element >::type; + }; + class bad_variant_access: public std::exception { public: @@ -233,9 +240,87 @@ namespace util template const T& get(const util::variant &v); + template + typename util::type_list_element::type& get(util::variant &v); + + template + const typename util::type_list_element::type& get(const util::variant &v); + template bool holds_alternative(const util::variant &v) noexcept; + + // Visitor + namespace detail + { + struct visitor_interface {}; + + // Class `visitor_return_type_deduction_helper` + // introduces solution for deduction `return_type` in `visit` function in common way + // for both Lambda and class Visitor and keep one interface invocation point: `visit` only + // his helper class is required to unify return_type deduction mechanism because + // for Lambda it is possible to take type of `decltype(visitor(get<0>(var)))` + // but for class Visitor there is no operator() in base case, + // because it provides `operator() (std::size_t index, ...)` + // So `visitor_return_type_deduction_helper` expose `operator()` + // uses only for class Visitor only for deduction `return type` in visit() + template + struct visitor_return_type_deduction_helper + { + using return_type = R; + + // to be used in Lambda return type deduction context only + template + return_type operator() (T&&); + }; + } + + // Special purpose `static_visitor` can receive additional arguments + template + struct static_visitor : public detail::visitor_interface, + public detail::visitor_return_type_deduction_helper { + + // assign responsibility for return type deduction to helper class + using return_type = typename detail::visitor_return_type_deduction_helper::return_type; + using detail::visitor_return_type_deduction_helper::operator(); + friend Impl; + + template + return_type operator() (std::size_t index, VariantValue&& value, Args&& ...args) + { + suppress_unused_warning(index); + return static_cast(this)-> visit( + std::forward(value), + std::forward(args)...); + } + }; + + // Special purpose `static_indexed_visitor` can receive additional arguments + // And make forwarding current variant index as runtime function argument to its `Impl` + template + struct static_indexed_visitor : public detail::visitor_interface, + public detail::visitor_return_type_deduction_helper { + + // assign responsibility for return type deduction to helper class + using return_type = typename detail::visitor_return_type_deduction_helper::return_type; + using detail::visitor_return_type_deduction_helper::operator(); + friend Impl; + + template + return_type operator() (std::size_t Index, VariantValue&& value, Args&& ...args) + { + return static_cast(this)-> visit(Index, + std::forward(value), + std::forward(args)...); + } + }; + + template + struct variant_size; + + template + struct variant_size> + : std::integral_constant { }; // FIXME: T&&, const TT&& versions. // Implementation ////////////////////////////////////////////////////////// @@ -402,6 +487,22 @@ namespace util throw_error(bad_variant_access()); } + template + typename util::type_list_element::type& get(util::variant &v) + { + using ReturnType = typename util::type_list_element::type; + return const_cast(get(static_cast &>(v))); + } + + template + const typename util::type_list_element::type& get(const util::variant &v) + { + static_assert(Index < sizeof...(Types), + "`Index` it out of bound of `util::variant` type list"); + using ReturnType = typename util::type_list_element::type; + return get(v); + } + template bool holds_alternative(const util::variant &v) noexcept { @@ -428,7 +529,130 @@ namespace util { return !(lhs == rhs); } -} // namespace cv + +namespace detail +{ + // terminate recursion implementation for `non-void` ReturnType + template + ReturnType apply_visitor_impl(Visitor&&, Variant&, + std::true_type, std::false_type, + VisitorArgs&& ...) + { + return {}; + } + + // terminate recursion implementation for `void` ReturnType + template + void apply_visitor_impl(Visitor&&, Variant&, + std::true_type, std::true_type, + VisitorArgs&& ...) + { + } + + // Intermediate resursion processor for Lambda Visitors + template + typename std::enable_if::type>::value, ReturnType>::type + apply_visitor_impl(Visitor&& visitor, Variant&& v, std::false_type not_processed, + std::integral_constant should_no_return, + VisitorArgs&& ...args) + { + static_assert(std::is_same(v)))>::value, + "Different `ReturnType`s detected! All `Visitor::visit` or `overload_lamba_set`" + " must return the same type"); + suppress_unused_warning(not_processed); + if (v.index() == CurIndex) + { + return visitor.operator()(get(v), std::forward(args)... ); + } + + using is_variant_processed_t = std::integral_constant= ElemCount>; + return apply_visitor_impl( + std::forward(visitor), + std::forward(v), + is_variant_processed_t{}, + should_no_return, + std::forward(args)...); + } + + //Visual Studio 2014 compilation fix: cast visitor to base class before invoke operator() + template + typename std::enable_if::type>, + typename std::decay::type>::value, ReturnType>::type + invoke_class_visitor(Visitor& visitor, Value&& v, VisitorArgs&&...args) + { + return static_cast::type>&>(visitor).operator() (CurIndex, std::forward(v), std::forward(args)... ); + } + + //Visual Studio 2014 compilation fix: cast visitor to base class before invoke operator() + template + typename std::enable_if::type>, + typename std::decay::type>::value, ReturnType>::type + invoke_class_visitor(Visitor& visitor, Value&& v, VisitorArgs&&...args) + { + return static_cast::type>&>(visitor).operator() (CurIndex, std::forward(v), std::forward(args)... ); + } + + // Intermediate recursion processor for special case `visitor_interface` derived Visitors + template + typename std::enable_if::type>::value, ReturnType>::type + apply_visitor_impl(Visitor&& visitor, Variant&& v, std::false_type not_processed, + std::integral_constant should_no_return, + VisitorArgs&& ...args) + { + static_assert(std::is_same(v)))>::value, + "Different `ReturnType`s detected! All `Visitor::visit` or `overload_lamba_set`" + " must return the same type"); + suppress_unused_warning(not_processed); + if (v.index() == CurIndex) + { + return invoke_class_visitor(visitor, get(v), std::forward(args)... ); + } + + using is_variant_processed_t = std::integral_constant= ElemCount>; + return apply_visitor_impl( + std::forward(visitor), + std::forward(v), + is_variant_processed_t{}, + should_no_return, + std::forward(args)...); + } +} // namespace detail + + template + auto visit(Visitor &visitor, const Variant& var, VisitorArg &&...args) -> decltype(visitor(get<0>(var))) + { + constexpr std::size_t varsize = util::variant_size::value; + static_assert(varsize != 0, "utils::variant must contains one type at least "); + using is_variant_processed_t = std::false_type; + + using ReturnType = decltype(visitor(get<0>(var))); + using return_t = std::is_same; + return detail::apply_visitor_impl( + std::forward(visitor), + var, is_variant_processed_t{}, + return_t{}, + std::forward(args)...); + } + + template + auto visit(Visitor&& visitor, const Variant& var) -> decltype(visitor(get<0>(var))) + { + constexpr std::size_t varsize = util::variant_size::value; + static_assert(varsize != 0, "utils::variant must contains one type at least "); + using is_variant_processed_t = std::false_type; + + using ReturnType = decltype(visitor(get<0>(var))); + using return_t = std::is_same; + return detail::apply_visitor_impl( + std::forward(visitor), + var, is_variant_processed_t{}, + return_t{}); + } } // namespace util +} // namespace cv #endif // OPENCV_GAPI_UTIL_VARIANT_HPP diff --git a/modules/gapi/src/api/gproto.cpp b/modules/gapi/src/api/gproto.cpp index 94234c9b4d70..9b012770caee 100644 --- a/modules/gapi/src/api/gproto.cpp +++ b/modules/gapi/src/api/gproto.cpp @@ -14,7 +14,6 @@ #include "api/gorigin.hpp" #include "api/gproto_priv.hpp" -#include "logger.hpp" // FIXME: it should be a visitor! // FIXME: Reimplement with traits? @@ -277,13 +276,9 @@ void cv::validate_input_arg(const GRunArg& arg) void cv::validate_input_args(const GRunArgs& args) { - GAPI_LOG_DEBUG(nullptr, "Total count: " << args.size()); - size_t index = 0; for (const auto& arg : args) { - GAPI_LOG_DEBUG(nullptr, "Process index: " << index); validate_input_arg(arg); - index ++; } } diff --git a/modules/gapi/test/util/variant_tests.cpp b/modules/gapi/test/util/variant_tests.cpp index 65d5e579f81b..7725f9a70211 100644 --- a/modules/gapi/test/util/variant_tests.cpp +++ b/modules/gapi/test/util/variant_tests.cpp @@ -354,6 +354,20 @@ TEST(Variant, Get) EXPECT_THROW(util::get(cv2), util::bad_variant_access); } +TEST(Variant, GetIndexed) +{ + const TestVar cv(42); + + // Test const& get() + EXPECT_EQ(42, util::get<0>(cv)); + EXPECT_THROW(util::get<1>(cv), util::bad_variant_access); + + // Test &get + TestVar cv2(std::string("42")); + EXPECT_EQ("42", util::get<1>(cv2)); + EXPECT_THROW(util::get<0>(cv2), util::bad_variant_access); +} + TEST(Variant, GetWrite) { util::variant v(42); @@ -486,4 +500,240 @@ TEST(Variant, EXT_IndexOf) static_assert(6u == V::index_of(), "Index is incorrect"); } +namespace test_validation +{ +struct MyType +{ + friend std::ostream& operator<<(std::ostream& out, const MyType& src) + { + return out << "MyType"; (void) src; + } +}; +class MyClass +{ + friend std::ostream& operator<<(std::ostream& out, const MyClass& src) + { + return out << "MyClass"; (void) src; + } +}; + +struct MyBoolParamIndexedVisitor : cv::util::static_indexed_visitor +{ + MyBoolParamIndexedVisitor(std::ostream &output) : out(output) {} + + template + bool visit(std::size_t index, Type val, int check) + { + bool result = false; + out << index << ":" << val <<","; + if(std::is_same::value) + { + result = !memcmp(&val, &check, sizeof(int)); + } + return result; + } + + std::ostream &out; +}; + +struct MyBoolNoParamNonIndexedVisitor : cv::util::static_indexed_visitor +{ + MyBoolNoParamNonIndexedVisitor(std::ostream &output) : out(output) {} + + template + bool visit(std::size_t index, Type val) + { + out << index << ":" << val <<","; + return true; + } + std::ostream &out; +}; + + +struct MyVoidNoParamNonIndexedVisitor : cv::util::static_visitor +{ + MyVoidNoParamNonIndexedVisitor(std::ostream &output) : out(output) {} + + template + void visit(Type val) + { + out << val << ","; + } + + std::ostream &out; +}; + + +struct MyVoidNoParamIndexedVisitor : cv::util::static_indexed_visitor +{ + MyVoidNoParamIndexedVisitor(std::ostream &output) : out(output) {} + + template + void visit(std::size_t Index, Type val) + { + out << Index << ":" << val <<","; + } + + std::ostream &out; +}; +} + +TEST(Variant, DynamicVisitor) +{ + using V = cv::util::variant; + V var{42}; + { + std::stringstream ss; + test_validation::MyBoolParamIndexedVisitor visitor(ss); + + EXPECT_TRUE(cv::util::visit(visitor, var, int{42})); + EXPECT_EQ(ss.str(), std::string("0:42,")); + } + + std::stringstream ss; + test_validation::MyBoolNoParamNonIndexedVisitor visitor(ss); + + cv::util::visit(visitor, var); + EXPECT_EQ(ss.str(), std::string("0:42,")); + + var = double{1.0}; + EXPECT_TRUE(cv::util::visit(visitor, var)); + EXPECT_EQ(ss.str(), std::string("0:42,1:1,")); + + var = char{'a'}; + EXPECT_TRUE(cv::util::visit(visitor, var)); + EXPECT_EQ(ss.str(), std::string("0:42,1:1,2:a,")); + + var = float{6.0}; + EXPECT_TRUE(cv::util::visit(visitor, var)); + EXPECT_EQ(ss.str(), std::string("0:42,1:1,2:a,3:6,")); + + var = test_validation::MyType{}; + EXPECT_TRUE(cv::util::visit(visitor, var)); + EXPECT_EQ(ss.str(), std::string("0:42,1:1,2:a,3:6,4:MyType,")); + + var = test_validation::MyClass{}; + EXPECT_TRUE(cv::util::visit(visitor, var)); + EXPECT_EQ(ss.str(), std::string("0:42,1:1,2:a,3:6,4:MyType,5:MyClass,")); +} + +TEST(Variant, StaticVisitor) +{ + using V = cv::util::variant; + V var{42}; + std::stringstream ss; + test_validation::MyVoidNoParamNonIndexedVisitor visitor(ss); + + cv::util::visit(visitor, var); + EXPECT_EQ(ss.str(), std::string("42,")); + + var = double{1.0}; + cv::util::visit(visitor, var); + EXPECT_EQ(ss.str(), std::string("42,1,")); + + var = char{'a'}; + cv::util::visit(visitor, var); + EXPECT_EQ(ss.str(), std::string("42,1,a,")); + + var = float{6.0}; + cv::util::visit(visitor, var); + EXPECT_EQ(ss.str(), std::string("42,1,a,6,")); + + var = test_validation::MyType{}; + cv::util::visit(visitor, var); + EXPECT_EQ(ss.str(), std::string("42,1,a,6,MyType,")); + + var = test_validation::MyClass{}; + cv::util::visit(visitor, var); + EXPECT_EQ(ss.str(), std::string("42,1,a,6,MyType,MyClass,")); +} + +TEST(Variant, StaticIndexedVisitor) +{ + using V = cv::util::variant; + V var{42}; + + std::stringstream ss; + cv::util::visit(test_validation::MyVoidNoParamIndexedVisitor {ss}, var); + EXPECT_EQ(ss.str(), std::string("0:42,")); + + var = double{1.0}; + cv::util::visit(test_validation::MyVoidNoParamIndexedVisitor (ss), var); + EXPECT_EQ(ss.str(), std::string("0:42,1:1,")); + + var = char{'a'}; + cv::util::visit(test_validation::MyVoidNoParamIndexedVisitor (ss), var); + EXPECT_EQ(ss.str(), std::string("0:42,1:1,2:a,")); + + var = float{6.0}; + cv::util::visit(test_validation::MyVoidNoParamIndexedVisitor (ss), var); + EXPECT_EQ(ss.str(), std::string("0:42,1:1,2:a,3:6,")); + + var = test_validation::MyType{}; + cv::util::visit(test_validation::MyVoidNoParamIndexedVisitor (ss), var); + EXPECT_EQ(ss.str(), std::string("0:42,1:1,2:a,3:6,4:MyType,")); + + var = test_validation::MyClass{}; + cv::util::visit(test_validation::MyVoidNoParamIndexedVisitor (ss), var); + EXPECT_EQ(ss.str(), std::string("0:42,1:1,2:a,3:6,4:MyType,5:MyClass,")); +} + + +TEST(Variant, LambdaVisitor) +{ + using V = cv::util::variant; + V var{42}; + { + cv::util::visit(cv::util::overload_lambdas( + [](int value) { + EXPECT_EQ(value, 42); + }, + [](double) { + ADD_FAILURE() << "can't be called for `double`"; + }, + [](char) { + ADD_FAILURE() << "can't be called for `char`"; + }, + [](float) { + ADD_FAILURE() << "can't be called for `float`"; + }, + [](test_validation::MyType) { + ADD_FAILURE() << "can't be called for `MyType`"; + }, + [](test_validation::MyClass) { + ADD_FAILURE() << "can't be called for `MyClass`"; + }, + [](std::string) { + ADD_FAILURE() << "can't be called for `std::string`, invalid type"; + } + ), var); + } + + var = 'c'; + { + cv::util::visit(cv::util::overload_lambdas( + [](int) { + ADD_FAILURE() << "can't be called for `int`"; + }, + [](double) { + ADD_FAILURE() << "can't be called for `double`"; + }, + [](char value) { + EXPECT_EQ(value, 'c'); + }, + [](float) { + ADD_FAILURE() << "can't be called for `float`"; + }, + [](test_validation::MyType) { + ADD_FAILURE() << "can't be called for `MyType`"; + }, + [](test_validation::MyClass) { + ADD_FAILURE() << "can't be called for `MyClass`"; + }, + [](std::string) { + ADD_FAILURE() << "can't be called for `std::string`, invalid type"; + } + ), var); + } +} } // namespace opencv_test From 926535469d82649204a9b2819f3f7341c140b7cb Mon Sep 17 00:00:00 2001 From: kikaxa Date: Wed, 7 Jul 2021 18:31:53 +0300 Subject: [PATCH 049/128] fix videoio/src/container_avi.cpp VideoInputStream alignment --- modules/videoio/src/container_avi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/videoio/src/container_avi.cpp b/modules/videoio/src/container_avi.cpp index 8664d198df1b..c0c3234f6f95 100644 --- a/modules/videoio/src/container_avi.cpp +++ b/modules/videoio/src/container_avi.cpp @@ -124,6 +124,7 @@ struct RiffList uint32_t m_size; uint32_t m_list_type_cc; }; +#pragma pack(pop) class VideoInputStream { @@ -149,7 +150,6 @@ class VideoInputStream String m_fname; }; -#pragma pack(pop) inline VideoInputStream& operator >> (VideoInputStream& is, AviMainHeader& avih) { From 59ae0e0013d85bf4a6d064e7c258c00e56858b35 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Wed, 7 Jul 2021 22:07:59 +0300 Subject: [PATCH 050/128] Merge pull request #20163 from smirnov-alexey:as/gapi_serialization_docs G-API: add documentation on serialization functionality * Add documentation on serialization/deserialization * Add docs on bind() methods * Fix typo * Docs refactoring * Fix s11n docs * Fix deserialize() docs * Change deserialize docs * Fix warning * Address review comments * Fix sample * Fix warnings and errors * Fix docs warnings * Fix warnings * Address review comments * Add prefixes to snippets and fix indentation * Address review comments and move snippets to a single file --- modules/gapi/include/opencv2/gapi.hpp | 3 +- modules/gapi/include/opencv2/gapi/garg.hpp | 41 +++- modules/gapi/include/opencv2/gapi/gproto.hpp | 2 +- modules/gapi/include/opencv2/gapi/s11n.hpp | 185 ++++++++++++++---- .../gapi/include/opencv2/gapi/s11n/base.hpp | 35 +++- modules/gapi/samples/api_ref_snippets.cpp | 121 ++++++++++++ modules/gapi/src/api/s11n.cpp | 6 +- modules/gapi/test/s11n/gapi_s11n_tests.cpp | 2 - 8 files changed, 339 insertions(+), 56 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi.hpp b/modules/gapi/include/opencv2/gapi.hpp index e4b20214796a..f10dfd471dbf 100644 --- a/modules/gapi/include/opencv2/gapi.hpp +++ b/modules/gapi/include/opencv2/gapi.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation #ifndef OPENCV_GAPI_HPP @@ -19,6 +19,7 @@ @} @defgroup gapi_std_backends G-API Standard Backends @defgroup gapi_compile_args G-API Graph Compilation Arguments + @defgroup gapi_serialization G-API Serialization functionality @} */ diff --git a/modules/gapi/include/opencv2/gapi/garg.hpp b/modules/gapi/include/opencv2/gapi/garg.hpp index 20f2233bf9c3..ee6ee81e1cc6 100644 --- a/modules/gapi/include/opencv2/gapi/garg.hpp +++ b/modules/gapi/include/opencv2/gapi/garg.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2018-2021 Intel Corporation #ifndef OPENCV_GAPI_GARG_HPP @@ -171,7 +171,7 @@ using GRunArgs = std::vector; * It's an ordinary overload of addition assignment operator. * * Example of usage: - * @snippet dynamic_graph.cpp GRunArgs usage + * @snippet modules/gapi/samples/dynamic_graph.cpp GRunArgs usage * */ inline GRunArgs& operator += (GRunArgs &lhs, const GRunArgs &rhs) @@ -223,7 +223,7 @@ using GRunArgsP = std::vector; * It's an ordinary overload of addition assignment operator. * * Example of usage: - * @snippet dynamic_graph.cpp GRunArgsP usage + * @snippet modules/gapi/samples/dynamic_graph.cpp GRunArgsP usage * */ inline GRunArgsP& operator += (GRunArgsP &lhs, const GRunArgsP &rhs) @@ -235,8 +235,39 @@ inline GRunArgsP& operator += (GRunArgsP &lhs, const GRunArgsP &rhs) namespace gapi { - GAPI_EXPORTS cv::GRunArgsP bind(cv::GRunArgs &results); - GAPI_EXPORTS cv::GRunArg bind(cv::GRunArgP &out); // FIXME: think more about it +/** + * \addtogroup gapi_serialization + * @{ + * + * @brief G-API functions and classes for serialization and deserialization. + */ +/** @brief Wraps deserialized output GRunArgs to GRunArgsP which can be used by GCompiled. + * + * Since it's impossible to get modifiable output arguments from deserialization + * it needs to be wrapped by this function. + * + * Example of usage: + * @snippet modules/gapi/samples/api_ref_snippets.cpp bind after deserialization + * + * @param out_args deserialized GRunArgs. + * @return the same GRunArgs wrapped in GRunArgsP. + * @see deserialize + */ +GAPI_EXPORTS cv::GRunArgsP bind(cv::GRunArgs &out_args); +/** @brief Wraps output GRunArgsP available during graph execution to GRunArgs which can be serialized. + * + * GRunArgsP is pointer-to-value, so to be serialized they need to be binded to real values + * which this function does. + * + * Example of usage: + * @snippet modules/gapi/samples/api_ref_snippets.cpp bind before serialization + * + * @param out output GRunArgsP available during graph execution. + * @return the same GRunArgsP wrapped in serializable GRunArgs. + * @see serialize + */ +GAPI_EXPORTS cv::GRunArg bind(cv::GRunArgP &out); // FIXME: think more about it +/** @} */ } template inline GRunArgs gin(const Ts&... args) diff --git a/modules/gapi/include/opencv2/gapi/gproto.hpp b/modules/gapi/include/opencv2/gapi/gproto.hpp index fbcccb38ea71..6271e470b076 100644 --- a/modules/gapi/include/opencv2/gapi/gproto.hpp +++ b/modules/gapi/include/opencv2/gapi/gproto.hpp @@ -71,7 +71,7 @@ struct GIOProtoArgs * It's an ordinary overload of addition assignment operator. * * Example of usage: - * @snippet dynamic_graph.cpp GIOProtoArgs usage + * @snippet modules/gapi/samples/dynamic_graph.cpp GIOProtoArgs usage * */ template diff --git a/modules/gapi/include/opencv2/gapi/s11n.hpp b/modules/gapi/include/opencv2/gapi/s11n.hpp index 5a64410e5abe..ca8e32c98bf9 100644 --- a/modules/gapi/include/opencv2/gapi/s11n.hpp +++ b/modules/gapi/include/opencv2/gapi/s11n.hpp @@ -17,65 +17,135 @@ namespace cv { namespace gapi { +/** +* \addtogroup gapi_serialization +* @{ +*/ + namespace detail { - GAPI_EXPORTS cv::GComputation getGraph(const std::vector &p); + GAPI_EXPORTS cv::GComputation getGraph(const std::vector &bytes); - GAPI_EXPORTS cv::GMetaArgs getMetaArgs(const std::vector &p); + GAPI_EXPORTS cv::GMetaArgs getMetaArgs(const std::vector &bytes); - GAPI_EXPORTS cv::GRunArgs getRunArgs(const std::vector &p); + GAPI_EXPORTS cv::GRunArgs getRunArgs(const std::vector &bytes); - GAPI_EXPORTS std::vector getVectorOfStrings(const std::vector &p); + GAPI_EXPORTS std::vector getVectorOfStrings(const std::vector &bytes); template - cv::GCompileArgs getCompileArgs(const std::vector &p); + cv::GCompileArgs getCompileArgs(const std::vector &bytes); template - cv::GRunArgs getRunArgsWithRMats(const std::vector &p); + cv::GRunArgs getRunArgsWithRMats(const std::vector &bytes); } // namespace detail +/** @brief Serialize a graph represented by GComputation into an array of bytes. + * + * Check different overloads for more examples. + * @param c GComputation to serialize. + * @return serialized vector of bytes. + */ GAPI_EXPORTS std::vector serialize(const cv::GComputation &c); -//namespace{ +/** @overload + * @param ca GCompileArgs to serialize. + */ +GAPI_EXPORTS std::vector serialize(const cv::GCompileArgs& ca); + +/** @overload + * @param ma GMetaArgs to serialize. + */ +GAPI_EXPORTS std::vector serialize(const cv::GMetaArgs& ma); + +/** @overload + * @param ra GRunArgs to serialize. + */ +GAPI_EXPORTS std::vector serialize(const cv::GRunArgs& ra); + +/** @overload + * @param vs std::vector to serialize. + */ +GAPI_EXPORTS std::vector serialize(const std::vector& vs); + +/** + * @private + */ template static inline -T deserialize(const std::vector &p); - -//} //ananymous namespace - -GAPI_EXPORTS std::vector serialize(const cv::GCompileArgs&); -GAPI_EXPORTS std::vector serialize(const cv::GMetaArgs&); -GAPI_EXPORTS std::vector serialize(const cv::GRunArgs&); -GAPI_EXPORTS std::vector serialize(const std::vector&); - +T deserialize(const std::vector &bytes); + +/** @brief Deserialize GComputation from a byte array. + * + * Check different overloads for more examples. + * @param bytes serialized vector of bytes. + * @return deserialized GComputation object. + */ template<> inline -cv::GComputation deserialize(const std::vector &p) { - return detail::getGraph(p); +cv::GComputation deserialize(const std::vector &bytes) { + return detail::getGraph(bytes); } +/** @brief Deserialize GMetaArgs from a byte array. + * + * Check different overloads for more examples. + * @param bytes serialized vector of bytes. + * @return deserialized GMetaArgs object. + */ template<> inline -cv::GMetaArgs deserialize(const std::vector &p) { - return detail::getMetaArgs(p); +cv::GMetaArgs deserialize(const std::vector &bytes) { + return detail::getMetaArgs(bytes); } +/** @brief Deserialize GRunArgs from a byte array. + * + * Check different overloads for more examples. + * @param bytes serialized vector of bytes. + * @return deserialized GRunArgs object. + */ template<> inline -cv::GRunArgs deserialize(const std::vector &p) { - return detail::getRunArgs(p); +cv::GRunArgs deserialize(const std::vector &bytes) { + return detail::getRunArgs(bytes); } +/** @brief Deserialize std::vector from a byte array. + * + * Check different overloads for more examples. + * @param bytes serialized vector of bytes. + * @return deserialized std::vector object. + */ template<> inline -std::vector deserialize(const std::vector &p) { - return detail::getVectorOfStrings(p); +std::vector deserialize(const std::vector &bytes) { + return detail::getVectorOfStrings(bytes); } +/** + * @brief Deserialize GCompileArgs which types were specified in the template from a byte array. + * + * @note cv::gapi::s11n::detail::S11N template specialization must be provided to make a custom type + * in GCompileArgs deserializable. + * + * @param bytes vector of bytes to deserialize GCompileArgs object from. + * @return GCompileArgs object. + * @see GCompileArgs cv::gapi::s11n::detail::S11N + */ template inline typename std::enable_if::value, GCompileArgs>:: -type deserialize(const std::vector &p) { - return detail::getCompileArgs(p); +type deserialize(const std::vector &bytes) { + return detail::getCompileArgs(bytes); } +/** + * @brief Deserialize GRunArgs including RMat objects if any from a byte array. + * + * RMat adapter type is specified in the template. + * @note To be used properly specified adapter type must overload its serialize() and + * deserialize() methods. + * @param bytes vector of bytes to deserialize GRunArgs object from. + * @return GRunArgs including RMat objects if any. + * @see RMat + */ template inline typename std::enable_if::value, GRunArgs>:: -type deserialize(const std::vector &p) { - return detail::getRunArgsWithRMats(p); +type deserialize(const std::vector &bytes) { + return detail::getRunArgsWithRMats(bytes); } } // namespace gapi } // namespace cv @@ -83,6 +153,17 @@ type deserialize(const std::vector &p) { namespace cv { namespace gapi { namespace s11n { + +/** @brief This structure is an interface for serialization routines. + * + * It's main purpose is to provide multiple overloads for operator<<() + * with basic C++ in addition to OpenCV/G-API types. + * + * This sctructure can be inherited and further extended with additional types. + * + * For example, it is utilized in cv::gapi::s11n::detail::S11N as input parameter + * in serialize() method. + */ struct GAPI_EXPORTS IOStream { virtual ~IOStream() = default; // Define the native support for basic C++ types at the API level: @@ -99,6 +180,16 @@ struct GAPI_EXPORTS IOStream { virtual IOStream& operator<< (const std::string&) = 0; }; +/** @brief This structure is an interface for deserialization routines. + * + * It's main purpose is to provide multiple overloads for operator>>() + * with basic C++ in addition to OpenCV/G-API types. + * + * This structure can be inherited and further extended with additional types. + * + * For example, it is utilized in cv::gapi::s11n::detail::S11N as input parameter + * in deserialize() method. + */ struct GAPI_EXPORTS IIStream { virtual ~IIStream() = default; virtual IIStream& operator>> (bool &) = 0; @@ -116,7 +207,7 @@ struct GAPI_EXPORTS IIStream { }; namespace detail { -GAPI_EXPORTS std::unique_ptr getInStream(const std::vector &p); +GAPI_EXPORTS std::unique_ptr getInStream(const std::vector &bytes); } // namespace detail //////////////////////////////////////////////////////////////////////////////// @@ -146,24 +237,26 @@ GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::Mat &m); // FIXME: for GRunArgs serailization #if !defined(GAPI_STANDALONE) -GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::UMat &); -GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::UMat &); +GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::UMat & um); +GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::UMat & um); #endif // !defined(GAPI_STANDALONE) GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::RMat &r); GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::RMat &r); -GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::gapi::wip::IStreamSource::Ptr &); -GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::gapi::wip::IStreamSource::Ptr &); +GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::gapi::wip::IStreamSource::Ptr &issptr); +GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::gapi::wip::IStreamSource::Ptr &issptr); -GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::detail::VectorRef &); -GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::detail::VectorRef &); +GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::detail::VectorRef &vr); +GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::detail::VectorRef &vr); -GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::detail::OpaqueRef &); -GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::detail::OpaqueRef &); +GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::detail::OpaqueRef &opr); +GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::detail::OpaqueRef &opr); -GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::MediaFrame &); -GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::MediaFrame &); +/// @private -- Exclude this function from OpenCV documentation +GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::MediaFrame &mf); +/// @private -- Exclude this function from OpenCV documentation +GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::MediaFrame &mf); // Generic STL types //////////////////////////////////////////////////////////////// template @@ -186,6 +279,7 @@ IIStream& operator>> (IIStream& is, std::map &m) { } return is; } + template IOStream& operator<< (IOStream& os, const std::unordered_map &m) { const uint32_t sz = static_cast(m.size()); @@ -206,6 +300,7 @@ IIStream& operator>> (IIStream& is, std::unordered_map &m) { } return is; } + template IOStream& operator<< (IOStream& os, const std::vector &ts) { const uint32_t sz = static_cast(ts.size()); @@ -233,16 +328,19 @@ template IOStream& put_v(IOStream&, const V&, std::size_t) { GAPI_Assert(false && "variant>>: requested index is invalid"); }; + template IOStream& put_v(IOStream& os, const V& v, std::size_t x) { return (x == 0u) ? os << cv::util::get(v) : put_v(os, v, x-1); } + template IIStream& get_v(IIStream&, V&, std::size_t, std::size_t) { GAPI_Assert(false && "variant<<: requested index is invalid"); } + template IIStream& get_v(IIStream& is, V& v, std::size_t i, std::size_t gi) { if (i == gi) { @@ -254,11 +352,13 @@ IIStream& get_v(IIStream& is, V& v, std::size_t i, std::size_t gi) { } } // namespace detail +//! @overload template IOStream& operator<< (IOStream& os, const cv::util::variant &v) { os << static_cast(v.index()); return detail::put_v, Ts...>(os, v, v.index()); } +//! @overload template IIStream& operator>> (IIStream& is, cv::util::variant &v) { int idx = -1; @@ -268,6 +368,7 @@ IIStream& operator>> (IIStream& is, cv::util::variant &v) { } // FIXME: consider a better solution +/// @private -- Exclude this function from OpenCV documentation template void getRunArgByIdx (IIStream& is, cv::util::variant &v, uint32_t idx) { is = detail::get_v, Ts...>(is, v, 0u, idx); @@ -351,8 +452,8 @@ cv::GCompileArgs getCompileArgs(const std::vector &sArgs) { } template -cv::GRunArgs getRunArgsWithRMats(const std::vector &p) { - std::unique_ptr pIs = cv::gapi::s11n::detail::getInStream(p); +cv::GRunArgs getRunArgsWithRMats(const std::vector &bytes) { + std::unique_ptr pIs = cv::gapi::s11n::detail::getInStream(bytes); cv::gapi::s11n::IIStream& is = *pIs; cv::GRunArgs args; @@ -367,6 +468,8 @@ cv::GRunArgs getRunArgsWithRMats(const std::vector &p) { return args; } } // namespace detail +/** @} */ + } // namespace gapi } // namespace cv diff --git a/modules/gapi/include/opencv2/gapi/s11n/base.hpp b/modules/gapi/include/opencv2/gapi/s11n/base.hpp index b8ec8cfaff73..11440b27e5f8 100644 --- a/modules/gapi/include/opencv2/gapi/s11n/base.hpp +++ b/modules/gapi/include/opencv2/gapi/s11n/base.hpp @@ -2,7 +2,7 @@ // It is subject to the license terms in the LICENSE file found in the top-level directory // of this distribution and at http://opencv.org/license.html. // -// Copyright (C) 2020 Intel Corporation +// Copyright (C) 2020-2021 Intel Corporation #ifndef OPENCV_GAPI_S11N_BASE_HPP #define OPENCV_GAPI_S11N_BASE_HPP @@ -23,25 +23,54 @@ struct IIStream; namespace detail { +//! @addtogroup gapi_serialization +//! @{ + struct NotImplemented { }; -// The default S11N for custom types is NotImplemented -// Don't! sublass from NotImplemented if you actually implement S11N. +/** @brief This structure allows to implement serialization routines for custom types. + * + * The default S11N for custom types is not implemented. + * + * @note When providing an overloaded implementation for S11N with your type + * don't inherit it from NotImplemented structure. + * + * @note There are lots of overloaded >> and << operators for basic and OpenCV/G-API types + * which can be utilized when serializing a custom type. + * + * Example of usage: + * @snippet modules/gapi/samples/api_ref_snippets.cpp S11N usage + * + */ template struct S11N: public NotImplemented { + /** + * @brief This function allows user to serialize their custom type. + * + * @note The default overload throws an exception if called. User need to + * properly overload the function to use it. + */ static void serialize(IOStream &, const T &) { GAPI_Assert(false && "No serialization routine is provided!"); } + /** + * @brief This function allows user to deserialize their custom type. + * + * @note The default overload throws an exception if called. User need to + * properly overload the function to use it. + */ static T deserialize(IIStream &) { GAPI_Assert(false && "No deserialization routine is provided!"); } }; +/// @private -- Exclude this struct from OpenCV documentation template struct has_S11N_spec { static constexpr bool value = !std::is_base_of::type>>::value; }; +//! @} gapi_serialization } // namespace detail } // namespace s11n diff --git a/modules/gapi/samples/api_ref_snippets.cpp b/modules/gapi/samples/api_ref_snippets.cpp index 6c660fb8fa2e..0abcab89b383 100644 --- a/modules/gapi/samples/api_ref_snippets.cpp +++ b/modules/gapi/samples/api_ref_snippets.cpp @@ -4,6 +4,10 @@ #include #include +#include +#include +#include + #include #include @@ -55,6 +59,120 @@ static void typed_example() //! [Typed_Example] } +static void bind_serialization_example() +{ + // ! [bind after deserialization] + cv::GCompiled compd; + std::vector bytes; + auto graph = cv::gapi::deserialize(bytes); + auto meta = cv::gapi::deserialize(bytes); + + compd = graph.compile(std::move(meta), cv::compile_args()); + auto in_args = cv::gapi::deserialize(bytes); + auto out_args = cv::gapi::deserialize(bytes); + compd(std::move(in_args), cv::gapi::bind(out_args)); + // ! [bind after deserialization] +} + +static void bind_deserialization_example() +{ + // ! [bind before serialization] + std::vector graph_outs; + cv::GRunArgs out_args; + + for (auto &&out : graph_outs) { + out_args.emplace_back(cv::gapi::bind(out)); + } + const auto sargsout = cv::gapi::serialize(out_args); + // ! [bind before serialization] +} + +struct SimpleCustomType { + bool val; + bool operator==(const SimpleCustomType& other) const { + return val == other.val; + } +}; + +struct SimpleCustomType2 { + int val; + std::string name; + std::vector vec; + std::map mmap; + bool operator==(const SimpleCustomType2& other) const { + return val == other.val && name == other.name && + vec == other.vec && mmap == other.mmap; + } +}; + +// ! [S11N usage] +namespace cv { +namespace gapi { +namespace s11n { +namespace detail { +template<> struct S11N { + static void serialize(IOStream &os, const SimpleCustomType &p) { + os << p.val; + } + static SimpleCustomType deserialize(IIStream &is) { + SimpleCustomType p; + is >> p.val; + return p; + } +}; + +template<> struct S11N { + static void serialize(IOStream &os, const SimpleCustomType2 &p) { + os << p.val << p.name << p.vec << p.mmap; + } + static SimpleCustomType2 deserialize(IIStream &is) { + SimpleCustomType2 p; + is >> p.val >> p.name >> p.vec >> p.mmap; + return p; + } +}; +} // namespace detail +} // namespace s11n +} // namespace gapi +} // namespace cv +// ! [S11N usage] + +namespace cv { +namespace detail { +template<> struct CompileArgTag { + static const char* tag() { + return "org.opencv.test.simple_custom_type"; + } +}; + +template<> struct CompileArgTag { + static const char* tag() { + return "org.opencv.test.simple_custom_type_2"; + } +}; +} // namespace detail +} // namespace cv + +static void s11n_example() +{ + SimpleCustomType customVar1 { false }; + SimpleCustomType2 customVar2 { 1248, "World", {1280, 720, 640, 480}, + { {5, 32434142342}, {7, 34242432} } }; + + std::vector sArgs = cv::gapi::serialize( + cv::compile_args(customVar1, customVar2)); + + cv::GCompileArgs dArgs = cv::gapi::deserialize(sArgs); + + SimpleCustomType dCustomVar1 = cv::gapi::getCompileArg(dArgs).value(); + SimpleCustomType2 dCustomVar2 = cv::gapi::getCompileArg(dArgs).value(); + + (void) dCustomVar1; + (void) dCustomVar2; +} + G_TYPED_KERNEL(IAdd, , "test.custom.add") { static cv::GMatDesc outMeta(const cv::GMatDesc &in) { return in; } }; @@ -128,5 +246,8 @@ int main(int argc, char *argv[]) // unused functions typed_example(); gscalar_example(); + bind_serialization_example(); + bind_deserialization_example(); + s11n_example(); return 0; } diff --git a/modules/gapi/src/api/s11n.cpp b/modules/gapi/src/api/s11n.cpp index d08f47fd26a7..97f5a95c42a6 100644 --- a/modules/gapi/src/api/s11n.cpp +++ b/modules/gapi/src/api/s11n.cpp @@ -65,11 +65,11 @@ std::vector cv::gapi::serialize(const std::vector& vs) // FIXME: This function should move from S11N to GRunArg-related entities. // it has nothing to do with the S11N as it is -cv::GRunArgsP cv::gapi::bind(cv::GRunArgs &results) +cv::GRunArgsP cv::gapi::bind(cv::GRunArgs &out_args) { cv::GRunArgsP outputs; - outputs.reserve(results.size()); - for (cv::GRunArg &res_obj : results) + outputs.reserve(out_args.size()); + for (cv::GRunArg &res_obj : out_args) { using T = cv::GRunArg; switch (res_obj.index()) diff --git a/modules/gapi/test/s11n/gapi_s11n_tests.cpp b/modules/gapi/test/s11n/gapi_s11n_tests.cpp index f4a30b394631..c2b17521d966 100644 --- a/modules/gapi/test/s11n/gapi_s11n_tests.cpp +++ b/modules/gapi/test/s11n/gapi_s11n_tests.cpp @@ -754,8 +754,6 @@ TEST_F(S11N_Basic, Test_Deserialize_CompileArgs_RandomOrder) { std::vector sArgs = cv::gapi::serialize( cv::compile_args(simpleCustomVar, simpleCustomVar2)); GCompileArgs dArgs = cv::gapi::deserialize(sArgs); From b42623ff9d6b1baf9fda11b16a8d2f512720d424 Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Thu, 8 Jul 2021 10:42:44 +0300 Subject: [PATCH 051/128] port base64 encoding from 3.4 --- modules/core/src/persistence.cpp | 2393 +++++++++-------- modules/core/src/persistence.hpp | 18 + .../core/src/persistence_base64_encoding.cpp | 370 +++ .../core/src/persistence_base64_encoding.hpp | 127 + modules/core/src/persistence_impl.hpp | 231 ++ modules/core/src/persistence_json.cpp | 69 +- modules/core/src/persistence_xml.cpp | 30 +- modules/core/src/persistence_yml.cpp | 28 +- modules/core/test/test_io.cpp | 29 +- modules/python/test/test_filestorage_io.py | 93 + 10 files changed, 2156 insertions(+), 1232 deletions(-) create mode 100644 modules/core/src/persistence_base64_encoding.cpp create mode 100644 modules/core/src/persistence_base64_encoding.hpp create mode 100644 modules/core/src/persistence_impl.hpp diff --git a/modules/core/src/persistence.cpp b/modules/core/src/persistence.cpp index 32328361e874..291931b5ae01 100644 --- a/modules/core/src/persistence.cpp +++ b/modules/core/src/persistence.cpp @@ -4,6 +4,8 @@ #include "precomp.hpp" #include "persistence.hpp" +#include "persistence_impl.hpp" +#include "persistence_base64_encoding.hpp" #include #include @@ -153,7 +155,7 @@ static int symbolToType(char c) return CV_SEQ_ELTYPE_PTR; const char* pos = strchr( symbols, c ); if( !pos ) - CV_Error( CV_StsBadArg, "Invalid data type specification" ); + CV_Error( cv::Error::StsBadArg, "Invalid data type specification" ); return static_cast(pos - symbols); } @@ -192,7 +194,7 @@ int decodeFormat( const char* dt, int* fmt_pairs, int max_len ) } if( count <= 0 ) - CV_Error( CV_StsBadArg, "Invalid data type specification" ); + CV_Error( cv::Error::StsBadArg, "Invalid data type specification" ); fmt_pairs[i] = count; } @@ -208,7 +210,7 @@ int decodeFormat( const char* dt, int* fmt_pairs, int max_len ) { i += 2; if( i >= max_len ) - CV_Error( CV_StsBadArg, "Too long data type specification" ); + CV_Error( cv::Error::StsBadArg, "Too long data type specification" ); } fmt_pairs[i] = 0; } @@ -275,7 +277,7 @@ int decodeSimpleFormat( const char* dt ) fmt_pair_count = decodeFormat( dt, fmt_pairs, CV_FS_MAX_FMT_PAIRS ); if( fmt_pair_count != 1 || fmt_pairs[0] >= CV_CN_MAX) - CV_Error( CV_StsError, "Too complex format for the matrix" ); + CV_Error( cv::Error::StsError, "Too complex format for the matrix" ); elem_type = CV_MAKETYPE( fmt_pairs[1], fmt_pairs[0] ); @@ -345,1450 +347,1483 @@ static inline void writeReal(uchar* p, double fval) #endif } -class FileStorage::Impl : public FileStorage_API -{ -public: - void init() - { - flags = 0; - buffer.clear(); - bufofs = 0; - state = UNDEFINED; - is_opened = false; - dummy_eof = false; - write_mode = false; - mem_mode = false; - space = 0; - wrap_margin = 71; - fmt = 0; - file = 0; - gzfile = 0; - empty_stream = true; - strbufv.clear(); - strbuf = 0; - strbufsize = strbufpos = 0; - roots.clear(); - - fs_data.clear(); - fs_data_ptrs.clear(); - fs_data_blksz.clear(); - freeSpaceOfs = 0; - - str_hash.clear(); - str_hash_data.clear(); - str_hash_data.resize(1); - str_hash_data[0] = '\0'; - - filename.clear(); - lineno = 0; - } - - Impl(FileStorage* _fs) - { - fs_ext = _fs; - init(); - } - - virtual ~Impl() - { - release(); - } - void release(String* out=0) - { - if( is_opened ) - { - if(out) - out->clear(); - if( write_mode ) - { - while( write_stack.size() > 1 ) - { - endWriteStruct(); - } - flush(); - if( fmt == FileStorage::FORMAT_XML ) - puts( "\n" ); - else if ( fmt == FileStorage::FORMAT_JSON ) - puts( "}\n" ); - } - if( mem_mode && out ) - { - *out = cv::String(outbuf.begin(), outbuf.end()); +void FileStorage::Impl::init() { + flags = 0; + buffer.clear(); + bufofs = 0; + state = UNDEFINED; + is_using_base64 = false; + state_of_writing_base64 = FileStorage_API::Base64State::Uncertain; + is_write_struct_delayed = false; + delayed_struct_key = nullptr; + delayed_struct_flags = 0; + delayed_type_name = nullptr; + base64_writer = nullptr; + is_opened = false; + dummy_eof = false; + write_mode = false; + mem_mode = false; + space = 0; + wrap_margin = 71; + fmt = 0; + file = 0; + gzfile = 0; + empty_stream = true; + + strbufv.clear(); + strbuf = 0; + strbufsize = strbufpos = 0; + roots.clear(); + + fs_data.clear(); + fs_data_ptrs.clear(); + fs_data_blksz.clear(); + freeSpaceOfs = 0; + + str_hash.clear(); + str_hash_data.clear(); + str_hash_data.resize(1); + str_hash_data[0] = '\0'; + + filename.clear(); + lineno = 0; +} + +FileStorage::Impl::Impl(FileStorage *_fs) { + fs_ext = _fs; + init(); +} + +FileStorage::Impl::~Impl() { + release(); +} + +void FileStorage::Impl::release(String *out) { + if (is_opened) { + if (out) + out->clear(); + if (write_mode) { + while (write_stack.size() > 1) { + endWriteStruct(); } + flush(); + if (fmt == FileStorage::FORMAT_XML) + puts("\n"); + else if (fmt == FileStorage::FORMAT_JSON) + puts("}\n"); + } + if (mem_mode && out) { + *out = cv::String(outbuf.begin(), outbuf.end()); } - closeFile(); - init(); } + closeFile(); + init(); +} - void analyze_file_name( const std::string& file_name, std::vector& params ) - { - params.clear(); - static const char not_file_name = '\n'; - static const char parameter_begin = '?'; - static const char parameter_separator = '&'; +void FileStorage::Impl::analyze_file_name(const std::string &file_name, std::vector ¶ms) { + params.clear(); + static const char not_file_name = '\n'; + static const char parameter_begin = '?'; + static const char parameter_separator = '&'; - if( file_name.find(not_file_name, (size_t)0) != std::string::npos ) - return; + if (file_name.find(not_file_name, (size_t) 0) != std::string::npos) + return; - size_t beg = file_name.find_last_of(parameter_begin); - params.push_back(file_name.substr((size_t)0, beg)); + size_t beg = file_name.find_last_of(parameter_begin); + params.push_back(file_name.substr((size_t) 0, beg)); - if( beg != std::string::npos ) - { - size_t end = file_name.size(); - beg++; - for( size_t param_beg = beg, param_end = beg; - param_end < end; - param_beg = param_end + 1 ) - { - param_end = file_name.find_first_of( parameter_separator, param_beg ); - if( (param_end == std::string::npos || param_end != param_beg) && param_beg + 1 < end ) - { - params.push_back( file_name.substr( param_beg, param_end - param_beg ) ); - } + if (beg != std::string::npos) { + size_t end = file_name.size(); + beg++; + for (size_t param_beg = beg, param_end = beg; + param_end < end; + param_beg = param_end + 1) { + param_end = file_name.find_first_of(parameter_separator, param_beg); + if ((param_end == std::string::npos || param_end != param_beg) && param_beg + 1 < end) { + params.push_back(file_name.substr(param_beg, param_end - param_beg)); } } } +} - bool open( const char* filename_or_buf, int _flags, const char* encoding ) - { - _flags &= ~FileStorage::BASE64; - - bool ok = true; - release(); +bool FileStorage::Impl::open(const char *filename_or_buf, int _flags, const char *encoding) { + bool ok = true; + release(); - bool append = (_flags & 3) == FileStorage::APPEND; - mem_mode = (_flags & FileStorage::MEMORY) != 0; + bool append = (_flags & 3) == FileStorage::APPEND; + mem_mode = (_flags & FileStorage::MEMORY) != 0; - write_mode = (_flags & 3) != 0; + write_mode = (_flags & 3) != 0; + bool write_base64 = (write_mode || append) && (_flags & FileStorage::BASE64) != 0; - bool isGZ = false; - size_t fnamelen = 0; + bool isGZ = false; + size_t fnamelen = 0; - std::vector params; - //if ( !mem_mode ) - { - analyze_file_name( filename_or_buf, params ); - if( !params.empty() ) - filename = params[0]; + std::vector params; + //if ( !mem_mode ) + { + analyze_file_name(filename_or_buf, params); + if (!params.empty()) + filename = params[0]; - /*if( !write_base64 && params.size() >= 2 && - std::find(params.begin()+1, params.end(), std::string("base64")) != params.end()) - write_base64 = (write_mode || append);*/ - } + if (!write_base64 && params.size() >= 2 && + std::find(params.begin() + 1, params.end(), std::string("base64")) != params.end()) + write_base64 = (write_mode || append); + } - if( filename.size() == 0 && !mem_mode && !write_mode ) - CV_Error( CV_StsNullPtr, "NULL or empty filename" ); + if (filename.size() == 0 && !mem_mode && !write_mode) + CV_Error(cv::Error::StsNullPtr, "NULL or empty filename"); - if( mem_mode && append ) - CV_Error( CV_StsBadFlag, "FileStorage::APPEND and FileStorage::MEMORY are not currently compatible" ); + if (mem_mode && append) + CV_Error(cv::Error::StsBadFlag, "FileStorage::APPEND and FileStorage::MEMORY are not currently compatible"); - flags = _flags; + flags = _flags; - if( !mem_mode ) - { - char* dot_pos = strrchr((char*)filename.c_str(), '.'); - char compression = '\0'; + if (!mem_mode) { + char *dot_pos = strrchr((char *) filename.c_str(), '.'); + char compression = '\0'; - if( dot_pos && dot_pos[1] == 'g' && dot_pos[2] == 'z' && - (dot_pos[3] == '\0' || (cv_isdigit(dot_pos[3]) && dot_pos[4] == '\0')) ) - { - if( append ) - { - CV_Error(CV_StsNotImplemented, "Appending data to compressed file is not implemented" ); - } - isGZ = true; - compression = dot_pos[3]; - if( compression ) - dot_pos[3] = '\0', fnamelen--; + if (dot_pos && dot_pos[1] == 'g' && dot_pos[2] == 'z' && + (dot_pos[3] == '\0' || (cv_isdigit(dot_pos[3]) && dot_pos[4] == '\0'))) { + if (append) { + CV_Error(cv::Error::StsNotImplemented, "Appending data to compressed file is not implemented"); } + isGZ = true; + compression = dot_pos[3]; + if (compression) + dot_pos[3] = '\0', fnamelen--; + } - if( !isGZ ) - { - file = fopen(filename.c_str(), !write_mode ? "rt" : !append ? "wt" : "a+t" ); - if( !file ) - return false; - } - else - { + if (!isGZ) { + file = fopen(filename.c_str(), !write_mode ? "rt" : !append ? "wt" : "a+t"); + if (!file) + return false; + } else { #if USE_ZLIB - char mode[] = { write_mode ? 'w' : 'r', 'b', compression ? compression : '3', '\0' }; - gzfile = gzopen(filename.c_str(), mode); - if( !gzfile ) - return false; + char mode[] = {write_mode ? 'w' : 'r', 'b', compression ? compression : '3', '\0'}; + gzfile = gzopen(filename.c_str(), mode); + if (!gzfile) + return false; #else - CV_Error(CV_StsNotImplemented, "There is no compressed file storage support in this configuration"); + CV_Error(cv::Error::StsNotImplemented, "There is no compressed file storage support in this configuration"); #endif - } } + } - roots.clear(); - fs_data.clear(); - wrap_margin = 71; - fmt = FileStorage::FORMAT_AUTO; + roots.clear(); + fs_data.clear(); + wrap_margin = 71; + fmt = FileStorage::FORMAT_AUTO; - if( write_mode ) - { - fmt = flags & FileStorage::FORMAT_MASK; + if (write_mode) { + fmt = flags & FileStorage::FORMAT_MASK; - if( mem_mode ) - outbuf.clear(); + if (mem_mode) + outbuf.clear(); - if( fmt == FileStorage::FORMAT_AUTO && !filename.empty() ) - { - const char* dot_pos = NULL; - const char* dot_pos2 = NULL; - // like strrchr() implementation, but save two last positions simultaneously - for (const char* pos = &filename[0]; pos[0] != 0; pos++) - { - if( pos[0] == '.' ) - { - dot_pos2 = dot_pos; - dot_pos = pos; - } - } - if (fs::strcasecmp(dot_pos, ".gz") == 0 && dot_pos2 != NULL) - { - dot_pos = dot_pos2; + if (fmt == FileStorage::FORMAT_AUTO && !filename.empty()) { + const char *dot_pos = NULL; + const char *dot_pos2 = NULL; + // like strrchr() implementation, but save two last positions simultaneously + for (const char *pos = &filename[0]; pos[0] != 0; pos++) { + if (pos[0] == '.') { + dot_pos2 = dot_pos; + dot_pos = pos; } - fmt = (fs::strcasecmp(dot_pos, ".xml") == 0 || fs::strcasecmp(dot_pos, ".xml.gz") == 0 ) - ? FileStorage::FORMAT_XML - : (fs::strcasecmp(dot_pos, ".json") == 0 || fs::strcasecmp(dot_pos, ".json.gz") == 0) - ? FileStorage::FORMAT_JSON - : FileStorage::FORMAT_YAML; - } - else if( fmt == FileStorage::FORMAT_AUTO ) - { - fmt = FileStorage::FORMAT_XML; } - - // we use factor=6 for XML (the longest characters (' and ") are encoded with 6 bytes (' and ") - // and factor=4 for YAML ( as we use 4 bytes for non ASCII characters (e.g. \xAB)) - int buf_size = CV_FS_MAX_LEN*(fmt == FileStorage::FORMAT_XML ? 6 : 4) + 1024; - - if (append) - { - fseek( file, 0, SEEK_END ); - if (ftell(file) == 0) - append = false; + if (fs::strcasecmp(dot_pos, ".gz") == 0 && dot_pos2 != NULL) { + dot_pos = dot_pos2; } + fmt = (fs::strcasecmp(dot_pos, ".xml") == 0 || fs::strcasecmp(dot_pos, ".xml.gz") == 0) + ? FileStorage::FORMAT_XML + : (fs::strcasecmp(dot_pos, ".json") == 0 || fs::strcasecmp(dot_pos, ".json.gz") == 0) + ? FileStorage::FORMAT_JSON + : FileStorage::FORMAT_YAML; + } else if (fmt == FileStorage::FORMAT_AUTO) { + fmt = FileStorage::FORMAT_XML; + } - write_stack.clear(); - empty_stream = true; - write_stack.push_back(FStructData("", FileNode::MAP | FileNode::EMPTY, 0)); - buffer.reserve(buf_size + 1024); - buffer.resize(buf_size); - bufofs = 0; + // we use factor=6 for XML (the longest characters (' and ") are encoded with 6 bytes (' and ") + // and factor=4 for YAML ( as we use 4 bytes for non ASCII characters (e.g. \xAB)) + int buf_size = CV_FS_MAX_LEN * (fmt == FileStorage::FORMAT_XML ? 6 : 4) + 1024; - if( fmt == FileStorage::FORMAT_XML ) - { - size_t file_size = file ? (size_t)ftell(file) : (size_t)0; - if( !append || file_size == 0 ) - { - if( encoding && *encoding != '\0' ) - { - if( fs::strcasecmp(encoding, "UTF-16" ) == 0 ) - { - release(); - CV_Error( CV_StsBadArg, "UTF-16 XML encoding is not supported! Use 8-bit encoding\n"); - } + if (append) { + fseek(file, 0, SEEK_END); + if (ftell(file) == 0) + append = false; + } - CV_Assert( strlen(encoding) < 1000 ); - char buf[1100]; - sprintf(buf, "\n", encoding); - puts( buf ); + write_stack.clear(); + empty_stream = true; + write_stack.push_back(FStructData("", FileNode::MAP | FileNode::EMPTY, 0)); + buffer.reserve(buf_size + 1024); + buffer.resize(buf_size); + bufofs = 0; + is_using_base64 = write_base64; + state_of_writing_base64 = FileStorage_API::Base64State::Uncertain; + + if (fmt == FileStorage::FORMAT_XML) { + size_t file_size = file ? (size_t) ftell(file) : (size_t) 0; + if (!append || file_size == 0) { + if (encoding && *encoding != '\0') { + if (fs::strcasecmp(encoding, "UTF-16") == 0) { + release(); + CV_Error(cv::Error::StsBadArg, "UTF-16 XML encoding is not supported! Use 8-bit encoding\n"); } - else - puts( "\n" ); - puts( "\n" ); - } - else - { - int xml_buf_size = 1 << 10; - char substr[] = ""; - int last_occurrence = -1; - xml_buf_size = MIN(xml_buf_size, int(file_size)); - fseek( file, -xml_buf_size, SEEK_END ); - // find the last occurrence of - for(;;) - { - int line_offset = (int)ftell( file ); - const char* ptr0 = this->gets(xml_buf_size); - const char* ptr = NULL; - if( !ptr0 ) + + CV_Assert(strlen(encoding) < 1000); + char buf[1100]; + sprintf(buf, "\n", encoding); + puts(buf); + } else + puts("\n"); + puts("\n"); + } else { + int xml_buf_size = 1 << 10; + char substr[] = ""; + int last_occurrence = -1; + xml_buf_size = MIN(xml_buf_size, int(file_size)); + fseek(file, -xml_buf_size, SEEK_END); + // find the last occurrence of + for (;;) { + int line_offset = (int) ftell(file); + const char *ptr0 = this->gets(xml_buf_size); + const char *ptr = NULL; + if (!ptr0) + break; + ptr = ptr0; + for (;;) { + ptr = strstr(ptr, substr); + if (!ptr) break; - ptr = ptr0; - for(;;) - { - ptr = strstr( ptr, substr ); - if( !ptr ) - break; - last_occurrence = line_offset + (int)(ptr - ptr0); - ptr += strlen(substr); - } + last_occurrence = line_offset + (int) (ptr - ptr0); + ptr += strlen(substr); } - if( last_occurrence < 0 ) - { - release(); - CV_Error( CV_StsError, "Could not find in the end of file.\n" ); - } - closeFile(); - file = fopen( filename.c_str(), "r+t" ); - CV_Assert(file != 0); - fseek( file, last_occurrence, SEEK_SET ); - // replace the last "" with " ", which has the same length - puts( " " ); - fseek( file, 0, SEEK_END ); - puts( "\n" ); } - - emitter = createXMLEmitter(this); + if (last_occurrence < 0) { + release(); + CV_Error(cv::Error::StsError, "Could not find in the end of file.\n"); + } + closeFile(); + file = fopen(filename.c_str(), "r+t"); + CV_Assert(file != 0); + fseek(file, last_occurrence, SEEK_SET); + // replace the last "" with " ", which has the same length + puts(" "); + fseek(file, 0, SEEK_END); + puts("\n"); } - else if( fmt == FileStorage::FORMAT_YAML ) - { - if( !append) - puts( "%YAML:1.0\n---\n" ); - else - puts( "...\n---\n" ); - emitter = createYAMLEmitter(this); - } + emitter = createXMLEmitter(this); + } else if (fmt == FileStorage::FORMAT_YAML) { + if (!append) + puts("%YAML:1.0\n---\n"); else - { - CV_Assert( fmt == FileStorage::FORMAT_JSON ); - if( !append ) - puts( "{\n" ); - else - { - bool valid = false; - long roffset = 0; - for ( ; - fseek( file, roffset, SEEK_END ) == 0; - roffset -= 1 ) - { - const char end_mark = '}'; - if ( fgetc( file ) == end_mark ) - { - fseek( file, roffset, SEEK_END ); - valid = true; - break; - } + puts("...\n---\n"); + + emitter = createYAMLEmitter(this); + } else { + CV_Assert(fmt == FileStorage::FORMAT_JSON); + if (!append) + puts("{\n"); + else { + bool valid = false; + long roffset = 0; + for (; + fseek(file, roffset, SEEK_END) == 0; + roffset -= 1) { + const char end_mark = '}'; + if (fgetc(file) == end_mark) { + fseek(file, roffset, SEEK_END); + valid = true; + break; } + } - if ( valid ) - { - closeFile(); - file = fopen( filename.c_str(), "r+t" ); - CV_Assert(file != 0); - fseek( file, roffset, SEEK_END ); - fputs( ",", file ); - } - else - { - CV_Error( CV_StsError, "Could not find '}' in the end of file.\n" ); - } + if (valid) { + closeFile(); + file = fopen(filename.c_str(), "r+t"); + CV_Assert(file != 0); + fseek(file, roffset, SEEK_END); + fputs(",", file); + } else { + CV_Error(cv::Error::StsError, "Could not find '}' in the end of file.\n"); } - write_stack.back().indent = 4; - emitter = createJSONEmitter(this); } - is_opened = true; + write_stack.back().indent = 4; + emitter = createJSONEmitter(this); + } + is_opened = true; + } else { + const size_t buf_size0 = 40; + buffer.resize(buf_size0); + if (mem_mode) { + strbuf = (char *) filename_or_buf; + strbufsize = strlen(strbuf); } + + const char *yaml_signature = "%YAML"; + const char *json_signature = "{"; + const char *xml_signature = "gets(16); + CV_Assert(buf); + char *bufPtr = cv_skip_BOM(buf); + size_t bufOffset = bufPtr - buf; + + if (strncmp(bufPtr, yaml_signature, strlen(yaml_signature)) == 0) + fmt = FileStorage::FORMAT_YAML; + else if (strncmp(bufPtr, json_signature, strlen(json_signature)) == 0) + fmt = FileStorage::FORMAT_JSON; + else if (strncmp(bufPtr, xml_signature, strlen(xml_signature)) == 0) + fmt = FileStorage::FORMAT_XML; + else if (strbufsize == bufOffset) + CV_Error(cv::Error::StsBadArg, "Input file is invalid"); else - { - const size_t buf_size0 = 40; - buffer.resize(buf_size0); - if( mem_mode ) - { - strbuf = (char*)filename_or_buf; - strbufsize = strlen(strbuf); - } + CV_Error(cv::Error::StsBadArg, "Unsupported file storage format"); - const char* yaml_signature = "%YAML"; - const char* json_signature = "{"; - const char* xml_signature = "gets(16); - CV_Assert(buf); - char* bufPtr = cv_skip_BOM(buf); - size_t bufOffset = bufPtr - buf; - - if(strncmp( bufPtr, yaml_signature, strlen(yaml_signature) ) == 0) - fmt = FileStorage::FORMAT_YAML; - else if(strncmp( bufPtr, json_signature, strlen(json_signature) ) == 0) - fmt = FileStorage::FORMAT_JSON; - else if(strncmp( bufPtr, xml_signature, strlen(xml_signature) ) == 0) - fmt = FileStorage::FORMAT_XML; - else if(strbufsize == bufOffset) - CV_Error(CV_BADARG_ERR, "Input file is invalid"); - else - CV_Error(CV_BADARG_ERR, "Unsupported file storage format"); + rewind(); + strbufpos = bufOffset; + bufofs = 0; - rewind(); - strbufpos = bufOffset; - bufofs = 0; + try { + char *ptr = bufferStart(); + ptr[0] = ptr[1] = ptr[2] = '\0'; + FileNode root_nodes(fs_ext, 0, 0); - try - { - char* ptr = bufferStart(); - ptr[0] = ptr[1] = ptr[2] = '\0'; - FileNode root_nodes(fs_ext, 0, 0); + uchar *rptr = reserveNodeSpace(root_nodes, 9); + *rptr = FileNode::SEQ; + writeInt(rptr + 1, 4); + writeInt(rptr + 5, 0); - uchar* rptr = reserveNodeSpace(root_nodes, 9); - *rptr = FileNode::SEQ; - writeInt(rptr + 1, 4); - writeInt(rptr + 5, 0); + roots.clear(); - roots.clear(); + switch (fmt) { + case FileStorage::FORMAT_XML: + parser = createXMLParser(this); + break; + case FileStorage::FORMAT_YAML: + parser = createYAMLParser(this); + break; + case FileStorage::FORMAT_JSON: + parser = createJSONParser(this); + break; + default: + parser = Ptr(); + } - switch (fmt) - { - case FileStorage::FORMAT_XML: parser = createXMLParser(this); break; - case FileStorage::FORMAT_YAML: parser = createYAMLParser(this); break; - case FileStorage::FORMAT_JSON: parser = createJSONParser(this); break; - default: parser = Ptr(); - } + if (!parser.empty()) { + ok = parser->parse(ptr); + if (ok) { + finalizeCollection(root_nodes); - if( !parser.empty() ) - { - ok = parser->parse(ptr); - if( ok ) - { - finalizeCollection(root_nodes); + CV_Assert(!fs_data_ptrs.empty()); + FileNode roots_node(fs_ext, 0, 0); + size_t i, nroots = roots_node.size(); + FileNodeIterator it = roots_node.begin(); - CV_Assert( !fs_data_ptrs.empty() ); - FileNode roots_node(fs_ext, 0, 0); - size_t i, nroots = roots_node.size(); - FileNodeIterator it = roots_node.begin(); - - for( i = 0; i < nroots; i++, ++it ) - roots.push_back(*it); - } + for (i = 0; i < nroots; i++, ++it) + roots.push_back(*it); } } - catch(...) - { - is_opened = true; - release(); - throw; - } - - // release resources that we do not need anymore - closeFile(); + } + catch (...) { is_opened = true; - std::vector tmpbuf; - std::swap(buffer, tmpbuf); - bufofs = 0; + release(); + throw; } - return ok; + + // release resources that we do not need anymore + closeFile(); + is_opened = true; + std::vector tmpbuf; + std::swap(buffer, tmpbuf); + bufofs = 0; } + return ok; +} - void puts( const char* str ) - { - CV_Assert( write_mode ); - if( mem_mode ) - std::copy(str, str + strlen(str), std::back_inserter(outbuf)); - else if( file ) - fputs( str, file ); +void FileStorage::Impl::puts(const char *str) { + CV_Assert(write_mode); + if (mem_mode) + std::copy(str, str + strlen(str), std::back_inserter(outbuf)); + else if (file) + fputs(str, file); #if USE_ZLIB - else if( gzfile ) - gzputs( gzfile, str ); + else if (gzfile) + gzputs(gzfile, str); #endif - else - CV_Error( CV_StsError, "The storage is not opened" ); - } - - char* getsFromFile( char* buf, int count ) - { - if( file ) - return fgets( buf, count, file ); - #if USE_ZLIB - if( gzfile ) - return gzgets( gzfile, buf, count ); - #endif - CV_Error(CV_StsError, "The storage is not opened"); - } + else + CV_Error(cv::Error::StsError, "The storage is not opened"); +} - char* gets( size_t maxCount ) - { - if( strbuf ) - { - size_t i = strbufpos, len = strbufsize; - const char* instr = strbuf; - for( ; i < len; i++ ) - { - char c = instr[i]; - if( c == '\0' || c == '\n' ) - { - if( c == '\n' ) - i++; - break; - } +char *FileStorage::Impl::getsFromFile(char *buf, int count) { + if (file) + return fgets(buf, count, file); +#if USE_ZLIB + if (gzfile) + return gzgets(gzfile, buf, count); +#endif + CV_Error(cv::Error::StsError, "The storage is not opened"); +} + +char *FileStorage::Impl::gets(size_t maxCount) { + if (strbuf) { + size_t i = strbufpos, len = strbufsize; + const char *instr = strbuf; + for (; i < len; i++) { + char c = instr[i]; + if (c == '\0' || c == '\n') { + if (c == '\n') + i++; + break; } - size_t count = i - strbufpos; - if( maxCount == 0 || maxCount > count ) - maxCount = count; - buffer.resize(std::max(buffer.size(), maxCount + 8)); - memcpy(&buffer[0], instr + strbufpos, maxCount); - buffer[maxCount] = '\0'; - strbufpos = i; - return maxCount > 0 ? &buffer[0] : 0; } + size_t count = i - strbufpos; + if (maxCount == 0 || maxCount > count) + maxCount = count; + buffer.resize(std::max(buffer.size(), maxCount + 8)); + memcpy(&buffer[0], instr + strbufpos, maxCount); + buffer[maxCount] = '\0'; + strbufpos = i; + return maxCount > 0 ? &buffer[0] : 0; + } + + const size_t MAX_BLOCK_SIZE = INT_MAX / 2; // hopefully, that will be enough + if (maxCount == 0) + maxCount = MAX_BLOCK_SIZE; + else + CV_Assert(maxCount < MAX_BLOCK_SIZE); + size_t ofs = 0; - const size_t MAX_BLOCK_SIZE = INT_MAX/2; // hopefully, that will be enough - if( maxCount == 0 ) - maxCount = MAX_BLOCK_SIZE; - else - CV_Assert(maxCount < MAX_BLOCK_SIZE); - size_t ofs = 0; - - for(;;) - { - int count = (int)std::min(buffer.size() - ofs - 16, maxCount); - char* ptr = getsFromFile( &buffer[ofs], count+1 ); - if( !ptr ) - break; - int delta = (int)strlen(ptr); - ofs += delta; - maxCount -= delta; - if( ptr[delta-1] == '\n' || maxCount == 0 ) - break; - if( delta == count ) - buffer.resize((size_t)(buffer.size()*1.5)); - } - return ofs > 0 ? &buffer[0] : 0; + for (;;) { + int count = (int) std::min(buffer.size() - ofs - 16, maxCount); + char *ptr = getsFromFile(&buffer[ofs], count + 1); + if (!ptr) + break; + int delta = (int) strlen(ptr); + ofs += delta; + maxCount -= delta; + if (ptr[delta - 1] == '\n' || maxCount == 0) + break; + if (delta == count) + buffer.resize((size_t) (buffer.size() * 1.5)); } + return ofs > 0 ? &buffer[0] : 0; +} - char* gets() - { - char* ptr = this->gets(0); - if( !ptr ) - { - ptr = bufferStart(); // FIXIT Why do we need this hack? What is about other parsers JSON/YAML? - *ptr = '\0'; - setEof(); - return 0; - } - else - { - size_t l = strlen(ptr); - if( l > 0 && ptr[l-1] != '\n' && ptr[l-1] != '\r' && !eof() ) - { - ptr[l] = '\n'; - ptr[l+1] = '\0'; - } +char *FileStorage::Impl::gets() { + char *ptr = this->gets(0); + if (!ptr) { + ptr = bufferStart(); // FIXIT Why do we need this hack? What is about other parsers JSON/YAML? + *ptr = '\0'; + setEof(); + return 0; + } else { + size_t l = strlen(ptr); + if (l > 0 && ptr[l - 1] != '\n' && ptr[l - 1] != '\r' && !eof()) { + ptr[l] = '\n'; + ptr[l + 1] = '\0'; } - lineno++; - return ptr; } + lineno++; + return ptr; +} - bool eof() - { - if( dummy_eof ) - return true; - if( strbuf ) - return strbufpos >= strbufsize; - if( file ) - return feof(file) != 0; +bool FileStorage::Impl::eof() { + if (dummy_eof) + return true; + if (strbuf) + return strbufpos >= strbufsize; + if (file) + return feof(file) != 0; #if USE_ZLIB - if( gzfile ) - return gzeof(gzfile) != 0; + if (gzfile) + return gzeof(gzfile) != 0; #endif - return false; - } + return false; +} - void setEof() - { - dummy_eof = true; - } +void FileStorage::Impl::setEof() { + dummy_eof = true; +} - void closeFile() - { - if( file ) - fclose( file ); +void FileStorage::Impl::closeFile() { + if (file) + fclose(file); #if USE_ZLIB - else if( gzfile ) - gzclose( gzfile ); + else if (gzfile) + gzclose(gzfile); #endif - file = 0; - gzfile = 0; - strbuf = 0; - strbufpos = 0; - is_opened = false; - } + file = 0; + gzfile = 0; + strbuf = 0; + strbufpos = 0; + is_opened = false; +} - void rewind() - { - if( file ) - ::rewind(file); +void FileStorage::Impl::rewind() { + if (file) + ::rewind(file); #if USE_ZLIB - else if( gzfile ) - gzrewind(gzfile); + else if (gzfile) + gzrewind(gzfile); #endif - strbufpos = 0; - } + strbufpos = 0; +} - char* resizeWriteBuffer( char* ptr, int len ) - { - const char* buffer_end = &buffer[0] + buffer.size(); - if( ptr + len < buffer_end ) - return ptr; +char *FileStorage::Impl::resizeWriteBuffer(char *ptr, int len) { + const char *buffer_end = &buffer[0] + buffer.size(); + if (ptr + len < buffer_end) + return ptr; - const char* buffer_start = &buffer[0]; - int written_len = (int)(ptr - buffer_start); + const char *buffer_start = &buffer[0]; + int written_len = (int) (ptr - buffer_start); - CV_Assert(written_len <= (int)buffer.size()); - int new_size = (int)((buffer_end - buffer_start)*3/2); - new_size = MAX( written_len + len, new_size ); - buffer.reserve( new_size + 256 ); - buffer.resize( new_size ); - bufofs = written_len; - return &buffer[0] + bufofs; + CV_Assert(written_len <= (int) buffer.size()); + int new_size = (int) ((buffer_end - buffer_start) * 3 / 2); + new_size = MAX(written_len + len, new_size); + buffer.reserve(new_size + 256); + buffer.resize(new_size); + bufofs = written_len; + return &buffer[0] + bufofs; +} + +char *FileStorage::Impl::flush() { + char *buffer_start = &buffer[0]; + char *ptr = buffer_start + bufofs; + + if (ptr > buffer_start + space) { + ptr[0] = '\n'; + ptr[1] = '\0'; + puts(buffer_start); + bufofs = 0; } - char* flush() - { - char* buffer_start = &buffer[0]; - char* ptr = buffer_start + bufofs; + int indent = write_stack.back().indent; - if( ptr > buffer_start + space ) - { - ptr[0] = '\n'; - ptr[1] = '\0'; - puts( buffer_start ); - bufofs = 0; - } + if (space != indent) { + memset(buffer_start, ' ', indent); + space = indent; + } + bufofs = space; + ptr = buffer_start + bufofs; - int indent = write_stack.back().indent; + return ptr; +} - if( space != indent ) - { - memset( buffer_start, ' ', indent ); - space = indent; - } - bufofs = space; - ptr = buffer_start + bufofs; +void FileStorage::Impl::endWriteStruct() { + CV_Assert(write_mode); - return ptr; - } + check_if_write_struct_is_delayed(false); + if (state_of_writing_base64 != FileStorage_API::Uncertain) + switch_to_Base64_state(FileStorage_API::Uncertain); - void endWriteStruct() - { - CV_Assert( write_mode ); - CV_Assert( !write_stack.empty() ); + CV_Assert(!write_stack.empty()); - FStructData& current_struct = write_stack.back(); - if( fmt == FileStorage::FORMAT_JSON && !FileNode::isFlow(current_struct.flags) && write_stack.size() > 1 ) - current_struct.indent = write_stack[write_stack.size() - 2].indent; + FStructData ¤t_struct = write_stack.back(); + if (fmt == FileStorage::FORMAT_JSON && !FileNode::isFlow(current_struct.flags) && write_stack.size() > 1) + current_struct.indent = write_stack[write_stack.size() - 2].indent; - emitter->endWriteStruct(current_struct); + emitter->endWriteStruct(current_struct); - write_stack.pop_back(); - if( !write_stack.empty() ) - write_stack.back().flags &= ~FileNode::EMPTY; - } + write_stack.pop_back(); + if (!write_stack.empty()) + write_stack.back().flags &= ~FileNode::EMPTY; +} - void startWriteStruct( const char* key, int struct_flags, - const char* type_name ) - { - CV_Assert( write_mode ); +void FileStorage::Impl::startWriteStruct_helper(const char *key, int struct_flags, + const char *type_name) { + CV_Assert(write_mode); - struct_flags = (struct_flags & (FileNode::TYPE_MASK|FileNode::FLOW)) | FileNode::EMPTY; - if( !FileNode::isCollection(struct_flags)) - CV_Error( CV_StsBadArg, - "Some collection type: FileNode::SEQ or FileNode::MAP must be specified" ); + struct_flags = (struct_flags & (FileNode::TYPE_MASK | FileNode::FLOW)) | FileNode::EMPTY; + if (!FileNode::isCollection(struct_flags)) + CV_Error(cv::Error::StsBadArg, + "Some collection type: FileNode::SEQ or FileNode::MAP must be specified"); - if( type_name && type_name[0] == '\0' ) - type_name = 0; + if (type_name && type_name[0] == '\0') + type_name = 0; - FStructData s = emitter->startWriteStruct( write_stack.back(), key, struct_flags, type_name ); - write_stack.push_back(s); - size_t write_stack_size = write_stack.size(); - if( write_stack_size > 1 ) - write_stack[write_stack_size-2].flags &= ~FileNode::EMPTY; + FStructData s = emitter->startWriteStruct(write_stack.back(), key, struct_flags, type_name); - if( !FileNode::isFlow(s.flags) ) - flush(); + write_stack.push_back(s); + size_t write_stack_size = write_stack.size(); + if (write_stack_size > 1) + write_stack[write_stack_size - 2].flags &= ~FileNode::EMPTY; - if( fmt == FileStorage::FORMAT_JSON && type_name && type_name[0] && FileNode::isMap(struct_flags)) - { - emitter->write("type_id", type_name, false); - } - } + if (fmt != FileStorage::FORMAT_JSON && !FileNode::isFlow(s.flags)) + flush(); - void writeComment( const char* comment, bool eol_comment ) - { - CV_Assert(write_mode); - emitter->writeComment( comment, eol_comment ); + if (fmt == FileStorage::FORMAT_JSON && type_name && type_name[0] && FileNode::isMap(struct_flags)) { + emitter->write("type_id", type_name, false); } +} - void startNextStream() - { - CV_Assert(write_mode); - if( !empty_stream ) - { - while( !write_stack.empty() ) - endWriteStruct(); - flush(); - emitter->startNextStream(); - empty_stream = true; - write_stack.push_back(FStructData("", FileNode::EMPTY, 0)); - bufofs = 0; - } - } +void FileStorage::Impl::startWriteStruct(const char *key, int struct_flags, + const char *type_name) { + check_if_write_struct_is_delayed(false); + if (state_of_writing_base64 == FileStorage_API::NotUse) + switch_to_Base64_state(FileStorage_API::Uncertain); - void write( const String& key, int value ) - { - CV_Assert(write_mode); - emitter->write(key.c_str(), value); - } + if (state_of_writing_base64 == FileStorage_API::Uncertain && FileNode::isSeq(struct_flags) + && is_using_base64 && type_name == 0) { + /* Uncertain whether output Base64 data */ + make_write_struct_delayed(key, struct_flags, type_name); + } else if (type_name && memcmp(type_name, "binary", 6) == 0) { + /* Must output Base64 data */ + if ((FileNode::TYPE_MASK & struct_flags) != FileNode::SEQ) + CV_Error(cv::Error::StsBadArg, "must set 'struct_flags |= CV_NODE_SEQ' if using Base64."); + else if (state_of_writing_base64 != FileStorage_API::Uncertain) + CV_Error(cv::Error::StsError, "function \'cvStartWriteStruct\' calls cannot be nested if using Base64."); - void write( const String& key, double value ) - { - CV_Assert(write_mode); - emitter->write(key.c_str(), value); + startWriteStruct_helper(key, struct_flags, "binary"); + + if (state_of_writing_base64 != FileStorage_API::Uncertain) + switch_to_Base64_state(FileStorage_API::Uncertain); + switch_to_Base64_state(FileStorage_API::InUse); + } else { + /* Won't output Base64 data */ + if (state_of_writing_base64 == FileStorage_API::InUse) + CV_Error(cv::Error::StsError, "At the end of the output Base64, `cvEndWriteStruct` is needed."); + + startWriteStruct_helper(key, struct_flags, type_name); + + if (state_of_writing_base64 != FileStorage_API::Uncertain) + switch_to_Base64_state(FileStorage_API::Uncertain); + switch_to_Base64_state(FileStorage_API::NotUse); } +} - void write( const String& key, const String& value ) - { - CV_Assert(write_mode); - emitter->write(key.c_str(), value.c_str(), false); +void FileStorage::Impl::writeComment(const char *comment, bool eol_comment) { + CV_Assert(write_mode); + emitter->writeComment(comment, eol_comment); +} + +void FileStorage::Impl::startNextStream() { + CV_Assert(write_mode); + if (!empty_stream) { + while (!write_stack.empty()) + endWriteStruct(); + flush(); + emitter->startNextStream(); + empty_stream = true; + write_stack.push_back(FStructData("", FileNode::EMPTY, 0)); + bufofs = 0; } +} - void writeRawData( const std::string& dt, const void* _data, size_t len ) - { - CV_Assert(write_mode); +void FileStorage::Impl::write(const String &key, int value) { + CV_Assert(write_mode); + emitter->write(key.c_str(), value); +} - size_t elemSize = fs::calcStructSize(dt.c_str(), 0); - CV_Assert(elemSize); - CV_Assert( len % elemSize == 0 ); - len /= elemSize; +void FileStorage::Impl::write(const String &key, double value) { + CV_Assert(write_mode); + emitter->write(key.c_str(), value); +} - bool explicitZero = fmt == FileStorage::FORMAT_JSON; - const uchar* data0 = (const uchar*)_data; - int fmt_pairs[CV_FS_MAX_FMT_PAIRS*2], k, fmt_pair_count; - char buf[256] = ""; +void FileStorage::Impl::write(const String &key, const String &value) { + CV_Assert(write_mode); + emitter->write(key.c_str(), value.c_str(), false); +} - fmt_pair_count = fs::decodeFormat( dt.c_str(), fmt_pairs, CV_FS_MAX_FMT_PAIRS ); +void FileStorage::Impl::writeRawData(const std::string &dt, const void *_data, size_t len) { + CV_Assert(write_mode); - if( !len ) - return; + if (is_using_base64 || state_of_writing_base64 == FileStorage_API::Base64State::InUse) { + writeRawDataBase64(_data, len, dt.c_str()); + return; + } else if (state_of_writing_base64 == FileStorage_API::Base64State::Uncertain) { + switch_to_Base64_state(FileStorage_API::Base64State::NotUse); + } - if( !data0 ) - CV_Error( CV_StsNullPtr, "Null data pointer" ); + size_t elemSize = fs::calcStructSize(dt.c_str(), 0); + CV_Assert(elemSize); + CV_Assert(len % elemSize == 0); + len /= elemSize; - if( fmt_pair_count == 1 ) - { - fmt_pairs[0] *= (int)len; - len = 1; - } + bool explicitZero = fmt == FileStorage::FORMAT_JSON; + const uchar *data0 = (const uchar *) _data; + int fmt_pairs[CV_FS_MAX_FMT_PAIRS * 2], k, fmt_pair_count; + char buf[256] = ""; - for(;len--; data0 += elemSize) - { - int offset = 0; - for( k = 0; k < fmt_pair_count; k++ ) - { - int i, count = fmt_pairs[k*2]; - int elem_type = fmt_pairs[k*2+1]; - int elem_size = CV_ELEM_SIZE(elem_type); - const char *ptr; + fmt_pair_count = fs::decodeFormat(dt.c_str(), fmt_pairs, CV_FS_MAX_FMT_PAIRS); - offset = cvAlign( offset, elem_size ); - const uchar* data = data0 + offset; + if (!len) + return; - for( i = 0; i < count; i++ ) - { - switch( elem_type ) - { + if (!data0) + CV_Error(cv::Error::StsNullPtr, "Null data pointer"); + + if (fmt_pair_count == 1) { + fmt_pairs[0] *= (int) len; + len = 1; + } + + for (; len--; data0 += elemSize) { + int offset = 0; + for (k = 0; k < fmt_pair_count; k++) { + int i, count = fmt_pairs[k * 2]; + int elem_type = fmt_pairs[k * 2 + 1]; + int elem_size = CV_ELEM_SIZE(elem_type); + const char *ptr; + + offset = cvAlign(offset, elem_size); + const uchar *data = data0 + offset; + + for (i = 0; i < count; i++) { + switch (elem_type) { case CV_8U: - ptr = fs::itoa( *(uchar*)data, buf, 10 ); + ptr = fs::itoa(*(uchar *) data, buf, 10); data++; break; case CV_8S: - ptr = fs::itoa( *(char*)data, buf, 10 ); + ptr = fs::itoa(*(char *) data, buf, 10); data++; break; case CV_16U: - ptr = fs::itoa( *(ushort*)data, buf, 10 ); + ptr = fs::itoa(*(ushort *) data, buf, 10); data += sizeof(ushort); break; case CV_16S: - ptr = fs::itoa( *(short*)data, buf, 10 ); + ptr = fs::itoa(*(short *) data, buf, 10); data += sizeof(short); break; case CV_32S: - ptr = fs::itoa( *(int*)data, buf, 10 ); + ptr = fs::itoa(*(int *) data, buf, 10); data += sizeof(int); break; case CV_32F: - ptr = fs::floatToString( buf, *(float*)data, false, explicitZero ); + ptr = fs::floatToString(buf, *(float *) data, false, explicitZero); data += sizeof(float); break; case CV_64F: - ptr = fs::doubleToString( buf, *(double*)data, explicitZero ); + ptr = fs::doubleToString(buf, *(double *) data, explicitZero); data += sizeof(double); break; case CV_16F: /* reference */ - ptr = fs::floatToString( buf, (float)*(float16_t*)data, true, explicitZero ); + ptr = fs::floatToString(buf, (float) *(float16_t *) data, true, explicitZero); data += sizeof(float16_t); break; default: - CV_Error( CV_StsUnsupportedFormat, "Unsupported type" ); + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported type"); return; - } - - emitter->writeScalar(0, ptr); } - offset = (int)(data - data0); + emitter->writeScalar(0, ptr); } + + offset = (int) (data - data0); } } +} - void writeRawDataBase64(const void* /*data*/, int /*len*/, const char* /*dt*/ ) - { +void FileStorage::Impl::workaround() { + check_if_write_struct_is_delayed(false); - } + if (state_of_writing_base64 != FileStorage_API::Base64State::Uncertain) + switch_to_Base64_state(FileStorage_API::Base64State::Uncertain); +} - String releaseAndGetString(); +void FileStorage::Impl::switch_to_Base64_state(FileStorage_API::Base64State new_state) { + const char *err_unkonwn_state = "Unexpected error, unable to determine the Base64 state."; + const char *err_unable_to_switch = "Unexpected error, unable to switch to this state."; - FileNode getFirstTopLevelNode() const - { - return roots.empty() ? FileNode() : roots[0]; + /* like a finite state machine */ + switch (state_of_writing_base64) { + case FileStorage_API::Base64State::Uncertain: + switch (new_state) { + case FileStorage_API::Base64State::InUse: + { + CV_DbgAssert(base64_writer == 0); + bool can_indent = (fmt != cv::FileStorage::Mode::FORMAT_JSON); + base64_writer = new base64::Base64Writer(*this, can_indent); + if (!can_indent) { + char *ptr = bufferPtr(); + *ptr++ = '\0'; + puts(bufferStart()); + setBufferPtr(bufferStart()); + memset(bufferStart(), 0, static_cast(space)); + puts("\"$base64$"); + } + break; + } + case FileStorage_API::Base64State::Uncertain: + break; + case FileStorage_API::Base64State::NotUse: + break; + default: + CV_Error(cv::Error::StsError, err_unkonwn_state); + break; + } + break; + case FileStorage_API::Base64State::InUse: + switch (new_state) { + case FileStorage_API::Base64State::InUse: + case FileStorage_API::Base64State::NotUse: + CV_Error(cv::Error::StsError, err_unable_to_switch); + break; + case FileStorage_API::Base64State::Uncertain: + delete base64_writer; + base64_writer = 0; + if ( fmt == cv::FileStorage::FORMAT_JSON ) + { + puts("\""); + setBufferPtr(bufferStart()); + flush(); + memset(bufferStart(), 0, static_cast(space) ); + setBufferPtr(bufferStart()); + } + break; + default: + CV_Error(cv::Error::StsError, err_unkonwn_state); + break; + } + break; + case FileStorage_API::Base64State::NotUse: + switch (new_state) { + case FileStorage_API::Base64State::InUse: + case FileStorage_API::Base64State::NotUse: + CV_Error(cv::Error::StsError, err_unable_to_switch); + break; + case FileStorage_API::Base64State::Uncertain: + break; + default: + CV_Error(cv::Error::StsError, err_unkonwn_state); + break; + } + break; + default: + CV_Error(cv::Error::StsError, err_unkonwn_state); + break; } - FileNode root(int streamIdx=0) const - { - return streamIdx >= 0 && streamIdx < (int)roots.size() ? roots[streamIdx] : FileNode(); - } + state_of_writing_base64 = new_state; +} - FileNode operator[](const String& nodename) const - { - return this->operator[](nodename.c_str()); +void FileStorage::Impl::make_write_struct_delayed(const char *key, int struct_flags, const char *type_name) { + CV_Assert(is_write_struct_delayed == false); + CV_DbgAssert(delayed_struct_key == nullptr); + CV_DbgAssert(delayed_struct_flags == 0); + CV_DbgAssert(delayed_type_name == nullptr); + + delayed_struct_flags = struct_flags; + + if (key != nullptr) { + delayed_struct_key = new char[strlen(key) + 1U]; + strcpy(delayed_struct_key, key); } - FileNode operator[](const char* /*nodename*/) const - { - return FileNode(); + if (type_name != nullptr) { + delayed_type_name = new char[strlen(type_name) + 1U]; + strcpy(delayed_type_name, type_name); } - int getFormat() const { return fmt; } + is_write_struct_delayed = true; +} - char* bufferPtr() const { return (char*)(&buffer[0] + bufofs); } - char* bufferStart() const { return (char*)&buffer[0]; } - char* bufferEnd() const { return (char*)(&buffer[0] + buffer.size()); } - void setBufferPtr(char* ptr) - { - char* bufferstart = bufferStart(); - CV_Assert( ptr >= bufferstart && ptr <= bufferEnd() ); - bufofs = ptr - bufferstart; - } - int wrapMargin() const { return wrap_margin; } +void FileStorage::Impl::check_if_write_struct_is_delayed(bool change_type_to_base64) { + if (is_write_struct_delayed) { + /* save data to prevent recursive call errors */ + std::string struct_key; + std::string type_name; + int struct_flags = delayed_struct_flags; - FStructData& getCurrentStruct() - { - CV_Assert(!write_stack.empty()); - return write_stack.back(); + if (delayed_struct_key != nullptr && *delayed_struct_key != '\0') { + struct_key.assign(delayed_struct_key); + } + if (delayed_type_name != nullptr && *delayed_type_name != '\0') { + type_name.assign(delayed_type_name); + } + + /* reset */ + delete[] delayed_struct_key; + delete[] delayed_type_name; + delayed_struct_key = nullptr; + delayed_struct_flags = 0; + delayed_type_name = nullptr; + + is_write_struct_delayed = false; + + /* call */ + if (change_type_to_base64) { + startWriteStruct_helper(struct_key.c_str(), struct_flags, "binary"); + if (state_of_writing_base64 != FileStorage_API::Uncertain) + switch_to_Base64_state(FileStorage_API::Uncertain); + switch_to_Base64_state(FileStorage_API::InUse); + } else { + startWriteStruct_helper(struct_key.c_str(), struct_flags, type_name.c_str()); + if (state_of_writing_base64 != FileStorage_API::Uncertain) + switch_to_Base64_state(FileStorage_API::Uncertain); + switch_to_Base64_state(FileStorage_API::NotUse); + } } +} - void setNonEmpty() - { - empty_stream = false; +void FileStorage::Impl::writeRawDataBase64(const void *_data, size_t len, const char *dt) { + CV_Assert(write_mode); + + check_if_write_struct_is_delayed(true); + + if (state_of_writing_base64 == FileStorage_API::Base64State::Uncertain) { + switch_to_Base64_state(FileStorage_API::Base64State::InUse); + } else if (state_of_writing_base64 != FileStorage_API::Base64State::InUse) { + CV_Error(cv::Error::StsError, "Base64 should not be used at present."); } - void processSpecialDouble( char* buf, double* value, char** endptr ) - { - FileStorage_API* fs = this; - char c = buf[0]; - int inf_hi = 0x7ff00000; + base64_writer->write(_data, len, dt); +} - if( c == '-' || c == '+' ) - { - inf_hi = c == '-' ? 0xfff00000 : 0x7ff00000; - c = *++buf; - } +FileNode FileStorage::Impl::getFirstTopLevelNode() const { + return roots.empty() ? FileNode() : roots[0]; +} - if( c != '.' ) - CV_PARSE_ERROR_CPP( "Bad format of floating-point constant" ); +FileNode FileStorage::Impl::root(int streamIdx) const { + return streamIdx >= 0 && streamIdx < (int) roots.size() ? roots[streamIdx] : FileNode(); +} - Cv64suf v; - v.f = 0.; - if( toupper(buf[1]) == 'I' && toupper(buf[2]) == 'N' && toupper(buf[3]) == 'F' ) - v.u = (uint64)inf_hi << 32; - else if( toupper(buf[1]) == 'N' && toupper(buf[2]) == 'A' && toupper(buf[3]) == 'N' ) - v.u = (uint64)-1; - else - CV_PARSE_ERROR_CPP( "Bad format of floating-point constant" ); - *value = v.f; - *endptr = buf + 4; - } +FileNode FileStorage::Impl::operator[](const String &nodename) const { + return this->operator[](nodename.c_str()); +} - double strtod( char* ptr, char** endptr ) - { - double fval = ::strtod( ptr, endptr ); - if( **endptr == '.' ) - { - char* dot_pos = *endptr; - *dot_pos = ','; - double fval2 = ::strtod( ptr, endptr ); - *dot_pos = '.'; - if( *endptr > dot_pos ) - fval = fval2; - else - *endptr = dot_pos; - } +FileNode FileStorage::Impl::operator[](const char * /*nodename*/) const { + return FileNode(); +} - if( *endptr == ptr || cv_isalpha(**endptr) ) - processSpecialDouble( ptr, &fval, endptr ); +int FileStorage::Impl::getFormat() const { return fmt; } - return fval; - } +char *FileStorage::Impl::bufferPtr() const { return (char *) (&buffer[0] + bufofs); } - void convertToCollection(int type, FileNode& node) - { - CV_Assert( type == FileNode::SEQ || type == FileNode::MAP ); +char *FileStorage::Impl::bufferStart() const { return (char *) &buffer[0]; } - int node_type = node.type(); - if( node_type == type ) - return; +char *FileStorage::Impl::bufferEnd() const { return (char *) (&buffer[0] + buffer.size()); } - bool named = node.isNamed(); - uchar* ptr = node.ptr() + 1 + (named ? 4 : 0); +void FileStorage::Impl::setBufferPtr(char *ptr) { + char *bufferstart = bufferStart(); + CV_Assert(ptr >= bufferstart && ptr <= bufferEnd()); + bufofs = ptr - bufferstart; +} - int ival = 0; - double fval = 0; - std::string sval; - bool add_first_scalar = false; +int FileStorage::Impl::wrapMargin() const { return wrap_margin; } - if( node_type != FileNode::NONE ) - { - // scalar nodes can only be converted to sequences, e.g. in XML: - // 5[parser_position]... => create 5 with name "a" - // 5 6[parser_position]... => 5 is converted to [5] and then 6 is added to it - // - // otherwise we don't know where to get the element names from - CV_Assert( type == FileNode::SEQ ); - if( node_type == FileNode::INT ) - { - ival = readInt(ptr); - add_first_scalar = true; - } - else if( node_type == FileNode::REAL ) - { - fval = readReal(ptr); - add_first_scalar = true; - } - else if( node_type == FileNode::STRING ) - { - sval = std::string(node); - add_first_scalar = true; - } - else - CV_Error_(Error::StsError, ("The node of type %d cannot be converted to collection", node_type)); - } +FStructData &FileStorage::Impl::getCurrentStruct() { + CV_Assert(!write_stack.empty()); + return write_stack.back(); +} - ptr = reserveNodeSpace(node, 1 + (named ? 4 : 0) + 4 + 4); - *ptr++ = (uchar)(type | (named ? FileNode::NAMED : 0)); - // name has been copied automatically - if( named ) - ptr += 4; - // set raw_size(collection)==4, nelems(collection)==1 - writeInt(ptr, 4); - writeInt(ptr + 4, 0); - - if( add_first_scalar ) - addNode(node, std::string(), node_type, - node_type == FileNode::INT ? (const void*)&ival : - node_type == FileNode::REAL ? (const void*)&fval : - node_type == FileNode::STRING ? (const void*)sval.c_str() : 0, - -1); - } - - // a) allocates new FileNode (for that just set blockIdx to the last block and ofs to freeSpaceOfs) or - // b) reallocates just created new node (blockIdx and ofs must be taken from FileNode). - // If there is no enough space in the current block (it should be the last block added so far), - // the last block is shrunk so that it ends immediately before the reallocated node. Then, - // a new block of sufficient size is allocated and the FileNode is placed in the beginning of it. - // The case (a) can be used to allocate the very first node by setting blockIdx == ofs == 0. - // In the case (b) the existing tag and the name are copied automatically. - uchar* reserveNodeSpace(FileNode& node, size_t sz) - { - bool shrinkBlock = false; - size_t shrinkBlockIdx = 0, shrinkSize = 0; +void FileStorage::Impl::setNonEmpty() { + empty_stream = false; +} - uchar *ptr = 0, *blockEnd = 0; +void FileStorage::Impl::processSpecialDouble(char *buf, double *value, char **endptr) { + FileStorage_API *fs = this; + char c = buf[0]; + int inf_hi = 0x7ff00000; - if( !fs_data_ptrs.empty() ) - { - size_t blockIdx = node.blockIdx; - size_t ofs = node.ofs; - CV_Assert( blockIdx == fs_data_ptrs.size()-1 ); - CV_Assert( ofs <= fs_data_blksz[blockIdx] ); - CV_Assert( freeSpaceOfs <= fs_data_blksz[blockIdx] ); - //CV_Assert( freeSpaceOfs <= ofs + sz ); - - ptr = fs_data_ptrs[blockIdx] + ofs; - blockEnd = fs_data_ptrs[blockIdx] + fs_data_blksz[blockIdx]; - - CV_Assert(ptr >= fs_data_ptrs[blockIdx] && ptr <= blockEnd); - if( ptr + sz <= blockEnd ) - { - freeSpaceOfs = ofs + sz; - return ptr; - } + if (c == '-' || c == '+') { + inf_hi = c == '-' ? 0xfff00000 : 0x7ff00000; + c = *++buf; + } - if (ofs == 0) // FileNode is a first component of this block. Resize current block instead of allocation of new one. - { - fs_data[blockIdx]->resize(sz); - ptr = &fs_data[blockIdx]->at(0); - fs_data_ptrs[blockIdx] = ptr; - fs_data_blksz[blockIdx] = sz; - freeSpaceOfs = sz; - return ptr; - } + if (c != '.') + CV_PARSE_ERROR_CPP("Bad format of floating-point constant"); - shrinkBlock = true; - shrinkBlockIdx = blockIdx; - shrinkSize = ofs; + Cv64suf v; + v.f = 0.; + if (toupper(buf[1]) == 'I' && toupper(buf[2]) == 'N' && toupper(buf[3]) == 'F') + v.u = (uint64) inf_hi << 32; + else if (toupper(buf[1]) == 'N' && toupper(buf[2]) == 'A' && toupper(buf[3]) == 'N') + v.u = (uint64) -1; + else + CV_PARSE_ERROR_CPP("Bad format of floating-point constant"); + *value = v.f; + *endptr = buf + 4; +} + +double FileStorage::Impl::strtod(char *ptr, char **endptr) { + double fval = ::strtod(ptr, endptr); + if (**endptr == '.') { + char *dot_pos = *endptr; + *dot_pos = ','; + double fval2 = ::strtod(ptr, endptr); + *dot_pos = '.'; + if (*endptr > dot_pos) + fval = fval2; + else + *endptr = dot_pos; + } + + if (*endptr == ptr || cv_isalpha(**endptr)) + processSpecialDouble(ptr, &fval, endptr); + + return fval; +} + +void FileStorage::Impl::convertToCollection(int type, FileNode &node) { + CV_Assert(type == FileNode::SEQ || type == FileNode::MAP); + + int node_type = node.type(); + if (node_type == type) + return; + + bool named = node.isNamed(); + uchar *ptr = node.ptr() + 1 + (named ? 4 : 0); + + int ival = 0; + double fval = 0; + std::string sval; + bool add_first_scalar = false; + + if (node_type != FileNode::NONE) { + // scalar nodes can only be converted to sequences, e.g. in XML: + // 5[parser_position]... => create 5 with name "a" + // 5 6[parser_position]... => 5 is converted to [5] and then 6 is added to it + // + // otherwise we don't know where to get the element names from + CV_Assert(type == FileNode::SEQ); + if (node_type == FileNode::INT) { + ival = readInt(ptr); + add_first_scalar = true; + } else if (node_type == FileNode::REAL) { + fval = readReal(ptr); + add_first_scalar = true; + } else if (node_type == FileNode::STRING) { + sval = std::string(node); + add_first_scalar = true; + } else + CV_Error_(Error::StsError, ("The node of type %d cannot be converted to collection", node_type)); + } + + ptr = reserveNodeSpace(node, 1 + (named ? 4 : 0) + 4 + 4); + *ptr++ = (uchar) (type | (named ? FileNode::NAMED : 0)); + // name has been copied automatically + if (named) + ptr += 4; + // set raw_size(collection)==4, nelems(collection)==1 + writeInt(ptr, 4); + writeInt(ptr + 4, 0); + + if (add_first_scalar) + addNode(node, std::string(), node_type, + node_type == FileNode::INT ? (const void *) &ival : + node_type == FileNode::REAL ? (const void *) &fval : + node_type == FileNode::STRING ? (const void *) sval.c_str() : 0, + -1); +} + +// a) allocates new FileNode (for that just set blockIdx to the last block and ofs to freeSpaceOfs) or +// b) reallocates just created new node (blockIdx and ofs must be taken from FileNode). +// If there is no enough space in the current block (it should be the last block added so far), +// the last block is shrunk so that it ends immediately before the reallocated node. Then, +// a new block of sufficient size is allocated and the FileNode is placed in the beginning of it. +// The case (a) can be used to allocate the very first node by setting blockIdx == ofs == 0. +// In the case (b) the existing tag and the name are copied automatically. +uchar *FileStorage::Impl::reserveNodeSpace(FileNode &node, size_t sz) { + bool shrinkBlock = false; + size_t shrinkBlockIdx = 0, shrinkSize = 0; + + uchar *ptr = 0, *blockEnd = 0; + + if (!fs_data_ptrs.empty()) { + size_t blockIdx = node.blockIdx; + size_t ofs = node.ofs; + CV_Assert(blockIdx == fs_data_ptrs.size() - 1); + CV_Assert(ofs <= fs_data_blksz[blockIdx]); + CV_Assert(freeSpaceOfs <= fs_data_blksz[blockIdx]); + //CV_Assert( freeSpaceOfs <= ofs + sz ); + + ptr = fs_data_ptrs[blockIdx] + ofs; + blockEnd = fs_data_ptrs[blockIdx] + fs_data_blksz[blockIdx]; + + CV_Assert(ptr >= fs_data_ptrs[blockIdx] && ptr <= blockEnd); + if (ptr + sz <= blockEnd) { + freeSpaceOfs = ofs + sz; + return ptr; } - size_t blockSize = std::max((size_t)CV_FS_MAX_LEN*4 - 256, sz) + 256; - Ptr > pv = makePtr >(blockSize); - fs_data.push_back(pv); - uchar* new_ptr = &pv->at(0); - fs_data_ptrs.push_back(new_ptr); - fs_data_blksz.push_back(blockSize); - node.blockIdx = fs_data_ptrs.size()-1; - node.ofs = 0; - freeSpaceOfs = sz; - - if( ptr && ptr + 5 <= blockEnd ) + if (ofs == + 0) // FileNode is a first component of this block. Resize current block instead of allocation of new one. { - new_ptr[0] = ptr[0]; - if( ptr[0] & FileNode::NAMED ) - { - new_ptr[1] = ptr[1]; - new_ptr[2] = ptr[2]; - new_ptr[3] = ptr[3]; - new_ptr[4] = ptr[4]; - } + fs_data[blockIdx]->resize(sz); + ptr = &fs_data[blockIdx]->at(0); + fs_data_ptrs[blockIdx] = ptr; + fs_data_blksz[blockIdx] = sz; + freeSpaceOfs = sz; + return ptr; } - if (shrinkBlock) - { - fs_data[shrinkBlockIdx]->resize(shrinkSize); - fs_data_blksz[shrinkBlockIdx] = shrinkSize; + shrinkBlock = true; + shrinkBlockIdx = blockIdx; + shrinkSize = ofs; + } + + size_t blockSize = std::max((size_t) CV_FS_MAX_LEN * 4 - 256, sz) + 256; + Ptr > pv = makePtr >(blockSize); + fs_data.push_back(pv); + uchar *new_ptr = &pv->at(0); + fs_data_ptrs.push_back(new_ptr); + fs_data_blksz.push_back(blockSize); + node.blockIdx = fs_data_ptrs.size() - 1; + node.ofs = 0; + freeSpaceOfs = sz; + + if (ptr && ptr + 5 <= blockEnd) { + new_ptr[0] = ptr[0]; + if (ptr[0] & FileNode::NAMED) { + new_ptr[1] = ptr[1]; + new_ptr[2] = ptr[2]; + new_ptr[3] = ptr[3]; + new_ptr[4] = ptr[4]; } - - return new_ptr; } - unsigned getStringOfs( const std::string& key ) const - { - str_hash_t::const_iterator it = str_hash.find(key); - return it != str_hash.end() ? it->second : 0; + if (shrinkBlock) { + fs_data[shrinkBlockIdx]->resize(shrinkSize); + fs_data_blksz[shrinkBlockIdx] = shrinkSize; } - FileNode addNode( FileNode& collection, const std::string& key, - int elem_type, const void* value, int len ) - { - FileStorage_API* fs = this; - bool noname = key.empty() || (fmt == FileStorage::FORMAT_XML && strcmp(key.c_str(), "_") == 0); - convertToCollection( noname ? FileNode::SEQ : FileNode::MAP, collection ); - - bool isseq = collection.empty() ? false : collection.isSeq(); - if( noname != isseq ) - CV_PARSE_ERROR_CPP( noname ? "Map element should have a name" : - "Sequence element should not have name (use <_>)" ); - unsigned strofs = 0; - if( !noname ) - { - strofs = getStringOfs(key); - if( !strofs ) - { - strofs = (unsigned)str_hash_data.size(); - size_t keysize = key.size() + 1; - str_hash_data.resize(strofs + keysize); - memcpy(&str_hash_data[0] + strofs, &key[0], keysize); - str_hash.insert(std::make_pair(key, strofs)); - } - } + return new_ptr; +} - uchar* cp = collection.ptr(); +unsigned FileStorage::Impl::getStringOfs(const std::string &key) const { + str_hash_t::const_iterator it = str_hash.find(key); + return it != str_hash.end() ? it->second : 0; +} - size_t blockIdx = fs_data_ptrs.size() - 1; - size_t ofs = freeSpaceOfs; - FileNode node(fs_ext, blockIdx, ofs); +FileNode FileStorage::Impl::addNode(FileNode &collection, const std::string &key, + int elem_type, const void *value, int len) { + FileStorage_API *fs = this; + bool noname = key.empty() || (fmt == FileStorage::FORMAT_XML && strcmp(key.c_str(), "_") == 0); + convertToCollection(noname ? FileNode::SEQ : FileNode::MAP, collection); - size_t sz0 = 1 + (noname ? 0 : 4) + 8; - uchar* ptr = reserveNodeSpace(node, sz0); + bool isseq = collection.empty() ? false : collection.isSeq(); + if (noname != isseq) + CV_PARSE_ERROR_CPP(noname ? "Map element should have a name" : + "Sequence element should not have name (use <_>)"); + unsigned strofs = 0; + if (!noname) { + strofs = getStringOfs(key); + if (!strofs) { + strofs = (unsigned) str_hash_data.size(); + size_t keysize = key.size() + 1; + str_hash_data.resize(strofs + keysize); + memcpy(&str_hash_data[0] + strofs, &key[0], keysize); + str_hash.insert(std::make_pair(key, strofs)); + } + } - *ptr++ = (uchar)(elem_type | (noname ? 0 : FileNode::NAMED)); - if( elem_type == FileNode::NONE ) - freeSpaceOfs -= 8; + uchar *cp = collection.ptr(); - if( !noname ) - { - writeInt(ptr, (int)strofs); - ptr += 4; - } + size_t blockIdx = fs_data_ptrs.size() - 1; + size_t ofs = freeSpaceOfs; + FileNode node(fs_ext, blockIdx, ofs); - if( elem_type == FileNode::SEQ || elem_type == FileNode::MAP ) - { - writeInt(ptr, 4); - writeInt(ptr, 0); - } + size_t sz0 = 1 + (noname ? 0 : 4) + 8; + uchar *ptr = reserveNodeSpace(node, sz0); - if( value ) - node.setValue(elem_type, value, len); + *ptr++ = (uchar) (elem_type | (noname ? 0 : FileNode::NAMED)); + if (elem_type == FileNode::NONE) + freeSpaceOfs -= 8; - if( collection.isNamed() ) - cp += 4; - int nelems = readInt(cp + 5); - writeInt(cp + 5, nelems + 1); + if (!noname) { + writeInt(ptr, (int) strofs); + ptr += 4; + } - return node; + if (elem_type == FileNode::SEQ || elem_type == FileNode::MAP) { + writeInt(ptr, 4); + writeInt(ptr, 0); } - void finalizeCollection( FileNode& collection ) - { - if( !collection.isSeq() && !collection.isMap() ) - return; - uchar* ptr0 = collection.ptr(), *ptr = ptr0 + 1; - if( *ptr0 & FileNode::NAMED ) - ptr += 4; - size_t blockIdx = collection.blockIdx; - size_t ofs = collection.ofs + (size_t)(ptr + 8 - ptr0); - size_t rawSize = 4; - unsigned sz = (unsigned)readInt(ptr + 4); - if( sz > 0 ) - { - size_t lastBlockIdx = fs_data_ptrs.size() - 1; + if (value) + node.setValue(elem_type, value, len); - for( ; blockIdx < lastBlockIdx; blockIdx++ ) - { - rawSize += fs_data_blksz[blockIdx] - ofs; - ofs = 0; - } + if (collection.isNamed()) + cp += 4; + int nelems = readInt(cp + 5); + writeInt(cp + 5, nelems + 1); + + return node; +} + +void FileStorage::Impl::finalizeCollection(FileNode &collection) { + if (!collection.isSeq() && !collection.isMap()) + return; + uchar *ptr0 = collection.ptr(), *ptr = ptr0 + 1; + if (*ptr0 & FileNode::NAMED) + ptr += 4; + size_t blockIdx = collection.blockIdx; + size_t ofs = collection.ofs + (size_t) (ptr + 8 - ptr0); + size_t rawSize = 4; + unsigned sz = (unsigned) readInt(ptr + 4); + if (sz > 0) { + size_t lastBlockIdx = fs_data_ptrs.size() - 1; + + for (; blockIdx < lastBlockIdx; blockIdx++) { + rawSize += fs_data_blksz[blockIdx] - ofs; + ofs = 0; } - rawSize += freeSpaceOfs - ofs; - writeInt(ptr, (int)rawSize); } + rawSize += freeSpaceOfs - ofs; + writeInt(ptr, (int) rawSize); +} - void normalizeNodeOfs(size_t& blockIdx, size_t& ofs) const - { - while( ofs >= fs_data_blksz[blockIdx] ) - { - if( blockIdx == fs_data_blksz.size() - 1 ) - { - CV_Assert( ofs == fs_data_blksz[blockIdx] ); - break; - } - ofs -= fs_data_blksz[blockIdx]; - blockIdx++; +void FileStorage::Impl::normalizeNodeOfs(size_t &blockIdx, size_t &ofs) const { + while (ofs >= fs_data_blksz[blockIdx]) { + if (blockIdx == fs_data_blksz.size() - 1) { + CV_Assert(ofs == fs_data_blksz[blockIdx]); + break; } + ofs -= fs_data_blksz[blockIdx]; + blockIdx++; } +} - class Base64Decoder - { - public: - Base64Decoder() { ofs = 0; ptr = 0; indent = 0; totalchars = 0; eos = true; } - void init(Ptr& _parser, char* _ptr, int _indent) - { - parser = _parser; - ptr = _ptr; - indent = _indent; - encoded.clear(); - decoded.clear(); - ofs = 0; - totalchars = 0; - eos = false; - } +FileStorage::Impl::Base64State FileStorage::Impl::get_state_of_writing_base64() { + return state_of_writing_base64; +} - bool readMore(int needed) - { - static const uchar base64tab[] = +int FileStorage::Impl::get_space() { + return space; +} + + +FileStorage::Impl::Base64Decoder::Base64Decoder() { + ofs = 0; + ptr = 0; + indent = 0; + totalchars = 0; + eos = true; +} + +void FileStorage::Impl::Base64Decoder::init(Ptr &_parser, char *_ptr, int _indent) { + parser = _parser; + ptr = _ptr; + indent = _indent; + encoded.clear(); + decoded.clear(); + ofs = 0; + totalchars = 0; + eos = false; +} + +bool FileStorage::Impl::Base64Decoder::readMore(int needed) { + static const uchar base64tab[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, - 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, + 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - if( eos ) - return false; - - size_t sz = decoded.size(); - CV_Assert( ofs <= sz ); - sz -= ofs; - for( size_t i = 0; i < sz; i++ ) - decoded[i] = decoded[ofs + i]; + if (eos) + return false; - decoded.resize(sz); - ofs = 0; + size_t sz = decoded.size(); + CV_Assert(ofs <= sz); + sz -= ofs; + for (size_t i = 0; i < sz; i++) + decoded[i] = decoded[ofs + i]; - CV_Assert( !parser.empty() && ptr ); - char *beg = 0, *end = 0; - bool ok = parser->getBase64Row(ptr, indent, beg, end); - ptr = end; - std::copy(beg, end, std::back_inserter(encoded)); - totalchars += end - beg; + decoded.resize(sz); + ofs = 0; - if( !ok || beg == end ) - { - // in the end of base64 sequence pad it with '=' characters so that - // its total length is multiple of - eos = true; - size_t tc = totalchars; - for( ; tc % 4 != 0; tc++ ) - encoded.push_back('='); - } + CV_Assert(!parser.empty() && ptr); + char *beg = 0, *end = 0; + bool ok = parser->getBase64Row(ptr, indent, beg, end); + ptr = end; + std::copy(beg, end, std::back_inserter(encoded)); + totalchars += end - beg; + + if (!ok || beg == end) { + // in the end of base64 sequence pad it with '=' characters so that + // its total length is multiple of + eos = true; + size_t tc = totalchars; + for (; tc % 4 != 0; tc++) + encoded.push_back('='); + } + + int i = 0, j, n = (int) encoded.size(); + if (n > 0) { + const uchar *tab = base64tab; + char *src = &encoded[0]; + + for (; i <= n - 4; i += 4) { + // dddddd cccccc bbbbbb aaaaaa => ddddddcc ccccbbbb bbaaaaaa + uchar d = tab[(int) (uchar) src[i]], c = tab[(int) (uchar) src[i + 1]]; + uchar b = tab[(int) (uchar) src[i + 2]], a = tab[(int) (uchar) src[i + 3]]; + + decoded.push_back((uchar) ((d << 2) | (c >> 4))); + decoded.push_back((uchar) ((c << 4) | (b >> 2))); + decoded.push_back((uchar) ((b << 6) | a)); + } + } - int i = 0, j, n = (int)encoded.size(); - if( n > 0 ) - { - const uchar* tab = base64tab; - char* src = &encoded[0]; + if (i > 0 && encoded[i - 1] == '=') { + if (i > 1 && encoded[i - 2] == '=' && !decoded.empty()) + decoded.pop_back(); + if (!decoded.empty()) + decoded.pop_back(); + } - for( ; i <= n - 4; i += 4 ) - { - // dddddd cccccc bbbbbb aaaaaa => ddddddcc ccccbbbb bbaaaaaa - uchar d = tab[(int)(uchar)src[i]], c = tab[(int)(uchar)src[i+1]]; - uchar b = tab[(int)(uchar)src[i+2]], a = tab[(int)(uchar)src[i+3]]; + n -= i; + for (j = 0; j < n; j++) + encoded[j] = encoded[i + j]; + encoded.resize(n); - decoded.push_back((uchar)((d << 2) | (c >> 4))); - decoded.push_back((uchar)((c << 4) | (b >> 2))); - decoded.push_back((uchar)((b << 6) | a)); - } - } + return (int) decoded.size() >= needed; +} - if( i > 0 && encoded[i-1] == '=' ) - { - if( i > 1 && encoded[i-2] == '=' && !decoded.empty() ) - decoded.pop_back(); - if( !decoded.empty() ) - decoded.pop_back(); - } +uchar FileStorage::Impl::Base64Decoder::getUInt8() { + size_t sz = decoded.size(); + if (ofs >= sz && !readMore(1)) + return (uchar) 0; + return decoded[ofs++]; +} - n -= i; - for( j = 0; j < n; j++ ) - encoded[j] = encoded[i + j]; - encoded.resize(n); +ushort FileStorage::Impl::Base64Decoder::getUInt16() { + size_t sz = decoded.size(); + if (ofs + 2 > sz && !readMore(2)) + return (ushort) 0; + ushort val = (decoded[ofs] + (decoded[ofs + 1] << 8)); + ofs += 2; + return val; +} - return (int)decoded.size() >= needed; - } +int FileStorage::Impl::Base64Decoder::getInt32() { + size_t sz = decoded.size(); + if (ofs + 4 > sz && !readMore(4)) + return 0; + int ival = readInt(&decoded[ofs]); + ofs += 4; + return ival; +} - uchar getUInt8() - { - size_t sz = decoded.size(); - if( ofs >= sz && !readMore(1) ) - return (uchar)0; - return decoded[ofs++]; - } +double FileStorage::Impl::Base64Decoder::getFloat64() { + size_t sz = decoded.size(); + if (ofs + 8 > sz && !readMore(8)) + return 0; + double fval = readReal(&decoded[ofs]); + ofs += 8; + return fval; +} - ushort getUInt16() - { - size_t sz = decoded.size(); - if( ofs + 2 > sz && !readMore(2) ) - return (ushort)0; - ushort val = (decoded[ofs] + (decoded[ofs + 1] << 8)); - ofs += 2; - return val; - } +bool FileStorage::Impl::Base64Decoder::endOfStream() const { return eos; } - int getInt32() - { - size_t sz = decoded.size(); - if( ofs + 4 > sz && !readMore(4) ) - return 0; - int ival = readInt(&decoded[ofs]); - ofs += 4; - return ival; - } +char *FileStorage::Impl::Base64Decoder::getPtr() const { return ptr; } - double getFloat64() - { - size_t sz = decoded.size(); - if( ofs + 8 > sz && !readMore(8) ) - return 0; - double fval = readReal(&decoded[ofs]); - ofs += 8; - return fval; - } - bool endOfStream() const { return eos; } - char* getPtr() const { return ptr; } - protected: - - Ptr parser; - char* ptr; - int indent; - std::vector encoded; - std::vector decoded; - size_t ofs; - size_t totalchars; - bool eos; - }; - - char* parseBase64(char* ptr, int indent, FileNode& collection) - { - const int BASE64_HDR_SIZE = 24; - char dt[BASE64_HDR_SIZE+1] = {0}; - base64decoder.init(parser, ptr, indent); +char *FileStorage::Impl::parseBase64(char *ptr, int indent, FileNode &collection) { + const int BASE64_HDR_SIZE = 24; + char dt[BASE64_HDR_SIZE + 1] = {0}; + base64decoder.init(parser, ptr, indent); - int i, k; + int i, k; - for( i = 0; i < BASE64_HDR_SIZE; i++ ) - dt[i] = (char)base64decoder.getUInt8(); - for( i = 0; i < BASE64_HDR_SIZE; i++ ) - if( isspace(dt[i])) - break; - dt[i] = '\0'; + for (i = 0; i < BASE64_HDR_SIZE; i++) + dt[i] = (char) base64decoder.getUInt8(); + for (i = 0; i < BASE64_HDR_SIZE; i++) + if (isspace(dt[i])) + break; + dt[i] = '\0'; - CV_Assert( !base64decoder.endOfStream() ); + CV_Assert(!base64decoder.endOfStream()); - int fmt_pairs[CV_FS_MAX_FMT_PAIRS*2]; - int fmt_pair_count = fs::decodeFormat( dt, fmt_pairs, CV_FS_MAX_FMT_PAIRS ); - int ival = 0; - double fval = 0; + int fmt_pairs[CV_FS_MAX_FMT_PAIRS * 2]; + int fmt_pair_count = fs::decodeFormat(dt, fmt_pairs, CV_FS_MAX_FMT_PAIRS); + int ival = 0; + double fval = 0; - for(;;) - { - for( k = 0; k < fmt_pair_count; k++ ) - { - int elem_type = fmt_pairs[k*2+1]; - int count = fmt_pairs[k*2]; + for (;;) { + for (k = 0; k < fmt_pair_count; k++) { + int elem_type = fmt_pairs[k * 2 + 1]; + int count = fmt_pairs[k * 2]; - for( i = 0; i < count; i++ ) - { - int node_type = FileNode::INT; - switch( elem_type ) - { + for (i = 0; i < count; i++) { + int node_type = FileNode::INT; + switch (elem_type) { case CV_8U: ival = base64decoder.getUInt8(); break; case CV_8S: - ival = (char)base64decoder.getUInt8(); + ival = (char) base64decoder.getUInt8(); break; case CV_16U: ival = base64decoder.getUInt16(); break; case CV_16S: - ival = (short)base64decoder.getUInt16(); + ival = (short) base64decoder.getUInt16(); break; case CV_32S: ival = base64decoder.getInt32(); break; - case CV_32F: - { + case CV_32F: { Cv32suf v; v.i = base64decoder.getInt32(); fval = v.f; node_type = FileNode::REAL; - } + } break; case CV_64F: fval = base64decoder.getFloat64(); node_type = FileNode::REAL; break; case CV_16F: - fval = (float)float16_t::fromBits(base64decoder.getUInt16()); + fval = (float) float16_t::fromBits(base64decoder.getUInt16()); node_type = FileNode::REAL; break; default: - CV_Error( Error::StsUnsupportedFormat, "Unsupported type" ); - } - - if( base64decoder.endOfStream() ) - break; - addNode(collection, std::string(), node_type, - node_type == FileNode::INT ? (void*)&ival : (void*)&fval, -1); + CV_Error(Error::StsUnsupportedFormat, "Unsupported type"); } + + if (base64decoder.endOfStream()) + break; + addNode(collection, std::string(), node_type, + node_type == FileNode::INT ? (void *) &ival : (void *) &fval, -1); } - if( base64decoder.endOfStream() ) - break; } - - finalizeCollection(collection); - return base64decoder.getPtr(); - } - - void parseError( const char* func_name, const std::string& err_msg, const char* source_file, int source_line ) - { - std::string msg = format("%s(%d): %s", filename.c_str(), lineno, err_msg.c_str()); - error(Error::StsParseError, func_name, msg.c_str(), source_file, source_line ); - } - - const uchar* getNodePtr(size_t blockIdx, size_t ofs) const - { - CV_Assert( blockIdx < fs_data_ptrs.size()); - CV_Assert( ofs < fs_data_blksz[blockIdx]); - - return fs_data_ptrs[blockIdx] + ofs; - } - - std::string getName( size_t nameofs ) const - { - CV_Assert( nameofs < str_hash_data.size() ); - return std::string(&str_hash_data[nameofs]); + if (base64decoder.endOfStream()) + break; } - FileStorage* getFS() { return fs_ext; } - - FileStorage* fs_ext; - - std::string filename; - int flags; - bool empty_stream; - - FILE* file; - gzFile gzfile; + finalizeCollection(collection); + return base64decoder.getPtr(); +} - bool is_opened; - bool dummy_eof; - bool write_mode; - bool mem_mode; - int fmt; +void FileStorage::Impl::parseError(const char *func_name, const std::string &err_msg, const char *source_file, + int source_line) { + std::string msg = format("%s(%d): %s", filename.c_str(), lineno, err_msg.c_str()); + error(Error::StsParseError, func_name, msg.c_str(), source_file, source_line); +} - State state; //!< current state of the FileStorage (used only for writing) - int space, wrap_margin; - std::deque write_stack; - std::vector buffer; - size_t bufofs; +const uchar *FileStorage::Impl::getNodePtr(size_t blockIdx, size_t ofs) const { + CV_Assert(blockIdx < fs_data_ptrs.size()); + CV_Assert(ofs < fs_data_blksz[blockIdx]); - std::deque outbuf; + return fs_data_ptrs[blockIdx] + ofs; +} - Ptr emitter; - Ptr parser; - Base64Decoder base64decoder; +std::string FileStorage::Impl::getName(size_t nameofs) const { + CV_Assert(nameofs < str_hash_data.size()); + return std::string(&str_hash_data[nameofs]); +} - std::vector roots; - std::vector > > fs_data; - std::vector fs_data_ptrs; - std::vector fs_data_blksz; - size_t freeSpaceOfs; - typedef std::unordered_map str_hash_t; - str_hash_t str_hash; - std::vector str_hash_data; +FileStorage *FileStorage::Impl::getFS() { return fs_ext; } - std::vector strbufv; - char* strbuf; - size_t strbufsize; - size_t strbufpos; - int lineno; -}; FileStorage::FileStorage() : state(0) @@ -1807,7 +1842,7 @@ FileStorage::FileStorage(const String& filename, int flags, const String& encodi void FileStorage::startWriteStruct(const String& name, int struct_flags, const String& typeName) { - p->startWriteStruct(name.c_str(), struct_flags, typeName.c_str()); + p->startWriteStruct(name.size() ? name.c_str() : 0, struct_flags, typeName.size() ? typeName.c_str() : 0); elname = String(); if ((struct_flags & FileNode::TYPE_MASK) == FileNode::SEQ) state = FileStorage::VALUE_EXPECTED; @@ -1882,7 +1917,7 @@ std::string FileStorage::getDefaultObjectName(const std::string& _filename) } ptr++; if( ptr == ptr2 ) - CV_Error( CV_StsBadArg, "Invalid filename" ); + CV_Error( cv::Error::StsBadArg, "Invalid filename" ); char* name = name_buf.data(); @@ -2005,12 +2040,14 @@ FileStorage& operator << (FileStorage& fs, const String& str) if( c == '}' || c == ']' ) { if( fs_impl->write_stack.empty() ) - CV_Error_( CV_StsError, ("Extra closing '%c'", *_str) ); + CV_Error_( cv::Error::StsError, ("Extra closing '%c'", *_str) ); + + fs_impl->workaround(); int struct_flags = fs_impl->write_stack.back().flags; char expected_bracket = FileNode::isMap(struct_flags) ? '}' : ']'; if( c != expected_bracket ) - CV_Error_( CV_StsError, ("The closing '%c' does not match the opening '%c'", c, expected_bracket)); + CV_Error_( cv::Error::StsError, ("The closing '%c' does not match the opening '%c'", c, expected_bracket)); fs_impl->endWriteStruct(); CV_Assert(!fs_impl->write_stack.empty()); struct_flags = fs_impl->write_stack.back().flags; @@ -2020,7 +2057,7 @@ FileStorage& operator << (FileStorage& fs, const String& str) else if( fs.state == NAME_EXPECTED + INSIDE_MAP ) { if (!cv_isalpha(c) && c != '_') - CV_Error_( CV_StsError, ("Incorrect element name %s; should start with a letter or '_'", _str) ); + CV_Error_( cv::Error::StsError, ("Incorrect element name %s; should start with a letter or '_'", _str) ); fs.elname = str; fs.state = VALUE_EXPECTED + INSIDE_MAP; } @@ -2049,7 +2086,7 @@ FileStorage& operator << (FileStorage& fs, const String& str) } } else - CV_Error( CV_StsError, "Invalid fs.state" ); + CV_Error( cv::Error::StsError, "Invalid fs.state" ); return fs; } diff --git a/modules/core/src/persistence.hpp b/modules/core/src/persistence.hpp index 05c7adc17ce3..1a9dbecf7c5b 100644 --- a/modules/core/src/persistence.hpp +++ b/modules/core/src/persistence.hpp @@ -163,6 +163,24 @@ class FileStorage_API CV_NORETURN virtual void parseError(const char* funcname, const std::string& msg, const char* filename, int lineno) = 0; + +private: + enum Base64State{ + Uncertain, + NotUse, + InUse, + }; + + friend class cv::FileStorage::Impl; + friend class cv::FileStorage; + friend class JSONEmitter; + friend class XMLEmitter; + friend class YAMLEmitter; + + virtual void check_if_write_struct_is_delayed(bool change_type_to_base64 = false) = 0; + virtual void switch_to_Base64_state(Base64State state) = 0; + virtual Base64State get_state_of_writing_base64() = 0; + virtual int get_space() = 0; }; class FileStorageEmitter diff --git a/modules/core/src/persistence_base64_encoding.cpp b/modules/core/src/persistence_base64_encoding.cpp new file mode 100644 index 000000000000..7d90fd422b2d --- /dev/null +++ b/modules/core/src/persistence_base64_encoding.cpp @@ -0,0 +1,370 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include "precomp.hpp" +#include "persistence_impl.hpp" +#include "persistence_base64_encoding.hpp" + +namespace cv +{ + +class base64::Base64ContextEmitter +{ +public: + explicit Base64ContextEmitter(cv::FileStorage::Impl& fs, bool needs_indent_) + : file_storage(fs) + , needs_indent(needs_indent_) + , binary_buffer(BUFFER_LEN) + , base64_buffer(base64_encode_buffer_size(BUFFER_LEN)) + , src_beg(0) + , src_cur(0) + , src_end(0) + { + src_beg = binary_buffer.data(); + src_end = src_beg + BUFFER_LEN; + src_cur = src_beg; + + CV_Assert(fs.write_mode); + + if (needs_indent) + { + file_storage.flush(); + } + } + + ~Base64ContextEmitter() + { + /* cleaning */ + if (src_cur != src_beg) + flush(); /* encode the rest binary data to base64 buffer */ + } + + Base64ContextEmitter & write(const uchar * beg, const uchar * end) + { + if (beg >= end) + return *this; + + while (beg < end) { + /* collect binary data and copy to binary buffer */ + size_t len = std::min(end - beg, src_end - src_cur); + std::memcpy(src_cur, beg, len); + beg += len; + src_cur += len; + + if (src_cur >= src_end) { + /* binary buffer is full. */ + /* encode it to base64 and send result to fs */ + flush(); + } + } + + return *this; + } + + /* + * a convertor must provide : + * - `operator >> (uchar * & dst)` for writing current binary data to `dst` and moving to next data. + * - `operator bool` for checking if current loaction is valid and not the end. + */ + template inline + Base64ContextEmitter & write(_to_binary_convertor_t & convertor) + { + static const size_t BUFFER_MAX_LEN = 1024U; + + std::vector buffer(BUFFER_MAX_LEN); + uchar * beg = buffer.data(); + uchar * end = beg; + + while (convertor) { + convertor >> end; + write(beg, end); + end = beg; + } + + return *this; + } + + bool flush() + { + /* control line width, so on. */ + size_t len = base64_encode(src_beg, base64_buffer.data(), 0U, src_cur - src_beg); + if (len == 0U) + return false; + + src_cur = src_beg; + + if ( !needs_indent) + { + file_storage.puts((const char*)base64_buffer.data()); + } + else + { + const char newline[] = "\n"; + char space[80]; + int ident = file_storage.write_stack.back().indent; + memset(space, ' ', static_cast(ident)); + space[ident] = '\0'; + + file_storage.puts(space); + file_storage.puts((const char*)base64_buffer.data()); + file_storage.puts(newline); + file_storage.flush(); + } + + return true; + } + +private: + /* because of Base64, we must keep its length a multiple of 3 */ + static const size_t BUFFER_LEN = 48U; + // static_assert(BUFFER_LEN % 3 == 0, "BUFFER_LEN is invalid"); + +private: + cv::FileStorage::Impl& file_storage; + bool needs_indent; + + std::vector binary_buffer; + std::vector base64_buffer; + uchar * src_beg; + uchar * src_cur; + uchar * src_end; +}; + +std::string base64::make_base64_header(const char *dt) { + std::ostringstream oss; + oss << dt << ' '; + std::string buffer(oss.str()); + CV_Assert(buffer.size() < ::base64::HEADER_SIZE); + + buffer.reserve(::base64::HEADER_SIZE); + while (buffer.size() < ::base64::HEADER_SIZE) + buffer += ' '; + + return buffer; +} + +size_t base64::base64_encode(const uint8_t *src, uint8_t *dst, size_t off, size_t cnt) { + if (!src || !dst || !cnt) + return 0; + + /* initialize beginning and end */ + uint8_t * dst_beg = dst; + uint8_t * dst_cur = dst_beg; + + uint8_t const * src_beg = src + off; + uint8_t const * src_cur = src_beg; + uint8_t const * src_end = src_cur + cnt / 3U * 3U; + + /* integer multiples part */ + while (src_cur < src_end) { + uint8_t _2 = *src_cur++; + uint8_t _1 = *src_cur++; + uint8_t _0 = *src_cur++; + *dst_cur++ = base64_mapping[ _2 >> 2U]; + *dst_cur++ = base64_mapping[(_1 & 0xF0U) >> 4U | (_2 & 0x03U) << 4U]; + *dst_cur++ = base64_mapping[(_0 & 0xC0U) >> 6U | (_1 & 0x0FU) << 2U]; + *dst_cur++ = base64_mapping[ _0 & 0x3FU]; + } + + /* remainder part */ + size_t rst = src_beg + cnt - src_cur; + if (rst == 1U) { + uint8_t _2 = *src_cur++; + *dst_cur++ = base64_mapping[ _2 >> 2U]; + *dst_cur++ = base64_mapping[(_2 & 0x03U) << 4U]; + } else if (rst == 2U) { + uint8_t _2 = *src_cur++; + uint8_t _1 = *src_cur++; + *dst_cur++ = base64_mapping[ _2 >> 2U]; + *dst_cur++ = base64_mapping[(_2 & 0x03U) << 4U | (_1 & 0xF0U) >> 4U]; + *dst_cur++ = base64_mapping[(_1 & 0x0FU) << 2U]; + } + + /* padding */ + switch (rst) + { + case 1U: *dst_cur++ = base64_padding; + /* fallthrough */ + case 2U: *dst_cur++ = base64_padding; + /* fallthrough */ + default: *dst_cur = 0; + break; + } + + return static_cast(dst_cur - dst_beg); +} + +int base64::icvCalcStructSize(const char *dt, int initial_size) { + int size = cv::fs::calcElemSize( dt, initial_size ); + size_t elem_max_size = 0; + for ( const char * type = dt; *type != '\0'; type++ ) { + switch ( *type ) + { + case 'u': { elem_max_size = std::max( elem_max_size, sizeof(uchar ) ); break; } + case 'c': { elem_max_size = std::max( elem_max_size, sizeof(schar ) ); break; } + case 'w': { elem_max_size = std::max( elem_max_size, sizeof(ushort) ); break; } + case 's': { elem_max_size = std::max( elem_max_size, sizeof(short ) ); break; } + case 'i': { elem_max_size = std::max( elem_max_size, sizeof(int ) ); break; } + case 'f': { elem_max_size = std::max( elem_max_size, sizeof(float ) ); break; } + case 'd': { elem_max_size = std::max( elem_max_size, sizeof(double) ); break; } + default: break; + } + } + size = cvAlign( size, static_cast(elem_max_size) ); + return size; +} + +size_t base64::base64_encode_buffer_size(size_t cnt, bool is_end_with_zero) { + size_t additional = static_cast(is_end_with_zero == true); + return (cnt + 2U) / 3U * 4U + additional; +} + +base64::Base64Writer::Base64Writer(cv::FileStorage::Impl& fs, bool can_indent) + : emitter(new Base64ContextEmitter(fs, can_indent)) + , data_type_string() +{ + CV_Assert(fs.write_mode); +} + +void base64::Base64Writer::write(const void* _data, size_t len, const char* dt) +{ + check_dt(dt); + RawDataToBinaryConvertor convertor(_data, static_cast(len), data_type_string); + emitter->write(convertor); +} + +template inline +void base64::Base64Writer::write(_to_binary_convertor_t & convertor, const char* dt) +{ + check_dt(dt); + emitter->write(convertor); +} + +base64::Base64Writer::~Base64Writer() +{ + delete emitter; +} + +void base64::Base64Writer::check_dt(const char* dt) +{ + if ( dt == 0 ) + CV_Error( cv::Error::StsBadArg, "Invalid \'dt\'." ); + else if (data_type_string.empty()) { + data_type_string = dt; + + /* output header */ + std::string buffer = make_base64_header(dt); + const uchar * beg = reinterpret_cast(buffer.data()); + const uchar * end = beg + buffer.size(); + + emitter->write(beg, end); + } else if ( data_type_string != dt ) + CV_Error( cv::Error::StsBadArg, "\'dt\' does not match." ); +} + +base64::RawDataToBinaryConvertor::RawDataToBinaryConvertor(const void* src, int len, const std::string & dt) + : beg(reinterpret_cast(src)) + , cur(0) + , end(0) +{ + CV_Assert(src); + CV_Assert(!dt.empty()); + CV_Assert(len > 0); + + /* calc step and to_binary_funcs */ + step_packed = make_to_binary_funcs(dt); + + end = beg; + cur = beg; + + step = icvCalcStructSize(dt.c_str(), 0); + end = beg + static_cast(len); +} + +inline base64::RawDataToBinaryConvertor& base64::RawDataToBinaryConvertor::operator >>(uchar * & dst) +{ + CV_DbgAssert(*this); + + for (size_t i = 0U, n = to_binary_funcs.size(); i < n; i++) { + elem_to_binary_t & pack = to_binary_funcs[i]; + pack.func(cur + pack.offset, dst + pack.offset_packed); + } + cur += step; + dst += step_packed; + + return *this; +} + +inline base64::RawDataToBinaryConvertor::operator bool() const +{ + return cur < end; +} + +size_t base64::RawDataToBinaryConvertor::make_to_binary_funcs(const std::string &dt) +{ + size_t cnt = 0; + size_t offset = 0; + size_t offset_packed = 0; + char type = '\0'; + + std::istringstream iss(dt); + while (!iss.eof()) { + if (!(iss >> cnt)) { + iss.clear(); + cnt = 1; + } + CV_Assert(cnt > 0U); + if (!(iss >> type)) + break; + + while (cnt-- > 0) + { + elem_to_binary_t pack; + + size_t size = 0; + switch (type) + { + case 'u': + case 'c': + size = sizeof(uchar); + pack.func = to_binary; + break; + case 'w': + case 's': + size = sizeof(ushort); + pack.func = to_binary; + break; + case 'i': + size = sizeof(uint); + pack.func = to_binary; + break; + case 'f': + size = sizeof(float); + pack.func = to_binary; + break; + case 'd': + size = sizeof(double); + pack.func = to_binary; + break; + case 'r': + default: + CV_Error(cv::Error::StsError, "type is not supported"); + }; + + offset = static_cast(cvAlign(static_cast(offset), static_cast(size))); + pack.offset = offset; + offset += size; + + pack.offset_packed = offset_packed; + offset_packed += size; + + to_binary_funcs.push_back(pack); + } + } + + CV_Assert(iss.eof()); + return offset_packed; +} + +} \ No newline at end of file diff --git a/modules/core/src/persistence_base64_encoding.hpp b/modules/core/src/persistence_base64_encoding.hpp new file mode 100644 index 000000000000..1ee5201e141f --- /dev/null +++ b/modules/core/src/persistence_base64_encoding.hpp @@ -0,0 +1,127 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_CORE_BASE64_ENCODING_HPP +#define OPENCV_CORE_BASE64_ENCODING_HPP + +namespace cv +{ + +namespace base64 +{ +/* A decorator for CvFileStorage +* - no copyable +* - not safe for now +* - move constructor may be needed if C++11 +*/ +uint8_t const base64_mapping[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +uint8_t const base64_padding = '='; + +std::string make_base64_header(const char * dt); + +size_t base64_encode(uint8_t const * src, uint8_t * dst, size_t off, size_t cnt); + + +int icvCalcStructSize( const char* dt, int initial_size ); + +class Base64ContextEmitter; +class Impl; + +class Base64Writer +{ +public: + Base64Writer(cv::FileStorage::Impl& fs, bool can_indent); + ~Base64Writer(); + void write(const void* _data, size_t len, const char* dt); + template void write(_to_binary_convertor_t & convertor, const char* dt); + +private: + void check_dt(const char* dt); + +private: + // disable copy and assignment + Base64Writer(const Base64Writer &); + Base64Writer & operator=(const Base64Writer &); + +private: + + Base64ContextEmitter * emitter; + std::string data_type_string; +}; + +size_t base64_encode_buffer_size(size_t cnt, bool is_end_with_zero = true); + +template inline size_t +to_binary(_uint_t val, uchar * cur) +{ + size_t delta = CHAR_BIT; + size_t cnt = sizeof(_uint_t); + while (cnt --> static_cast(0U)) { + *cur++ = static_cast(val); + val >>= delta; + } + return sizeof(_uint_t); +} + +template<> inline size_t to_binary(double val, uchar * cur) +{ + Cv64suf bit64; + bit64.f = val; + return to_binary(bit64.u, cur); +} + +template<> inline size_t to_binary(float val, uchar * cur) +{ + Cv32suf bit32; + bit32.f = val; + return to_binary(bit32.u, cur); +} + +template inline size_t +to_binary(uchar const * val, uchar * cur) +{ + return to_binary<_primitive_t>(*reinterpret_cast<_primitive_t const *>(val), cur); +} + + + +class RawDataToBinaryConvertor +{ +public: + // NOTE: len is already multiplied by element size here + RawDataToBinaryConvertor(const void* src, int len, const std::string & dt); + + inline RawDataToBinaryConvertor & operator >>(uchar * & dst); + inline operator bool() const; + +private: + typedef size_t(*to_binary_t)(const uchar *, uchar *); + struct elem_to_binary_t + { + size_t offset; + size_t offset_packed; + to_binary_t func; + }; + +private: + size_t make_to_binary_funcs(const std::string &dt); + +private: + const uchar * beg; + const uchar * cur; + const uchar * end; + + size_t step; + size_t step_packed; + std::vector to_binary_funcs; +}; + +} + +} +#endif \ No newline at end of file diff --git a/modules/core/src/persistence_impl.hpp b/modules/core/src/persistence_impl.hpp new file mode 100644 index 000000000000..4ea2dc350282 --- /dev/null +++ b/modules/core/src/persistence_impl.hpp @@ -0,0 +1,231 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef OPENCV_CORE_PERSISTENCE_IMPL_HPP +#define OPENCV_CORE_PERSISTENCE_IMPL_HPP + +#include "persistence.hpp" +#include "persistence_base64_encoding.hpp" +#include +#include + + +namespace cv +{ + +enum Base64State{ + Uncertain, + NotUse, + InUse, +}; + +class cv::FileStorage::Impl : public FileStorage_API +{ +public: + void init(); + + Impl(FileStorage* _fs); + + virtual ~Impl(); + + void release(String* out=0); + + void analyze_file_name( const std::string& file_name, std::vector& params ); + + bool open( const char* filename_or_buf, int _flags, const char* encoding ); + + void puts( const char* str ); + + char* getsFromFile( char* buf, int count ); + + char* gets( size_t maxCount ); + + char* gets(); + + bool eof(); + + void setEof(); + + void closeFile(); + + void rewind(); + + char* resizeWriteBuffer( char* ptr, int len ); + + char* flush(); + + void endWriteStruct(); + + void startWriteStruct_helper( const char* key, int struct_flags, + const char* type_name ); + + void startWriteStruct( const char* key, int struct_flags, + const char* type_name ); + + void writeComment( const char* comment, bool eol_comment ); + + void startNextStream(); + + void write( const String& key, int value ); + + void write( const String& key, double value ); + + void write( const String& key, const String& value ); + + void writeRawData( const std::string& dt, const void* _data, size_t len ); + + void workaround(); + + void switch_to_Base64_state( FileStorage_API::Base64State new_state); + + void make_write_struct_delayed( const char* key, int struct_flags, const char* type_name ); + + void check_if_write_struct_is_delayed( bool change_type_to_base64 ); + + void writeRawDataBase64(const void* _data, size_t len, const char* dt ); + + String releaseAndGetString(); + + FileNode getFirstTopLevelNode() const; + + FileNode root(int streamIdx=0) const; + + FileNode operator[](const String& nodename) const; + + FileNode operator[](const char* /*nodename*/) const; + + int getFormat() const; + + char* bufferPtr() const; + char* bufferStart() const; + char* bufferEnd() const; + void setBufferPtr(char* ptr); + int wrapMargin() const; + + FStructData& getCurrentStruct(); + + void setNonEmpty(); + + void processSpecialDouble( char* buf, double* value, char** endptr ); + + double strtod( char* ptr, char** endptr ); + + void convertToCollection(int type, FileNode& node); + + // a) allocates new FileNode (for that just set blockIdx to the last block and ofs to freeSpaceOfs) or + // b) reallocates just created new node (blockIdx and ofs must be taken from FileNode). + // If there is no enough space in the current block (it should be the last block added so far), + // the last block is shrunk so that it ends immediately before the reallocated node. Then, + // a new block of sufficient size is allocated and the FileNode is placed in the beginning of it. + // The case (a) can be used to allocate the very first node by setting blockIdx == ofs == 0. + // In the case (b) the existing tag and the name are copied automatically. + uchar* reserveNodeSpace(FileNode& node, size_t sz); + + unsigned getStringOfs( const std::string& key ) const; + + FileNode addNode( FileNode& collection, const std::string& key, + int elem_type, const void* value, int len ); + + void finalizeCollection( FileNode& collection ); + + void normalizeNodeOfs(size_t& blockIdx, size_t& ofs) const; + + Base64State get_state_of_writing_base64(); + + int get_space(); + + class Base64Decoder + { + public: + Base64Decoder(); + void init(Ptr& _parser, char* _ptr, int _indent); + + bool readMore(int needed); + + uchar getUInt8(); + + ushort getUInt16(); + + int getInt32(); + + double getFloat64(); + + bool endOfStream() const; + char* getPtr() const; + protected: + + Ptr parser; + char* ptr; + int indent; + std::vector encoded; + std::vector decoded; + size_t ofs; + size_t totalchars; + bool eos; + }; + + char* parseBase64(char* ptr, int indent, FileNode& collection); + + void parseError( const char* func_name, const std::string& err_msg, const char* source_file, int source_line ); + + const uchar* getNodePtr(size_t blockIdx, size_t ofs) const; + + std::string getName( size_t nameofs ) const; + + FileStorage* getFS(); + + FileStorage* fs_ext; + + std::string filename; + int flags; + bool empty_stream; + + FILE* file; + gzFile gzfile; + + bool is_opened; + bool dummy_eof; + bool write_mode; + bool mem_mode; + int fmt; + + State state; //!< current state of the FileStorage (used only for writing) + bool is_using_base64; + bool is_write_struct_delayed; + char* delayed_struct_key; + int delayed_struct_flags; + char* delayed_type_name; + FileStorage_API::Base64State state_of_writing_base64; + + int space, wrap_margin; + std::deque write_stack; + std::vector buffer; + size_t bufofs; + + std::deque outbuf; + + Ptr emitter; + Ptr parser; + Base64Decoder base64decoder; + base64::Base64Writer* base64_writer; + + std::vector roots; + std::vector > > fs_data; + std::vector fs_data_ptrs; + std::vector fs_data_blksz; + size_t freeSpaceOfs; + typedef std::unordered_map str_hash_t; + str_hash_t str_hash; + std::vector str_hash_data; + + std::vector strbufv; + char* strbuf; + size_t strbufsize; + size_t strbufpos; + int lineno; +}; + +} + +#endif \ No newline at end of file diff --git a/modules/core/src/persistence_json.cpp b/modules/core/src/persistence_json.cpp index 667895fbc5e7..12a58e80bfa0 100644 --- a/modules/core/src/persistence_json.cpp +++ b/modules/core/src/persistence_json.cpp @@ -23,7 +23,7 @@ class JSONEmitter : public FileStorageEmitter struct_flags = (struct_flags & (FileNode::TYPE_MASK|FileNode::FLOW)) | FileNode::EMPTY; if( !FileNode::isCollection(struct_flags)) - CV_Error( CV_StsBadArg, + CV_Error( cv::Error::StsBadArg, "Some collection type - FileNode::SEQ or FileNode::MAP, must be specified" ); if( type_name && *type_name == '\0' ) @@ -53,29 +53,26 @@ class JSONEmitter : public FileStorageEmitter void endWriteStruct(const FStructData& current_struct) { int struct_flags = current_struct.flags; - CV_Assert( FileNode::isCollection(struct_flags) ); - if( !FileNode::isFlow(struct_flags) ) - { -#if 0 - if ( fs->bufferPtr() <= fs->bufferStart() + fs->space ) - { - /* some bad code for base64_writer... */ - ptr = fs->bufferPtr(); - *ptr++ = '\n'; - *ptr++ = '\0'; - fs->puts( fs->bufferStart() ); - fs->setBufferPtr(fs->bufferStart()); + if (FileNode::isCollection(struct_flags)) { + if (!FileNode::isFlow(struct_flags)) { + if (fs->bufferPtr() <= fs->bufferStart() + fs->get_space()) { + /* some bad code for base64_writer... */ + char *ptr = fs->bufferPtr(); + *ptr++ = '\n'; + *ptr++ = '\0'; + fs->puts(fs->bufferStart()); + fs->setBufferPtr(fs->bufferStart()); + } + fs->flush(); } -#endif - fs->flush(); - } - char* ptr = fs->bufferPtr(); - if( ptr > fs->bufferStart() + current_struct.indent && !FileNode::isEmptyCollection(struct_flags) ) - *ptr++ = ' '; - *ptr++ = FileNode::isMap(struct_flags) ? '}' : ']'; - fs->setBufferPtr(ptr); + char *ptr = fs->bufferPtr(); + if (ptr > fs->bufferStart() + current_struct.indent && !FileNode::isEmptyCollection(struct_flags)) + *ptr++ = ' '; + *ptr++ = FileNode::isMap(struct_flags) ? '}' : ']'; + fs->setBufferPtr(ptr); + } } void write(const char* key, int value) @@ -97,11 +94,11 @@ class JSONEmitter : public FileStorageEmitter int i, len; if( !str ) - CV_Error( CV_StsNullPtr, "Null string pointer" ); + CV_Error( cv::Error::StsNullPtr, "Null string pointer" ); len = (int)strlen(str); if( len > CV_FS_MAX_LEN ) - CV_Error( CV_StsBadArg, "The written string is too long" ); + CV_Error( cv::Error::StsBadArg, "The written string is too long" ); if( quote || len == 0 || str[0] != str[len-1] || (str[0] != '\"' && str[0] != '\'') ) { @@ -136,6 +133,20 @@ class JSONEmitter : public FileStorageEmitter void writeScalar(const char* key, const char* data) { + /* check write_struct */ + + fs->check_if_write_struct_is_delayed(false); + if ( fs->get_state_of_writing_base64() == FileStorage_API::Uncertain ) + { + fs->switch_to_Base64_state( FileStorage_API::NotUse ); + } + else if ( fs->get_state_of_writing_base64() == FileStorage_API::InUse ) + { + CV_Error( cv::Error::StsError, "At present, output Base64 data only." ); + } + + /* check parameters */ + size_t key_len = 0u; if( key && *key == '\0' ) key = 0; @@ -143,9 +154,9 @@ class JSONEmitter : public FileStorageEmitter { key_len = strlen(key); if ( key_len == 0u ) - CV_Error( CV_StsBadArg, "The key is an empty" ); + CV_Error( cv::Error::StsBadArg, "The key is an empty" ); else if ( static_cast(key_len) > CV_FS_MAX_LEN ) - CV_Error( CV_StsBadArg, "The key is too long" ); + CV_Error( cv::Error::StsBadArg, "The key is too long" ); } size_t data_len = 0u; @@ -157,7 +168,7 @@ class JSONEmitter : public FileStorageEmitter if( FileNode::isCollection(struct_flags) ) { if ( (FileNode::isMap(struct_flags) ^ (key != 0)) ) - CV_Error( CV_StsBadArg, "An attempt to add element without a key to a map, " + CV_Error( cv::Error::StsBadArg, "An attempt to add element without a key to a map, " "or add element with key to sequence" ); } else { fs->setNonEmpty(); @@ -199,7 +210,7 @@ class JSONEmitter : public FileStorageEmitter if( key ) { if( !cv_isalpha(key[0]) && key[0] != '_' ) - CV_Error( CV_StsBadArg, "Key must start with a letter or _" ); + CV_Error( cv::Error::StsBadArg, "Key must start with a letter or _" ); ptr = fs->resizeWriteBuffer( ptr, static_cast(key_len) ); *ptr++ = '\"'; @@ -210,7 +221,7 @@ class JSONEmitter : public FileStorageEmitter ptr[i] = c; if( !cv_isalnum(c) && c != '-' && c != '_' && c != ' ' ) - CV_Error( CV_StsBadArg, "Key names may only contain alphanumeric characters [a-zA-Z0-9], '-', '_' and ' '" ); + CV_Error( cv::Error::StsBadArg, "Key names may only contain alphanumeric characters [a-zA-Z0-9], '-', '_' and ' '" ); } ptr += key_len; @@ -233,7 +244,7 @@ class JSONEmitter : public FileStorageEmitter void writeComment(const char* comment, bool eol_comment) { if( !comment ) - CV_Error( CV_StsNullPtr, "Null comment" ); + CV_Error( cv::Error::StsNullPtr, "Null comment" ); int len = static_cast(strlen(comment)); char* ptr = fs->bufferPtr(); diff --git a/modules/core/src/persistence_xml.cpp b/modules/core/src/persistence_xml.cpp index 52b53744254e..62b7b1eb59c6 100644 --- a/modules/core/src/persistence_xml.cpp +++ b/modules/core/src/persistence_xml.cpp @@ -45,7 +45,7 @@ class XMLEmitter : public FileStorageEmitter if( FileNode::isCollection(struct_flags) ) { if( FileNode::isMap(struct_flags) ^ (key != 0) ) - CV_Error( CV_StsBadArg, "An attempt to add element without a key to a map, " + CV_Error( cv::Error::StsBadArg, "An attempt to add element without a key to a map, " "or add element with key to sequence" ); } else @@ -61,26 +61,26 @@ class XMLEmitter : public FileStorageEmitter if( !key ) key = "_"; else if( key[0] == '_' && key[1] == '\0' ) - CV_Error( CV_StsBadArg, "A single _ is a reserved tag name" ); + CV_Error( cv::Error::StsBadArg, "A single _ is a reserved tag name" ); len = (int)strlen( key ); *ptr++ = '<'; if( tag_type == CV_XML_CLOSING_TAG ) { if( !attrlist.empty() ) - CV_Error( CV_StsBadArg, "Closing tag should not include any attributes" ); + CV_Error( cv::Error::StsBadArg, "Closing tag should not include any attributes" ); *ptr++ = '/'; } if( !cv_isalpha(key[0]) && key[0] != '_' ) - CV_Error( CV_StsBadArg, "Key should start with a letter or _" ); + CV_Error( cv::Error::StsBadArg, "Key should start with a letter or _" ); ptr = fs->resizeWriteBuffer( ptr, len ); for( i = 0; i < len; i++ ) { char c = key[i]; if( !cv_isalnum(c) && c != '_' && c != '-' ) - CV_Error( CV_StsBadArg, "Key name may only contain alphanumeric characters [a-zA-Z0-9], '-' and '_'" ); + CV_Error( cv::Error::StsBadArg, "Key name may only contain alphanumeric characters [a-zA-Z0-9], '-' and '_'" ); ptr[i] = c; } ptr += len; @@ -158,11 +158,11 @@ class XMLEmitter : public FileStorageEmitter int i, len; if( !str ) - CV_Error( CV_StsNullPtr, "Null string pointer" ); + CV_Error( cv::Error::StsNullPtr, "Null string pointer" ); len = (int)strlen(str); if( len > CV_FS_MAX_LEN ) - CV_Error( CV_StsBadArg, "The written string is too long" ); + CV_Error( cv::Error::StsBadArg, "The written string is too long" ); if( quote || len == 0 || str[0] != '\"' || str[0] != str[len-1] ) { @@ -233,6 +233,16 @@ class XMLEmitter : public FileStorageEmitter void writeScalar(const char* key, const char* data) { + fs->check_if_write_struct_is_delayed(false); + if ( fs->get_state_of_writing_base64() == FileStorage_API::Uncertain ) + { + fs->switch_to_Base64_state( FileStorage_API::NotUse ); + } + else if ( fs->get_state_of_writing_base64() == FileStorage_API::InUse ) + { + CV_Error( cv::Error::StsError, "At present, output Base64 data only." ); + } + int len = (int)strlen(data); if( key && *key == '\0' ) key = 0; @@ -255,7 +265,7 @@ class XMLEmitter : public FileStorageEmitter int new_offset = (int)(ptr - fs->bufferStart()) + len; if( key ) - CV_Error( CV_StsBadArg, "elements with keys can not be written to sequence" ); + CV_Error( cv::Error::StsBadArg, "elements with keys can not be written to sequence" ); current_struct.flags = FileNode::SEQ; @@ -281,10 +291,10 @@ class XMLEmitter : public FileStorageEmitter char* ptr; if( !comment ) - CV_Error( CV_StsNullPtr, "Null comment" ); + CV_Error( cv::Error::StsNullPtr, "Null comment" ); if( strstr(comment, "--") != 0 ) - CV_Error( CV_StsBadArg, "Double hyphen \'--\' is not allowed in the comments" ); + CV_Error( cv::Error::StsBadArg, "Double hyphen \'--\' is not allowed in the comments" ); len = (int)strlen(comment); eol = strchr(comment, '\n'); diff --git a/modules/core/src/persistence_yml.cpp b/modules/core/src/persistence_yml.cpp index 3f3742b8d18e..95db1450c62e 100644 --- a/modules/core/src/persistence_yml.cpp +++ b/modules/core/src/persistence_yml.cpp @@ -33,7 +33,7 @@ class YAMLEmitter : public FileStorageEmitter struct_flags = (struct_flags & (FileNode::TYPE_MASK|FileNode::FLOW)) | FileNode::EMPTY; if( !FileNode::isCollection(struct_flags)) - CV_Error( CV_StsBadArg, + CV_Error( cv::Error::StsBadArg, "Some collection type - FileNode::SEQ or FileNode::MAP, must be specified" ); if (type_name && memcmp(type_name, "binary", 6) == 0) @@ -120,11 +120,11 @@ class YAMLEmitter : public FileStorageEmitter int i, len; if( !str ) - CV_Error( CV_StsNullPtr, "Null string pointer" ); + CV_Error( cv::Error::StsNullPtr, "Null string pointer" ); len = (int)strlen(str); if( len > CV_FS_MAX_LEN ) - CV_Error( CV_StsBadArg, "The written string is too long" ); + CV_Error( cv::Error::StsBadArg, "The written string is too long" ); if( quote || len == 0 || str[0] != str[len-1] || (str[0] != '\"' && str[0] != '\'') ) { @@ -174,6 +174,16 @@ class YAMLEmitter : public FileStorageEmitter void writeScalar(const char* key, const char* data) { + fs->check_if_write_struct_is_delayed(false); + if ( fs->get_state_of_writing_base64() == FileStorage_API::Uncertain ) + { + fs->switch_to_Base64_state( FileStorage_API::NotUse ); + } + else if ( fs->get_state_of_writing_base64() == FileStorage_API::InUse ) + { + CV_Error( cv::Error::StsError, "At present, output Base64 data only." ); + } + int i, keylen = 0; int datalen = 0; char* ptr; @@ -188,7 +198,7 @@ class YAMLEmitter : public FileStorageEmitter if( FileNode::isCollection(struct_flags) ) { if( (FileNode::isMap(struct_flags) ^ (key != 0)) ) - CV_Error( CV_StsBadArg, "An attempt to add element without a key to a map, " + CV_Error( cv::Error::StsBadArg, "An attempt to add element without a key to a map, " "or add element with key to sequence" ); } else @@ -201,10 +211,10 @@ class YAMLEmitter : public FileStorageEmitter { keylen = (int)strlen(key); if( keylen == 0 ) - CV_Error( CV_StsBadArg, "The key is an empty" ); + CV_Error( cv::Error::StsBadArg, "The key is an empty" ); if( keylen > CV_FS_MAX_LEN ) - CV_Error( CV_StsBadArg, "The key is too long" ); + CV_Error( cv::Error::StsBadArg, "The key is too long" ); } if( data ) @@ -238,7 +248,7 @@ class YAMLEmitter : public FileStorageEmitter if( key ) { if( !cv_isalpha(key[0]) && key[0] != '_' ) - CV_Error( CV_StsBadArg, "Key must start with a letter or _" ); + CV_Error( cv::Error::StsBadArg, "Key must start with a letter or _" ); ptr = fs->resizeWriteBuffer( ptr, keylen ); @@ -248,7 +258,7 @@ class YAMLEmitter : public FileStorageEmitter ptr[i] = c; if( !cv_isalnum(c) && c != '-' && c != '_' && c != ' ' ) - CV_Error( CV_StsBadArg, "Key names may only contain alphanumeric characters [a-zA-Z0-9], '-', '_' and ' '" ); + CV_Error( cv::Error::StsBadArg, "Key names may only contain alphanumeric characters [a-zA-Z0-9], '-', '_' and ' '" ); } ptr += keylen; @@ -271,7 +281,7 @@ class YAMLEmitter : public FileStorageEmitter void writeComment(const char* comment, bool eol_comment) { if( !comment ) - CV_Error( CV_StsNullPtr, "Null comment" ); + CV_Error( cv::Error::StsNullPtr, "Null comment" ); int len = (int)strlen(comment); const char* eol = strchr(comment, '\n'); diff --git a/modules/core/test/test_io.cpp b/modules/core/test/test_io.cpp index 82bd05372da7..3712be9f2e39 100644 --- a/modules/core/test/test_io.cpp +++ b/modules/core/test/test_io.cpp @@ -586,6 +586,7 @@ static void test_filestorage_basic(int write_flags, const char* suffix_name, boo const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info(); CV_Assert(test_info); std::string name = (std::string(test_info->test_case_name()) + "--" + test_info->name() + suffix_name); + std::string name_34 = string(cvtest::TS::ptr()->get_data_path()) + "io/3_4/" + name; if (!testReadWrite) name = string(cvtest::TS::ptr()->get_data_path()) + "io/" + name; @@ -661,7 +662,23 @@ static void test_filestorage_basic(int write_flags, const char* suffix_name, boo std::ifstream f(name.c_str(), std::ios::in|std::ios::binary); f.seekg(0, std::fstream::end); sz = (size_t)f.tellg(); + + f.seekg(0, std::ios::beg); + std::vector test_data(sz); + f.read(&test_data[0], sz); f.close(); + + std::ifstream reference(name_34.c_str(), std::ios::in|std::ios::binary); + ASSERT_TRUE(reference.is_open()); + reference.seekg(0, std::fstream::end); + size_t ref_sz = (size_t)reference.tellg(); + + reference.seekg(0, std::ios::beg); + std::vector reference_data(ref_sz); + reference.read(&reference_data[0], ref_sz); + reference.close(); + + EXPECT_EQ(reference_data, test_data); } std::cout << "Storage size: " << sz << std::endl; EXPECT_LE(sz, (size_t)6000); @@ -757,27 +774,27 @@ TEST(Core_InputOutput, filestorage_base64_basic_read_JSON) { test_filestorage_basic(cv::FileStorage::WRITE_BASE64, ".json", false); } -TEST(Core_InputOutput, DISABLED_filestorage_base64_basic_rw_XML) +TEST(Core_InputOutput, filestorage_base64_basic_rw_XML) { test_filestorage_basic(cv::FileStorage::WRITE_BASE64, ".xml", true); } -TEST(Core_InputOutput, DISABLED_filestorage_base64_basic_rw_YAML) +TEST(Core_InputOutput, filestorage_base64_basic_rw_YAML) { test_filestorage_basic(cv::FileStorage::WRITE_BASE64, ".yml", true); } -TEST(Core_InputOutput, DISABLED_filestorage_base64_basic_rw_JSON) +TEST(Core_InputOutput, filestorage_base64_basic_rw_JSON) { test_filestorage_basic(cv::FileStorage::WRITE_BASE64, ".json", true); } -TEST(Core_InputOutput, DISABLED_filestorage_base64_basic_memory_XML) +TEST(Core_InputOutput, filestorage_base64_basic_memory_XML) { test_filestorage_basic(cv::FileStorage::WRITE_BASE64, ".xml", true, true); } -TEST(Core_InputOutput, DISABLED_filestorage_base64_basic_memory_YAML) +TEST(Core_InputOutput, filestorage_base64_basic_memory_YAML) { test_filestorage_basic(cv::FileStorage::WRITE_BASE64, ".yml", true, true); } -TEST(Core_InputOutput, DISABLED_filestorage_base64_basic_memory_JSON) +TEST(Core_InputOutput, filestorage_base64_basic_memory_JSON) { test_filestorage_basic(cv::FileStorage::WRITE_BASE64, ".json", true, true); } diff --git a/modules/python/test/test_filestorage_io.py b/modules/python/test/test_filestorage_io.py index 62b540d79cd8..01e0a72300cc 100755 --- a/modules/python/test/test_filestorage_io.py +++ b/modules/python/test/test_filestorage_io.py @@ -1,6 +1,8 @@ #!/usr/bin/env python """Algorithm serialization test.""" from __future__ import print_function +import base64 +import json import tempfile import os import cv2 as cv @@ -109,5 +111,96 @@ def test_yml(self): def test_json(self): self.run_fs_test(".json") + def test_base64(self): + fd, fname = tempfile.mkstemp(prefix="opencv_python_sample_filestorage_base64", suffix=".json") + os.close(fd) + np.random.seed(42) + self.write_base64_json(fname) + os.remove(fname) + + @staticmethod + def get_normal_2d_mat(): + rows = 10 + cols = 20 + cn = 3 + + image = np.zeros((rows, cols, cn), np.uint8) + image[:] = (1, 2, 127) + + for i in range(rows): + for j in range(cols): + image[i, j, 1] = (i + j) % 256 + + return image + + @staticmethod + def get_normal_nd_mat(): + shape = (2, 2, 1, 2) + cn = 4 + + image = np.zeros(shape + (cn,), np.float64) + image[:] = (0.888, 0.111, 0.666, 0.444) + + return image + + @staticmethod + def get_empty_2d_mat(): + shape = (0, 0) + cn = 1 + + image = np.zeros(shape + (cn,), np.uint8) + + return image + + @staticmethod + def get_random_mat(): + rows = 8 + cols = 16 + cn = 1 + + image = np.random.rand(rows, cols, cn) + + return image + + @staticmethod + def decode(data): + # strip $base64$ + encoded = data[8:] + + if len(encoded) == 0: + return b'' + + # strip info about datatype and padding + return base64.b64decode(encoded)[24:] + + def write_base64_json(self, fname): + fs = cv.FileStorage(fname, cv.FileStorage_WRITE_BASE64) + + mats = {'normal_2d_mat': self.get_normal_2d_mat(), + 'normal_nd_mat': self.get_normal_nd_mat(), + 'empty_2d_mat': self.get_empty_2d_mat(), + 'random_mat': self.get_random_mat()} + + for name, mat in mats.items(): + fs.write(name, mat) + + fs.release() + + data = {} + with open(fname) as file: + data = json.load(file) + + for name, mat in mats.items(): + buffer = b'' + + if mat.size != 0: + if hasattr(mat, 'tobytes'): + buffer = mat.tobytes() + else: + buffer = mat.tostring() + + self.assertEqual(buffer, self.decode(data[name]['data'])) + + if __name__ == '__main__': NewOpenCVTests.bootstrap() From b928ebdd53b3db10864ff675461c44f04eef3e8c Mon Sep 17 00:00:00 2001 From: Francesco Petrogalli <25690309+fpetrogalli@users.noreply.github.com> Date: Thu, 8 Jul 2021 21:21:21 +0100 Subject: [PATCH 052/128] Merge pull request #19985 from fpetrogalli:disable_threads * [build][option] Introduce `OPENCV_DISABLE_THREAD_SUPPORT` option. The option forces the library to build without thread support. * update handling of OPENCV_DISABLE_THREAD_SUPPORT - reduce amount of #if conditions * [to squash] cmake: apply mode vars in toolchains too Co-authored-by: Alexander Alekhin --- 3rdparty/libwebp/CMakeLists.txt | 4 +- CMakeLists.txt | 15 ++ cmake/OpenCVCompilerOptions.cmake | 11 +- cmake/OpenCVUtils.cmake | 6 + cmake/vars/EnableModeVars.cmake | 18 ++ .../vars/OPENCV_DISABLE_THREAD_SUPPORT.cmake | 28 +++ modules/core/CMakeLists.txt | 4 + modules/core/include/opencv2/core/utility.hpp | 20 ++- modules/core/src/async.cpp | 166 ++++++++++++++++++ modules/core/src/parallel.cpp | 10 +- modules/core/src/system.cpp | 112 ++++++++++++ modules/core/src/umatrix.cpp | 37 +++- modules/core/src/utils/logtagmanager.hpp | 4 +- modules/core/test/test_async.cpp | 5 +- modules/core/test/test_utils.cpp | 5 +- modules/ts/CMakeLists.txt | 6 + 16 files changed, 435 insertions(+), 16 deletions(-) create mode 100644 cmake/vars/EnableModeVars.cmake create mode 100644 cmake/vars/OPENCV_DISABLE_THREAD_SUPPORT.cmake diff --git a/3rdparty/libwebp/CMakeLists.txt b/3rdparty/libwebp/CMakeLists.txt index 80ab0b86ab76..9160e2024ca0 100644 --- a/3rdparty/libwebp/CMakeLists.txt +++ b/3rdparty/libwebp/CMakeLists.txt @@ -32,7 +32,9 @@ endif() # Define the library target: # ---------------------------------------------------------------------------------- -add_definitions(-DWEBP_USE_THREAD) +if(NOT OPENCV_DISABLE_THREAD_SUPPORT) + add_definitions(-DWEBP_USE_THREAD) +endif() add_library(${WEBP_LIBRARY} STATIC ${OPENCV_3RDPARTY_EXCLUDE_FROM_ALL} ${lib_srcs} ${lib_hdrs}) if(ANDROID) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49abe017a5ee..dd862bb1549e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -512,6 +512,7 @@ OCV_OPTION(OPENCV_GENERATE_SETUPVARS "Generate setup_vars* scripts" ON IF (NOT OCV_OPTION(ENABLE_CONFIG_VERIFICATION "Fail build if actual configuration doesn't match requested (WITH_XXX != HAVE_XXX)" OFF) OCV_OPTION(OPENCV_ENABLE_MEMALIGN "Enable posix_memalign or memalign usage" ON) OCV_OPTION(OPENCV_DISABLE_FILESYSTEM_SUPPORT "Disable filesystem support" OFF) +OCV_OPTION(OPENCV_DISABLE_THREAD_SUPPORT "Build the library without multi-threaded code." OFF) OCV_OPTION(ENABLE_PYLINT "Add target with Pylint checks" (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) ) OCV_OPTION(ENABLE_FLAKE8 "Add target with Python flake8 checker" (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) ) @@ -666,6 +667,11 @@ if(UNIX) set(HAVE_PTHREAD 1) endif() + # Ensure that libpthread is not listed as one of the libraries to pass to the linker. + if (OPENCV_DISABLE_THREAD_SUPPORT) + list(REMOVE_ITEM OPENCV_LINKER_LIBS pthread) + endif() + if(OPENCV_ENABLE_MEMALIGN) CHECK_SYMBOL_EXISTS(posix_memalign stdlib.h HAVE_POSIX_MEMALIGN) CHECK_INCLUDE_FILE(malloc.h HAVE_MALLOC_H) @@ -1459,6 +1465,15 @@ ocv_build_features_string(parallel_status EXCLUSIVE ELSE "none") status("") status(" Parallel framework:" "${parallel_status}") +if (OPENCV_DISABLE_THREAD_SUPPORT) + status("" "Multi thread code explicitly disabled with OPENCV_DISABLE_THREAD_SUPPORT.") + if(HAVE_PTHREADS_PF OR HAVE_HPX OR HAVE_OPENMP OR HAVE_GCD OR HAVE_CONCURRENCY) + message(FATAL_ERROR "Not all parallel frameworks have been disabled (using ${parallel_status}).") + endif() + if(HAVE_PTHREAD) + message(FATAL_ERROR "Thread execution might be in use in some component.") + endif() +endif() if(CV_TRACE OR OPENCV_TRACE) ocv_build_features_string(trace_status EXCLUSIVE diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 6e56a2e34aa0..2917dd33d5ee 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -178,14 +178,17 @@ if(CV_GCC OR CV_CLANG) add_extra_compiler_option(-Wno-long-long) endif() - # We need pthread's - if((UNIX + # We need pthread's, unless we have explicitly disabled multi-thread execution. + if(NOT OPENCV_DISABLE_THREAD_SUPPORT + AND ( + (UNIX AND NOT ANDROID AND NOT (APPLE AND CV_CLANG) AND NOT EMSCRIPTEN + ) + OR (EMSCRIPTEN AND WITH_PTHREADS_PF) # https://github.com/opencv/opencv/issues/20285 ) - OR (EMSCRIPTEN AND WITH_PTHREADS_PF) # https://github.com/opencv/opencv/issues/20285 - ) + ) # TODO add_extra_compiler_option(-pthread) endif() diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index 252078bdf776..39445150a911 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -1973,3 +1973,9 @@ if(NOT BUILD_SHARED_LIBS AND (CMAKE_VERSION VERSION_LESS "3.14.0")) else() ocv_update(OPENCV_3RDPARTY_EXCLUDE_FROM_ALL "EXCLUDE_FROM_ALL") endif() + + +# +# Include configuration override settings +# +include(cmake/vars/EnableModeVars.cmake) diff --git a/cmake/vars/EnableModeVars.cmake b/cmake/vars/EnableModeVars.cmake new file mode 100644 index 000000000000..b3c4e79c46d1 --- /dev/null +++ b/cmake/vars/EnableModeVars.cmake @@ -0,0 +1,18 @@ +set(__OCV_MODE_VARS_DIR "${CMAKE_CURRENT_LIST_DIR}") + +macro(ocv_change_mode_var) + set(__var "${ARGV0}") + set(__mode "${ARGV1}") + set(__value "${ARGV2}") + if(__mode STREQUAL "MODIFIED_ACCESS" AND __value) + if(NOT __applied_mode_${__var}) + include("${__OCV_MODE_VARS_DIR}/${__var}.cmake") + set(__applied_mode_${__var} 1) + else() + #message("Mode is already applied: ${__var}") + endif() + endif() +endmacro() + +variable_watch(OPENCV_DISABLE_THREAD_SUPPORT ocv_change_mode_var) +set(OPENCV_DISABLE_THREAD_SUPPORT "${OPENCV_DISABLE_THREAD_SUPPORT}") diff --git a/cmake/vars/OPENCV_DISABLE_THREAD_SUPPORT.cmake b/cmake/vars/OPENCV_DISABLE_THREAD_SUPPORT.cmake new file mode 100644 index 000000000000..5f5fc0204dfc --- /dev/null +++ b/cmake/vars/OPENCV_DISABLE_THREAD_SUPPORT.cmake @@ -0,0 +1,28 @@ +# Force removal of code conditionally compiled with `#if +# HAVE_PTHREAD`. +ocv_update(HAVE_PTHREAD 0) + +# There components are disabled because they require +# multi-threaded execution. +ocv_update(WITH_PROTOBUF OFF) +ocv_update(WITH_GSTREAMER OFF) +ocv_update(WITH_IPP OFF) +ocv_update(WITH_ITT OFF) +ocv_update(WITH_OPENCL OFF) +ocv_update(WITH_VA OFF) +ocv_update(WITH_VA_INTEL OFF) + +# Disable bindings +ocv_update(BUILD_opencv_python2 OFF) +ocv_update(BUILD_opencv_python3 OFF) +ocv_update(BUILD_JAVA OFF) +ocv_update(BUILD_opencv_java OFF) + +# These modules require `#include +# <[thread|mutex|condition_variable|future]>` and linkage into +# `libpthread` to work. +ocv_update(BUILD_opencv_objdetect OFF) +ocv_update(BUILD_opencv_gapi OFF) +ocv_update(BUILD_opencv_dnn OFF) + +set(OPJ_USE_THREAD "OFF" CACHE INTERNAL "") diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index b2797ab31fc1..6a969e5fc358 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -153,6 +153,10 @@ if(OPENCV_CORE_EXCLUDE_C_API) ocv_target_compile_definitions(${the_module} PRIVATE "OPENCV_EXCLUDE_C_API=1") endif() +if(OPENCV_DISABLE_THREAD_SUPPORT) + ocv_target_compile_definitions(${the_module} PUBLIC "OPENCV_DISABLE_THREAD_SUPPORT=1") +endif() + if(HAVE_HPX) ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}") endif() diff --git a/modules/core/include/opencv2/core/utility.hpp b/modules/core/include/opencv2/core/utility.hpp index f0368027aa6a..108c0d93e749 100644 --- a/modules/core/include/opencv2/core/utility.hpp +++ b/modules/core/include/opencv2/core/utility.hpp @@ -714,9 +714,27 @@ void Mat::forEach_impl(const Functor& operation) { /////////////////////////// Synchronization Primitives /////////////////////////////// #if !defined(_M_CEE) +#ifndef OPENCV_DISABLE_THREAD_SUPPORT typedef std::recursive_mutex Mutex; typedef std::lock_guard AutoLock; -#endif +#else // OPENCV_DISABLE_THREAD_SUPPORT +// Custom (failing) implementation of `std::recursive_mutex`. +struct Mutex { + void lock(){ + CV_Error(cv::Error::StsNotImplemented, + "cv::Mutex is disabled by OPENCV_DISABLE_THREAD_SUPPORT=ON"); + } + void unlock(){ + CV_Error(cv::Error::StsNotImplemented, + "cv::Mutex is disabled by OPENCV_DISABLE_THREAD_SUPPORT=ON"); + } +}; +// Stub for cv::AutoLock when threads are disabled. +struct AutoLock { + AutoLock(Mutex &) { } +}; +#endif // OPENCV_DISABLE_THREAD_SUPPORT +#endif // !defined(_M_CEE) /** @brief Designed for command line parsing diff --git a/modules/core/src/async.cpp b/modules/core/src/async.cpp index a2f4612365b9..78c0a1ee8116 100644 --- a/modules/core/src/async.cpp +++ b/modules/core/src/async.cpp @@ -14,6 +14,7 @@ #define CV_LOG_STRIP_LEVEL CV_LOG_LEVEL_DEBUG + 1 #include +#ifndef OPENCV_DISABLE_THREAD_SUPPORT #ifdef CV_CXX11 #include @@ -236,6 +237,171 @@ struct AsyncArray::Impl } }; +} // namespace + +#else // OPENCV_DISABLE_THREAD_SUPPORT + +namespace cv { + +// no threading +struct AsyncArray::Impl +{ + int refcount; + void addrefFuture() CV_NOEXCEPT { refcount_future++; refcount++; } + void releaseFuture() CV_NOEXCEPT { refcount_future--; if (0 == --refcount) delete this; } + int refcount_future; + void addrefPromise() CV_NOEXCEPT { refcount_promise++; refcount++; } \ + void releasePromise() CV_NOEXCEPT { refcount_promise--; if (0 == --refcount) delete this; } + int refcount_promise; + + mutable bool has_result; // Mat, UMat or exception + + mutable cv::Ptr result_mat; + mutable cv::Ptr result_umat; + + + bool has_exception; +#if CV__EXCEPTION_PTR + std::exception_ptr exception; +#endif + cv::Exception cv_exception; + + mutable bool result_is_fetched; + + bool future_is_returned; + + Impl() + : refcount(1), refcount_future(0), refcount_promise(1) + , has_result(false) + , has_exception(false) + , result_is_fetched(false) + , future_is_returned(false) + { + // nothing + } + + ~Impl() + { + if (has_result && !result_is_fetched) + { + CV_LOG_INFO(NULL, "Asynchronous result has not been fetched"); + } + } + + bool get(OutputArray dst, int64 timeoutNs) const + { + CV_Assert(!result_is_fetched); + if (!has_result) + { + CV_UNUSED(timeoutNs); + CV_Error(Error::StsError, "Result is not produced (unable to wait for result in OPENCV_DISABLE_THREAD_SUPPORT mode)"); + } + if (!result_mat.empty()) + { + dst.move(*result_mat.get()); + result_mat.release(); + result_is_fetched = true; + return true; + } + if (!result_umat.empty()) + { + dst.move(*result_umat.get()); + result_umat.release(); + result_is_fetched = true; + return true; + } +#if CV__EXCEPTION_PTR + if (has_exception && exception) + { + result_is_fetched = true; + std::rethrow_exception(exception); + } +#endif + if (has_exception) + { + result_is_fetched = true; + throw cv_exception; + } + CV_Error(Error::StsInternal, "AsyncArray: invalid state of 'has_result = true'"); + return false; + } + + bool valid() const CV_NOEXCEPT + { + if (result_is_fetched) + return false; + if (refcount_promise == 0 && !has_result) + return false; + return true; + } + + bool wait_for(int64 timeoutNs) const + { + CV_Assert(valid()); + if (has_result) + return has_result; + if (timeoutNs == 0) + return has_result; + CV_Error(Error::StsError, "Unable to wait in OPENCV_DISABLE_THREAD_SUPPORT mode"); + } + + AsyncArray getArrayResult() + { + CV_Assert(refcount_future == 0); + AsyncArray result; + addrefFuture(); + result.p = this; + future_is_returned = true; + return result; + } + + void setValue(InputArray value) + { + if (future_is_returned && refcount_future == 0) + CV_Error(Error::StsError, "Associated AsyncArray has been destroyed"); + CV_Assert(!has_result); + int k = value.kind(); + if (k == _InputArray::UMAT) + { + result_umat = makePtr(); + value.copyTo(*result_umat.get()); + } + else + { + result_mat = makePtr(); + value.copyTo(*result_mat.get()); + } + has_result = true; + } + +#if CV__EXCEPTION_PTR + void setException(std::exception_ptr e) + { + if (future_is_returned && refcount_future == 0) + CV_Error(Error::StsError, "Associated AsyncArray has been destroyed"); + CV_Assert(!has_result); + has_exception = true; + exception = e; + has_result = true; + } +#endif + + void setException(const cv::Exception e) + { + if (future_is_returned && refcount_future == 0) + CV_Error(Error::StsError, "Associated AsyncArray has been destroyed"); + CV_Assert(!has_result); + has_exception = true; + cv_exception = e; + has_result = true; + } +}; + +} + +#endif // OPENCV_DISABLE_THREAD_SUPPORT + +namespace cv { AsyncArray::AsyncArray() CV_NOEXCEPT : p(NULL) diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index 7bb7e4633dcd..81ddd0c5ddce 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -72,7 +72,7 @@ #endif #endif -#if defined CV_CXX11 +#ifndef OPENCV_DISABLE_THREAD_SUPPORT #include #endif @@ -884,6 +884,7 @@ T minNonZero(const T& val_1, const T& val_2) return (val_1 != 0) ? val_1 : val_2; } +#ifndef OPENCV_DISABLE_THREAD_SUPPORT static int getNumberOfCPUs_() { @@ -986,6 +987,13 @@ int getNumberOfCPUs() return nCPUs; // cached value } +#else // OPENCV_DISABLE_THREAD_SUPPORT +int getNumberOfCPUs() +{ + return 1; +} +#endif // OPENCV_DISABLE_THREAD_SUPPORT + const char* currentParallelFramework() { std::shared_ptr& api = getCurrentParallelForAPI(); diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 441457d50fd2..777efceca021 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -216,7 +216,9 @@ std::wstring GetTempFileNameWinRT(std::wstring prefix) #endif #else +#ifndef OPENCV_DISABLE_THREAD_SUPPORT #include +#endif #include #include @@ -1366,6 +1368,8 @@ bool __termination = false; namespace details { +#ifndef OPENCV_DISABLE_THREAD_SUPPORT + #ifdef _WIN32 #ifdef _MSC_VER #pragma warning(disable:4505) // unreferenced local function has been removed @@ -1778,14 +1782,122 @@ static void WINAPI opencv_fls_destructor(void* pData) #endif // CV_USE_FLS #endif // _WIN32 +#else // OPENCV_DISABLE_THREAD_SUPPORT + +// no threading (OPENCV_DISABLE_THREAD_SUPPORT=ON) +class TlsStorage +{ +public: + TlsStorage() + { + slots.reserve(32); + } + ~TlsStorage() + { + for (size_t slotIdx = 0; slotIdx < slots.size(); slotIdx++) + { + SlotInfo& s = slots[slotIdx]; + TLSDataContainer* container = s.container; + if (container && s.data) + { + container->deleteDataInstance(s.data); // Can't use from SlotInfo destructor + s.data = nullptr; + } + } + } + + // Reserve TLS storage index + size_t reserveSlot(TLSDataContainer* container) + { + size_t slotsSize = slots.size(); + for (size_t slot = 0; slot < slotsSize; slot++) + { + SlotInfo& s = slots[slot]; + if (s.container == NULL) + { + CV_Assert(!s.data); + s.container = container; + return slot; + } + } + + // create new slot + slots.push_back(SlotInfo(container)); + return slotsSize; + } + + // Release TLS storage index and pass associated data to caller + void releaseSlot(size_t slotIdx, std::vector &dataVec, bool keepSlot = false) + { + CV_Assert(slotIdx < slots.size()); + SlotInfo& s = slots[slotIdx]; + void* data = s.data; + if (data) + { + dataVec.push_back(data); + s.data = nullptr; + } + if (!keepSlot) + { + s.container = NULL; // mark slot as free (see reserveSlot() implementation) + } + } + + // Get data by TLS storage index + void* getData(size_t slotIdx) const + { + CV_Assert(slotIdx < slots.size()); + const SlotInfo& s = slots[slotIdx]; + return s.data; + } + + // Gather data from threads by TLS storage index + void gather(size_t slotIdx, std::vector &dataVec) + { + CV_Assert(slotIdx < slots.size()); + SlotInfo& s = slots[slotIdx]; + void* data = s.data; + if (data) + dataVec.push_back(data); + return; + } + + // Set data to storage index + void setData(size_t slotIdx, void* pData) + { + CV_Assert(slotIdx < slots.size()); + SlotInfo& s = slots[slotIdx]; + s.data = pData; + } + +private: + struct SlotInfo + { + SlotInfo(TLSDataContainer* _container) : container(_container), data(nullptr) {} + TLSDataContainer* container; // attached container (to dispose data) + void* data; + }; + std::vector slots; +}; + +static TlsStorage& getTlsStorage() +{ + static TlsStorage g_storage; // no threading + return g_storage; +} + +#endif // OPENCV_DISABLE_THREAD_SUPPORT + } // namespace details using namespace details; void releaseTlsStorageThread() { +#ifndef OPENCV_DISABLE_THREAD_SUPPORT if (!g_isTlsStorageInitialized) return; // nothing to release, so prefer to avoid creation of new global structures getTlsStorage().releaseThread(); +#endif } TLSDataContainer::TLSDataContainer() diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index bf5dfb68a318..bbb34a725604 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -56,10 +56,6 @@ void setSize(UMat& m, int _dims, const int* _sz, const size_t* _steps, void updateContinuityFlag(UMat& m); void finalizeHdr(UMat& m); -// it should be a prime number for the best hash function -enum { UMAT_NLOCKS = 31 }; -static Mutex umatLocks[UMAT_NLOCKS]; - UMatData::UMatData(const MatAllocator* allocator) { prevAllocator = currAllocator = allocator; @@ -131,6 +127,12 @@ UMatData::~UMatData() } } +#ifndef OPENCV_DISABLE_THREAD_SUPPORT + +// it should be a prime number for the best hash function +enum { UMAT_NLOCKS = 31 }; +static Mutex umatLocks[UMAT_NLOCKS]; + static size_t getUMatDataLockIndex(const UMatData* u) { size_t idx = ((size_t)(void*)u) % UMAT_NLOCKS; @@ -228,6 +230,33 @@ UMatDataAutoLock::~UMatDataAutoLock() getUMatDataAutoLocker().release(u1, u2); } +#else + +void UMatData::lock() +{ + // nothing in OPENCV_DISABLE_THREAD_SUPPORT mode +} + +void UMatData::unlock() +{ + // nothing in OPENCV_DISABLE_THREAD_SUPPORT mode +} + +UMatDataAutoLock::UMatDataAutoLock(UMatData* u) : u1(u), u2(NULL) +{ + // nothing in OPENCV_DISABLE_THREAD_SUPPORT mode +} +UMatDataAutoLock::UMatDataAutoLock(UMatData* u1_, UMatData* u2_) : u1(u1_), u2(u2_) +{ + // nothing in OPENCV_DISABLE_THREAD_SUPPORT mode +} +UMatDataAutoLock::~UMatDataAutoLock() +{ + // nothing in OPENCV_DISABLE_THREAD_SUPPORT mode +} + +#endif // OPENCV_DISABLE_THREAD_SUPPORT + //////////////////////////////// UMat //////////////////////////////// UMat::UMat(UMatUsageFlags _usageFlags) CV_NOEXCEPT diff --git a/modules/core/src/utils/logtagmanager.hpp b/modules/core/src/utils/logtagmanager.hpp index 29a1776ada21..ab4bb9b7d3d4 100644 --- a/modules/core/src/utils/logtagmanager.hpp +++ b/modules/core/src/utils/logtagmanager.hpp @@ -37,8 +37,8 @@ class LogTagManager // also, extensible functions (accepting user-provided callback) are not allowed // to call LogTagManger (to prevent iterator invalidation), which needs enforced // with a non-recursive mutex. - using MutexType = std::mutex; - using LockType = std::lock_guard; + using MutexType = cv::Mutex; + using LockType = cv::AutoLock; enum class MatchingScope { diff --git a/modules/core/test/test_async.cpp b/modules/core/test/test_async.cpp index f898a22878d2..58bcfddcd769 100644 --- a/modules/core/test/test_async.cpp +++ b/modules/core/test/test_async.cpp @@ -7,7 +7,7 @@ #include -#ifdef CV_CXX11 +#if defined(CV_CXX11) && !defined(OPENCV_DISABLE_THREAD_SUPPORT) #include #include #endif @@ -85,7 +85,8 @@ TEST(Core_Async, LikePythonTest) } -#ifdef CV_CXX11 +#if defined(CV_CXX11) && !defined(OPENCV_DISABLE_THREAD_SUPPORT) + TEST(Core_Async, AsyncThread_Simple) { Mat m(3, 3, CV_32FC1, Scalar::all(5.0f)); diff --git a/modules/core/test/test_utils.cpp b/modules/core/test/test_utils.cpp index ed5f34603de5..c31ca75667e9 100644 --- a/modules/core/test/test_utils.cpp +++ b/modules/core/test/test_utils.cpp @@ -8,9 +8,12 @@ #include "opencv2/core/utils/logger.hpp" #include "opencv2/core/utils/buffer_area.private.hpp" -#include "test_utils_tls.impl.hpp" #include "opencv2/core/utils/filesystem.private.hpp" +#ifndef OPENCV_DISABLE_THREAD_SUPPORT +#include "test_utils_tls.impl.hpp" +#endif + namespace opencv_test { namespace { static const char * const keys = diff --git a/modules/ts/CMakeLists.txt b/modules/ts/CMakeLists.txt index f95bed079383..c1d249ea149a 100644 --- a/modules/ts/CMakeLists.txt +++ b/modules/ts/CMakeLists.txt @@ -41,3 +41,9 @@ endif() if(NOT OPENCV_TESTS_CONFIG_STR STREQUAL "${__content}") file(WRITE "${OPENCV_TESTS_CONFIG_FILE}" "${OPENCV_TESTS_CONFIG_STR}") endif() + +if(OPENCV_DISABLE_THREAD_SUPPORT) + # This is required to disable threads in the ts module, as + # described in `ts_gtest.h`. + ocv_target_compile_definitions(${the_module} PUBLIC GTEST_HAS_PTHREAD=0) +endif() From fd16222613f06201fa1c9b503aee3cdb1b13fa8b Mon Sep 17 00:00:00 2001 From: berak Date: Fri, 9 Jul 2021 13:21:44 +0200 Subject: [PATCH 053/128] dnn: update links for the colorization samples --- samples/dnn/colorization.cpp | 2 +- samples/dnn/colorization.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/dnn/colorization.cpp b/samples/dnn/colorization.cpp index b68e0ec4d8bf..6d751590d10d 100644 --- a/samples/dnn/colorization.cpp +++ b/samples/dnn/colorization.cpp @@ -50,7 +50,7 @@ int main(int argc, char **argv) " https://github.com/richzhang/colorization\n" "Download caffemodel and prototxt files:\n" " http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/colorization_release_v2.caffemodel\n" - " https://raw.githubusercontent.com/richzhang/colorization/master/colorization/models/colorization_deploy_v2.prototxt\n"; + " https://raw.githubusercontent.com/richzhang/colorization/caffe/models/colorization_deploy_v2.prototxt\n"; const string keys = "{ h help | | print this help message }" "{ proto | colorization_deploy_v2.prototxt | model configuration }" diff --git a/samples/dnn/colorization.py b/samples/dnn/colorization.py index c9eb2af3b668..5bdef9793e30 100644 --- a/samples/dnn/colorization.py +++ b/samples/dnn/colorization.py @@ -1,6 +1,6 @@ # Script is based on https://github.com/richzhang/colorization/blob/master/colorization/colorize.py -# To download the caffemodel and the prototxt, see: https://github.com/richzhang/colorization/tree/master/colorization/models -# To download pts_in_hull.npy, see: https://github.com/richzhang/colorization/blob/master/colorization/resources/pts_in_hull.npy +# To download the caffemodel and the prototxt, see: https://github.com/richzhang/colorization/tree/caffe/colorization/models +# To download pts_in_hull.npy, see: https://github.com/richzhang/colorization/tree/caffe/colorization/resources/pts_in_hull.npy import numpy as np import argparse import cv2 as cv From 34b65be44a265cac7921a63bb20b09786802de99 Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Fri, 9 Jul 2021 19:15:45 +0300 Subject: [PATCH 054/128] fix find_package cache pollution --- cmake/OpenCVDetectVTK.cmake | 60 ++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/cmake/OpenCVDetectVTK.cmake b/cmake/OpenCVDetectVTK.cmake index b8cf36007cf2..57c154475c67 100644 --- a/cmake/OpenCVDetectVTK.cmake +++ b/cmake/OpenCVDetectVTK.cmake @@ -1,34 +1,34 @@ -# VTK 9.0 if(NOT VTK_FOUND) - find_package(VTK 9 QUIET NAMES vtk COMPONENTS - FiltersExtraction - FiltersSources - FiltersTexture - IOExport - IOGeometry - IOPLY - InteractionStyle - RenderingCore - RenderingLOD - RenderingOpenGL2 - NO_MODULE) -endif() - -# VTK 6.x components -if(NOT VTK_FOUND) - find_package(VTK QUIET COMPONENTS vtkInteractionStyle vtkRenderingLOD vtkIOPLY vtkFiltersTexture vtkRenderingFreeType vtkIOExport NO_MODULE) - IF(VTK_FOUND) - IF(VTK_RENDERING_BACKEND) #in vtk 7, the rendering backend is exported as a var. - find_package(VTK QUIET COMPONENTS vtkRendering${VTK_RENDERING_BACKEND} vtkInteractionStyle vtkRenderingLOD vtkIOPLY vtkFiltersTexture vtkRenderingFreeType vtkIOExport vtkIOGeometry NO_MODULE) - ELSE(VTK_RENDERING_BACKEND) - find_package(VTK QUIET COMPONENTS vtkRenderingOpenGL vtkInteractionStyle vtkRenderingLOD vtkIOPLY vtkFiltersTexture vtkRenderingFreeType vtkIOExport NO_MODULE) - ENDIF(VTK_RENDERING_BACKEND) - ENDIF(VTK_FOUND) -endif() - -# VTK 5.x components -if(NOT VTK_FOUND) - find_package(VTK QUIET COMPONENTS vtkCommon NO_MODULE) + find_package(VTK QUIET NAMES vtk VTK) + if(VTK_FOUND) + if(VTK_VERSION VERSION_EQUAL "9") # VTK 9.0 + find_package(VTK 9 QUIET NAMES vtk COMPONENTS + FiltersExtraction + FiltersSources + FiltersTexture + IOExport + IOGeometry + IOPLY + InteractionStyle + RenderingCore + RenderingLOD + RenderingOpenGL2 + NO_MODULE) + elseif(VTK_VERSION VERSION_GREATER "5") # VTK 6.x components + find_package(VTK QUIET COMPONENTS vtkInteractionStyle vtkRenderingLOD vtkIOPLY vtkFiltersTexture vtkRenderingFreeType vtkIOExport NO_MODULE) + IF(VTK_FOUND) + IF(VTK_RENDERING_BACKEND) #in vtk 7, the rendering backend is exported as a var. + find_package(VTK QUIET COMPONENTS vtkRendering${VTK_RENDERING_BACKEND} vtkInteractionStyle vtkRenderingLOD vtkIOPLY vtkFiltersTexture vtkRenderingFreeType vtkIOExport vtkIOGeometry NO_MODULE) + ELSE(VTK_RENDERING_BACKEND) + find_package(VTK QUIET COMPONENTS vtkRenderingOpenGL vtkInteractionStyle vtkRenderingLOD vtkIOPLY vtkFiltersTexture vtkRenderingFreeType vtkIOExport NO_MODULE) + ENDIF(VTK_RENDERING_BACKEND) + ENDIF(VTK_FOUND) + elseif(VTK_VERSION VERSION_EQUAL "5") # VTK 5.x components + find_package(VTK QUIET COMPONENTS vtkCommon NO_MODULE) + else() + set(VTK_FOUND FALSE) + endif() + endif() endif() if(NOT VTK_FOUND) From 167a12028daa7b8b0f52fc828b9693c161cc01f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9sar=20Gouveia?= <33461054+cesarpgouveia@users.noreply.github.com> Date: Fri, 9 Jul 2021 19:21:56 +0100 Subject: [PATCH 055/128] Merge pull request #20374 from cesarpgouveia:bugfix/fix_load_onnxModel_debug * Fix bug while loading onnx model in debug * dnn: fix other .at using Co-authored-by: Alexander Alekhin --- modules/dnn/src/onnx/onnx_importer.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 3668c9b51e5d..db16cfd56d8b 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -788,7 +788,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) int blob_total = blob.total(); if (blob_total == 1) { layerParams.type = "Power"; - layerParams.set("shift", (isSub ? -1 : 1) * blob.at(0)); + layerParams.set("shift", (isSub ? -1 : 1) * blob.ptr()[0]); } else { MatShape inpShape = outShapes[node_proto.input(1 - const_blob_id)]; @@ -871,7 +871,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) blob.convertTo(blob, CV_32F); layerParams.type = "Power"; - layerParams.set("power", blob.at(0)); + layerParams.set("power", blob.ptr()[0]); } else if (layer_type == "Max") { @@ -1150,7 +1150,8 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) Mat blob = getBlob(node_proto, constId); blob = blob.reshape(1, 1); if (blob.total() == 1) { - float coeff = isDiv ? 1.0 / blob.at(0) : blob.at(0); + float blob_value = blob.ptr()[0]; + float coeff = isDiv ? 1.0 / blob_value : blob_value; layerParams.set("scale", coeff); layerParams.type = "Power"; } @@ -1188,12 +1189,14 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) { if (inp0.total() == 1) { - float coeff = isDiv ? 1.0 / inp0.at(0) : inp0.at(0); + float inp0_value = inp0.ptr()[0]; + float coeff = isDiv ? 1.0 / inp0_value : inp0_value; multiply(inp1, coeff, out); } else { - float coeff = isDiv ? 1.0 / inp1.at(0) : inp1.at(0); + float inp1_value = inp1.ptr()[0]; + float coeff = isDiv ? 1.0 / inp1_value : inp1_value; multiply(inp0, coeff, out); } @@ -1605,7 +1608,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) if (node_proto.input_size() == 3) { Mat value = getBlob(node_proto, 2); - layerParams.set("value", value.at(0)); + layerParams.set("value", value.ptr()[0]); } } } From 3f3c5de851dc8e28240a5fa292708f3bdcfe8851 Mon Sep 17 00:00:00 2001 From: Sergey Ivanov Date: Fri, 9 Jul 2021 21:46:38 +0300 Subject: [PATCH 056/128] Merge pull request #20372 from sivanov-work:serialize GAPI: Implement ConstValue serialize/deserialize * Implement ConstValue ser/deser * Fix MacOs compile issue * Fix Docs compile * Change uint32 -> uint64 for serialize tag --- .../src/backends/common/serialization.cpp | 48 +++++++++++++++++-- .../src/backends/common/serialization.hpp | 6 +++ .../test/s11n/gapi_sample_pipelines_s11n.cpp | 29 +++++++++++ 3 files changed, 79 insertions(+), 4 deletions(-) diff --git a/modules/gapi/src/backends/common/serialization.cpp b/modules/gapi/src/backends/common/serialization.cpp index 7389bacb02f0..f2c956874c1e 100644 --- a/modules/gapi/src/backends/common/serialization.cpp +++ b/modules/gapi/src/backends/common/serialization.cpp @@ -32,6 +32,14 @@ void putData(GSerialized& s, const cv::gimpl::GModel::ConstGraph& cg, const ade: }); if (s.m_datas.end() == it) { s.m_datas.push_back(gdata); + + if (cg.metadata(nh).contains()) { + size_t datas_num = s.m_datas.size() - 1; + GAPI_DbgAssert(datas_num <= static_cast(std::numeric_limits::max())); + GSerialized::data_tag_t tag = static_cast(datas_num); + s.m_const_datas.emplace(tag, + cg.metadata(nh).get()); + } } } @@ -42,11 +50,20 @@ void putOp(GSerialized& s, const cv::gimpl::GModel::ConstGraph& cg, const ade::N s.m_ops.push_back(op); } -void mkDataNode(ade::Graph& g, const cv::gimpl::Data& data) { +ade::NodeHandle mkDataNode(ade::Graph& g, const cv::gimpl::Data& data) { cv::gimpl::GModel::Graph gm(g); auto nh = gm.createNode(); gm.metadata(nh).set(cv::gimpl::NodeType{cv::gimpl::NodeType::DATA}); gm.metadata(nh).set(data); + return nh; +} + +ade::NodeHandle mkConstDataNode(ade::Graph& g, const cv::gimpl::Data& data, const cv::gimpl::ConstValue& const_data) { + auto nh = mkDataNode(g, data); + + cv::gimpl::GModel::Graph gm(g); + gm.metadata(nh).set(const_data); + return nh; } void mkOpNode(ade::Graph& g, const cv::gimpl::Op& op) { @@ -624,6 +641,10 @@ IOStream& operator<< (IOStream& os, const cv::gimpl::Data &d) { return os << d.shape << d.rc << d.meta << d.storage << d.kind; } +IOStream& operator<< (IOStream& os, const cv::gimpl::ConstValue &cd) { + return os << cd.arg; +} + namespace { template @@ -667,6 +688,9 @@ IIStream& operator>> (IIStream& is, cv::gimpl::Data &d) { return is; } +IIStream& operator>> (IIStream& is, cv::gimpl::ConstValue &cd) { + return is >> cd.arg; +} IOStream& operator<< (IOStream& os, const cv::gimpl::DataObjectCounter &c) { return os << c.m_next_data_id; @@ -709,18 +733,34 @@ void serialize( IOStream& os } s.m_counter = cg.metadata().get(); s.m_proto = p; - os << s.m_ops << s.m_datas << s.m_counter << s.m_proto; + os << s.m_ops << s.m_datas << s.m_counter << s.m_proto << s.m_const_datas; } GSerialized deserialize(IIStream &is) { GSerialized s; - is >> s.m_ops >> s.m_datas >> s.m_counter >> s.m_proto; + is >> s.m_ops >> s.m_datas >> s.m_counter >> s.m_proto >> s.m_const_datas; return s; } void reconstruct(const GSerialized &s, ade::Graph &g) { GAPI_Assert(g.nodes().empty()); - for (const auto& d : s.m_datas) cv::gapi::s11n::mkDataNode(g, d); + + GSerialized::data_tag_t tag = 0; + for (const auto& d : s.m_datas) { + if (d.storage == gimpl::Data::Storage::CONST_VAL) { + auto cit = s.m_const_datas.find(tag); + if (cit == s.m_const_datas.end()) { + util::throw_error(std::logic_error("Cannot reconstruct graph: Data::Storage::CONST_VAL by tag: " + + std::to_string(tag) + " requires ConstValue")); + } + + mkConstDataNode(g, d, cit->second); + } else { + cv::gapi::s11n::mkDataNode(g, d); + } + + tag ++; + } for (const auto& op : s.m_ops) cv::gapi::s11n::mkOpNode(g, op); cv::gapi::s11n::linkNodes(g); diff --git a/modules/gapi/src/backends/common/serialization.hpp b/modules/gapi/src/backends/common/serialization.hpp index b4204ca64e38..529fdc635d5e 100644 --- a/modules/gapi/src/backends/common/serialization.hpp +++ b/modules/gapi/src/backends/common/serialization.hpp @@ -31,6 +31,9 @@ struct GSerialized { std::vector m_datas; cv::gimpl::DataObjectCounter m_counter; cv::gimpl::Protocol m_proto; + + using data_tag_t = uint64_t; + std::map m_const_datas; }; //////////////////////////////////////////////////////////////////////////////// @@ -97,6 +100,9 @@ GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::gimpl::Op &op); GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::gimpl::Data &op); GAPI_EXPORTS IIStream& operator>> (IIStream& is, cv::gimpl::Data &op); +GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::gimpl::ConstValue &cd); +GAPI_EXPORTS IIStream& operator>> (IIStream& os, cv::gimpl::ConstValue &cd); + // Render types //////////////////////////////////////////////////////////////// GAPI_EXPORTS IOStream& operator<< (IOStream& os, const cv::gapi::wip::draw::Text &t); diff --git a/modules/gapi/test/s11n/gapi_sample_pipelines_s11n.cpp b/modules/gapi/test/s11n/gapi_sample_pipelines_s11n.cpp index 885457cd9063..c3d21a3f6f8c 100644 --- a/modules/gapi/test/s11n/gapi_sample_pipelines_s11n.cpp +++ b/modules/gapi/test/s11n/gapi_sample_pipelines_s11n.cpp @@ -806,4 +806,33 @@ TEST(S11N, Pipeline_Render_RGB) EXPECT_EQ(cv::norm(input, ref_mat), 0); } + +TEST(S11N, Pipeline_Const_GScalar) +{ + static constexpr auto in_scalar = 10; + + cv::GMat a; + cv::GScalar s; + + cv::GComputation computation(GIn(a), GOut(cv::gapi::addC(a, in_scalar))); + auto p = cv::gapi::serialize(computation); + auto deserialized_computation = cv::gapi::deserialize(p); + + cv::Mat in_mat = cv::Mat::eye(32, 32, CV_8UC1); + cv::Mat ref_mat; + cv::add(in_mat, in_scalar, ref_mat); + + cv::Mat out_mat; + computation.apply(cv::gin(in_mat/*, in_scalar*/), cv::gout(out_mat)); + EXPECT_EQ(0, cvtest::norm(out_mat, ref_mat, NORM_INF)); + + out_mat = cv::Mat(); + deserialized_computation.apply(cv::gin(in_mat/*, in_scalar*/), cv::gout(out_mat)); + EXPECT_EQ(0, cvtest::norm(out_mat, ref_mat, NORM_INF)); + + out_mat = cv::Mat(); + auto cc = deserialized_computation.compile(cv::descr_of(in_mat)); + cc(cv::gin(in_mat/*, in_scalar*/), cv::gout(out_mat)); + EXPECT_EQ(0, cvtest::norm(out_mat, ref_mat, NORM_INF)); +} } // namespace opencv_test From fd22e9829815860d25142f9e42aa68fb8f192b3f Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Mon, 12 Jul 2021 19:32:11 +0000 Subject: [PATCH 057/128] build(winpack_dldt): avoid stale sysroot contents --- platforms/winpack_dldt/build_package.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/platforms/winpack_dldt/build_package.py b/platforms/winpack_dldt/build_package.py index 6fde62241a5e..bd4355e1cdf4 100644 --- a/platforms/winpack_dldt/build_package.py +++ b/platforms/winpack_dldt/build_package.py @@ -189,7 +189,10 @@ def __init__(self, config): if self.srcdir is None: self.srcdir = prepare_dir(self.outdir / 'sources', clean=clean_src_dir) self.build_dir = prepare_dir(self.outdir / 'build', clean=self.config.clean_dldt) - self.sysrootdir = prepare_dir(self.outdir / 'sysroot', clean=self.config.clean_dldt) + self.sysrootdir = prepare_dir(self.outdir / 'sysroot', clean=self.config.clean_dldt or self.config.clean_dldt_sysroot) + if not (self.config.clean_dldt or self.config.clean_dldt_sysroot): + _ = prepare_dir(self.sysrootdir / 'bin', clean=True) # always clean sysroot/bin (package files) + _ = prepare_dir(self.sysrootdir / 'etc', clean=True) # always clean sysroot/etc (package files) if self.config.build_subst_drive: if os.path.exists(self.config.build_subst_drive + ':\\'): @@ -483,8 +486,9 @@ def main(): parser.add_argument('--cmake_option', action='append', help='Append OpenCV CMake option') parser.add_argument('--cmake_option_dldt', action='append', help='Append CMake option for DLDT project') - parser.add_argument('--clean_dldt', action='store_true', help='Clear DLDT build and sysroot directories') - parser.add_argument('--clean_opencv', action='store_true', help='Clear OpenCV build directory') + parser.add_argument('--clean_dldt', action='store_true', help='Clean DLDT build and sysroot directories') + parser.add_argument('--clean_dldt_sysroot', action='store_true', help='Clean DLDT sysroot directories') + parser.add_argument('--clean_opencv', action='store_true', help='Clean OpenCV build directory') parser.add_argument('--build_debug', action='store_true', help='Build debug binaries') parser.add_argument('--build_tests', action='store_true', help='Build OpenCV tests') From 4af1f31a3fb3530499826e516fdc5b5121cdf600 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 13 Jul 2021 09:15:03 +0000 Subject: [PATCH 058/128] cmake: use relative path for mode vars --- cmake/OpenCVUtils.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index 39445150a911..da0ee3b36bc6 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -1978,4 +1978,4 @@ endif() # # Include configuration override settings # -include(cmake/vars/EnableModeVars.cmake) +include("${CMAKE_CURRENT_LIST_DIR}/vars/EnableModeVars.cmake") From 5179e37bd1670e025735ee9ed9aa7e4cbe38ccd9 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Tue, 13 Jul 2021 22:31:46 +0300 Subject: [PATCH 059/128] Merge pull request #20329 from smirnov-alexey:as/mediaframe_serialization [G-API]: Add serialization mechanism for cv::MediaFrame * Stub initial interface * Fix templates for deserialization * Fix tests * Disable a warning on windows * Address review comments * Change enable_ifs to other template helpers * Resolve ambiguous template * Fix warnings in docs --- modules/gapi/include/opencv2/gapi/media.hpp | 28 +++++ modules/gapi/include/opencv2/gapi/rmat.hpp | 10 +- modules/gapi/include/opencv2/gapi/s11n.hpp | 67 +++++++--- .../gapi/include/opencv2/gapi/util/util.hpp | 24 +++- modules/gapi/src/api/media.cpp | 4 + modules/gapi/src/api/s11n.cpp | 14 ++- .../src/backends/common/serialization.cpp | 14 ++- modules/gapi/test/s11n/gapi_s11n_tests.cpp | 116 ++++++++++++++++++ 8 files changed, 243 insertions(+), 34 deletions(-) diff --git a/modules/gapi/include/opencv2/gapi/media.hpp b/modules/gapi/include/opencv2/gapi/media.hpp index aa7d6d6a1f4e..19aaef3fd1a5 100644 --- a/modules/gapi/include/opencv2/gapi/media.hpp +++ b/modules/gapi/include/opencv2/gapi/media.hpp @@ -15,6 +15,16 @@ #include #include +// Forward declaration +namespace cv { +namespace gapi { +namespace s11n { +struct IOStream; +struct IIStream; +} // namespace s11n +} // namespace gapi +} // namespace cv + namespace cv { /** \addtogroup gapi_data_structures @@ -125,6 +135,16 @@ class GAPI_EXPORTS MediaFrame { return dynamic_cast(adapter); } + /** + * @brief Serialize MediaFrame's data to a byte array. + * + * @note The actual logic is implemented by frame's adapter class. + * Does nothing by default. + * + * @param os Bytestream to store serialized MediaFrame data in. + */ + void serialize(cv::gapi::s11n::IOStream& os) const; + private: struct Priv; std::shared_ptr m; @@ -221,6 +241,14 @@ class GAPI_EXPORTS MediaFrame::IAdapter { // FIXME: design a better solution // The default implementation does nothing virtual cv::util::any blobParams() const; + virtual void serialize(cv::gapi::s11n::IOStream&) { + GAPI_Assert(false && "Generic serialize method of MediaFrame::IAdapter does nothing by default. " + "Please, implement it in derived class to properly serialize the object."); + } + virtual void deserialize(cv::gapi::s11n::IIStream&) { + GAPI_Assert(false && "Generic deserialize method of MediaFrame::IAdapter does nothing by default. " + "Please, implement it in derived class to properly deserialize the object."); + } }; /** @} */ diff --git a/modules/gapi/include/opencv2/gapi/rmat.hpp b/modules/gapi/include/opencv2/gapi/rmat.hpp index cc27f48664cd..6b289001e7f3 100644 --- a/modules/gapi/include/opencv2/gapi/rmat.hpp +++ b/modules/gapi/include/opencv2/gapi/rmat.hpp @@ -14,8 +14,8 @@ namespace cv { namespace gapi { namespace s11n { - struct IOStream; - struct IIStream; +struct IOStream; +struct IIStream; } // namespace s11n } // namespace gapi } // namespace cv @@ -111,10 +111,12 @@ class GAPI_EXPORTS RMat // is transferred to the device when the view is destroyed virtual View access(Access) = 0; virtual void serialize(cv::gapi::s11n::IOStream&) { - GAPI_Assert(false && "Generic serialize method should never be called for RMat adapter"); + GAPI_Assert(false && "Generic serialize method of RMat::Adapter does nothing by default. " + "Please, implement it in derived class to properly serialize the object."); } virtual void deserialize(cv::gapi::s11n::IIStream&) { - GAPI_Assert(false && "Generic deserialize method should never be called for RMat adapter"); + GAPI_Assert(false && "Generic deserialize method of RMat::Adapter does nothing by default. " + "Please, implement it in derived class to properly deserialize the object."); } }; using AdapterP = std::shared_ptr; diff --git a/modules/gapi/include/opencv2/gapi/s11n.hpp b/modules/gapi/include/opencv2/gapi/s11n.hpp index ca8e32c98bf9..53800970d1cb 100644 --- a/modules/gapi/include/opencv2/gapi/s11n.hpp +++ b/modules/gapi/include/opencv2/gapi/s11n.hpp @@ -13,6 +13,13 @@ #include #include #include +#include +#include + +// FIXME: caused by deserialize_runarg +#if (defined _WIN32 || defined _WIN64) && defined _MSC_VER +#pragma warning(disable: 4702) +#endif namespace cv { namespace gapi { @@ -34,8 +41,8 @@ namespace detail { template cv::GCompileArgs getCompileArgs(const std::vector &bytes); - template - cv::GRunArgs getRunArgsWithRMats(const std::vector &bytes); + template + cv::GRunArgs getRunArgsWithAdapters(const std::vector &bytes); } // namespace detail /** @brief Serialize a graph represented by GComputation into an array of bytes. @@ -133,19 +140,18 @@ type deserialize(const std::vector &bytes) { } /** - * @brief Deserialize GRunArgs including RMat objects if any from a byte array. + * @brief Deserialize GRunArgs including RMat and MediaFrame objects if any from a byte array. * - * RMat adapter type is specified in the template. - * @note To be used properly specified adapter type must overload its serialize() and - * deserialize() methods. + * Adapter types are specified in the template. + * @note To be used properly specified adapter types must overload their deserialize() method. * @param bytes vector of bytes to deserialize GRunArgs object from. - * @return GRunArgs including RMat objects if any. - * @see RMat + * @return GRunArgs including RMat and MediaFrame objects if any. + * @see RMat MediaFrame */ -template inline +template inline typename std::enable_if::value, GRunArgs>:: type deserialize(const std::vector &bytes) { - return detail::getRunArgsWithRMats(bytes); + return detail::getRunArgsWithAdapters(bytes); } } // namespace gapi } // namespace cv @@ -399,16 +405,39 @@ static cv::util::optional exec(const std::string& tag, cv::gapi::s1 } }; -template struct deserialize_runarg; +template +struct deserialize_arg_with_adapter; + +template +struct deserialize_arg_with_adapter { +static GRunArg exec(cv::gapi::s11n::IIStream& is) { + std::unique_ptr ptr(new TA); + ptr->deserialize(is); + return GRunArg { RA(std::move(ptr)) }; +} +}; + +template +struct deserialize_arg_with_adapter { +static GRunArg exec(cv::gapi::s11n::IIStream&) { + GAPI_Assert(false && "No suitable adapter class found during RMat/MediaFrame deserialization. " + "Please, make sure you've passed them in cv::gapi::deserialize() template"); + return GRunArg{}; +} +}; -template +template struct deserialize_runarg { static GRunArg exec(cv::gapi::s11n::IIStream& is, uint32_t idx) { if (idx == GRunArg::index_of()) { - auto ptr = std::make_shared(); - ptr->deserialize(is); - return GRunArg { RMat(std::move(ptr)) }; - } else { // non-RMat arg - use default deserialization + // Type or void (if not found) + using TA = typename cv::util::find_adapter_impl::type; + return deserialize_arg_with_adapter::exec(is); + } else if (idx == GRunArg::index_of()) { + // Type or void (if not found) + using TA = typename cv::util::find_adapter_impl::type; + return deserialize_arg_with_adapter::exec(is); + } else { // not an adapter holding type runarg - use default deserialization GRunArg arg; getRunArgByIdx(is, arg, idx); return arg; @@ -451,8 +480,8 @@ cv::GCompileArgs getCompileArgs(const std::vector &sArgs) { return args; } -template -cv::GRunArgs getRunArgsWithRMats(const std::vector &bytes) { +template +cv::GRunArgs getRunArgsWithAdapters(const std::vector &bytes) { std::unique_ptr pIs = cv::gapi::s11n::detail::getInStream(bytes); cv::gapi::s11n::IIStream& is = *pIs; cv::GRunArgs args; @@ -462,7 +491,7 @@ cv::GRunArgs getRunArgsWithRMats(const std::vector &bytes) { for (uint32_t i = 0; i < sz; ++i) { uint32_t idx = 0; is >> idx; - args.push_back(cv::gapi::detail::deserialize_runarg::exec(is, idx)); + args.push_back(cv::gapi::detail::deserialize_runarg::exec(is, idx)); } return args; diff --git a/modules/gapi/include/opencv2/gapi/util/util.hpp b/modules/gapi/include/opencv2/gapi/util/util.hpp index c6ad0632e268..eb435a3eeff0 100644 --- a/modules/gapi/include/opencv2/gapi/util/util.hpp +++ b/modules/gapi/include/opencv2/gapi/util/util.hpp @@ -153,7 +153,29 @@ overload_lamba_set overload_lambdas(L&& ...lambdas) { return overload_lamba_set(std::forward(lambdas)...); } -} + +template +struct find_adapter_impl; + +template +struct find_adapter_impl +{ + using type = typename std::conditional::value, + T, + void>::type; + static constexpr bool found = std::is_base_of::value; +}; + +template +struct find_adapter_impl +{ + using type = typename std::conditional::value, + T, + typename find_adapter_impl::type>::type; + static constexpr bool found = std::is_base_of::value || + find_adapter_impl::found; +}; +} // namespace util } // namespace cv // \endcond diff --git a/modules/gapi/src/api/media.cpp b/modules/gapi/src/api/media.cpp index 884fc9e83d79..b1c455d40aef 100644 --- a/modules/gapi/src/api/media.cpp +++ b/modules/gapi/src/api/media.cpp @@ -35,6 +35,10 @@ cv::MediaFrame::IAdapter* cv::MediaFrame::getAdapter() const { return m->adapter.get(); } +void cv::MediaFrame::serialize(cv::gapi::s11n::IOStream& os) const { + return m->adapter->serialize(os); +} + cv::MediaFrame::View::View(Ptrs&& ptrs, Strides&& strs, Callback &&cb) : ptr (std::move(ptrs)) , stride(std::move(strs)) diff --git a/modules/gapi/src/api/s11n.cpp b/modules/gapi/src/api/s11n.cpp index 97f5a95c42a6..bd7f46c88aec 100644 --- a/modules/gapi/src/api/s11n.cpp +++ b/modules/gapi/src/api/s11n.cpp @@ -76,14 +76,14 @@ cv::GRunArgsP cv::gapi::bind(cv::GRunArgs &out_args) { #if !defined(GAPI_STANDALONE) case T::index_of() : - outputs.emplace_back((cv::UMat*)(&(cv::util::get(res_obj)))); + outputs.emplace_back(&(cv::util::get(res_obj))); break; #endif case cv::GRunArg::index_of() : - outputs.emplace_back((cv::Mat*)(&(cv::util::get(res_obj)))); + outputs.emplace_back(&(cv::util::get(res_obj))); break; case cv::GRunArg::index_of() : - outputs.emplace_back((cv::Scalar*)(&(cv::util::get(res_obj)))); + outputs.emplace_back(&(cv::util::get(res_obj))); break; case T::index_of() : outputs.emplace_back(cv::util::get(res_obj)); @@ -92,7 +92,10 @@ cv::GRunArgsP cv::gapi::bind(cv::GRunArgs &out_args) outputs.emplace_back(cv::util::get(res_obj)); break; case cv::GRunArg::index_of() : - outputs.emplace_back((cv::RMat*)(&(cv::util::get(res_obj)))); + outputs.emplace_back(&(cv::util::get(res_obj))); + break; + case cv::GRunArg::index_of() : + outputs.emplace_back(&(cv::util::get(res_obj))); break; default: GAPI_Assert(false && "This value type is not supported!"); // ...maybe because of STANDALONE mode. @@ -130,6 +133,9 @@ cv::GRunArg cv::gapi::bind(cv::GRunArgP &out) case T::index_of() : return cv::GRunArg(*cv::util::get(out)); + case T::index_of() : + return cv::GRunArg(*cv::util::get(out)); + default: // ...maybe our types were extended GAPI_Assert(false && "This value type is UNKNOWN!"); diff --git a/modules/gapi/src/backends/common/serialization.cpp b/modules/gapi/src/backends/common/serialization.cpp index f2c956874c1e..619b2feb7417 100644 --- a/modules/gapi/src/backends/common/serialization.cpp +++ b/modules/gapi/src/backends/common/serialization.cpp @@ -201,18 +201,20 @@ IOStream& operator<< (IOStream& os, const cv::RMat& mat) { return os; } IIStream& operator>> (IIStream& is, cv::RMat&) { - util::throw_error(std::logic_error("operator>> for RMat should never be called")); + util::throw_error(std::logic_error("operator>> for RMat should never be called. " + "Instead, cv::gapi::deserialize() " + "should be used")); return is; } -IOStream& operator<< (IOStream& os, const cv::MediaFrame &) { - // Stub - GAPI_Assert(false && "cv::MediaFrame serialization is not supported!"); +IOStream& operator<< (IOStream& os, const cv::MediaFrame &frame) { + frame.serialize(os); return os; } IIStream& operator>> (IIStream& is, cv::MediaFrame &) { - // Stub - GAPI_Assert(false && "cv::MediaFrame serialization is not supported!"); + util::throw_error(std::logic_error("operator>> for MediaFrame should never be called. " + "Instead, cv::gapi::deserialize() " + "should be used")); return is; } diff --git a/modules/gapi/test/s11n/gapi_s11n_tests.cpp b/modules/gapi/test/s11n/gapi_s11n_tests.cpp index c2b17521d966..4c6e63b55204 100644 --- a/modules/gapi/test/s11n/gapi_s11n_tests.cpp +++ b/modules/gapi/test/s11n/gapi_s11n_tests.cpp @@ -2,6 +2,7 @@ #include "backends/common/serialization.hpp" #include +#include #include <../src/backends/common/gbackend.hpp> // asView namespace { @@ -148,6 +149,29 @@ class MyRMatAdapter : public cv::RMat::Adapter { int getVal() { return m_value; } std::string getStr() { return m_str; } }; + +class MyMediaFrameAdapter : public cv::MediaFrame::IAdapter { + cv::Mat m_mat; + int m_value; + std::string m_str; +public: + MyMediaFrameAdapter() = default; + MyMediaFrameAdapter(cv::Mat m, int value, const std::string& str) + : m_mat(m), m_value(value), m_str(str) + {} + virtual cv::MediaFrame::View access(cv::MediaFrame::Access) override { + return cv::MediaFrame::View({m_mat.data}, {m_mat.step}); + } + virtual cv::GFrameDesc meta() const override { return {cv::MediaFormat::BGR, m_mat.size()}; } + virtual void serialize(cv::gapi::s11n::IOStream& os) override { + os << m_value << m_str; + } + virtual void deserialize(cv::gapi::s11n::IIStream& is) override { + is >> m_value >> m_str; + } + int getVal() { return m_value; } + std::string getStr() { return m_str; } +}; } namespace opencv_test { @@ -581,6 +605,17 @@ TEST_F(S11N_Basic, Test_Vector_Of_Strings) { EXPECT_EQ("42", des[2]); } +TEST_F(S11N_Basic, Test_RunArg) { + cv::Mat mat = cv::Mat::eye(cv::Size(128, 64), CV_8UC3); + auto v = cv::GRunArgs{ cv::GRunArg{ mat } }; + + const std::vector sargsin = cv::gapi::serialize(v); + cv::GRunArgs out = cv::gapi::deserialize(sargsin); + cv::Mat out_mat = cv::util::get(out[0]); + + EXPECT_EQ(0, cv::norm(mat, out_mat)); +} + TEST_F(S11N_Basic, Test_RunArg_RMat) { cv::Mat mat = cv::Mat::eye(cv::Size(128, 64), CV_8UC3); cv::RMat rmat = cv::make_rmat(mat, 42, "It actually works"); @@ -614,6 +649,87 @@ TEST_F(S11N_Basic, Test_RunArg_RMat_Scalar_Mat) { EXPECT_EQ(0, cv::norm(mat, out_mat)); } +TEST_F(S11N_Basic, Test_RunArg_MediaFrame) { + cv::Mat mat = cv::Mat::eye(cv::Size(128, 64), CV_8UC3); + auto frame = cv::MediaFrame::Create(mat, 42, "It actually works"); + auto v = cv::GRunArgs{ cv::GRunArg{ frame } }; + + const std::vector sargsin = cv::gapi::serialize(v); + cv::GRunArgs out = cv::gapi::deserialize(sargsin); + cv::MediaFrame out_mat = cv::util::get(out[0]); + auto adapter = out_mat.get(); + EXPECT_EQ(42, adapter->getVal()); + EXPECT_EQ("It actually works", adapter->getStr()); +} + +TEST_F(S11N_Basic, Test_RunArg_MediaFrame_Scalar_Mat) { + cv::Mat mat = cv::Mat::eye(cv::Size(128, 64), CV_8UC3); + auto frame = cv::MediaFrame::Create(mat, 42, "It actually works"); + cv::Scalar sc(111); + auto v = cv::GRunArgs{ cv::GRunArg{ frame }, cv::GRunArg{ sc }, cv::GRunArg{ mat } }; + + const std::vector sargsin = cv::gapi::serialize(v); + cv::GRunArgs out = cv::gapi::deserialize(sargsin); + cv::MediaFrame out_frame = cv::util::get(out[0]); + auto adapter = out_frame.get(); + EXPECT_EQ(42, adapter->getVal()); + EXPECT_EQ("It actually works", adapter->getStr()); + + cv::Scalar out_sc = cv::util::get(out[1]); + EXPECT_EQ(sc, out_sc); + + cv::Mat out_mat = cv::util::get(out[2]); + EXPECT_EQ(0, cv::norm(mat, out_mat)); +} + +TEST_F(S11N_Basic, Test_RunArg_MediaFrame_RMat) { + cv::Mat mat = cv::Mat::eye(cv::Size(128, 64), CV_8UC3); + cv::Mat mat2 = cv::Mat::eye(cv::Size(128, 64), CV_8UC3); + + auto frame = cv::MediaFrame::Create(mat, 42, "It actually works"); + auto rmat = cv::make_rmat(mat2, 24, "Hello there"); + + auto v = cv::GRunArgs{ cv::GRunArg{ frame }, cv::GRunArg{ rmat } }; + + const std::vector sargsin = cv::gapi::serialize(v); + cv::GRunArgs out = cv::gapi::deserialize(sargsin); + + cv::MediaFrame out_frame = cv::util::get(out[0]); + cv::RMat out_rmat = cv::util::get(out[1]); + + auto adapter = out_frame.get(); + EXPECT_EQ(42, adapter->getVal()); + EXPECT_EQ("It actually works", adapter->getStr()); + + auto adapter2 = out_rmat.get(); + EXPECT_EQ(24, adapter2->getVal()); + EXPECT_EQ("Hello there", adapter2->getStr()); +} + +TEST_F(S11N_Basic, Test_RunArg_RMat_MediaFrame) { + cv::Mat mat = cv::Mat::eye(cv::Size(128, 64), CV_8UC3); + cv::Mat mat2 = cv::Mat::eye(cv::Size(128, 64), CV_8UC3); + + auto frame = cv::MediaFrame::Create(mat, 42, "It actually works"); + auto rmat = cv::make_rmat(mat2, 24, "Hello there"); + + auto v = cv::GRunArgs{ cv::GRunArg{ rmat }, cv::GRunArg{ frame } }; + + const std::vector sargsin = cv::gapi::serialize(v); + cv::GRunArgs out = cv::gapi::deserialize(sargsin); + + cv::RMat out_rmat = cv::util::get(out[0]); + cv::MediaFrame out_frame = cv::util::get(out[1]); + + auto adapter = out_frame.get(); + EXPECT_EQ(42, adapter->getVal()); + EXPECT_EQ("It actually works", adapter->getStr()); + + auto adapter2 = out_rmat.get(); + EXPECT_EQ(24, adapter2->getVal()); + EXPECT_EQ("Hello there", adapter2->getStr()); +} + namespace { template bool verifyOpaqueKind(T&& in) { From a7742d7d631b00346aed8a681e1bbc338d58651d Mon Sep 17 00:00:00 2001 From: Dmitry Budnikov Date: Tue, 13 Jul 2021 22:33:13 +0300 Subject: [PATCH 060/128] Merge pull request #20383 from dbudniko:dbudniko/mtcnn_1st_pnet_simplification MTCNN 1st pnet simplification to ensure single graph input * 1st pnet simplification to ensure single graph input * address comment from Dmitry M regarding unused variable --- modules/gapi/samples/face_detection_mtcnn.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/gapi/samples/face_detection_mtcnn.cpp b/modules/gapi/samples/face_detection_mtcnn.cpp index c437bdbba46c..d679ba0529b1 100644 --- a/modules/gapi/samples/face_detection_mtcnn.cpp +++ b/modules/gapi/samples/face_detection_mtcnn.cpp @@ -596,7 +596,6 @@ int main(int argc, char* argv[]) { cv::GMat scores[MAX_PYRAMID_LEVELS]; cv::GArray nms_p_faces[MAX_PYRAMID_LEVELS]; cv::GArray total_faces[MAX_PYRAMID_LEVELS]; - cv::GArray faces_init(std::vector{}); //The very first PNet pyramid layer to init total_faces[0] in_resized[0] = cv::gapi::resize(in_originalRGB, level_size[0]); @@ -605,8 +604,7 @@ int main(int argc, char* argv[]) { cv::GArray faces0 = custom::BuildFaces::on(scores[0], regressions[0], static_cast(scales[0]), conf_thresh_p); cv::GArray final_p_faces_for_bb2squares = custom::ApplyRegression::on(faces0, true); cv::GArray final_faces_pnet0 = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares); - nms_p_faces[0] = custom::RunNMS::on(final_faces_pnet0, 0.5f, false); - total_faces[0] = custom::AccumulatePyramidOutputs::on(faces_init, nms_p_faces[0]); + total_faces[0] = custom::RunNMS::on(final_faces_pnet0, 0.5f, false); //The rest PNet pyramid layers to accumlate all layers result in total_faces[PYRAMID_LEVELS - 1]] for (int i = 1; i < pyramid_levels; ++i) { From 6f417b57c1bbb3b113104b3023ae9f2361b4618d Mon Sep 17 00:00:00 2001 From: Pablo Romero Date: Tue, 13 Jul 2021 21:40:15 +0200 Subject: [PATCH 061/128] Merge pull request #20399 from pablorcum:3.4 Improves support for Unix non-Linux systems, including QNX * Fixes #20395. Improves support for Unix non-Linux systems. Focus on QNX Neutrino. Signed-off-by: promero * Update system.cpp --- CMakeLists.txt | 2 ++ modules/core/src/parallel.cpp | 2 +- modules/core/src/system.cpp | 22 +++++++++++++--------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f6a2da53103f..94ef43fcb46f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -648,6 +648,8 @@ if(UNIX) set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} m pthread) elseif(EMSCRIPTEN) # no need to link to system libs with emscripten + elseif(QNXNTO) + set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} m) else() set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m pthread rt) endif() diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index 9ac7d3e4c093..cfff4cea4bce 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -53,7 +53,7 @@ #undef abs #endif -#if defined __linux__ || defined __APPLE__ || defined __GLIBC__ \ +#if defined __unix__ || defined __APPLE__ || defined __GLIBC__ \ || defined __HAIKU__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ \ || defined __OpenBSD__ #include diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index b6810fa9f5fa..d8b8f6755950 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -114,10 +114,14 @@ void* allocSingletonNewBuffer(size_t size) { return malloc(size); } #include // std::abort #endif -#if defined __ANDROID__ || defined __linux__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __HAIKU__ +#if defined __ANDROID__ || defined __unix__ || defined __FreeBSD__ || defined __OpenBSD__ || defined __HAIKU__ # include # include +#if defined __QNXNTO__ +# include +#else # include +#endif #if defined __ANDROID__ || defined __linux__ # include #endif @@ -128,7 +132,7 @@ void* allocSingletonNewBuffer(size_t size) { return malloc(size); } #endif -#if (defined __ppc64__ || defined __PPC64__) && defined __linux__ +#if (defined __ppc64__ || defined __PPC64__) && defined __unix__ # include "sys/auxv.h" # ifndef AT_HWCAP2 # define AT_HWCAP2 26 @@ -229,7 +233,7 @@ std::wstring GetTempFileNameWinRT(std::wstring prefix) #include "omp.h" #endif -#if defined __linux__ || defined __APPLE__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ || defined __GLIBC__ || defined __HAIKU__ +#if defined __unix__ || defined __APPLE__ || defined __EMSCRIPTEN__ || defined __FreeBSD__ || defined __GLIBC__ || defined __HAIKU__ #include #include #include @@ -591,7 +595,7 @@ struct HWFeatures have[CV_CPU_MSA] = true; #endif - #if (defined __ppc64__ || defined __PPC64__) && defined __linux__ + #if (defined __ppc64__ || defined __PPC64__) && defined __unix__ unsigned int hwcap = getauxval(AT_HWCAP); if (hwcap & PPC_FEATURE_HAS_VSX) { hwcap = getauxval(AT_HWCAP2); @@ -804,12 +808,12 @@ int64 getTickCount(void) LARGE_INTEGER counter; QueryPerformanceCounter( &counter ); return (int64)counter.QuadPart; -#elif defined __linux || defined __linux__ +#elif defined __MACH__ && defined __APPLE__ + return (int64)mach_absolute_time(); +#elif defined __unix__ struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return (int64)tp.tv_sec*1000000000 + tp.tv_nsec; -#elif defined __MACH__ && defined __APPLE__ - return (int64)mach_absolute_time(); #else struct timeval tv; gettimeofday(&tv, NULL); @@ -823,8 +827,6 @@ double getTickFrequency(void) LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); return (double)freq.QuadPart; -#elif defined __linux || defined __linux__ - return 1e9; #elif defined __MACH__ && defined __APPLE__ static double freq = 0; if( freq == 0 ) @@ -834,6 +836,8 @@ double getTickFrequency(void) freq = sTimebaseInfo.denom*1e9/sTimebaseInfo.numer; } return freq; +#elif defined __unix__ + return 1e9; #else return 1e6; #endif From 2113af9c52b4756809004dc932489e518dea4837 Mon Sep 17 00:00:00 2001 From: Roland Meertens Date: Thu, 3 Jun 2021 20:59:22 +0200 Subject: [PATCH 062/128] Updated grabcut example to show the background in a transparant way --- samples/cpp/grabcut.cpp | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/samples/cpp/grabcut.cpp b/samples/cpp/grabcut.cpp index d3e3db49f9d1..25492166a781 100644 --- a/samples/cpp/grabcut.cpp +++ b/samples/cpp/grabcut.cpp @@ -107,12 +107,14 @@ void GCApplication::showImage() const Mat res; Mat binMask; - if( !isInitialized ) - image->copyTo( res ); - else - { - getBinMask( mask, binMask ); - image->copyTo( res, binMask ); + image->copyTo( res ); + if( isInitialized ){ + getBinMask( mask, binMask); + + Mat black (binMask.rows, binMask.cols, CV_8UC3, cv::Scalar(0,0,0)); + black.setTo(Scalar::all(255), binMask); + + addWeighted(black, 0.5, res, 0.5, 0.0, res); } vector::const_iterator it; @@ -201,24 +203,39 @@ void GCApplication::mouseClick( int event, int x, int y, int flags, void* ) case EVENT_LBUTTONUP: if( rectState == IN_PROCESS ) { - rect = Rect( Point(rect.x, rect.y), Point(x,y) ); - rectState = SET; - setRectInMask(); - CV_Assert( bgdPxls.empty() && fgdPxls.empty() && prBgdPxls.empty() && prFgdPxls.empty() ); + if(rect.x == x || rect.y == y){ + rectState = NOT_SET; + } + else{ + rect = Rect( Point(rect.x, rect.y), Point(x,y) ); + rectState = SET; + setRectInMask(); + CV_Assert( bgdPxls.empty() && fgdPxls.empty() && prBgdPxls.empty() && prFgdPxls.empty() ); + } showImage(); } if( lblsState == IN_PROCESS ) { setLblsInMask(flags, Point(x,y), false); lblsState = SET; + nextIter(); showImage(); } + else{ + if(rectState == SET){ + nextIter(); + showImage(); + } + } break; case EVENT_RBUTTONUP: if( prLblsState == IN_PROCESS ) { setLblsInMask(flags, Point(x,y), true); prLblsState = SET; + } + if(rectState == SET){ + nextIter(); showImage(); } break; From bc210b292b7df85d0582211e85c8017aca165444 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Fri, 9 Jul 2021 16:22:13 +0000 Subject: [PATCH 063/128] dnn(test): backport test_ie_models.cpp from 4.5.3 --- modules/dnn/test/test_ie_models.cpp | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp index da6cbd6fbc2f..06d2e1776dd7 100644 --- a/modules/dnn/test/test_ie_models.cpp +++ b/modules/dnn/test/test_ie_models.cpp @@ -103,11 +103,15 @@ static const std::map& getOpenVINOTestMo #if INF_ENGINE_RELEASE >= 2020010000 // Downloaded using these parameters for Open Model Zoo downloader (2020.1): // ./downloader.py -o ${OPENCV_DNN_TEST_DATA_PATH}/omz_intel_models --cache_dir ${OPENCV_DNN_TEST_DATA_PATH}/.omz_cache/ \ - // --name person-detection-retail-0013 + // --name person-detection-retail-0013,age-gender-recognition-retail-0013 { "person-detection-retail-0013", { // IRv10 "intel/person-detection-retail-0013/FP32/person-detection-retail-0013", "intel/person-detection-retail-0013/FP16/person-detection-retail-0013" }}, + { "age-gender-recognition-retail-0013", { + "intel/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013", + "intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013" + }}, #endif }; @@ -123,6 +127,21 @@ static const std::vector getOpenVINOTestModelsList() return result; } +inline static std::string getOpenVINOModel(const std::string &modelName, bool isFP16) +{ + const std::map& models = getOpenVINOTestModels(); + const auto it = models.find(modelName); + if (it != models.end()) + { + OpenVINOModelTestCaseInfo modelInfo = it->second; + if (isFP16 && modelInfo.modelPathFP16) + return std::string(modelInfo.modelPathFP16); + else if (!isFP16 && modelInfo.modelPathFP32) + return std::string(modelInfo.modelPathFP32); + } + return std::string(); +} + static inline void genData(const InferenceEngine::TensorDesc& desc, Mat& m, Blob::Ptr& dataPtr) { const std::vector& dims = desc.getDims(); @@ -319,11 +338,8 @@ TEST_P(DNNTestOpenVINO, models) bool isFP16 = (targetId == DNN_TARGET_OPENCL_FP16 || targetId == DNN_TARGET_MYRIAD); - const std::map& models = getOpenVINOTestModels(); - const auto it = models.find(modelName); - ASSERT_TRUE(it != models.end()) << modelName; - OpenVINOModelTestCaseInfo modelInfo = it->second; - std::string modelPath = isFP16 ? modelInfo.modelPathFP16 : modelInfo.modelPathFP32; + const std::string modelPath = getOpenVINOModel(modelName, isFP16); + ASSERT_FALSE(modelPath.empty()) << modelName; std::string xmlPath = findDataFile(modelPath + ".xml", false); std::string binPath = findDataFile(modelPath + ".bin", false); From 9f2dcc3f13d8484d8f97df866f8bd51be4875c36 Mon Sep 17 00:00:00 2001 From: berak Date: Thu, 15 Jul 2021 17:02:23 +0200 Subject: [PATCH 064/128] python: fix trackbar warning --- modules/python/src2/cv2.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index 4bdb0fcc14bc..6c5e6463d2fb 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -1971,15 +1971,23 @@ static void OnChange(int pos, void *param) } #ifdef HAVE_OPENCV_HIGHGUI +// workaround for #20408, use nullptr, set value later +static int _createTrackbar(const String &trackbar_name, const String &window_name, int value, int count, + TrackbarCallback onChange, PyObject* py_callback_info) +{ + int n = createTrackbar(trackbar_name, window_name, NULL, count, onChange, py_callback_info); + setTrackbarPos(trackbar_name, window_name, value); + return n; +} static PyObject *pycvCreateTrackbar(PyObject*, PyObject *args) { PyObject *on_change; char* trackbar_name; char* window_name; - int *value = new int; + int value; int count; - if (!PyArg_ParseTuple(args, "ssiiO", &trackbar_name, &window_name, value, &count, &on_change)) + if (!PyArg_ParseTuple(args, "ssiiO", &trackbar_name, &window_name, &value, &count, &on_change)) return NULL; if (!PyCallable_Check(on_change)) { PyErr_SetString(PyExc_TypeError, "on_change must be callable"); @@ -1998,7 +2006,7 @@ static PyObject *pycvCreateTrackbar(PyObject*, PyObject *args) { registered_callbacks.insert(std::pair(name, py_callback_info)); } - ERRWRAP2(createTrackbar(trackbar_name, window_name, value, count, OnChange, py_callback_info)); + ERRWRAP2(_createTrackbar(trackbar_name, window_name, value, count, OnChange, py_callback_info)); Py_RETURN_NONE; } From 602e7c83e2ea3cf7d094e883c69f0911a37e6d1a Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 10 Jul 2021 13:06:33 +0000 Subject: [PATCH 065/128] dnn(test): add extra IR models, more checks in IE testing code --- modules/dnn/test/test_ie_models.cpp | 98 ++++++++++++++++++++++++++--- 1 file changed, 88 insertions(+), 10 deletions(-) diff --git a/modules/dnn/test/test_ie_models.cpp b/modules/dnn/test/test_ie_models.cpp index 06d2e1776dd7..3407e95e9bd2 100644 --- a/modules/dnn/test/test_ie_models.cpp +++ b/modules/dnn/test/test_ie_models.cpp @@ -112,6 +112,25 @@ static const std::map& getOpenVINOTestMo "intel/age-gender-recognition-retail-0013/FP16/age-gender-recognition-retail-0013", "intel/age-gender-recognition-retail-0013/FP32/age-gender-recognition-retail-0013" }}, +#endif +#if INF_ENGINE_RELEASE >= 2021020000 + // OMZ: 2020.2 + { "face-detection-0105", { + "intel/face-detection-0105/FP32/face-detection-0105", + "intel/face-detection-0105/FP16/face-detection-0105" + }}, + { "face-detection-0106", { + "intel/face-detection-0106/FP32/face-detection-0106", + "intel/face-detection-0106/FP16/face-detection-0106" + }}, +#endif +#if INF_ENGINE_RELEASE >= 2021040000 + // OMZ: 2021.4 + { "person-vehicle-bike-detection-2004", { + "intel/person-vehicle-bike-detection-2004/FP32/person-vehicle-bike-detection-2004", + "intel/person-vehicle-bike-detection-2004/FP16/person-vehicle-bike-detection-2004" + //"intel/person-vehicle-bike-detection-2004/FP16-INT8/person-vehicle-bike-detection-2004" + }}, #endif }; @@ -145,10 +164,22 @@ inline static std::string getOpenVINOModel(const std::string &modelName, bool is static inline void genData(const InferenceEngine::TensorDesc& desc, Mat& m, Blob::Ptr& dataPtr) { const std::vector& dims = desc.getDims(); - m.create(std::vector(dims.begin(), dims.end()), CV_32F); - randu(m, -1, 1); - - dataPtr = make_shared_blob(desc, (float*)m.data); + if (desc.getPrecision() == InferenceEngine::Precision::FP32) + { + m.create(std::vector(dims.begin(), dims.end()), CV_32F); + randu(m, -1, 1); + dataPtr = make_shared_blob(desc, (float*)m.data); + } + else if (desc.getPrecision() == InferenceEngine::Precision::I32) + { + m.create(std::vector(dims.begin(), dims.end()), CV_32S); + randu(m, -100, 100); + dataPtr = make_shared_blob(desc, (int*)m.data); + } + else + { + FAIL() << "Unsupported precision: " << desc.getPrecision(); + } } void runIE(Target target, const std::string& xmlPath, const std::string& binPath, @@ -254,7 +285,16 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath BlobMap inputBlobs; for (auto& it : net.getInputsInfo()) { - genData(it.second->getTensorDesc(), inputsMap[it.first], inputBlobs[it.first]); + const InferenceEngine::TensorDesc& desc = it.second->getTensorDesc(); + genData(desc, inputsMap[it.first], inputBlobs[it.first]); + if (cvtest::debugLevel > 0) + { + const std::vector& dims = desc.getDims(); + std::cout << "Input: '" << it.first << "' precison=" << desc.getPrecision() << " dims=" << dims.size() << " ["; + for (auto d : dims) + std::cout << " " << d; + std::cout << "] ocv_mat=" << inputsMap[it.first].size << " of " << typeToString(inputsMap[it.first].type()) << std::endl; + } } infRequest.SetInput(inputBlobs); @@ -263,7 +303,16 @@ void runIE(Target target, const std::string& xmlPath, const std::string& binPath BlobMap outputBlobs; for (auto& it : net.getOutputsInfo()) { - genData(it.second->getTensorDesc(), outputsMap[it.first], outputBlobs[it.first]); + const InferenceEngine::TensorDesc& desc = it.second->getTensorDesc(); + genData(desc, outputsMap[it.first], outputBlobs[it.first]); + if (cvtest::debugLevel > 0) + { + const std::vector& dims = desc.getDims(); + std::cout << "Output: '" << it.first << "' precison=" << desc.getPrecision() << " dims=" << dims.size() << " ["; + for (auto d : dims) + std::cout << " " << d; + std::cout << "] ocv_mat=" << outputsMap[it.first].size << " of " << typeToString(outputsMap[it.first].type()) << std::endl; + } } infRequest.SetOutput(outputBlobs); @@ -284,6 +333,12 @@ void runCV(Backend backendId, Target targetId, const std::string& xmlPath, const net.setPreferableTarget(targetId); std::vector outNames = net.getUnconnectedOutLayersNames(); + if (cvtest::debugLevel > 0) + { + std::cout << "OpenCV output names: " << outNames.size() << std::endl; + for (auto name : outNames) + std::cout << "- " << name << std::endl; + } std::vector outs; net.forward(outs, outNames); @@ -307,13 +362,26 @@ TEST_P(DNNTestOpenVINO, models) ASSERT_FALSE(backendId != DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && backendId != DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) << "Inference Engine backend is required"; -#if INF_ENGINE_VER_MAJOR_EQ(2021040000) - if (targetId == DNN_TARGET_MYRIAD && ( - modelName == "person-detection-retail-0013" || // ncDeviceOpen:1013 Failed to find booted device after boot - modelName == "age-gender-recognition-retail-0013" // ncDeviceOpen:1013 Failed to find booted device after boot +#if INF_ENGINE_VER_MAJOR_GE(2021030000) + if (targetId == DNN_TARGET_MYRIAD && (false + || modelName == "person-detection-retail-0013" // ncDeviceOpen:1013 Failed to find booted device after boot + || modelName == "age-gender-recognition-retail-0013" // ncDeviceOpen:1013 Failed to find booted device after boot + || modelName == "face-detection-0105" // get_element_type() must be called on a node with exactly one output + || modelName == "face-detection-0106" // get_element_type() must be called on a node with exactly one output + || modelName == "person-vehicle-bike-detection-2004" // 2021.4+: ncDeviceOpen:1013 Failed to find booted device after boot ) ) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (targetId == DNN_TARGET_OPENCL && (false + || modelName == "face-detection-0106" // Operation: 2278 of type ExperimentalDetectronPriorGridGenerator(op::v6) is not supported + ) + ) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (targetId == DNN_TARGET_OPENCL_FP16 && (false + || modelName == "face-detection-0106" // Operation: 2278 of type ExperimentalDetectronPriorGridGenerator(op::v6) is not supported + ) + ) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); #endif #if INF_ENGINE_VER_MAJOR_GE(2020020000) @@ -350,6 +418,8 @@ TEST_P(DNNTestOpenVINO, models) if (targetId == DNN_TARGET_MYRIAD) resetMyriadDevice(); EXPECT_NO_THROW(runIE(targetId, xmlPath, binPath, inputsMap, ieOutputsMap)) << "runIE"; + if (targetId == DNN_TARGET_MYRIAD) + resetMyriadDevice(); EXPECT_NO_THROW(runCV(backendId, targetId, xmlPath, binPath, inputsMap, cvOutputsMap)) << "runCV"; double eps = 0; @@ -357,6 +427,14 @@ TEST_P(DNNTestOpenVINO, models) if (targetId == DNN_TARGET_CPU && checkHardwareSupport(CV_CPU_AVX_512F)) eps = 1e-5; #endif +#if INF_ENGINE_VER_MAJOR_GE(2021030000) + if (targetId == DNN_TARGET_CPU && modelName == "face-detection-0105") + eps = 2e-4; +#endif +#if INF_ENGINE_VER_MAJOR_GE(2021040000) + if (targetId == DNN_TARGET_CPU && modelName == "person-vehicle-bike-detection-2004") + eps = 1e-6; +#endif EXPECT_EQ(ieOutputsMap.size(), cvOutputsMap.size()); for (auto& srcIt : ieOutputsMap) From fbde0c6c961e631c4c91ce4c94d6c1a891e282dd Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 14 Jul 2021 23:31:41 +0000 Subject: [PATCH 066/128] dnn(ie): fix handling of 1D and non-32F outputs of InferenceEngine --- modules/dnn/src/dnn.cpp | 29 ++++++++--- modules/dnn/src/ie_ngraph.cpp | 97 ++++++++++++++++++++++++++++++++--- modules/dnn/src/ie_ngraph.hpp | 10 +++- 3 files changed, 120 insertions(+), 16 deletions(-) diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 45be6eb97ca9..8182394387ab 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1944,7 +1944,10 @@ struct Net::Impl : public detail::NetImplBase Ptr ieNode = node.dynamicCast(); CV_Assert(!ieNode.empty()); - ieNode->net->reset(); + + CV_Assert(ieNode->net); + InfEngineNgraphNet& ienet = *ieNode->net; + ienet.reset(); for (it = layers.begin(); it != layers.end(); ++it) { @@ -1961,16 +1964,26 @@ struct Net::Impl : public detail::NetImplBase { for (int i = 0; i < ld.outputBlobsWrappers.size(); ++i) { - InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); - dataPtr->setName(ld.name); + auto it = ienet.outputsDesc.find(ld.name); + if (it != ienet.outputsDesc.end()) + { + const InferenceEngine::TensorDesc& descriptor = it->second; + InferenceEngine::DataPtr dataPtr = ngraphDataOutputNode(ld.outputBlobsWrappers[i], descriptor, ld.name); + dataPtr->setName(ld.name); + } + else + { + InferenceEngine::DataPtr dataPtr = ngraphDataNode(ld.outputBlobsWrappers[i]); + dataPtr->setName(ld.name); + } } } - ieNode->net->addBlobs(ld.inputBlobsWrappers); - ieNode->net->addBlobs(ld.outputBlobsWrappers); + ienet.addBlobs(ld.inputBlobsWrappers); + ienet.addBlobs(ld.outputBlobsWrappers); ld.skip = true; } layers[lastLayerId].skip = false; - ieNode->net->init((Target)preferableTarget); + ienet.init((Target)preferableTarget); return; } @@ -3719,8 +3732,8 @@ void Net::forward(OutputArrayOfArrays outputBlobs, matvec.push_back(impl->getBlob(pins[i])); } - std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); - outputvec = matvec; + outputBlobs.create((int)matvec.size(), 1, CV_32F/*FIXIT*/, -1); // allocate vector + outputBlobs.assign(matvec); } void Net::forward(std::vector >& outputBlobs, diff --git a/modules/dnn/src/ie_ngraph.cpp b/modules/dnn/src/ie_ngraph.cpp index e6c219f13e5a..6736590161c6 100644 --- a/modules/dnn/src/ie_ngraph.cpp +++ b/modules/dnn/src/ie_ngraph.cpp @@ -789,21 +789,32 @@ void NgraphBackendLayer::forward(InputArrayOfArrays inputs, OutputArrayOfArrays } -static InferenceEngine::Layout estimateLayout(const Mat& m) +static InferenceEngine::Layout estimateLayout(int dims) { - if (m.dims == 4) + if (dims == 4) return InferenceEngine::Layout::NCHW; - else if (m.dims == 3) + else if (dims == 3) return InferenceEngine::Layout::CHW; - else if (m.dims == 2) + else if (dims == 2) return InferenceEngine::Layout::NC; - else if (m.dims == 1) + else if (dims == 1) return InferenceEngine::Layout::C; - else if (m.dims == 5) + else if (dims == 5) return InferenceEngine::Layout::NCDHW; else return InferenceEngine::Layout::ANY; } +static inline +InferenceEngine::Layout estimateLayout(size_t dims) +{ + return estimateLayout((int)dims); +} + +static inline +InferenceEngine::Layout estimateLayout(const Mat& m) +{ + return estimateLayout(m.dims); +} static InferenceEngine::DataPtr wrapToInfEngineDataNode(const Mat& m, const std::string& name = "") { @@ -839,6 +850,7 @@ InferenceEngine::Blob::Ptr wrapToNgraphBlob(const Mat& m, InferenceEngine::Layou NgraphBackendWrapper::NgraphBackendWrapper(int targetId, const cv::Mat& m) : BackendWrapper(DNN_BACKEND_INFERENCE_ENGINE_NGRAPH, targetId) + , host((Mat*)&m) { dataPtr = wrapToInfEngineDataNode(m); blob = wrapToNgraphBlob(m, estimateLayout(m)); @@ -890,7 +902,11 @@ InferenceEngine::Blob::Ptr copyBlob(const InferenceEngine::Blob::Ptr& blob) copy = InferenceEngine::make_shared_blob(description); } else - CV_Error(Error::StsNotImplemented, "Unsupported blob precision"); + { + std::ostringstream msg; + msg << precision; + CV_Error_(Error::StsNotImplemented, ("Unsupported blob precision: %s", msg.str().c_str())); + } copy->allocate(); return copy; } @@ -903,6 +919,66 @@ InferenceEngine::DataPtr ngraphDataNode(const Ptr& ptr) return p->dataPtr; } +static +InferenceEngine::Blob::Ptr reallocateBlob(Mat &m, const InferenceEngine::TensorDesc& description) +{ + auto dims = description.getDims(); + auto layout = estimateLayout(dims.size()); + MatShape matShape(dims.begin(), dims.end()); + if (description.getPrecision() == InferenceEngine::Precision::FP32) + { + m.create(matShape, CV_32FC1); + return InferenceEngine::make_shared_blob( + {description.getPrecision(), dims, layout}, (float*)m.data); + } + else if (description.getPrecision() == InferenceEngine::Precision::I32) + { + m.create(matShape, CV_32SC1); + return InferenceEngine::make_shared_blob( + {description.getPrecision(), dims, layout}, (int*)m.data); + } + else if (description.getPrecision() == InferenceEngine::Precision::U8) + { + m.create(matShape, CV_8UC1); + return InferenceEngine::make_shared_blob( + {description.getPrecision(), dims, layout}, (uchar*)m.data); + } + std::ostringstream msg; + msg << "Unsupported IE precision: " << description.getPrecision(); + CV_Error(Error::StsNotImplemented, msg.str()); +} + +InferenceEngine::DataPtr ngraphDataOutputNode( + const Ptr& ptr, + const InferenceEngine::TensorDesc& description, + const std::string name) +{ + CV_Assert(!ptr.empty()); + Ptr p = ptr.dynamicCast(); + CV_Assert(!p.empty()); + NgraphBackendWrapper& w = *p; + const InferenceEngine::TensorDesc& blobDesc = w.blob.get()->getTensorDesc(); + auto dims = description.getDims(); + bool reallocate = false; + if (blobDesc.getPrecision() != description.getPrecision()) + { + reallocate = true; + CV_LOG_WARNING(NULL, "Reallocate output '" << name << "' blob due to wrong precision: " << blobDesc.getPrecision() << " => " << description.getPrecision() << " ndims=" << dims.size()); + } + if (dims.size() != blobDesc.getDims().size()) + { + reallocate = true; + CV_LOG_WARNING(NULL, "Reallocate output '" << name << "' blob due to wrong dims: " << blobDesc.getDims().size() << " => " << dims.size()); + } + if (reallocate) + { + auto layout = estimateLayout(dims.size()); + w.dataPtr = InferenceEngine::DataPtr(new InferenceEngine::Data(name, + {description.getPrecision(), dims, layout})); + w.blob = reallocateBlob(*w.host, description); + } + return w.dataPtr; +} void forwardNgraph(const std::vector >& outBlobsWrappers, Ptr& node, bool isAsync) @@ -918,6 +994,13 @@ void InfEngineNgraphNet::reset() allBlobs.clear(); infRequests.clear(); isInit = false; + + outputsDesc.clear(); + for (const auto& it : cnn.getOutputsInfo()) + { + const std::string& name = it.first; + outputsDesc.insert({name, it.second->getTensorDesc()}); + } } void InfEngineNgraphNet::addBlobs(const std::vector >& ptrs) diff --git a/modules/dnn/src/ie_ngraph.hpp b/modules/dnn/src/ie_ngraph.hpp index 7a8c4bef8d5c..617f1d454232 100644 --- a/modules/dnn/src/ie_ngraph.hpp +++ b/modules/dnn/src/ie_ngraph.hpp @@ -54,7 +54,8 @@ class InfEngineNgraphNet void setNodePtr(std::shared_ptr* ptr); void reset(); -private: + +//private: detail::NetImplBase& netImpl_; void release(); @@ -89,6 +90,8 @@ class InfEngineNgraphNet bool hasNetOwner; std::vector requestedOutputs; std::unordered_set> unconnectedNodes; + + std::map outputsDesc; }; class InfEngineNgraphNode : public BackendNode @@ -121,12 +124,17 @@ class NgraphBackendWrapper : public BackendWrapper virtual void copyToHost() CV_OVERRIDE; virtual void setHostDirty() CV_OVERRIDE; + Mat* host; InferenceEngine::DataPtr dataPtr; InferenceEngine::Blob::Ptr blob; AsyncArray futureMat; }; InferenceEngine::DataPtr ngraphDataNode(const Ptr& ptr); +InferenceEngine::DataPtr ngraphDataOutputNode( + const Ptr& ptr, + const InferenceEngine::TensorDesc& description, + const std::string name); // This is a fake class to run networks from Model Optimizer. Objects of that // class simulate responses of layers are imported by OpenCV and supported by From 96d35f7c54e6482e2c041c20dbc78bb3ef568a88 Mon Sep 17 00:00:00 2001 From: SamFC10 Date: Fri, 16 Jul 2021 09:39:41 +0530 Subject: [PATCH 067/128] Fix convolution asymmetric padding bug in onnx importer --- modules/dnn/src/onnx/onnx_importer.cpp | 39 +++++++++++++++++++++++++ modules/dnn/test/test_onnx_importer.cpp | 1 + 2 files changed, 40 insertions(+) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index db16cfd56d8b..ec61a9707eb9 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -1263,6 +1263,45 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) } int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0]; layerParams.set("num_output", outCn); + + // Check for asymmetric padding in Conv2D + if (layerParams.has("pad")) + { + bool asymmetricPadding = false; + DictValue pads = layerParams.get("pad"); + const int dims = pads.size() / 2; + for (int i = 0; i < dims; ++i) + { + if (pads.get(i) != pads.get(i + dims)) + { + asymmetricPadding = true; + break; + } + } + if (asymmetricPadding && pads.size() == 4) // [pad_t, pad_l, pad_b, pad_r] + { + layerParams.erase("pad"); + // No paddings required for N, C axis + std::vector paddings(4, 0); + // Add paddings for H, W axis + for (int i = 0; i < dims; ++i) + { + paddings.push_back(pads.get(i)); + paddings.push_back(pads.get(dims + i)); + } + LayerParams padLp; + padLp.name = layerParams.name + "/pad"; + padLp.type = "Padding"; + padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); + + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(padLp.name); + + addLayer(padLp, proto); + node_proto.set_input(0, padLp.name); + } + } } else if (layer_type == "ConvTranspose") { diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 600f727d7db4..3923068dbf17 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -109,6 +109,7 @@ TEST_P(Test_ONNX_layers, MaxPooling_2) TEST_P(Test_ONNX_layers, Convolution) { testONNXModels("convolution"); + testONNXModels("conv_asymmetric_pads"); } TEST_P(Test_ONNX_layers, Convolution_variable_weight) From c30078c5a3e852e6df7ec825280ecd8bc03e3107 Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Tue, 13 Jul 2021 12:20:35 +0300 Subject: [PATCH 068/128] add NotImplemented layer --- modules/dnn/src/dnn.cpp | 28 ++- modules/dnn/src/dnn_common.hpp | 9 + .../dnn/src/layers/not_implemented_layer.cpp | 194 ++++++++++++++++++ modules/dnn/src/tensorflow/tf_importer.cpp | 95 ++++++--- modules/dnn/test/test_tf_importer.cpp | 33 +++ 5 files changed, 326 insertions(+), 33 deletions(-) create mode 100644 modules/dnn/src/layers/not_implemented_layer.cpp diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 668cce8fa671..2d1a093ef47a 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -99,6 +99,15 @@ bool DNN_DIAGNOSTICS_RUN = false; void enableModelDiagnostics(bool isDiagnosticsMode) { DNN_DIAGNOSTICS_RUN = isDiagnosticsMode; + + if (DNN_DIAGNOSTICS_RUN) + { + detail::NotImplemented::Register(); + } + else + { + detail::NotImplemented::unRegister(); + } } using std::vector; @@ -4001,13 +4010,24 @@ int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) { CV_TRACE_FUNCTION(); - if (impl->getLayerId(name) >= 0) + int id = impl->getLayerId(name); + if (id >= 0) { - CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); - return -1; + if (!DNN_DIAGNOSTICS_RUN || type != "NotImplemented") + { + CV_Error(Error::StsBadArg, "Layer \"" + name + "\" already into net"); + return -1; + } + else + { + LayerData& ld = impl->layers.find(id)->second; + ld.type = type; + ld.params = params; + return -1; + } } - int id = ++impl->lastLayerId; + id = ++impl->lastLayerId; impl->layerNameToId.insert(std::make_pair(name, id)); impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params))); if (params.get("has_dynamic_shapes", false)) diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp index ff8f5e846724..46fae41cc217 100644 --- a/modules/dnn/src/dnn_common.hpp +++ b/modules/dnn/src/dnn_common.hpp @@ -15,6 +15,15 @@ void initializeLayerFactory(); namespace detail { +class NotImplemented : public Layer +{ +public: + static Ptr create(const LayerParams ¶ms); + + static void Register(); + static void unRegister(); +}; + struct NetImplBase { const int networkId; // network global identifier diff --git a/modules/dnn/src/layers/not_implemented_layer.cpp b/modules/dnn/src/layers/not_implemented_layer.cpp new file mode 100644 index 000000000000..c4b134390222 --- /dev/null +++ b/modules/dnn/src/layers/not_implemented_layer.cpp @@ -0,0 +1,194 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "../dnn_common.hpp" + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +namespace detail { + +class NotImplementedImpl CV_FINAL : public NotImplemented +{ +public: + NotImplementedImpl(const LayerParams& params) + { + setParamsFrom(params); + CV_Assert(params.has("type")); + std::stringstream ss; + ss << "Node for layer '" << params.name << "' of type '" << params.get("type") << "' wasn't initialized."; + msg = ss.str(); + } + + CV_DEPRECATED_EXTERNAL + virtual void finalize(const std::vector &input, std::vector &output) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual void finalize(InputArrayOfArrays inputs, OutputArrayOfArrays outputs) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + CV_DEPRECATED_EXTERNAL + virtual void forward(std::vector &input, std::vector &output, std::vector &internals) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + void forward_fallback(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals) + { + CV_Error(Error::StsNotImplemented, msg); + } + + CV_DEPRECATED_EXTERNAL + void finalize(const std::vector &inputs, CV_OUT std::vector &outputs) + { + CV_Error(Error::StsNotImplemented, msg); + } + + CV_DEPRECATED std::vector finalize(const std::vector &inputs) + { + CV_Error(Error::StsNotImplemented, msg); + } + + CV_DEPRECATED void run(const std::vector &inputs, + CV_OUT std::vector &outputs, + CV_IN_OUT std::vector &internals) + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual int inputNameToIndex(String inputName) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual int outputNameToIndex(const String& outputName) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual Ptr initHalide(const std::vector > &inputs) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual Ptr initInfEngine(const std::vector > &inputs) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual Ptr initNgraph(const std::vector > &inputs, + const std::vector >& nodes) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual Ptr initVkCom(const std::vector > &inputs) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual Ptr initCUDA( + void *context, + const std::vector>& inputs, + const std::vector>& outputs + ) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual void applyHalideScheduler(Ptr& node, + const std::vector &inputs, + const std::vector &outputs, + int targetId) const CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual Ptr tryAttach(const Ptr& node) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual bool setActivation(const Ptr& layer) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual bool tryFuse(Ptr& top) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual void unsetAttached() CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + + virtual bool updateMemoryShapes(const std::vector &inputs) CV_OVERRIDE + { + CV_Error(Error::StsNotImplemented, msg); + } + +private: + std::string msg; +}; + +Ptr NotImplemented::create(const LayerParams& params) +{ + return makePtr(params); +} + +Ptr notImplementedRegisterer(LayerParams ¶ms) +{ + return detail::NotImplemented::create(params); +} + +void NotImplemented::Register() +{ + LayerFactory::registerLayer("NotImplemented", detail::notImplementedRegisterer); +} + +void NotImplemented::unRegister() +{ + LayerFactory::unregisterLayer("NotImplemented"); +} + +} // namespace detail + +CV__DNN_INLINE_NS_END +}} // namespace cv::dnn diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 15f88007b4d1..10670bfef9df 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -466,6 +466,8 @@ void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int in net.mutable_node()->DeleteSubrange(layer_index, 1); } +class LayerHandler; + class TFImporter { public: @@ -473,6 +475,7 @@ class TFImporter TFImporter(Net& net, const char *dataModel, size_t lenModel, const char *dataConfig = NULL, size_t lenConfig = 0); protected: + std::unique_ptr layerHandler; std::unique_ptr utilNet; Net& dstNet; void populateNet(); @@ -514,6 +517,7 @@ class TFImporter private: void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId); + friend class LayerHandler; typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&); typedef std::map DispatchMap; @@ -554,6 +558,20 @@ class TFImporter void parseCustomLayer (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); }; +class LayerHandler +{ +public: + LayerHandler(TFImporter* importer_); + ~LayerHandler() = default; + + bool handleMissing(const opencv_tensorflow::NodeDef& layer); + void handleFailed(const opencv_tensorflow::NodeDef& layer); + +private: + TFImporter* importer; + std::set layers; +}; + const TFImporter::DispatchMap TFImporter::buildDispatchMap() { static DispatchMap dispatch; @@ -2340,7 +2358,8 @@ void TFImporter::parseCustomLayer(tensorflow::GraphDef& net, const tensorflow::N } TFImporter::TFImporter(Net& net, const char *model, const char *config) - : utilNet(DNN_DIAGNOSTICS_RUN ? new Net : nullptr), + : layerHandler(DNN_DIAGNOSTICS_RUN ? new LayerHandler(this) : nullptr), + utilNet(DNN_DIAGNOSTICS_RUN ? new Net : nullptr), dstNet(DNN_DIAGNOSTICS_RUN ? *utilNet : net), dispatch(buildDispatchMap()) { if (model && model[0]) @@ -2362,7 +2381,8 @@ TFImporter::TFImporter( const char *dataModel, size_t lenModel, const char *dataConfig, size_t lenConfig ) - : utilNet(DNN_DIAGNOSTICS_RUN ? new Net : nullptr), + : layerHandler(DNN_DIAGNOSTICS_RUN ? new LayerHandler(this) : nullptr), + utilNet(DNN_DIAGNOSTICS_RUN ? new Net : nullptr), dstNet(DNN_DIAGNOSTICS_RUN ? *utilNet : net), dispatch(buildDispatchMap()) { if (dataModel != NULL && lenModel > 0) @@ -2620,11 +2640,6 @@ DataLayout TFImporter::predictOutputDataLayout(const tensorflow::NodeDef& layer) return it->second; } -Ptr dummy_constructor(LayerParams & params) -{ - return new Layer(params); -} - void TFImporter::populateNet() { CV_Assert(netBin.ByteSize() || netTxt.ByteSize()); @@ -2727,7 +2742,6 @@ void TFImporter::populateNet() addConstNodes(netBin, value_id, layers_to_ignore); addConstNodes(netTxt, value_id, layers_to_ignore); - for (int li = 0; li < layersSize; li++) { const tensorflow::NodeDef& layer = net.node(li); @@ -2785,41 +2799,64 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer) { ((*this).*(iter->second))(net, layer, layerParams); } - else + else if (!DNN_DIAGNOSTICS_RUN || !layerHandler->handleMissing(layer)) { - if (DNN_DIAGNOSTICS_RUN && !LayerFactory::createLayerInstance(type, layerParams)) - { - CV_LOG_ERROR(NULL, "DNN/TF: Node='" << name << "' of type='"<< type - << "' is not supported. This error won't be displayed again."); - LayerFactory::registerLayer(type, dummy_constructor); - } - parseCustomLayer(net, layer, layerParams); } } catch (const std::exception& e) { - if (!DNN_DIAGNOSTICS_RUN) + CV_LOG_ERROR(NULL, "DNN/TF: Can't parse layer for node='" << name << "' of type='" << type + << "'. Exception: " << e.what()); + + if (DNN_DIAGNOSTICS_RUN) { - CV_LOG_ERROR(NULL, "DNN/TF: Can't parse layer for node='" << name << "' of type='" << type - << "'. Exception: " << e.what()); - throw; + layerHandler->handleFailed(layer); } else { - CV_LOG_ERROR(NULL, "DNN/TF: Can't parse layer for node='" << name << "' of type='" << type - << "'. Exception: " << e.what()); - - // internal layer failure (didnt call addLayer) - if (dstNet.getLayerId(name) == -1) - { - int id = dstNet.addLayer(name, type, layerParams); - layer_id[name] = id; - } + throw; } } } +LayerHandler::LayerHandler(TFImporter* importer_) : importer(importer_) {} + +void LayerHandler::handleFailed(const opencv_tensorflow::NodeDef& layer) +{ + LayerParams lp; + lp.name = layer.name(); + lp.type = "NotImplemented"; + lp.set("type", layer.op()); + + // the layer will be created or its params and type will be replaced + int id = importer->dstNet.addLayer(lp.name, "NotImplemented", lp); + if (id != -1) // internal layer failure before the call to addLayer() + { + importer->layer_id[lp.name] = id; + } +} + +bool LayerHandler::handleMissing(const opencv_tensorflow::NodeDef& layer) +{ + LayerParams lp; + // If we didn't add it, but can create it, it's custom and not missing. + if (layers.find(layer.op()) == layers.end() && LayerFactory::createLayerInstance(layer.op(), lp)) + { + return false; + } + + if (layers.insert(layer.op()).second) + { + CV_LOG_ERROR(NULL, "DNN/TF: Node='" << layer.name() << "' of type='"<< layer.op() + << "' is not supported. This error won't be displayed again."); + } + + handleFailed(layer); + + return true; +} + } // namespace #endif //HAVE_PROTOBUF diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 2c3613472451..35751b482467 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -568,6 +568,39 @@ TEST_P(Test_TensorFlow_layers, l2_normalize_3d) runTensorFlowNet("l2_normalize_3d"); } +class Test_TensorFlow_diagnostics : public DNNTestLayer { +public: + Test_TensorFlow_diagnostics() + { + enableModelDiagnostics(true); + } + + ~Test_TensorFlow_diagnostics() + { + enableModelDiagnostics(false); + } + + void runFailingTensorFlowNet(const std::string& prefix, bool hasText = false) + { + std::string netPath = path(prefix + "_net.pb"); + std::string netConfig = (hasText ? path(prefix + "_net.pbtxt") : ""); + + Net net = readNetFromTensorflow(netPath, netConfig); + } +}; + +TEST_P(Test_TensorFlow_diagnostics, not_implemented_layer) +{ + runFailingTensorFlowNet("not_implemented_layer"); +} + +TEST_P(Test_TensorFlow_diagnostics, broken_parameters) +{ + runFailingTensorFlowNet("broken_layer"); +} + +INSTANTIATE_TEST_CASE_P(/**/, Test_TensorFlow_diagnostics, dnnBackendsAndTargets()); + class Test_TensorFlow_nets : public DNNTestLayer {}; TEST_P(Test_TensorFlow_nets, MobileNet_SSD) From 2062a7ca8fd313af7ebcd510d8dca10b77858ec3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C5=A1a=20Bajtl?= Date: Sun, 18 Jul 2021 10:12:39 +0200 Subject: [PATCH 069/128] Bugfix on import script with web worker. --- modules/js/src/make_umd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/js/src/make_umd.py b/modules/js/src/make_umd.py index bed6ee9bcc0f..1096a8eb31b0 100644 --- a/modules/js/src/make_umd.py +++ b/modules/js/src/make_umd.py @@ -95,7 +95,7 @@ def make_umd(opencvjs, cvjs): root.cv = factory(); } else if (typeof importScripts === 'function') { // Web worker - root.cv = factory; + root.cv = factory(); } else { // Other shells, e.g. d8 root.cv = factory(); From 863ab0e72ee9aa530cbf936489be639837739b86 Mon Sep 17 00:00:00 2001 From: Lukas-Alexander Weber <32765578+lukasalexanderweber@users.noreply.github.com> Date: Tue, 20 Jul 2021 10:59:15 +0200 Subject: [PATCH 070/128] fix TypeError when specifying compose_megapix without rounding the composed image sizes (variable "sz") they will be odly fractions of a pixel (e.g. (5300.965, 3772.897)) and therefore cause a "TypeError: integer argument expected, got float" in line 456 roi = warper.warpRoi(sz, K, cameras[i].R) --- samples/python/stitching_detailed.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/python/stitching_detailed.py b/samples/python/stitching_detailed.py index a7e316105edd..4ee29048d118 100644 --- a/samples/python/stitching_detailed.py +++ b/samples/python/stitching_detailed.py @@ -450,7 +450,8 @@ def main(): cameras[i].focal *= compose_work_aspect cameras[i].ppx *= compose_work_aspect cameras[i].ppy *= compose_work_aspect - sz = (full_img_sizes[i][0] * compose_scale, full_img_sizes[i][1] * compose_scale) + sz = (int(round(full_img_sizes[i][0] * compose_scale)), + int(round(full_img_sizes[i][1] * compose_scale))) K = cameras[i].K().astype(np.float32) roi = warper.warpRoi(sz, K, cameras[i].R) corners.append(roi[0:2]) From 3817f3a89bec25347568dab775fdfa1eea4704cd Mon Sep 17 00:00:00 2001 From: Xiaoxiao Tian Date: Wed, 21 Jul 2021 13:32:50 +0800 Subject: [PATCH 071/128] fix: ocv_target_link_libraries could not handle the keyword rightly #20430 --- cmake/OpenCVUtils.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index d03dc9c55191..1e0ea947ef77 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -1488,8 +1488,8 @@ function(ocv_target_link_libraries target) if(NOT LINK_PENDING STREQUAL "") __ocv_push_target_link_libraries(${LINK_MODE} ${LINK_PENDING}) set(LINK_PENDING "") - set(LINK_MODE "${dep}") endif() + set(LINK_MODE "${dep}") else() if(BUILD_opencv_world) if(OPENCV_MODULE_${dep}_IS_PART_OF_WORLD) From d29c7e787159fb01ea31b4673a07a52d752aa66d Mon Sep 17 00:00:00 2001 From: Francesco Petrogalli <25690309+fpetrogalli@users.noreply.github.com> Date: Wed, 21 Jul 2021 16:46:05 +0100 Subject: [PATCH 072/128] Merge pull request #20392 from fpetrogalli:aarch64-semihosting AArch64 semihosting * [ts] Disable filesystem support in the TS module. Because of this change, all the tests loading data will file, but tat least the core module can be tested with the following line: opencv_test_core --gtest_filter=-"*Core_InputOutput*:*Core_globbing.accuracy*" * [aarch64] Build OpenCV for AArch64 semihosting. This patch provide a toolchain file that allows to build the library for semihosting applications [1]. Minimal changes have been applied to the code to be able to compile with a baremetal toolchain. [1] https://developer.arm.com/documentation/100863/latest The option `CV_SEMIHOSTING` is used to guard the bits in the code that are specific to the target. To build the code: cmake ../opencv/ \ -DCMAKE_TOOLCHAIN_FILE=../opencv/platforms/semihosting/aarch64-semihosting.toolchain.cmake \ -DSEMIHOSTING_TOOLCHAIN_PATH=/path/to/baremetal-toolchain/bin/ \ -DBUILD_EXAMPLES=ON -GNinja A barematel toolchain for targeting aarch64 semihosting can be found at [2], under `aarch64-none-elf`. [2] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads The folder `samples/semihosting` provides two example semihosting applications. The two binaries can be executed on the host platform with: qemu-aarch64 ./bin/example_semihosting_histogram qemu-aarch64 ./bin/example_semihosting_norm Similarly, the test and perf executables of the modules can be run with: qemu-aarch64 ./bin/opecv_[test|perf]_ Notice that filesystem support is disabled by the toolchain file, hence some of the test that depend on filesystem support will fail. * [semihosting] Remove blank like at the end of file. [NFC] The spurious blankline was reported by https://pullrequest.opencv.org/buildbot/builders/precommit_docs/builds/31158. * [semihosting] Make the raw pixel file generation OS independent. Use the facilities provided by Cmake to generate the header file instead of a shell script, so that the build doesn't fail on systems that do not have a unix shell. * [semihosting] Rename variable for semihosting compilation. * [semihosting] Move the cmake configuration to a variable file. * [semihosting] Make the guard macro private for the core module. * [semihosting] Remove space. [NFC] * [semihosting] Improve comment with information about semihosting. [NFC] * [semihosting] Update license statement on top of sourvce file. [NFC] * [semihosting] Replace BM_SUFFIX with SEMIHOSTING_SUFFIX. [NFC] * [semihosting] Remove double space. [NFC] * [semihosting] Add some text output to the sample applications. * [semihosting] Remove duplicate entry in cmake configuration. [NFCI] * [semihosting] Replace `long` with `int` in sample apps. [NFCI] * [semihosting] Use `configure_file` to create the random pixels. [NFCI] * [semihosting][bugfix] Fix name of cmakedefine variable. * [semihosting][samples] Use CV_8UC1 for grayscale images. [NFCI] * [semihosting] Add readme file. * [semihosting] Remove blank like at the end of README. [NFC] This fixes the failure at https://pullrequest.opencv.org/buildbot/builders/precommit_docs/builds/31272. --- CMakeLists.txt | 1 + cmake/vars/EnableModeVars.cmake | 3 ++ cmake/vars/OPENCV_SEMIHOSTING.cmake | 10 +++++ modules/calib3d/src/ap3p.cpp | 18 ++++---- modules/core/CMakeLists.txt | 4 ++ modules/core/src/parallel.cpp | 4 ++ modules/ts/src/ts.cpp | 2 + modules/ts/src/ts_gtest.cpp | 10 +++++ .../aarch64-semihosting.toolchain.cmake | 40 +++++++++++++++++ .../include/aarch64_semihosting_port.hpp | 42 ++++++++++++++++++ samples/CMakeLists.txt | 7 ++- samples/semihosting/CMakeLists.txt | 10 +++++ samples/semihosting/README.md | 27 ++++++++++++ samples/semihosting/histogram/CMakeLists.txt | 26 +++++++++++ samples/semihosting/histogram/histogram.cpp | 43 +++++++++++++++++++ samples/semihosting/include/CMakeLists.txt | 16 +++++++ samples/semihosting/include/raw_pixels.hpp.in | 11 +++++ samples/semihosting/norm/CMakeLists.txt | 25 +++++++++++ samples/semihosting/norm/norm.cpp | 33 ++++++++++++++ 19 files changed, 321 insertions(+), 11 deletions(-) create mode 100644 cmake/vars/OPENCV_SEMIHOSTING.cmake create mode 100644 platforms/semihosting/aarch64-semihosting.toolchain.cmake create mode 100644 platforms/semihosting/include/aarch64_semihosting_port.hpp create mode 100644 samples/semihosting/CMakeLists.txt create mode 100644 samples/semihosting/README.md create mode 100644 samples/semihosting/histogram/CMakeLists.txt create mode 100644 samples/semihosting/histogram/histogram.cpp create mode 100644 samples/semihosting/include/CMakeLists.txt create mode 100644 samples/semihosting/include/raw_pixels.hpp.in create mode 100644 samples/semihosting/norm/CMakeLists.txt create mode 100644 samples/semihosting/norm/norm.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f4fe0385d12e..b7e5b58837bd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -513,6 +513,7 @@ OCV_OPTION(ENABLE_CONFIG_VERIFICATION "Fail build if actual configuration doesn' OCV_OPTION(OPENCV_ENABLE_MEMALIGN "Enable posix_memalign or memalign usage" ON) OCV_OPTION(OPENCV_DISABLE_FILESYSTEM_SUPPORT "Disable filesystem support" OFF) OCV_OPTION(OPENCV_DISABLE_THREAD_SUPPORT "Build the library without multi-threaded code." OFF) +OCV_OPTION(OPENCV_SEMIHOSTING "Build the library for semihosting target (Arm). See https://developer.arm.com/documentation/100863/latest." OFF) OCV_OPTION(ENABLE_PYLINT "Add target with Pylint checks" (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) ) OCV_OPTION(ENABLE_FLAKE8 "Add target with Python flake8 checker" (BUILD_DOCS OR BUILD_EXAMPLES) IF (NOT CMAKE_CROSSCOMPILING AND NOT APPLE_FRAMEWORK) ) diff --git a/cmake/vars/EnableModeVars.cmake b/cmake/vars/EnableModeVars.cmake index b3c4e79c46d1..3f017af496f2 100644 --- a/cmake/vars/EnableModeVars.cmake +++ b/cmake/vars/EnableModeVars.cmake @@ -16,3 +16,6 @@ endmacro() variable_watch(OPENCV_DISABLE_THREAD_SUPPORT ocv_change_mode_var) set(OPENCV_DISABLE_THREAD_SUPPORT "${OPENCV_DISABLE_THREAD_SUPPORT}") + +variable_watch(OPENCV_SEMIHOSTING ocv_change_mode_var) +set(OPENCV_SEMIHOSTING "${OPENCV_SEMIHOSTING}") diff --git a/cmake/vars/OPENCV_SEMIHOSTING.cmake b/cmake/vars/OPENCV_SEMIHOSTING.cmake new file mode 100644 index 000000000000..66f21c7ebddc --- /dev/null +++ b/cmake/vars/OPENCV_SEMIHOSTING.cmake @@ -0,0 +1,10 @@ +set(CV_TRACE OFF) + +# These third parties libraries are incompatible with the semihosting +# toolchain. +set(WITH_JPEG OFF) +set(WITH_OPENEXR OFF) +set(WITH_TIFF OFF) + +# Turn off `libpng` for some linking issues. +set(WITH_PNG OFF) diff --git a/modules/calib3d/src/ap3p.cpp b/modules/calib3d/src/ap3p.cpp index 386a4499efbe..582b201b36a1 100644 --- a/modules/calib3d/src/ap3p.cpp +++ b/modules/calib3d/src/ap3p.cpp @@ -7,8 +7,6 @@ static inline double cbrt(double x) { return (double)cv::cubeRoot((float)x); }; #endif -using namespace std; - namespace { void solveQuartic(const double *factors, double *realRoots) { const double &a4 = factors[0]; @@ -30,29 +28,29 @@ void solveQuartic(const double *factors, double *realRoots) { double q3 = (72 * r4 * p4 - 2 * p4 * p4 * p4 - 27 * q4 * q4) / 432; // /=2 double t; // *=2 - complex w; + std::complex w; if (q3 >= 0) - w = -sqrt(static_cast >(q3 * q3 - p3 * p3 * p3)) - q3; + w = -std::sqrt(static_cast >(q3 * q3 - p3 * p3 * p3)) - q3; else - w = sqrt(static_cast >(q3 * q3 - p3 * p3 * p3)) - q3; + w = std::sqrt(static_cast >(q3 * q3 - p3 * p3 * p3)) - q3; if (w.imag() == 0.0) { - w.real(cbrt(w.real())); + w.real(std::cbrt(w.real())); t = 2.0 * (w.real() + p3 / w.real()); } else { w = pow(w, 1.0 / 3); t = 4.0 * w.real(); } - complex sqrt_2m = sqrt(static_cast >(-2 * p4 / 3 + t)); + std::complex sqrt_2m = sqrt(static_cast >(-2 * p4 / 3 + t)); double B_4A = -a3 / (4 * a4); double complex1 = 4 * p4 / 3 + t; #if defined(__clang__) && defined(__arm__) && (__clang_major__ == 3 || __clang_major__ == 4) && !defined(__ANDROID__) // details: https://github.com/opencv/opencv/issues/11135 // details: https://github.com/opencv/opencv/issues/11056 - complex complex2 = 2 * q4; - complex2 = complex(complex2.real() / sqrt_2m.real(), 0); + std::complex complex2 = 2 * q4; + complex2 = std::complex(complex2.real() / sqrt_2m.real(), 0); #else - complex complex2 = 2 * q4 / sqrt_2m; + std::complex complex2 = 2 * q4 / sqrt_2m; #endif double sqrt_2m_rh = sqrt_2m.real() / 2; double sqrt1 = sqrt(-(complex1 + complex2)).real() / 2; diff --git a/modules/core/CMakeLists.txt b/modules/core/CMakeLists.txt index 6a969e5fc358..13d0af4db82f 100644 --- a/modules/core/CMakeLists.txt +++ b/modules/core/CMakeLists.txt @@ -157,6 +157,10 @@ if(OPENCV_DISABLE_THREAD_SUPPORT) ocv_target_compile_definitions(${the_module} PUBLIC "OPENCV_DISABLE_THREAD_SUPPORT=1") endif() +if(OPENCV_SEMIHOSTING) + ocv_target_compile_definitions(${the_module} PRIVATE "-DOPENCV_SEMIHOSTING") +endif(OPENCV_SEMIHOSTING) + if(HAVE_HPX) ocv_target_link_libraries(${the_module} LINK_PRIVATE "${HPX_LIBRARIES}") endif() diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index 1d4179b7b417..8fccd19798ae 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -888,6 +888,7 @@ T minNonZero(const T& val_1, const T& val_2) static int getNumberOfCPUs_() { +#ifndef OPENCV_SEMIHOSTING /* * Logic here is to try different methods of getting CPU counts and return * the minimum most value as it has high probablity of being right and safe. @@ -979,6 +980,9 @@ int getNumberOfCPUs_() #endif return ncpus != 0 ? ncpus : 1; +#else // OPENCV_SEMIHOSTING + return 1; +#endif //OPENCV_SEMIHOSTING } int getNumberOfCPUs() diff --git a/modules/ts/src/ts.cpp b/modules/ts/src/ts.cpp index 3aa403ad87e8..3af3a7b8d5af 100644 --- a/modules/ts/src/ts.cpp +++ b/modules/ts/src/ts.cpp @@ -72,7 +72,9 @@ #if defined _WIN32 || defined WINCE # include #else +#if OPENCV_HAVE_FILESYSTEM_SUPPORT # include +#endif # include #endif diff --git a/modules/ts/src/ts_gtest.cpp b/modules/ts/src/ts_gtest.cpp index a65ef721a2c6..b3debd54d2ed 100644 --- a/modules/ts/src/ts_gtest.cpp +++ b/modules/ts/src/ts_gtest.cpp @@ -1067,6 +1067,7 @@ class GTEST_API_ UnitTestImpl { void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc, Test::TearDownTestCaseFunc tear_down_tc, TestInfo* test_info) { +#if OPENCV_HAVE_FILESYSTEM_SUPPORT // In order to support thread-safe death tests, we need to // remember the original working directory when the test program // was first invoked. We cannot do this in RUN_ALL_TESTS(), as @@ -1079,6 +1080,7 @@ class GTEST_API_ UnitTestImpl { GTEST_CHECK_(!original_working_dir_.IsEmpty()) << "Failed to get the current working directory."; } +#endif GetTestCase(test_info->test_case_name(), test_info->type_param(), @@ -9165,6 +9167,7 @@ static bool IsPathSeparator(char c) { // Returns the current working directory, or "" if unsuccessful. FilePath FilePath::GetCurrentDir() { +#if OPENCV_HAVE_FILESYSTEM_SUPPORT #if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT // Windows CE doesn't have a current directory, so we just return // something reasonable. @@ -9183,6 +9186,9 @@ FilePath FilePath::GetCurrentDir() { # endif // GTEST_OS_NACL return FilePath(result == NULL ? "" : cwd); #endif // GTEST_OS_WINDOWS_MOBILE +#else // OPENCV_HAVE_FILESYSTEM_SUPPORT + return FilePath(""); +#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT } // Returns a copy of the FilePath with the case-insensitive extension removed. @@ -9391,6 +9397,7 @@ bool FilePath::CreateDirectoriesRecursively() const { // directory for any reason, including if the parent directory does not // exist. Not named "CreateDirectory" because that's a macro on Windows. bool FilePath::CreateFolder() const { +#if OPENCV_HAVE_FILESYSTEM_SUPPORT #if GTEST_OS_WINDOWS_MOBILE FilePath removed_sep(this->RemoveTrailingPathSeparator()); LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str()); @@ -9406,6 +9413,9 @@ bool FilePath::CreateFolder() const { return this->DirectoryExists(); // An error is OK if the directory exists. } return true; // No error. +#else // OPENCV_HAVE_FILESYSTEM_SUPPORT + return false; +#endif // OPENCV_HAVE_FILESYSTEM_SUPPORT } // If input name has a trailing separator character, remove it and return the diff --git a/platforms/semihosting/aarch64-semihosting.toolchain.cmake b/platforms/semihosting/aarch64-semihosting.toolchain.cmake new file mode 100644 index 000000000000..95bbda3bedba --- /dev/null +++ b/platforms/semihosting/aarch64-semihosting.toolchain.cmake @@ -0,0 +1,40 @@ +# This file is part of OpenCV project. +# It is subject to the license terms in the LICENSE file found in the top-level directory +# of this distribution and at http://opencv.org/license.html + +set(CMAKE_SYSTEM_NAME Generic) +set(CMAKE_SYSTEM_PROCESSOR AArch64) + +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +set(PORT_FILE ${CMAKE_SOURCE_DIR}/platforms/semihosting/include/aarch64_semihosting_port.hpp) + +set(COMMON_FLAGS "--specs=rdimon.specs -DOPENCV_INCLUDE_PORT_FILE=\\\"${PORT_FILE}\\\"") + +set(CMAKE_AR ${SEMIHOSTING_TOOLCHAIN_PATH}aarch64-none-elf-ar${CMAKE_EXECUTABLE_SUFFIX}) +set(CMAKE_ASM_COMPILER ${SEMIHOSTING_TOOLCHAIN_PATH}aarch64-none-elf-gcc${CMAKE_EXECUTABLE_SUFFIX}) +set(CMAKE_C_COMPILER ${SEMIHOSTING_TOOLCHAIN_PATH}aarch64-none-elf-gcc${CMAKE_EXECUTABLE_SUFFIX}) +set(CMAKE_CXX_COMPILER ${SEMIHOSTING_TOOLCHAIN_PATH}aarch64-none-elf-g++${CMAKE_EXECUTABLE_SUFFIX}) +set(CMAKE_LINKER ${SEMIHOSTING_TOOLCHAIN_PATH}aarch64-none-elf-ld${CMAKE_EXECUTABLE_SUFFIX}) +set(CMAKE_OBJCOPY ${SEMIHOSTING_TOOLCHAIN_PATH}aarch64-none-elf-objcopy${CMAKE_EXECUTABLE_SUFFIX} CACHE INTERNAL "") +set(CMAKE_RANLIB ${SEMIHOSTING_TOOLCHAIN_PATH}aarch64-none-elf-ranlib${CMAKE_EXECUTABLE_SUFFIX} CACHE INTERNAL "") +set(CMAKE_SIZE ${SEMIHOSTING_TOOLCHAIN_PATH}aarch64-none-elf-size${CMAKE_EXECUTABLE_SUFFIX} CACHE INTERNAL "") +set(CMAKE_STRIP ${SEMIHOSTING_TOOLCHAIN_PATH}aarch64-none-elf-strip${CMAKE_EXECUTABLE_SUFFIX} CACHE INTERNAL "") +set(CMAKE_C_FLAGS ${COMMON_FLAGS} CACHE INTERNAL "") +set(CMAKE_CXX_FLAGS ${COMMON_FLAGS} CACHE INTERNAL "") + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +set(OPENCV_SEMIHOSTING ON) +set(OPENCV_DISABLE_THREAD_SUPPORT ON) +set(OPENCV_DISABLE_FILESYSTEM_SUPPORT ON) +set(BUILD_SHARED_LIBS OFF) +set(OPENCV_FORCE_3RDPARTY_BUILD OFF) + + +# Enable newlib. +add_definitions(-D_GNU_SOURCE) + +add_definitions(-D_POSIX_PATH_MAX=0) diff --git a/platforms/semihosting/include/aarch64_semihosting_port.hpp b/platforms/semihosting/include/aarch64_semihosting_port.hpp new file mode 100644 index 000000000000..d3151c240a30 --- /dev/null +++ b/platforms/semihosting/include/aarch64_semihosting_port.hpp @@ -0,0 +1,42 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef AARCH64_BAREMETAL_PORT_HPP +#define AARCH64_BAREMETAL_PORT_HPP + +#include // Needed for `memalign`. +#include // Needed for `ENOMEM`. + +// -std=c++11 is missing the following definitions when targeting +// semihosting on aarch64. +#if __cplusplus == 201103L +#include +#define M_PI 3.14159265358979323846 +#define M_SQRT2 1.41421356237309504880 + +namespace std { +inline double cbrt(double x) { + return ::cbrt(x); +} +inline double copysign(double mag, double sgn) { + return ::copysign(mag, sgn); +} +} //namespace std +#endif // __cplusplus == 201103L + +extern "C" { +// Redirect the implementation of `posix_memalign` to `memalign` +// as the former is +// missing at link time. https://pubs.opengroup.org/onlinepubs/9699919799/functions/posix_memalign.html +__attribute__((weak)) int posix_memalign(void **memptr, size_t alignment, size_t size) { + void * ptr = memalign(alignment, size); + if (ptr != NULL) { + *memptr = ptr; + return 0; + } + return ENOMEM; +} +} // extern "C" + +#endif diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 0c70698ccbf6..9bfc2bf8ada4 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -45,7 +45,12 @@ endif() if(INSTALL_PYTHON_EXAMPLES) add_subdirectory(python) endif() - +# The examples in this folder will work with a semihosting version of +# OpenCV. For more information about semihosting, see +# https://developer.arm.com/documentation/100863/latest +if(OPENCV_SEMIHOSTING) + add_subdirectory(semihosting) +endif() ocv_install_example_src("." CMakeLists.txt samples_utils.cmake) if(INSTALL_C_EXAMPLES) install(DIRECTORY data DESTINATION "${OPENCV_SAMPLES_SRC_INSTALL_PATH}" COMPONENT samples_data) diff --git a/samples/semihosting/CMakeLists.txt b/samples/semihosting/CMakeLists.txt new file mode 100644 index 000000000000..9fddb0587b43 --- /dev/null +++ b/samples/semihosting/CMakeLists.txt @@ -0,0 +1,10 @@ +# This file is part of OpenCV project. +# It is subject to the license terms in the LICENSE file found in the top-level directory +# of this distribution and at http://opencv.org/license.html + +set(SEMIHOSTING_SUFFIX semihosting) + +add_subdirectory(include) +set(RAW_PIXEL_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) +add_subdirectory(histogram) +add_subdirectory(norm) diff --git a/samples/semihosting/README.md b/samples/semihosting/README.md new file mode 100644 index 000000000000..881b09b735b8 --- /dev/null +++ b/samples/semihosting/README.md @@ -0,0 +1,27 @@ +# Arm semihosting + +This folder contain a toolchain file and a couple of examples for +building OpenCV based applications that can run in an [Arm +semihosting](https://developer.arm.com/documentation/100863/latest) +setup. + +OpenCV can be compiled to target a semihosting platform as follows: + +``` +cmake ../opencv/ \ + -DCMAKE_TOOLCHAIN_FILE=../opencv/platforms/semihosting/aarch64-semihosting.toolchain.cmake \ + -DSEMIHOSTING_TOOLCHAIN_PATH=/path/to/baremetal-toolchain/bin/ \ + -DBUILD_EXAMPLES=ON -GNinja +``` + +A barematel toolchain for targeting aarch64 semihosting can be found +[here](https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-a/downloads), +under `aarch64-none-elf`. + +The code of the examples in the `norm` and `histogram` folders can be +executed with qemu in Linux userspace: + +``` + qemu-aarch64 ./bin/example_semihosting_histogram + qemu-aarch64 ./bin/example_semihosting_norm +``` diff --git a/samples/semihosting/histogram/CMakeLists.txt b/samples/semihosting/histogram/CMakeLists.txt new file mode 100644 index 000000000000..d2f065d1b9c8 --- /dev/null +++ b/samples/semihosting/histogram/CMakeLists.txt @@ -0,0 +1,26 @@ +# This file is part of OpenCV project. +# It is subject to the license terms in the LICENSE file found in the top-level directory +# of this distribution and at http://opencv.org/license.html + +set(PROJECT_NAME histogram) +project(${PROJECT_NAME}) + +ocv_install_example_src(histogram *.cpp *.hpp CMakeLists.txt) + +set(LOCAL_DEPS + opencv_core + opencv_imgproc + ${OPENCV_MODULES_PUBLIC} + ${OpenCV_LIB_COMPONENTS}) +ocv_check_dependencies(${LOCAL_DEPS}) + +if(NOT OCV_DEPENDENCIES_FOUND) + return() +endif() + +ocv_define_sample(histogram histogram.cpp ${SEMIHOSTING_SUFFIX}) +ocv_include_modules_recurse(${LOCAL_DEPS}) +target_include_directories(${histogram} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(${histogram} PRIVATE ${RAW_PIXEL_INCLUDE}) +ocv_target_link_libraries(${histogram} PRIVATE ${OPENCV_LINKER_LIBS} + ${LOCAL_DEPS}) diff --git a/samples/semihosting/histogram/histogram.cpp b/samples/semihosting/histogram/histogram.cpp new file mode 100644 index 000000000000..daa568d0bbb0 --- /dev/null +++ b/samples/semihosting/histogram/histogram.cpp @@ -0,0 +1,43 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include +#include + +#include +#include +#include +#include "raw_pixels.hpp" + +#define IMG_ROWS 100 +#define IMG_COLS 100 + +static_assert(IMG_ROWS * IMG_COLS <= RAW_PIXELS_SIZE, "Incompatible size"); + +int main(void) +{ + // Number of experiment runs + int no_runs = 2; + + // https://docs.opencv.org/master/d3/d63/classcv_1_1Mat.html + cv::Mat src_new(IMG_ROWS, IMG_COLS, CV_8UC1, (void *)raw_pixels); + + // Set parameters + int imgCount = 1; + const int channels[] = {0}; + cv::Mat mask = cv::Mat(); + cv::Mat hist; + int dims = 1; + const int hist_sizes[] = {256}; + float Range[] = {0,256}; + const float *ranges[] = {Range}; + + // Run calc Hist + for(int i=0; i < no_runs; i++){ + std::cout << "Running iteration # "<< i << std::endl; + cv::calcHist(&src_new, imgCount, channels, mask, hist, dims, hist_sizes, ranges); + } + + return 0; +} diff --git a/samples/semihosting/include/CMakeLists.txt b/samples/semihosting/include/CMakeLists.txt new file mode 100644 index 000000000000..3c429b8adf6c --- /dev/null +++ b/samples/semihosting/include/CMakeLists.txt @@ -0,0 +1,16 @@ +# Populate a C array with random data. +set(RAW_PIXELS_SIZE 102400) +set(RAW_PIXELS_HEADER ${CMAKE_CURRENT_BINARY_DIR}/raw_pixels.hpp) +set(RAW_PIXELS_HEADER_IN ${CMAKE_CURRENT_SOURCE_DIR}/raw_pixels.hpp.in) + +set(RAW_PIXEL_VALUES "") +# Seed the random number generator. +string(RANDOM LENGTH 8 ALPHABET 0123456789abcdf RANDOM_SEED 314 number) +math(EXPR LOOP_RANGE "${RAW_PIXELS_SIZE} - 1") + +foreach(i RANGE ${LOOP_RANGE}) + string(RANDOM LENGTH 8 ALPHABET 0123456789abcdf number) + string(CONCAT RAW_PIXEL_VALUES ${RAW_PIXEL_VALUES} "0x${number}, \\\n") +endforeach() + +configure_file(${RAW_PIXELS_HEADER_IN} ${RAW_PIXELS_HEADER}) diff --git a/samples/semihosting/include/raw_pixels.hpp.in b/samples/semihosting/include/raw_pixels.hpp.in new file mode 100644 index 000000000000..6ee98222cc1b --- /dev/null +++ b/samples/semihosting/include/raw_pixels.hpp.in @@ -0,0 +1,11 @@ +#ifndef RAW_PIXELS_HPP +#define RAW_PIXELS_HP +#include + +#cmakedefine RAW_PIXEL_VALUES @RAW_PIXEL_VALUES@ +#cmakedefine RAW_PIXELS_SIZE @RAW_PIXELS_SIZE@ + +static std::uint32_t raw_pixels[RAW_PIXELS_SIZE] = { + RAW_PIXEL_VALUES +}; +#endif //RAW_PIXELS_HPP diff --git a/samples/semihosting/norm/CMakeLists.txt b/samples/semihosting/norm/CMakeLists.txt new file mode 100644 index 000000000000..6f23d74627d2 --- /dev/null +++ b/samples/semihosting/norm/CMakeLists.txt @@ -0,0 +1,25 @@ +# This file is part of OpenCV project. +# It is subject to the license terms in the LICENSE file found in the top-level directory +# of this distribution and at http://opencv.org/license.html + +set(PROJECT_NAME norm) +project(${PROJECT_NAME}) + +ocv_install_example_src(norm *.cpp *.hpp CMakeLists.txt) + +set(LOCAL_DEPS + opencv_core + ${OPENCV_MODULES_PUBLIC} + ${OpenCV_LIB_COMPONENTS}) +ocv_check_dependencies(${LOCAL_DEPS}) + +if(NOT OCV_DEPENDENCIES_FOUND) + return() +endif() + +ocv_define_sample(norm norm.cpp ${SEMIHOSTING_SUFFIX}) +ocv_include_modules_recurse(${LOCAL_DEPS}) +target_include_directories(${norm} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) +target_include_directories(${norm} PRIVATE ${RAW_PIXEL_INCLUDE}) +ocv_target_link_libraries(${norm} PRIVATE ${OPENCV_LINKER_LIBS} + ${LOCAL_DEPS}) diff --git a/samples/semihosting/norm/norm.cpp b/samples/semihosting/norm/norm.cpp new file mode 100644 index 000000000000..f911754be132 --- /dev/null +++ b/samples/semihosting/norm/norm.cpp @@ -0,0 +1,33 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#include +#include + +#include +#include +#include +#include "raw_pixels.hpp" + +#define IMG_ROWS 100 +#define IMG_COLS 100 + +static_assert(IMG_ROWS * IMG_COLS <= RAW_PIXELS_SIZE, "Incompatible size"); + +int main(void) +{ + // Number of experiment runs + int no_runs = 2; + + // https://docs.opencv.org/master/d3/d63/classcv_1_1Mat.html + cv::Mat src(IMG_ROWS, IMG_COLS, CV_8UC1, (void *)raw_pixels); + + // Run calc Hist + for(int i=0; i < no_runs; i++){ + std::cout << "Running iteration # "<< i << std::endl; + cv::norm(src); + } + + return 0; +} From aae48e6fd77a30e45b56c60ceef20de21886d5a9 Mon Sep 17 00:00:00 2001 From: Dmitry Budnikov Date: Thu, 22 Jul 2021 16:52:11 +0300 Subject: [PATCH 073/128] single transpose MTCNN version --- modules/gapi/samples/face_detection_mtcnn.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/modules/gapi/samples/face_detection_mtcnn.cpp b/modules/gapi/samples/face_detection_mtcnn.cpp index d679ba0529b1..ad7c20c1175c 100644 --- a/modules/gapi/samples/face_detection_mtcnn.cpp +++ b/modules/gapi/samples/face_detection_mtcnn.cpp @@ -589,18 +589,18 @@ int main(int argc, char* argv[]) { //Preprocessing BGR2RGB + transpose (NCWH is expected instead of NCHW) cv::GMat in_original; cv::GMat in_originalRGB = cv::gapi::BGR2RGB(in_original); + cv::GMat in_transposedRGB = cv::gapi::transpose(in_originalRGB); cv::GOpaque in_sz = cv::gapi::streaming::size(in_original); cv::GMat in_resized[MAX_PYRAMID_LEVELS]; - cv::GMat in_transposed[MAX_PYRAMID_LEVELS]; cv::GMat regressions[MAX_PYRAMID_LEVELS]; cv::GMat scores[MAX_PYRAMID_LEVELS]; cv::GArray nms_p_faces[MAX_PYRAMID_LEVELS]; cv::GArray total_faces[MAX_PYRAMID_LEVELS]; //The very first PNet pyramid layer to init total_faces[0] - in_resized[0] = cv::gapi::resize(in_originalRGB, level_size[0]); - in_transposed[0] = cv::gapi::transpose(in_resized[0]); - std::tie(regressions[0], scores[0]) = run_mtcnn_p(in_transposed[0], get_pnet_level_name(level_size[0])); + cv::Size currentSize = cv::Size(level_size[0].height, level_size[0].width); + in_resized[0] = cv::gapi::resize(in_transposedRGB, currentSize); + std::tie(regressions[0], scores[0]) = run_mtcnn_p(in_resized[0], get_pnet_level_name(level_size[0])); cv::GArray faces0 = custom::BuildFaces::on(scores[0], regressions[0], static_cast(scales[0]), conf_thresh_p); cv::GArray final_p_faces_for_bb2squares = custom::ApplyRegression::on(faces0, true); cv::GArray final_faces_pnet0 = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares); @@ -608,9 +608,9 @@ int main(int argc, char* argv[]) { //The rest PNet pyramid layers to accumlate all layers result in total_faces[PYRAMID_LEVELS - 1]] for (int i = 1; i < pyramid_levels; ++i) { - in_resized[i] = cv::gapi::resize(in_originalRGB, level_size[i]); - in_transposed[i] = cv::gapi::transpose(in_resized[i]); - std::tie(regressions[i], scores[i]) = run_mtcnn_p(in_transposed[i], get_pnet_level_name(level_size[i])); + currentSize = cv::Size(level_size[i].height, level_size[i].width); + in_resized[i] = cv::gapi::resize(in_transposedRGB, currentSize); + std::tie(regressions[i], scores[i]) = run_mtcnn_p(in_resized[i], get_pnet_level_name(level_size[i])); cv::GArray faces = custom::BuildFaces::on(scores[i], regressions[i], static_cast(scales[i]), conf_thresh_p); cv::GArray final_p_faces_for_bb2squares_i = custom::ApplyRegression::on(faces, true); cv::GArray final_faces_pnet_i = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares_i); @@ -624,8 +624,7 @@ int main(int argc, char* argv[]) { //Refinement part of MTCNN graph cv::GArray faces_roi_pnet = custom::R_O_NetPreProcGetROIs::on(final_faces_pnet, in_sz); cv::GArray regressionsRNet, scoresRNet; - cv::GMat in_originalRGB_transposed = cv::gapi::transpose(in_originalRGB); - std::tie(regressionsRNet, scoresRNet) = cv::gapi::infer(faces_roi_pnet, in_originalRGB_transposed); + std::tie(regressionsRNet, scoresRNet) = cv::gapi::infer(faces_roi_pnet, in_transposedRGB); //Refinement post-processing cv::GArray rnet_post_proc_faces = custom::RNetPostProc::on(final_faces_pnet, scoresRNet, regressionsRNet, conf_thresh_r); @@ -636,7 +635,7 @@ int main(int argc, char* argv[]) { //Output part of MTCNN graph cv::GArray faces_roi_rnet = custom::R_O_NetPreProcGetROIs::on(final_faces_rnet, in_sz); cv::GArray regressionsONet, scoresONet, landmarksONet; - std::tie(regressionsONet, landmarksONet, scoresONet) = cv::gapi::infer(faces_roi_rnet, in_originalRGB_transposed); + std::tie(regressionsONet, landmarksONet, scoresONet) = cv::gapi::infer(faces_roi_rnet, in_transposedRGB); //Output post-processing cv::GArray onet_post_proc_faces = custom::ONetPostProc::on(final_faces_rnet, scoresONet, regressionsONet, landmarksONet, conf_thresh_o); From 024b43ca06dc7ab5e612aeee55fb44cdb534c49d Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Mon, 19 Jul 2021 18:24:15 +0300 Subject: [PATCH 074/128] implement asymmetric padding for conv2d, max_pool and conv2d_backprop_input --- modules/dnn/src/tensorflow/tf_importer.cpp | 112 +++++++++++++++++++-- modules/dnn/test/test_tf_importer.cpp | 10 ++ 2 files changed, 112 insertions(+), 10 deletions(-) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 39c230939474..426710989e48 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -404,12 +404,53 @@ void setKSize(LayerParams &layerParams, const tensorflow::NodeDef &layer) } } -void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer) +void setPadMode(LayerParams &layerParams, const tensorflow::NodeDef &layer) { if (hasLayerAttr(layer, "padding")) layerParams.set("pad_mode", getLayerAttr(layer, "padding").s()); } +bool getExplicitPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, int64_t (&pads)[8]) +{ + if (!layerParams.has("pad_mode") || + layerParams.get("pad_mode").getStringValue() != "EXPLICIT") + { + return false; + } + + CV_Assert(hasLayerAttr(layer, "explicit_paddings")); + + const tensorflow::AttrValue& protoPads = getLayerAttr(layer, "explicit_paddings"); + if (protoPads.list().i_size() != 8) + { + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding configuration."); + } + + int n = sizeof(pads) / sizeof(pads[0]); + for (int i = 0; i < n; ++i) + { + pads[i] = protoPads.list().i(i); + } + + if (getDataLayout(layer) != DATA_LAYOUT_NCHW) + { + CV_LOG_DEBUG(NULL, "DNN/TF: Data format " << getLayerAttr(layer, "data_format").s() << ", assuming NHWC."); + // Perhaps, we have NHWC padding dimensions order. + // N H W C + // 0 1 2 3 4 5 6 7 + std::swap(pads[2], pads[6]); + std::swap(pads[3], pads[7]); + // N C W H + // 0 1 2 3 4 5 6 7 + std::swap(pads[4], pads[6]); + std::swap(pads[5], pads[7]); + // N C H W + // 0 1 2 3 4 5 6 7 + } + + return true; +} + Pin parsePin(const std::string &name) { Pin pin(name); @@ -510,6 +551,7 @@ class TFImporter private: void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId); + void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value = 0.); typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&); typedef std::map DispatchMap; @@ -551,6 +593,31 @@ class TFImporter void parseCustomLayer (tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams); }; +void TFImporter::setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value) +{ + setPadMode(layerParams, layer); + int64_t pads[8]; + + if (!getExplicitPadding(layerParams, layer, pads)) + { + return; + } + + LayerParams padLp; + padLp.name = layer.name() + "/pad"; + padLp.type = "Padding"; + padLp.set("paddings", DictValue::arrayInt(pads, sizeof(pads) / sizeof(pads[0]))); + padLp.set("value", value); + + int id = dstNet.addLayer(padLp.name, padLp.type, padLp); + layer_id[padLp.name] = id; + + connect(layer_id, dstNet, parsePin(inputName), id, 0); + inputName = padLp.name; + + layerParams.set("pad_mode", "VALID"); +} + const TFImporter::DispatchMap TFImporter::buildDispatchMap() { static DispatchMap dispatch; @@ -787,7 +854,7 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N setStrides(layerParams, layer); if (!layerParams.has("pad_w") && !layerParams.has("pad_h")) - setPadding(layerParams, layer); + setPadding(layerParams, layer, input); // The final node of dilated convolution subgraph. next_layers = getNextLayers(net, name, "BatchToSpaceND"); @@ -1232,20 +1299,21 @@ void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeD { const std::string& name = layer.name(); const int num_inputs = layer.input_size(); + std::string inputName = layer.input(0); CV_CheckGT(num_inputs, 0, ""); layerParams.set("pool", "max"); setKSize(layerParams, layer); setStrides(layerParams, layer); - setPadding(layerParams, layer); + setPadding(layerParams, layer, inputName, -std::numeric_limits::infinity()); // Test_TensorFlow_nets.EAST_text_detection/1, NGRAPH/CPU layerParams.set("ceil_mode", false); int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; - connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); + connectToAllBlobs(layer_id, dstNet, parsePin(inputName), id, num_inputs); } void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) @@ -1258,7 +1326,7 @@ void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeD layerParams.set("ave_pool_padded_area", false); setKSize(layerParams, layer); setStrides(layerParams, layer); - setPadding(layerParams, layer); + setPadMode(layerParams, layer); int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; @@ -1673,7 +1741,7 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso // input: "weights" // input: "input" - const std::string& name = layer.name(); + std::string name = layer.name(); const int num_inputs = layer.input_size(); CV_CheckEQ(num_inputs, 3, "Expected output shape, weights and input nodes"); @@ -1704,7 +1772,21 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso layerParams.set("num_output", kshape[1]); setStrides(layerParams, layer); - setPadding(layerParams, layer); + setPadMode(layerParams, layer); + int64_t pads[8]; + bool explicit_pads = getExplicitPadding(layerParams, layer, pads); + int64_t begs[4] = {}; + int64_t ends[4] = {-1, -1, -1, -1}; + if (explicit_pads) + { + name += "/deconv"; + layerParams.set("pad_mode", "VALID"); + for (int i = 2; i < 4; ++i) // begins=[0, 0, a, b], ends=[-1, -1, c, d] + { + begs[i] = pads[2*i]; + ends[i] = -1 - pads[2*i + 1]; + } + } // For convolution layer, output shape computes as // o = 1 + (i - k + 2*p) / s @@ -1721,8 +1803,9 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso const int strideY = layerParams.get("stride_h"); const int strideX = layerParams.get("stride_w"); Mat outShape = getTensorContent(getConstBlob(layer, value_id, 0)); - const int outH = outShape.at(1); - const int outW = outShape.at(2); + int shift = (getDataLayout(layer) == DATA_LAYOUT_NCHW); + const int outH = outShape.at(1 + shift) + begs[2] - 1 - ends[2]; + const int outW = outShape.at(2 + shift) + begs[3] - 1 - ends[3]; if (layerParams.get("pad_mode") == "SAME") { layerParams.set("adj_w", (outW - 1) % strideX); @@ -1738,6 +1821,16 @@ void TFImporter::parseConv2DBackpropInput(tensorflow::GraphDef& net, const tenso // one input only connect(layer_id, dstNet, parsePin(layer.input(2)), id, 0); + if (explicit_pads) // If we have explicit paddings, remove extra data + { + layerParams.set("begin", DictValue::arrayInt(begs, sizeof(begs) / sizeof(begs[0]))); + layerParams.set("end", DictValue::arrayInt(ends, sizeof(ends) / sizeof(ends[0]))); + + int id = dstNet.addLayer(layer.name(), "Slice", layerParams); + layer_id[layer.name()] = id; + + connect(layer_id, dstNet, parsePin(name), id, 0); + } } void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) @@ -2717,7 +2810,6 @@ void TFImporter::populateNet() addConstNodes(netBin, value_id, layers_to_ignore); addConstNodes(netTxt, value_id, layers_to_ignore); - for (int li = 0; li < layersSize; li++) { const tensorflow::NodeDef& layer = net.node(li); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 4ba4f29322d1..4f7840f9e4eb 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -203,6 +203,16 @@ TEST_P(Test_TensorFlow_layers, padding) runTensorFlowNet("keras_pad_concat"); } +TEST_P(Test_TensorFlow_layers, padding_asymmetric) +{ + runTensorFlowNet("conv2d_asymmetric_pads_nchw"); + runTensorFlowNet("conv2d_asymmetric_pads_nhwc"); + runTensorFlowNet("max_pool2d_asymmetric_pads_nchw"); + runTensorFlowNet("max_pool2d_asymmetric_pads_nhwc"); + runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nchw"); + runTensorFlowNet("conv2d_backprop_input_asymmetric_pads_nhwc"); +} + TEST_P(Test_TensorFlow_layers, padding_same) { // Reference output values are in range [0.0006, 2.798] From 4015a5486c5cd831923c6d8019233228c06b99d1 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Wed, 21 Jul 2021 04:06:31 +0000 Subject: [PATCH 075/128] cmake: process modules in the same CMake scope --- CMakeLists.txt | 4 +- cmake/OpenCVModule.cmake | 39 +++++++++++++++++-- modules/CMakeLists.txt | 28 ------------- modules/highgui/CMakeLists.txt | 3 +- modules/highgui/cmake/detect_gtk.cmake | 11 ------ modules/highgui/cmake/detect_win32ui.cmake | 2 - modules/highgui/cmake/init.cmake | 19 +++------ .../videoio/cmake/detect_android_camera.cmake | 2 - .../cmake/detect_android_mediandk.cmake | 2 - modules/videoio/cmake/detect_aravis.cmake | 4 +- .../videoio/cmake/detect_avfoundation.cmake | 2 - modules/videoio/cmake/detect_dc1394.cmake | 5 +-- modules/videoio/cmake/detect_dshow.cmake | 2 - modules/videoio/cmake/detect_ffmpeg.cmake | 7 ---- modules/videoio/cmake/detect_gphoto.cmake | 2 - modules/videoio/cmake/detect_gstreamer.cmake | 6 +-- modules/videoio/cmake/detect_ios.cmake | 2 - modules/videoio/cmake/detect_msdk.cmake | 2 - modules/videoio/cmake/detect_msmf.cmake | 3 -- modules/videoio/cmake/detect_openni2.cmake | 4 +- modules/videoio/cmake/detect_pvapi.cmake | 2 - modules/videoio/cmake/detect_realsense.cmake | 4 +- modules/videoio/cmake/detect_ueye.cmake | 2 - modules/videoio/cmake/detect_v4l.cmake | 2 - modules/videoio/cmake/detect_ximea.cmake | 2 - modules/videoio/cmake/detect_xine.cmake | 2 - modules/videoio/cmake/init.cmake | 19 +++------ 27 files changed, 56 insertions(+), 126 deletions(-) delete mode 100644 modules/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index f4fe0385d12e..48bce581f468 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -913,7 +913,7 @@ add_subdirectory(include) ocv_add_modules_compiler_options() # OpenCV modules -add_subdirectory(modules) +ocv_register_modules() # Generate targets for documentation add_subdirectory(doc) @@ -1243,7 +1243,7 @@ endif(WIN32) # ========================== GUI ========================== status("") -status(" GUI: ") +status(" GUI: " "${OPENCV_HIGHGUI_BUILTIN_BACKEND}") if(WITH_QT OR HAVE_QT) if(HAVE_QT5) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index 7c48aad9c295..9981620f2560 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -254,7 +254,7 @@ function(_glob_locations out_paths out_names) list(LENGTH paths before) get_filename_component(path "${path}" ABSOLUTE) # Either module itself - if(NOT path STREQUAL CMAKE_CURRENT_SOURCE_DIR AND EXISTS "${path}/CMakeLists.txt") + if(NOT path STREQUAL "${OpenCV_SOURCE_DIR}/modules" AND EXISTS "${path}/CMakeLists.txt") get_filename_component(name "${path}" NAME) list(APPEND paths "${path}") list(APPEND names "${name}") @@ -296,7 +296,7 @@ macro(_add_modules_1 paths names) list(GET ${names} ${i} __name) #message(STATUS "First pass: ${__name} => ${__path}") include("${__path}/cmake/init.cmake" OPTIONAL) - add_subdirectory("${__path}" "${CMAKE_CURRENT_BINARY_DIR}/.firstpass/${__name}") + add_subdirectory("${__path}" "${OpenCV_BINARY_DIR}/modules/.firstpass/${__name}") endforeach() endif() endmacro() @@ -316,7 +316,7 @@ macro(_add_modules_2) endif() string(REGEX REPLACE "^opencv_" "" name "${m}") #message(STATUS "Second pass: ${name} => ${OPENCV_MODULE_${m}_LOCATION}") - add_subdirectory("${OPENCV_MODULE_${m}_LOCATION}" "${CMAKE_CURRENT_BINARY_DIR}/${name}") + add_subdirectory("${OPENCV_MODULE_${m}_LOCATION}" "${OpenCV_BINARY_DIR}/modules/${name}") endif() ocv_cmake_hook(POST_MODULES_CREATE_${the_module}) endforeach() @@ -369,7 +369,6 @@ macro(ocv_glob_modules main_root) __ocv_resolve_dependencies() # create modules - set(OPENCV_INITIAL_PASS OFF PARENT_SCOPE) set(OPENCV_INITIAL_PASS OFF) ocv_cmake_hook(PRE_MODULES_CREATE) _add_modules_2(${OPENCV_MODULES_BUILD}) @@ -377,6 +376,37 @@ macro(ocv_glob_modules main_root) endmacro() +# called by root CMakeLists.txt +macro(ocv_register_modules) + if(NOT OPENCV_MODULES_PATH) + set(OPENCV_MODULES_PATH "${OpenCV_SOURCE_DIR}/modules") + endif() + + ocv_glob_modules(${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH}) + + # build lists of modules to be documented + set(OPENCV_MODULES_MAIN "") + set(OPENCV_MODULES_EXTRA "") + + foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MODULES_DISABLED_AUTO} ${OPENCV_MODULES_DISABLED_FORCE}) + string(REGEX REPLACE "^opencv_" "" mod "${mod}") + if("${OPENCV_MODULE_opencv_${mod}_LOCATION}" STREQUAL "${OpenCV_SOURCE_DIR}/modules/${mod}") + list(APPEND OPENCV_MODULES_MAIN ${mod}) + else() + list(APPEND OPENCV_MODULES_EXTRA ${mod}) + endif() + endforeach() + ocv_list_sort(OPENCV_MODULES_MAIN) + ocv_list_sort(OPENCV_MODULES_EXTRA) + set(FIXED_ORDER_MODULES core imgproc imgcodecs videoio highgui video calib3d features2d objdetect dnn ml flann photo stitching) + list(REMOVE_ITEM OPENCV_MODULES_MAIN ${FIXED_ORDER_MODULES}) + set(OPENCV_MODULES_MAIN ${FIXED_ORDER_MODULES} ${OPENCV_MODULES_MAIN}) + + set(OPENCV_MODULES_MAIN ${OPENCV_MODULES_MAIN} CACHE INTERNAL "List of main modules" FORCE) + set(OPENCV_MODULES_EXTRA ${OPENCV_MODULES_EXTRA} CACHE INTERNAL "List of extra modules" FORCE) +endmacro() + + # disables OpenCV module with missing dependencies function(__ocv_module_turn_off the_module) list(REMOVE_ITEM OPENCV_MODULES_DISABLED_AUTO "${the_module}") @@ -877,6 +907,7 @@ macro(ocv_create_module) endmacro() macro(_ocv_create_module) + add_definitions(-D__OPENCV_BUILD=1) ocv_compiler_optimization_process_sources(OPENCV_MODULE_${the_module}_SOURCES OPENCV_MODULE_${the_module}_DEPS_EXT ${the_module}) set(__module_headers ${OPENCV_MODULE_${the_module}_HEADERS}) diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt deleted file mode 100644 index 6a8004036b28..000000000000 --- a/modules/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -add_definitions(-D__OPENCV_BUILD=1) - -if(NOT OPENCV_MODULES_PATH) - set(OPENCV_MODULES_PATH "${CMAKE_CURRENT_SOURCE_DIR}") -endif() - -ocv_glob_modules(${OPENCV_MODULES_PATH} ${OPENCV_EXTRA_MODULES_PATH}) - -# build lists of modules to be documented -set(OPENCV_MODULES_MAIN "") -set(OPENCV_MODULES_EXTRA "") - -foreach(mod ${OPENCV_MODULES_BUILD} ${OPENCV_MODULES_DISABLED_USER} ${OPENCV_MODULES_DISABLED_AUTO} ${OPENCV_MODULES_DISABLED_FORCE}) - string(REGEX REPLACE "^opencv_" "" mod "${mod}") - if("${OPENCV_MODULE_opencv_${mod}_LOCATION}" STREQUAL "${OpenCV_SOURCE_DIR}/modules/${mod}") - list(APPEND OPENCV_MODULES_MAIN ${mod}) - else() - list(APPEND OPENCV_MODULES_EXTRA ${mod}) - endif() -endforeach() -ocv_list_sort(OPENCV_MODULES_MAIN) -ocv_list_sort(OPENCV_MODULES_EXTRA) -set(FIXED_ORDER_MODULES core imgproc imgcodecs videoio highgui video calib3d features2d objdetect dnn ml flann photo stitching) -list(REMOVE_ITEM OPENCV_MODULES_MAIN ${FIXED_ORDER_MODULES}) -set(OPENCV_MODULES_MAIN ${FIXED_ORDER_MODULES} ${OPENCV_MODULES_MAIN}) - -set(OPENCV_MODULES_MAIN ${OPENCV_MODULES_MAIN} CACHE INTERNAL "List of main modules" FORCE) -set(OPENCV_MODULES_EXTRA ${OPENCV_MODULES_EXTRA} CACHE INTERNAL "List of extra modules" FORCE) diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index bc31b84c74e1..2b630bfed80d 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -218,7 +218,8 @@ endif() if(NOT OPENCV_HIGHGUI_BUILTIN_BACKEND) set(OPENCV_HIGHGUI_BUILTIN_BACKEND "NONE") endif() -message(STATUS "highgui: using builtin backend: ${OPENCV_HIGHGUI_BUILTIN_BACKEND}") # FIXIT: propagate to root CMake +message(STATUS "highgui: using builtin backend: ${OPENCV_HIGHGUI_BUILTIN_BACKEND}") +set(OPENCV_HIGHGUI_BUILTIN_BACKEND "${OPENCV_HIGHGUI_BUILTIN_BACKEND}" PARENT_SCOPE) # informational if(TRUE) # these variables are set by 'ocv_append_build_options(HIGHGUI ...)' diff --git a/modules/highgui/cmake/detect_gtk.cmake b/modules/highgui/cmake/detect_gtk.cmake index cdc054fad0c5..c58246ac5414 100644 --- a/modules/highgui/cmake/detect_gtk.cmake +++ b/modules/highgui/cmake/detect_gtk.cmake @@ -6,8 +6,6 @@ if(WITH_GTK) if(HAVE_GTK3) ocv_add_external_target(gtk3 "${GTK3_INCLUDE_DIRS}" "${GTK3_LIBRARIES}" "HAVE_GTK3;HAVE_GTK") set(HAVE_GTK TRUE) - set(HAVE_GTK3 ${HAVE_GTK3} PARENT_SCOPE) - set(GTK3_VERSION "${GTK3_VERSION}" PARENT_SCOPE) # informational endif() endif() if((PROJECT_NAME STREQUAL "OpenCV" AND HIGHGUI_ENABLE_PLUGINS) OR NOT HAVE_GTK3) @@ -19,8 +17,6 @@ if(WITH_GTK) else() ocv_add_external_target(gtk2 "${GTK2_INCLUDE_DIRS}" "${GTK2_LIBRARIES}" "HAVE_GTK2;HAVE_GTK") set(HAVE_GTK TRUE) - set(HAVE_GTK2 ${HAVE_GTK2} PARENT_SCOPE) - set(GTK2_VERSION "${GTK2_VERSION}" PARENT_SCOPE) # informational endif() endif() endif() @@ -29,15 +25,11 @@ if(WITH_GTK) message(FATAL_ERROR "gthread not found. This library is required when building with GTK support") else() ocv_add_external_target(gthread "${GTHREAD_INCLUDE_DIRS}" "${GTHREAD_LIBRARIES}" "HAVE_GTHREAD") - set(HAVE_GTHREAD "${HAVE_GTHREAD}" PARENT_SCOPE) # informational - set(GTHREAD_VERSION "${GTHREAD_VERSION}" PARENT_SCOPE) # informational endif() if((WITH_OPENGL OR HAVE_OPENGL) AND HAVE_GTK2) ocv_check_modules(GTKGLEXT gtkglext-1.0) if(HAVE_GTKGLEXT) ocv_add_external_target(gtkglext "${GTKGLEXT_INCLUDE_DIRS}" "${GTKGLEXT_LIBRARIES}" "HAVE_GTKGLEXT") - set(HAVE_GTKGLEXT "${HAVE_GTKGLEXT}" PARENT_SCOPE) # informational - set(GTKGLEXT_VERSION "${GTKGLEXT_VERSION}" PARENT_SCOPE) # informational endif() endif() elseif(HAVE_GTK) @@ -48,9 +40,6 @@ if(WITH_OPENGL AND HAVE_GTKGLEXT) find_package(OpenGL QUIET) if(OPENGL_FOUND) set(HAVE_OPENGL TRUE) - #set(HAVE_OPENGL ${HAVE_OPENGL} PARENT_SCOPE) ocv_add_external_target(gtk_opengl "${OPENGL_INCLUDE_DIRS}" "${OPENGL_LIBRARIES}" "HAVE_OPENGL") endif() endif() - -set(HAVE_GTK ${HAVE_GTK} PARENT_SCOPE) diff --git a/modules/highgui/cmake/detect_win32ui.cmake b/modules/highgui/cmake/detect_win32ui.cmake index 1d2fdc5d4654..c5e358ffa710 100644 --- a/modules/highgui/cmake/detect_win32ui.cmake +++ b/modules/highgui/cmake/detect_win32ui.cmake @@ -13,5 +13,3 @@ if(WITH_WIN32UI) ocv_add_external_target(win32ui "" "${__libs}" "HAVE_WIN32UI") endif() endif() - -set(HAVE_WIN32UI "${HAVE_WIN32UI}" PARENT_SCOPE) # informational diff --git a/modules/highgui/cmake/init.cmake b/modules/highgui/cmake/init.cmake index 1626d254daf9..a302c4d534a6 100644 --- a/modules/highgui/cmake/init.cmake +++ b/modules/highgui/cmake/init.cmake @@ -27,20 +27,11 @@ endif() include(FindPkgConfig) -# FIXIT: stop using PARENT_SCOPE in dependencies -if(PROJECT_NAME STREQUAL "OpenCV") - macro(add_backend backend_id cond_var) - if(${cond_var}) - include("${CMAKE_CURRENT_LIST_DIR}/detect_${backend_id}.cmake") - endif() - endmacro() -else() - function(add_backend backend_id cond_var) - if(${cond_var}) - include("${CMAKE_CURRENT_LIST_DIR}/detect_${backend_id}.cmake") - endif() - endfunction() -endif() +macro(add_backend backend_id cond_var) + if(${cond_var}) + include("${CMAKE_CURRENT_LIST_DIR}/detect_${backend_id}.cmake") + endif() +endmacro() add_backend("gtk" WITH_GTK) add_backend("win32ui" WITH_WIN32UI) diff --git a/modules/videoio/cmake/detect_android_camera.cmake b/modules/videoio/cmake/detect_android_camera.cmake index ded4c91ccf17..a465751334fd 100644 --- a/modules/videoio/cmake/detect_android_camera.cmake +++ b/modules/videoio/cmake/detect_android_camera.cmake @@ -4,5 +4,3 @@ if(ANDROID AND ANDROID_NATIVE_API_LEVEL GREATER 23) set(libs "-landroid -llog -lcamera2ndk") ocv_add_external_target(android_native_camera "" "${libs}" "HAVE_ANDROID_NATIVE_CAMERA") endif() - -set(HAVE_ANDROID_NATIVE_CAMERA ${HAVE_ANDROID_NATIVE_CAMERA} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_android_mediandk.cmake b/modules/videoio/cmake/detect_android_mediandk.cmake index edfb4bbbc5c3..cee64ab54991 100644 --- a/modules/videoio/cmake/detect_android_mediandk.cmake +++ b/modules/videoio/cmake/detect_android_mediandk.cmake @@ -4,5 +4,3 @@ if(ANDROID AND ANDROID_NATIVE_API_LEVEL GREATER 20) set(libs "-landroid -llog -lmediandk") ocv_add_external_target(android_mediandk "" "${libs}" "HAVE_ANDROID_MEDIANDK") endif() - -set(HAVE_ANDROID_MEDIANDK ${HAVE_ANDROID_MEDIANDK} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_aravis.cmake b/modules/videoio/cmake/detect_aravis.cmake index 79d6a217db2e..e7b382899343 100644 --- a/modules/videoio/cmake/detect_aravis.cmake +++ b/modules/videoio/cmake/detect_aravis.cmake @@ -21,7 +21,7 @@ if(NOT HAVE_ARAVIS_API) string(REGEX REPLACE ".*ARAVIS_MAJOR_VERSION[^0-9]+([0-9]+).*" "\\1" ver_major "${ver_strings}") string(REGEX REPLACE ".*ARAVIS_MINOR_VERSION[^0-9]+([0-9]+).*" "\\1" ver_minor "${ver_strings}") string(REGEX REPLACE ".*ARAVIS_MICRO_VERSION[^0-9]+([0-9]+).*" "\\1" ver_micro "${ver_strings}") - set(ARAVIS_VERSION "${ver_major}.${ver_minor}.${ver_micro}" PARENT_SCOPE) # informational + set(ARAVIS_VERSION "${ver_major}.${ver_minor}.${ver_micro}") # informational set(ARAVIS_INCLUDE_DIRS "${ARAVIS_INCLUDE}") set(ARAVIS_LIBRARIES "${ARAVIS_LIBRARY}") endif() @@ -30,5 +30,3 @@ endif() if(HAVE_ARAVIS_API) ocv_add_external_target(aravis "${ARAVIS_INCLUDE_DIRS}" "${ARAVIS_LIBRARIES}" "HAVE_ARAVIS_API") endif() - -set(HAVE_ARAVIS_API ${HAVE_ARAVIS_API} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_avfoundation.cmake b/modules/videoio/cmake/detect_avfoundation.cmake index a341f587a199..2da4fabfab44 100644 --- a/modules/videoio/cmake/detect_avfoundation.cmake +++ b/modules/videoio/cmake/detect_avfoundation.cmake @@ -14,5 +14,3 @@ if(APPLE) endif() ocv_add_external_target(avfoundation "" "${libs}" "HAVE_AVFOUNDATION") endif() - -set(HAVE_AVFOUNDATION ${HAVE_AVFOUNDATION} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_dc1394.cmake b/modules/videoio/cmake/detect_dc1394.cmake index 51ab2dd80eb4..8bcee4bf7098 100644 --- a/modules/videoio/cmake/detect_dc1394.cmake +++ b/modules/videoio/cmake/detect_dc1394.cmake @@ -2,7 +2,6 @@ if(NOT HAVE_DC1394_2 AND PKG_CONFIG_FOUND) ocv_check_modules(DC1394_2 libdc1394-2) if(DC1394_2_FOUND) - set(DC1394_2_VERSION "${DC1394_2_VERSION}" PARENT_SCOPE) # informational set(HAVE_DC1394_2 TRUE) endif() endif() @@ -20,12 +19,10 @@ if(NOT HAVE_DC1394_2) set(HAVE_DC1394_2 TRUE) set(DC1394_2_INCLUDE_DIRS "${DC1394_INCLUDE}") set(DC1394_2_LIBRARIES "${DC1394_LIBRARY}") - set(DC1394_2_VERSION "unknown" PARENT_SCOPE) # informational + set(DC1394_2_VERSION "unknown") # informational endif() endif() if(HAVE_DC1394_2) ocv_add_external_target(dc1394_2 "${DC1394_2_INCLUDE_DIRS}" "${DC1394_2_LIBRARIES}" "HAVE_DC1394_2") endif() - -set(HAVE_DC1394_2 ${HAVE_DC1394_2} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_dshow.cmake b/modules/videoio/cmake/detect_dshow.cmake index 3f41b3fd34e2..928134c08c54 100644 --- a/modules/videoio/cmake/detect_dshow.cmake +++ b/modules/videoio/cmake/detect_dshow.cmake @@ -10,5 +10,3 @@ endif() if(HAVE_DSHOW) ocv_add_external_target(dshow "" "" "HAVE_DSHOW") endif() - -set(HAVE_DSHOW ${HAVE_DSHOW} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_ffmpeg.cmake b/modules/videoio/cmake/detect_ffmpeg.cmake index 58de4b9515ac..c33eaf221b8a 100644 --- a/modules/videoio/cmake/detect_ffmpeg.cmake +++ b/modules/videoio/cmake/detect_ffmpeg.cmake @@ -14,11 +14,6 @@ if(NOT HAVE_FFMPEG AND WIN32 AND NOT ARM AND NOT OPENCV_FFMPEG_SKIP_DOWNLOAD) download_win_ffmpeg(FFMPEG_CMAKE_SCRIPT) if(FFMPEG_CMAKE_SCRIPT) include("${FFMPEG_CMAKE_SCRIPT}") - set(FFMPEG_libavcodec_VERSION ${FFMPEG_libavcodec_VERSION} PARENT_SCOPE) # info - set(FFMPEG_libavformat_VERSION ${FFMPEG_libavformat_VERSION} PARENT_SCOPE) # info - set(FFMPEG_libavutil_VERSION ${FFMPEG_libavutil_VERSION} PARENT_SCOPE) # info - set(FFMPEG_libswscale_VERSION ${FFMPEG_libswscale_VERSION} PARENT_SCOPE) # info - set(FFMPEG_libavresample_VERSION ${FFMPEG_libavresample_VERSION} PARENT_SCOPE) # info set(HAVE_FFMPEG TRUE) set(HAVE_FFMPEG_WRAPPER TRUE) endif() @@ -132,5 +127,3 @@ elseif(HAVE_FFMPEG) ocv_add_external_target(ffmpeg.plugin_deps "${__plugin_include_dirs}" "${__plugin_include_libs}" "${__plugin_defines}") endif() endif() - -set(HAVE_FFMPEG ${HAVE_FFMPEG} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_gphoto.cmake b/modules/videoio/cmake/detect_gphoto.cmake index 0d6f1212eb37..2cb23c00335f 100644 --- a/modules/videoio/cmake/detect_gphoto.cmake +++ b/modules/videoio/cmake/detect_gphoto.cmake @@ -9,5 +9,3 @@ endif() if(HAVE_GPHOTO2) ocv_add_external_target(gphoto2 "${GPHOTO2_INCLUDE_DIRS}" "${GPHOTO2_LIBRARIES}" "HAVE_GPHOTO2") endif() - -set(HAVE_GPHOTO2 ${HAVE_GPHOTO2} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_gstreamer.cmake b/modules/videoio/cmake/detect_gstreamer.cmake index 219878616175..47ea7a0b3071 100644 --- a/modules/videoio/cmake/detect_gstreamer.cmake +++ b/modules/videoio/cmake/detect_gstreamer.cmake @@ -69,7 +69,7 @@ if(NOT HAVE_GSTREAMER AND WIN32) string(REGEX REPLACE ".*GST_VERSION_MAJOR[^0-9]+([0-9]+).*" "\\1" ver_major "${ver_strings}") string(REGEX REPLACE ".*GST_VERSION_MINOR[^0-9]+([0-9]+).*" "\\1" ver_minor "${ver_strings}") string(REGEX REPLACE ".*GST_VERSION_MICRO[^0-9]+([0-9]+).*" "\\1" ver_micro "${ver_strings}") - set(GSTREAMER_VERSION "${ver_major}.${ver_minor}.${ver_micro}" PARENT_SCOPE) # informational + set(GSTREAMER_VERSION "${ver_major}.${ver_minor}.${ver_micro}") # informational set(HAVE_GSTREAMER TRUE) set(GSTREAMER_LIBRARIES ${GSTREAMER_gstreamer_LIBRARY} @@ -95,7 +95,7 @@ if(NOT HAVE_GSTREAMER AND PKG_CONFIG_FOUND) ocv_check_modules(GSTREAMER_video gstreamer-video-1.0) if(GSTREAMER_base_FOUND AND GSTREAMER_app_FOUND AND GSTREAMER_riff_FOUND AND GSTREAMER_pbutils_FOUND AND GSTREAMER_video_FOUND) set(HAVE_GSTREAMER TRUE) - set(GSTREAMER_VERSION ${GSTREAMER_base_VERSION} PARENT_SCOPE) # informational + set(GSTREAMER_VERSION ${GSTREAMER_base_VERSION}) # informational set(GSTREAMER_LIBRARIES ${GSTREAMER_base_LIBRARIES} ${GSTREAMER_app_LIBRARIES} ${GSTREAMER_riff_LIBRARIES} ${GSTREAMER_pbutils_LIBRARIES} ${GSTREAMER_video_LIBRARIES}) set(GSTREAMER_INCLUDE_DIRS ${GSTREAMER_base_INCLUDE_DIRS} ${GSTREAMER_app_INCLUDE_DIRS} ${GSTREAMER_riff_INCLUDE_DIRS} ${GSTREAMER_pbutils_INCLUDE_DIRS} ${GSTREAMER_video_INCLUDE_DIRS}) endif() @@ -104,5 +104,3 @@ endif() if(HAVE_GSTREAMER) ocv_add_external_target(gstreamer "${GSTREAMER_INCLUDE_DIRS}" "${GSTREAMER_LIBRARIES}" "HAVE_GSTREAMER") endif() - -set(HAVE_GSTREAMER ${HAVE_GSTREAMER} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_ios.cmake b/modules/videoio/cmake/detect_ios.cmake index c75426060b0b..8d48dd6f3bea 100644 --- a/modules/videoio/cmake/detect_ios.cmake +++ b/modules/videoio/cmake/detect_ios.cmake @@ -11,5 +11,3 @@ if(APPLE AND IOS) "-framework UIKit") ocv_add_external_target(cap_ios "" "${libs}" "HAVE_CAP_IOS") endif() - -set(HAVE_CAP_IOS ${HAVE_CAP_IOS} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_msdk.cmake b/modules/videoio/cmake/detect_msdk.cmake index d035c3f5cc11..83701425e1f8 100644 --- a/modules/videoio/cmake/detect_msdk.cmake +++ b/modules/videoio/cmake/detect_msdk.cmake @@ -70,5 +70,3 @@ if(HAVE_MFX) list(APPEND MFX_DEFS "HAVE_MFX") ocv_add_external_target(mediasdk "${MFX_INCLUDE_DIRS}" "${MFX_LIBRARIES}" "${MFX_DEFS}") endif() - -set(HAVE_MFX ${HAVE_MFX} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_msmf.cmake b/modules/videoio/cmake/detect_msmf.cmake index a1c91dab670a..aebc226bcfc9 100644 --- a/modules/videoio/cmake/detect_msmf.cmake +++ b/modules/videoio/cmake/detect_msmf.cmake @@ -20,6 +20,3 @@ if(HAVE_MSMF) endif() ocv_add_external_target(msmf "" "" "${defs}") endif() - -set(HAVE_MSMF ${HAVE_MSMF} PARENT_SCOPE) -set(HAVE_MSMF_DXVA ${HAVE_MSMF_DXVA} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_openni2.cmake b/modules/videoio/cmake/detect_openni2.cmake index 76c31454da81..54a5c62beddc 100644 --- a/modules/videoio/cmake/detect_openni2.cmake +++ b/modules/videoio/cmake/detect_openni2.cmake @@ -42,8 +42,6 @@ if(HAVE_OPENNI2) string(REGEX REPLACE ".*ONI_VERSION_MAJOR[^0-9]+([0-9]+).*" "\\1" ver_major "${ver_strings}") string(REGEX REPLACE ".*ONI_VERSION_MINOR[^0-9]+([0-9]+).*" "\\1" ver_minor "${ver_strings}") string(REGEX REPLACE ".*ONI_VERSION_MAINTENANCE[^0-9]+([0-9]+).*" "\\1" ver_maint "${ver_strings}") - set(OPENNI2_VERSION "${ver_major}.${ver_minor}.${ver_maint}" PARENT_SCOPE) # informational + set(OPENNI2_VERSION "${ver_major}.${ver_minor}.${ver_maint}") # informational ocv_add_external_target(openni2 "${OPENNI2_INCLUDE_DIRS}" "${OPENNI2_LIBRARIES}" "HAVE_OPENNI2") endif() - -set(HAVE_OPENNI2 ${HAVE_OPENNI2} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_pvapi.cmake b/modules/videoio/cmake/detect_pvapi.cmake index a0f4673fdc1d..f2c6d4bceaa5 100644 --- a/modules/videoio/cmake/detect_pvapi.cmake +++ b/modules/videoio/cmake/detect_pvapi.cmake @@ -19,5 +19,3 @@ endif() if(HAVE_PVAPI) ocv_add_external_target(pvapi "${PVAPI_INCLUDE}" "${PVAPI_LIBRARY}" "HAVE_PVAPI") endif() - -set(HAVE_PVAPI ${HAVE_PVAPI} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_realsense.cmake b/modules/videoio/cmake/detect_realsense.cmake index 32e5e02c9e7b..065f5488301f 100644 --- a/modules/videoio/cmake/detect_realsense.cmake +++ b/modules/videoio/cmake/detect_realsense.cmake @@ -4,7 +4,7 @@ if(NOT HAVE_LIBREALSENSE) find_package(realsense2 QUIET) if(realsense2_FOUND) set(HAVE_LIBREALSENSE TRUE) - set(LIBREALSENSE_VERSION "${realsense2_VERSION}" PARENT_SCOPE) # informational + set(LIBREALSENSE_VERSION "${realsense2_VERSION}") # informational ocv_add_external_target(librealsense "" "${realsense2_LIBRARY}" "HAVE_LIBREALSENSE") endif() endif() @@ -20,7 +20,7 @@ if(NOT HAVE_LIBREALSENSE) string(REGEX REPLACE ".*RS2_API_MAJOR_VERSION[^0-9]+([0-9]+).*" "\\1" ver_major "${ver_strings}") string(REGEX REPLACE ".*RS2_API_MINOR_VERSION[^0-9]+([0-9]+).*" "\\1" ver_minor "${ver_strings}") string(REGEX REPLACE ".*RS2_API_PATCH_VERSION[^0-9]+([0-9]+).*" "\\1" ver_patch "${ver_strings}") - set(LIBREALSENSE_VERSION "${ver_major}.${ver_minor}.${ver_patch}" PARENT_SCOPE) # informational + set(LIBREALSENSE_VERSION "${ver_major}.${ver_minor}.${ver_patch}") # informational ocv_add_external_target(librealsense "${LIBREALSENSE_INCLUDE_DIR}" "${LIBREALSENSE_LIBRARIES}" "HAVE_LIBREALSENSE") endif() endif() diff --git a/modules/videoio/cmake/detect_ueye.cmake b/modules/videoio/cmake/detect_ueye.cmake index 495e9c245023..9428f9e59647 100644 --- a/modules/videoio/cmake/detect_ueye.cmake +++ b/modules/videoio/cmake/detect_ueye.cmake @@ -21,5 +21,3 @@ unset(_WIN_LIB_SUFFIX) if(HAVE_UEYE) ocv_add_external_target(ueye "${UEYE_INCLUDE}" "${UEYE_LIBRARY}" "HAVE_UEYE") endif() - -set(HAVE_UEYE ${HAVE_UEYE} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_v4l.cmake b/modules/videoio/cmake/detect_v4l.cmake index 05b73b003c4f..e413dae9ca4e 100644 --- a/modules/videoio/cmake/detect_v4l.cmake +++ b/modules/videoio/cmake/detect_v4l.cmake @@ -15,5 +15,3 @@ if(NOT HAVE_V4L) ocv_add_external_target(v4l "" "" "${defs}") endif() endif() - -set(HAVE_V4L ${HAVE_V4L} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_ximea.cmake b/modules/videoio/cmake/detect_ximea.cmake index 9cf295e3529b..7521e619b036 100644 --- a/modules/videoio/cmake/detect_ximea.cmake +++ b/modules/videoio/cmake/detect_ximea.cmake @@ -28,5 +28,3 @@ endif() if(HAVE_XIMEA) ocv_add_external_target(ximea "${XIMEA_INCLUDE}" "${XIMEA_LIBRARY}" "HAVE_XIMEA") endif() - -set(HAVE_XIMEA ${HAVE_XIMEA} PARENT_SCOPE) diff --git a/modules/videoio/cmake/detect_xine.cmake b/modules/videoio/cmake/detect_xine.cmake index 3e1f3010a431..0a6f64235349 100644 --- a/modules/videoio/cmake/detect_xine.cmake +++ b/modules/videoio/cmake/detect_xine.cmake @@ -5,5 +5,3 @@ endif() if(HAVE_XINE) ocv_add_external_target(xine "${XINE_INCLUDE_DIRS}" "${XINE_LIBRARIES}" "HAVE_XINE") endif() - -set(HAVE_XINE ${HAVE_XINE} PARENT_SCOPE) diff --git a/modules/videoio/cmake/init.cmake b/modules/videoio/cmake/init.cmake index 310df2d249e3..68838790b8a2 100644 --- a/modules/videoio/cmake/init.cmake +++ b/modules/videoio/cmake/init.cmake @@ -1,19 +1,10 @@ include(FindPkgConfig) -# FIXIT: stop using PARENT_SCOPE in dependencies -if(PROJECT_NAME STREQUAL "OpenCV") - macro(add_backend backend_id cond_var) - if(${cond_var}) - include("${CMAKE_CURRENT_LIST_DIR}/detect_${backend_id}.cmake") - endif() - endmacro() -else() - function(add_backend backend_id cond_var) - if(${cond_var}) - include("${CMAKE_CURRENT_LIST_DIR}/detect_${backend_id}.cmake") - endif() - endfunction() -endif() +macro(add_backend backend_id cond_var) + if(${cond_var}) + include("${CMAKE_CURRENT_LIST_DIR}/detect_${backend_id}.cmake") + endif() +endmacro() add_backend("ffmpeg" WITH_FFMPEG) add_backend("gstreamer" WITH_GSTREAMER) From 803ff8ebb9ed7960c28e28c6e11e77a783f77d0a Mon Sep 17 00:00:00 2001 From: Shreyas Taware Date: Wed, 7 Jul 2021 14:40:08 +0530 Subject: [PATCH 076/128] Update py_canny.markdown Fixed a word that was previously written as third argument but it is instead the fourth argument of cv.Canny() function --- doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown b/doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown index cbc2a72eecc7..d36e5784ebc8 100644 --- a/doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown +++ b/doc/py_tutorials/py_imgproc/py_canny/py_canny.markdown @@ -74,7 +74,7 @@ Canny Edge Detection in OpenCV OpenCV puts all the above in single function, **cv.Canny()**. We will see how to use it. First argument is our input image. Second and third arguments are our minVal and maxVal respectively. -Third argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By +Fourth argument is aperture_size. It is the size of Sobel kernel used for find image gradients. By default it is 3. Last argument is L2gradient which specifies the equation for finding gradient magnitude. If it is True, it uses the equation mentioned above which is more accurate, otherwise it uses this function: \f$Edge\_Gradient \; (G) = |G_x| + |G_y|\f$. By default, it is False. From a76274b549632464987805a29472a9520fb45389 Mon Sep 17 00:00:00 2001 From: Scott Noyes Date: Thu, 22 Jul 2021 14:58:20 -0500 Subject: [PATCH 077/128] minor grammar edits --- .../mat_the_basic_image_container.markdown | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown index f6a1a0a4fb2c..aafa9687d930 100644 --- a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown +++ b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.markdown @@ -84,8 +84,8 @@ a new header with the new boundaries: Mat D (A, Rect(10, 10, 100, 100) ); // using a rectangle Mat E = A(Range::all(), Range(1,3)); // using row and column boundaries @endcode -Now you may ask -- if the matrix itself may belong to multiple *Mat* objects who takes responsibility -for cleaning it up when it's no longer needed. The short answer is: the last object that used it. +Now you may ask -- if the matrix itself may belong to multiple *Mat* objects, who takes responsibility +for cleaning it up when it's no longer needed? The short answer is: the last object that used it. This is handled by using a reference counting mechanism. Whenever somebody copies a header of a *Mat* object, a counter is increased for the matrix. Whenever a header is cleaned, this counter is decreased. When the counter reaches zero the matrix is freed. Sometimes you will want to copy @@ -95,12 +95,12 @@ Mat F = A.clone(); Mat G; A.copyTo(G); @endcode -Now modifying *F* or *G* will not affect the matrix pointed by the *A*'s header. What you need to +Now modifying *F* or *G* will not affect the matrix pointed to by the *A*'s header. What you need to remember from all this is that: - Output image allocation for OpenCV functions is automatic (unless specified otherwise). - You do not need to think about memory management with OpenCV's C++ interface. -- The assignment operator and the copy constructor only copies the header. +- The assignment operator and the copy constructor only copy the header. - The underlying matrix of an image may be copied using the @ref cv::Mat::clone() and @ref cv::Mat::copyTo() functions. @@ -115,10 +115,10 @@ of these allows us to create many shades of gray. For *colorful* ways we have a lot more methods to choose from. Each of them breaks it down to three or four basic components and we can use the combination of these to create the others. The most popular one is RGB, mainly because this is also how our eye builds up colors. Its base colors are -red, green and blue. To code the transparency of a color sometimes a fourth element: alpha (A) is +red, green and blue. To code the transparency of a color sometimes a fourth element, alpha (A), is added. -There are, however, many other color systems each with their own advantages: +There are, however, many other color systems, each with their own advantages: - RGB is the most common as our eyes use something similar, however keep in mind that OpenCV standard display system composes colors using the BGR color space (red and blue channels are swapped places). @@ -132,11 +132,11 @@ There are, however, many other color systems each with their own advantages: Each of the building components has its own valid domains. This leads to the data type used. How we store a component defines the control we have over its domain. The smallest data type possible is *char*, which means one byte or 8 bits. This may be unsigned (so can store values from 0 to 255) or -signed (values from -127 to +127). Although in case of three components this already gives 16 -million possible colors to represent (like in case of RGB) we may acquire an even finer control by +signed (values from -127 to +127). Although this width, in the case of three components (like RGB), already gives 16 +million possible colors to represent, we may acquire an even finer control by using the float (4 byte = 32 bit) or double (8 byte = 64 bit) data types for each component. Nevertheless, remember that increasing the size of a component also increases the size of the whole -picture in the memory. +picture in memory. Creating a Mat object explicitly ---------------------------------- From acc576658ad628d46fd4e79c68c1419d438ce716 Mon Sep 17 00:00:00 2001 From: ZhangYin Date: Fri, 23 Jul 2021 22:08:43 +0800 Subject: [PATCH 078/128] Merge pull request #20412 from joy2myself:rvv-0.10 bug fixes for universal intrinsics of RISC-V back-end * Align universal intrinsic comparator behaviour with other platforms Set all bits to one for return value of int and fp comparators. * fix v_pack_triplets, v_pack_store and v_pack_u_store * Remove redundant CV_DECL_ALIGNED statements Co-authored-by: Alexander Smorkalov --- .../include/opencv2/core/hal/intrin_rvv.hpp | 299 +++++++++--------- 1 file changed, 151 insertions(+), 148 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp index 4a3455b07385..51433cdbae72 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp @@ -737,7 +737,7 @@ OPENCV_HAL_IMPL_RVV_LOADSTORE_OP(v_float64x2, vfloat64m1_t, double, 1, 2, 64, f6 inline v_int8x16 v_load_halves(const schar* ptr0, const schar* ptr1) { - schar CV_DECL_ALIGNED(32) elems[16] = + schar elems[16] = { ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr0[4], ptr0[5], ptr0[6], ptr0[7], ptr1[0], ptr1[1], ptr1[2], ptr1[3], ptr1[4], ptr1[5], ptr1[6], ptr1[7] @@ -748,7 +748,7 @@ inline v_uint8x16 v_load_halves(const uchar* ptr0, const uchar* ptr1) { return v inline v_int16x8 v_load_halves(const short* ptr0, const short* ptr1) { - short CV_DECL_ALIGNED(32) elems[8] = + short elems[8] = { ptr0[0], ptr0[1], ptr0[2], ptr0[3], ptr1[0], ptr1[1], ptr1[2], ptr1[3] }; @@ -758,7 +758,7 @@ inline v_uint16x8 v_load_halves(const ushort* ptr0, const ushort* ptr1) { return inline v_int32x4 v_load_halves(const int* ptr0, const int* ptr1) { - int CV_DECL_ALIGNED(32) elems[4] = + int elems[4] = { ptr0[0], ptr0[1], ptr1[0], ptr1[1] }; @@ -766,7 +766,7 @@ inline v_int32x4 v_load_halves(const int* ptr0, const int* ptr1) } inline v_float32x4 v_load_halves(const float* ptr0, const float* ptr1) { - float CV_DECL_ALIGNED(32) elems[4] = + float elems[4] = { ptr0[0], ptr0[1], ptr1[0], ptr1[1] }; @@ -776,7 +776,7 @@ inline v_uint32x4 v_load_halves(const unsigned* ptr0, const unsigned* ptr1) { re inline v_int64x2 v_load_halves(const int64* ptr0, const int64* ptr1) { - int64 CV_DECL_ALIGNED(32) elems[2] = + int64 elems[2] = { ptr0[0], ptr1[0] }; @@ -787,7 +787,7 @@ inline v_uint64x2 v_load_halves(const uint64* ptr0, const uint64* ptr1) { return #if CV_SIMD128_64F inline v_float64x2 v_load_halves(const double* ptr0, const double* ptr1) { - double CV_DECL_ALIGNED(32) elems[2] = + double elems[2] = { ptr0[0], ptr1[0] }; @@ -800,7 +800,7 @@ inline v_float64x2 v_load_halves(const double* ptr0, const double* ptr1) inline v_int8x16 v_lut(const schar* tab, const int* idx) { - schar CV_DECL_ALIGNED(32) elems[16] = + schar elems[16] = { tab[idx[ 0]], tab[idx[ 1]], @@ -823,7 +823,7 @@ inline v_int8x16 v_lut(const schar* tab, const int* idx) } inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) { - schar CV_DECL_ALIGNED(32) elems[16] = + schar elems[16] = { tab[idx[0]], tab[idx[0] + 1], @@ -846,7 +846,7 @@ inline v_int8x16 v_lut_pairs(const schar* tab, const int* idx) } inline v_int8x16 v_lut_quads(const schar* tab, const int* idx) { - schar CV_DECL_ALIGNED(32) elems[16] = + schar elems[16] = { tab[idx[0]], tab[idx[0] + 1], @@ -873,7 +873,7 @@ inline v_uint8x16 v_lut_quads(const uchar* tab, const int* idx) { return v_reint inline v_int16x8 v_lut(const short* tab, const int* idx) { - short CV_DECL_ALIGNED(32) elems[8] = + short elems[8] = { tab[idx[0]], tab[idx[1]], @@ -888,7 +888,7 @@ inline v_int16x8 v_lut(const short* tab, const int* idx) } inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) { - short CV_DECL_ALIGNED(32) elems[8] = + short elems[8] = { tab[idx[0]], tab[idx[0] + 1], @@ -903,7 +903,7 @@ inline v_int16x8 v_lut_pairs(const short* tab, const int* idx) } inline v_int16x8 v_lut_quads(const short* tab, const int* idx) { - short CV_DECL_ALIGNED(32) elems[8] = + short elems[8] = { tab[idx[0]], tab[idx[0] + 1], @@ -922,7 +922,7 @@ inline v_uint16x8 v_lut_quads(const ushort* tab, const int* idx) { return v_rein inline v_int32x4 v_lut(const int* tab, const int* idx) { - int CV_DECL_ALIGNED(32) elems[4] = + int elems[4] = { tab[idx[0]], tab[idx[1]], @@ -933,7 +933,7 @@ inline v_int32x4 v_lut(const int* tab, const int* idx) } inline v_int32x4 v_lut_pairs(const int* tab, const int* idx) { - int CV_DECL_ALIGNED(32) elems[4] = + int elems[4] = { tab[idx[0]], tab[idx[0] + 1], @@ -953,7 +953,7 @@ inline v_uint32x4 v_lut_quads(const unsigned* tab, const int* idx) { return v_re inline v_int64x2 v_lut(const int64_t* tab, const int* idx) { - int64_t CV_DECL_ALIGNED(32) elems[2] = + int64_t elems[2] = { tab[idx[0]], tab[idx[1]] @@ -969,7 +969,7 @@ inline v_uint64x2 v_lut_pairs(const uint64* tab, const int* idx) { return v_rein inline v_float32x4 v_lut(const float* tab, const int* idx) { - float CV_DECL_ALIGNED(32) elems[4] = + float elems[4] = { tab[idx[0]], tab[idx[1]], @@ -980,7 +980,7 @@ inline v_float32x4 v_lut(const float* tab, const int* idx) } inline v_float32x4 v_lut_pairs(const float* tab, const int* idx) { - float CV_DECL_ALIGNED(32) elems[4] = + float elems[4] = { tab[idx[0]], tab[idx[0] + 1], @@ -996,7 +996,7 @@ inline v_float32x4 v_lut_quads(const float* tab, const int* idx) inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) { - int CV_DECL_ALIGNED(32) elems[4] = + int elems[4] = { tab[v_extract_n<0>(idxvec)], tab[v_extract_n<1>(idxvec)], @@ -1008,7 +1008,7 @@ inline v_int32x4 v_lut(const int* tab, const v_int32x4& idxvec) inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) { - unsigned CV_DECL_ALIGNED(32) elems[4] = + unsigned elems[4] = { tab[v_extract_n<0>(idxvec)], tab[v_extract_n<1>(idxvec)], @@ -1020,7 +1020,7 @@ inline v_uint32x4 v_lut(const unsigned* tab, const v_int32x4& idxvec) inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) { - float CV_DECL_ALIGNED(32) elems[4] = + float elems[4] = { tab[v_extract_n<0>(idxvec)], tab[v_extract_n<1>(idxvec)], @@ -1032,7 +1032,7 @@ inline v_float32x4 v_lut(const float* tab, const v_int32x4& idxvec) inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_float32x4& x, v_float32x4& y) { - int CV_DECL_ALIGNED(32) idx[4]; + int idx[4]; v_store_aligned(idx, idxvec); x = v_float32x4(tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]]); @@ -1042,7 +1042,7 @@ inline void v_lut_deinterleave(const float* tab, const v_int32x4& idxvec, v_floa #if CV_SIMD128_64F inline v_float64x2 v_lut(const double* tab, const int* idx) { - double CV_DECL_ALIGNED(32) elems[2] = + double elems[2] = { tab[idx[0]], tab[idx[1]] @@ -1057,7 +1057,7 @@ inline v_float64x2 v_lut_pairs(const double* tab, const int* idx) inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) { - double CV_DECL_ALIGNED(32) elems[2] = + double elems[2] = { tab[v_extract_n<0>(idxvec)], tab[v_extract_n<1>(idxvec)] @@ -1067,7 +1067,7 @@ inline v_float64x2 v_lut(const double* tab, const v_int32x4& idxvec) inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_float64x2& x, v_float64x2& y) { - int CV_DECL_ALIGNED(32) idx[4] = {0}; + int idx[4] = {0}; v_store_aligned(idx, idxvec); x = v_float64x2(tab[idx[0]], tab[idx[1]]); @@ -1079,7 +1079,7 @@ inline void v_lut_deinterleave(const double* tab, const v_int32x4& idxvec, v_flo inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) { - ushort CV_DECL_ALIGNED(32) ptr[16] = {0}; + ushort ptr[16] = {0}; v_store(ptr, a); v_store(ptr + 8, b); return v_uint8x16(vnsrl_wx_u8m1(vle16_v_u16m2(ptr, 16), 0, 16)); @@ -1088,7 +1088,7 @@ inline v_uint8x16 v_pack_b(const v_uint16x8& a, const v_uint16x8& b) inline v_uint8x16 v_pack_b(const v_uint32x4& a, const v_uint32x4& b, const v_uint32x4& c, const v_uint32x4& d) { - unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + unsigned ptr[16] = {0}; v_store(ptr, a); v_store(ptr + 4, b); v_store(ptr + 8, c); @@ -1100,7 +1100,7 @@ inline v_uint8x16 v_pack_b(const v_uint64x2& a, const v_uint64x2& b, const v_uin const v_uint64x2& d, const v_uint64x2& e, const v_uint64x2& f, const v_uint64x2& g, const v_uint64x2& h) { - uint64 CV_DECL_ALIGNED(32) ptr[16] = {0}; + uint64 ptr[16] = {0}; v_store(ptr, a); v_store(ptr + 2, b); v_store(ptr + 4, c); @@ -1279,13 +1279,15 @@ OPENCV_HAL_IMPL_RVV_SIGNED_SHIFT_OP(v_int64x2, i64, 2) #define OPENCV_HAL_IMPL_RVV_INT_CMP_OP(_Tpvec, op, intrin, suffix, vl) \ inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ { \ - return _Tpvec(vmerge_vxm_##suffix##m1(intrin(a, b, vl), vmv_v_x_##suffix##m1(0, vl), 1, vl)); \ + uint64_t ones = -1; \ + return _Tpvec(vmerge_vxm_##suffix##m1(intrin(a, b, vl), vmv_v_x_##suffix##m1(0, vl), ones, vl)); \ } #define OPENCV_HAL_IMPL_RVV_FLOAT_CMP_OP(_Tpvec, op, intrin, suffix, vl) \ inline _Tpvec operator op (const _Tpvec& a, const _Tpvec& b) \ { \ - return _Tpvec(vfmerge_vfm_##suffix##m1(intrin(a, b, vl), vfmv_v_f_##suffix##m1(0, vl), 1, vl)); \ + union { uint64 u; double d; } ones; ones.u = -1; \ + return _Tpvec(vfmerge_vfm_##suffix##m1(intrin(a, b, vl), vfmv_v_f_##suffix##m1(0, vl), ones.d, vl)); \ } #define OPENCV_HAL_IMPL_RVV_UNSIGNED_CMP(_Tpvec, suffix, width, vl) \ @@ -1441,7 +1443,7 @@ OPENCV_HAL_IMPL_RVV_REDUCE(v_float32x4, max, float, f32, 4, fredmax) inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c, const v_float32x4& d) { - float CV_DECL_ALIGNED(32) elems[4] = + float elems[4] = { v_reduce_sum(a), v_reduce_sum(b), @@ -1746,9 +1748,9 @@ inline v_float32x4 v_cvt_f32(const v_float64x2& a, const v_float64x2& b) inline v_float64x2 v_cvt_f64(const v_int32x4& a) { - double CV_DECL_ALIGNED(32) ptr[4] = {0}; + double ptr[4] = {0}; vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a, 4), 4); - double CV_DECL_ALIGNED(32) elems[2] = + double elems[2] = { ptr[0], ptr[1] }; @@ -1757,9 +1759,9 @@ inline v_float64x2 v_cvt_f64(const v_int32x4& a) inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) { - double CV_DECL_ALIGNED(32) ptr[4] = {0}; + double ptr[4] = {0}; vse64_v_f64m2(ptr, vfwcvt_f_x_v_f64m2(a, 4), 4); - double CV_DECL_ALIGNED(32) elems[2] = + double elems[2] = { ptr[2], ptr[3] }; @@ -1768,9 +1770,9 @@ inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) inline v_float64x2 v_cvt_f64(const v_float32x4& a) { - double CV_DECL_ALIGNED(32) ptr[4] = {0}; + double ptr[4] = {0}; vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a, 4), 4); - double CV_DECL_ALIGNED(32) elems[2] = + double elems[2] = { ptr[0], ptr[1] }; @@ -1779,9 +1781,9 @@ inline v_float64x2 v_cvt_f64(const v_float32x4& a) inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) { - double CV_DECL_ALIGNED(32) ptr[4] = {0}; + double ptr[4] = {0}; vse64_v_f64m2(ptr, vfwcvt_f_f_v_f64m2(a, 4), 4); - double CV_DECL_ALIGNED(32) elems[2] = + double elems[2] = { ptr[2], ptr[3] }; @@ -1823,7 +1825,7 @@ inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ v_##_Tpvec& b0, v_##_Tpvec& b1, \ v_##_Tpvec& b2, v_##_Tpvec& b3) \ { \ - _Tp CV_DECL_ALIGNED(32) elems0[4] = \ + _Tp elems0[4] = \ { \ v_extract_n<0>(a0), \ v_extract_n<0>(a1), \ @@ -1831,7 +1833,7 @@ inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ v_extract_n<0>(a3) \ }; \ b0 = v_load(elems0); \ - _Tp CV_DECL_ALIGNED(32) elems1[4] = \ + _Tp elems1[4] = \ { \ v_extract_n<1>(a0), \ v_extract_n<1>(a1), \ @@ -1839,7 +1841,7 @@ inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ v_extract_n<1>(a3) \ }; \ b1 = v_load(elems1); \ - _Tp CV_DECL_ALIGNED(32) elems2[4] = \ + _Tp elems2[4] = \ { \ v_extract_n<2>(a0), \ v_extract_n<2>(a1), \ @@ -1847,7 +1849,7 @@ inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ v_extract_n<2>(a3) \ }; \ b2 = v_load(elems2); \ - _Tp CV_DECL_ALIGNED(32) elems3[4] = \ + _Tp elems3[4] = \ { \ v_extract_n<3>(a0), \ v_extract_n<3>(a1), \ @@ -1866,8 +1868,8 @@ OPENCV_HAL_IMPL_RVV_TRANSPOSE4x4(float32x4, float, f32) #define OPENCV_HAL_IMPL_RVV_REVERSE(_Tpvec, _Tp, suffix) \ inline _Tpvec v_reverse(const _Tpvec& a) \ { \ - _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptra[_Tpvec::nlanes] = {0}; \ + _Tp ptr[_Tpvec::nlanes] = {0}; \ + _Tp ptra[_Tpvec::nlanes] = {0}; \ v_store(ptra, a); \ for (int i = 0; i < _Tpvec::nlanes; i++) \ { \ @@ -1894,8 +1896,8 @@ OPENCV_HAL_IMPL_RVV_REVERSE(v_float64x2, double, f64) #define OPENCV_HAL_IMPL_RVV_EXPAND(_Tpwvec, _Tp, _Tpvec, width, suffix, wcvt, vl) \ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ { \ - _Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \ - _Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \ + _Tp lptr[_Tpvec::nlanes/2] = {0}; \ + _Tp hptr[_Tpvec::nlanes/2] = {0}; \ v_store_low(lptr, a); \ v_store_high(hptr, a); \ b0 = _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr, vl), vl)); \ @@ -1903,13 +1905,13 @@ inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ } \ inline _Tpwvec v_expand_low(const _Tpvec& a) \ { \ - _Tp CV_DECL_ALIGNED(32) lptr[_Tpvec::nlanes/2] = {0}; \ + _Tp lptr[_Tpvec::nlanes/2] = {0}; \ v_store_low(lptr, a); \ return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(lptr, vl), vl)); \ } \ inline _Tpwvec v_expand_high(const _Tpvec& a) \ { \ - _Tp CV_DECL_ALIGNED(32) hptr[_Tpvec::nlanes/2] = {0}; \ + _Tp hptr[_Tpvec::nlanes/2] = {0}; \ v_store_high(hptr, a); \ return _Tpwvec(wcvt(vle##width##_v_##suffix##mf2(hptr, vl), vl)); \ } \ @@ -1936,25 +1938,25 @@ inline v_int32x4 v_load_expand_q(const schar* ptr) } -#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, shr, hvl, vl) \ +#define OPENCV_HAL_IMPL_RVV_PACK(_Tpvec, _Tp, _wTpvec, _wTp, hwidth, width, hsuffix, suffix, rshr, shr, hvl, vl) \ inline _Tpvec v_pack(const _wTpvec& a, const _wTpvec& b) \ { \ - _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + _wTp arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, b); \ return _Tpvec(shr(vle##width##_v_##suffix##m2(arr, vl), 0, vl)); \ } \ inline void v_pack_store(_Tp* ptr, const _wTpvec& a) \ { \ - _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + _wTp arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ - v_store(ptr, _Tpvec(shr(vle##width##_v_##suffix##m2(arr, vl), 0, vl))); \ + vse##hwidth##_v_##hsuffix##m1(ptr, shr(vle##width##_v_##suffix##m2(arr, vl), 0, vl), hvl); \ } \ template inline \ _Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b) \ { \ - _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + _wTp arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, b); \ return _Tpvec(rshr(vle##width##_v_##suffix##m2(arr, vl), n, vl)); \ @@ -1962,39 +1964,39 @@ _Tpvec v_rshr_pack(const _wTpvec& a, const _wTpvec& b) \ template inline \ void v_rshr_pack_store(_Tp* ptr, const _wTpvec& a) \ { \ - _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + _wTp arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ v_store(ptr, _Tpvec(rshr(vle##width##_v_##suffix##m2(arr, vl), n, vl))); \ } -OPENCV_HAL_IMPL_RVV_PACK(v_uint8x16, uchar, v_uint16x8, ushort, 16, u16, vnclipu_wx_u8m1, vnclipu_wx_u8m1, 8, 16) -OPENCV_HAL_IMPL_RVV_PACK(v_int8x16, schar, v_int16x8, short, 16, i16, vnclip_wx_i8m1, vnclip_wx_i8m1, 8, 16) -OPENCV_HAL_IMPL_RVV_PACK(v_uint16x8, ushort, v_uint32x4, unsigned, 32, u32, vnclipu_wx_u16m1, vnclipu_wx_u16m1, 4, 8) -OPENCV_HAL_IMPL_RVV_PACK(v_int16x8, short, v_int32x4, int, 32, i32, vnclip_wx_i16m1, vnclip_wx_i16m1, 4, 8) -OPENCV_HAL_IMPL_RVV_PACK(v_uint32x4, unsigned, v_uint64x2, uint64, 64, u64, vnclipu_wx_u32m1, vnsrl_wx_u32m1, 2, 4) -OPENCV_HAL_IMPL_RVV_PACK(v_int32x4, int, v_int64x2, int64, 64, i64, vnclip_wx_i32m1, vnsra_wx_i32m1, 2, 4) +OPENCV_HAL_IMPL_RVV_PACK(v_uint8x16, uchar, v_uint16x8, ushort, 8, 16, u8, u16, vnclipu_wx_u8m1, vnclipu_wx_u8m1, 8, 16) +OPENCV_HAL_IMPL_RVV_PACK(v_int8x16, schar, v_int16x8, short, 8, 16, i8, i16, vnclip_wx_i8m1, vnclip_wx_i8m1, 8, 16) +OPENCV_HAL_IMPL_RVV_PACK(v_uint16x8, ushort, v_uint32x4, unsigned, 16, 32, u16, u32, vnclipu_wx_u16m1, vnclipu_wx_u16m1, 4, 8) +OPENCV_HAL_IMPL_RVV_PACK(v_int16x8, short, v_int32x4, int, 16, 32, i16, i32, vnclip_wx_i16m1, vnclip_wx_i16m1, 4, 8) +OPENCV_HAL_IMPL_RVV_PACK(v_uint32x4, unsigned, v_uint64x2, uint64, 32, 64, u32, u64, vnclipu_wx_u32m1, vnsrl_wx_u32m1, 2, 4) +OPENCV_HAL_IMPL_RVV_PACK(v_int32x4, int, v_int64x2, int64, 32, 64, i32, i64, vnclip_wx_i32m1, vnsra_wx_i32m1, 2, 4) -#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, width, suffix, rshr, cast, vl) \ +#define OPENCV_HAL_IMPL_RVV_PACK_U(_Tpvec, _Tp, _wTpvec, _wTp, hwidth, width, hsuffix, suffix, rshr, cast, hvl, vl) \ inline _Tpvec v_pack_u(const _wTpvec& a, const _wTpvec& b) \ { \ - _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + _wTp arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, b); \ return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), 0, vl)); \ } \ inline void v_pack_u_store(_Tp* ptr, const _wTpvec& a) \ { \ - _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + _wTp arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ - v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, vl))); \ - v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), 0, vl))); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ + vse##hwidth##_v_##hsuffix##m1(ptr, rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), 0, vl), hvl); \ } \ template inline \ _Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b) \ { \ - _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + _wTp arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, b); \ return _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), n, vl)); \ @@ -2002,23 +2004,23 @@ _Tpvec v_rshr_pack_u(const _wTpvec& a, const _wTpvec& b) \ template inline \ void v_rshr_pack_u_store(_Tp* ptr, const _wTpvec& a) \ { \ - _wTp CV_DECL_ALIGNED(32) arr[_Tpvec::nlanes] = {0}; \ + _wTp arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ - v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, vl))); \ + v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ v_store(ptr, _Tpvec(rshr(cast(vmax_vx_##suffix##m2(vle##width##_v_##suffix##m2(arr, vl), 0, vl)), n, vl))); \ } -OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8x16, uchar, v_int16x8, short, 16, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2, 16) -OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16x8, ushort, v_int32x4, int, 32, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2, 8) +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint8x16, uchar, v_int16x8, short, 8, 16, u8, i16, vnclipu_wx_u8m1, vreinterpret_v_i16m2_u16m2, 8, 16) +OPENCV_HAL_IMPL_RVV_PACK_U(v_uint16x8, ushort, v_int32x4, int, 16, 32, u16, i32, vnclipu_wx_u16m1, vreinterpret_v_i32m2_u32m2, 4, 8) #define OPENCV_HAL_IMPL_RVV_UNPACKS(_Tpvec, _Tp, suffix) \ inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ { \ - _Tp CV_DECL_ALIGNED(32) ptra0[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptra1[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrb0[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrb1[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptra0[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptra1[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb0[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb1[v_##_Tpvec::nlanes] = {0}; \ v_store(ptra0, a0); \ v_store(ptra1, a1); \ int i; \ @@ -2037,16 +2039,16 @@ inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_ } \ inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ { \ - _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes/2] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp ptra[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes/2] = {0}; \ v_store_low(ptra, a); \ v_store_low(ptrb, b); \ return v_load_halves(ptra, ptrb); \ } \ inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ { \ - _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes/2] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp ptra[v_##_Tpvec::nlanes/2] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes/2] = {0}; \ v_store_high(ptra, a); \ v_store_high(ptrb, b); \ return v_load_halves(ptra, ptrb); \ @@ -2072,8 +2074,8 @@ OPENCV_HAL_IMPL_RVV_UNPACKS(float64x2, double, f64) #define OPENCV_HAL_IMPL_RVV_INTERLEAVED(_Tpvec, _Tp) \ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ { \ - _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ int i, i2; \ for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \ { \ @@ -2085,9 +2087,9 @@ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ } \ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ { \ - _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrc[v_##_Tpvec::nlanes] = {0}; \ int i, i3; \ for( i = i3 = 0; i < v_##_Tpvec::nlanes; i++, i3 += 3 ) \ { \ @@ -2102,10 +2104,10 @@ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_ inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ v_##_Tpvec& c, v_##_Tpvec& d) \ { \ - _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrd[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrd[v_##_Tpvec::nlanes] = {0}; \ int i, i4; \ for( i = i4 = 0; i < v_##_Tpvec::nlanes; i++, i4 += 4 ) \ { \ @@ -2123,8 +2125,8 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { \ int i, i2; \ - _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ v_store(ptra, a); \ v_store(ptrb, b); \ for( i = i2 = 0; i < v_##_Tpvec::nlanes; i++, i2 += 2 ) \ @@ -2137,9 +2139,9 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& const v_##_Tpvec& c, hal::StoreMode /*mode*/=hal::STORE_UNALIGNED) \ { \ int i, i3; \ - _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrc[v_##_Tpvec::nlanes] = {0}; \ v_store(ptra, a); \ v_store(ptrb, b); \ v_store(ptrc, c); \ @@ -2155,10 +2157,10 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& hal::StoreMode /*mode*/=hal::STORE_UNALIGNED ) \ { \ int i, i4; \ - _Tp CV_DECL_ALIGNED(32) ptra[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrb[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrc[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrd[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptra[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrb[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrc[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrd[v_##_Tpvec::nlanes] = {0}; \ v_store(ptra, a); \ v_store(ptrb, b); \ v_store(ptrc, c); \ @@ -2173,8 +2175,8 @@ inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& } \ inline v_##_Tpvec v_interleave_pairs(const v_##_Tpvec& vec) \ { \ - _Tp CV_DECL_ALIGNED(32) ptr[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrvec[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptr[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrvec[v_##_Tpvec::nlanes] = {0}; \ v_store(ptrvec, vec); \ for (int i = 0; i < v_##_Tpvec::nlanes/4; i++) \ { \ @@ -2187,8 +2189,8 @@ inline v_##_Tpvec v_interleave_pairs(const v_##_Tpvec& vec) \ } \ inline v_##_Tpvec v_interleave_quads(const v_##_Tpvec& vec) \ { \ - _Tp CV_DECL_ALIGNED(32) ptr[v_##_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrvec[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptr[v_##_Tpvec::nlanes] = {0}; \ + _Tp ptrvec[v_##_Tpvec::nlanes] = {0}; \ v_store(ptrvec, vec); \ for (int i = 0; i < v_##_Tpvec::nlanes/8; i++) \ { \ @@ -2242,9 +2244,9 @@ static const unsigned char popCountTable[] = #define OPENCV_HAL_IMPL_RVV_POPCOUNT_OP(_rTpvec, _Tpvec, _rTp, _Tp, suffix) \ inline _rTpvec v_popcount(const _Tpvec& a) \ { \ - uchar CV_DECL_ALIGNED(32) ptra[16] = {0}; \ + uchar ptra[16] = {0}; \ v_store(ptra, v_reinterpret_as_u8(a)); \ - _rTp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + _rTp ptr[_Tpvec::nlanes] = {0}; \ v_store(ptr, v_setzero_##suffix()); \ for (int i = 0; i < _Tpvec::nlanes*(int)sizeof(_Tp); i++) \ ptr[i/sizeof(_Tp)] += popCountTable[ptra[i]]; \ @@ -2298,7 +2300,7 @@ inline int v_signmask(const v_float64x2& a) #define OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(_Tpvec, _Tp, suffix) \ inline int v_scan_forward(const _Tpvec& a) \ { \ - _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ + _Tp ptr[_Tpvec::nlanes] = {0}; \ v_store(ptr, v_reinterpret_as_##suffix(a)); \ for (int i = 0; i < _Tpvec::nlanes; i++) \ if(int(ptr[i]) < 0) \ @@ -2321,28 +2323,29 @@ OPENCV_HAL_IMPL_RVV_SCAN_FORWOARD_OP(v_float64x2, double, f64) //////////// Pack triplets //////////// -#define OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(_Tpvec, _Tp) \ -inline _Tpvec v_pack_triplets(const _Tpvec& vec) \ -{ \ - _Tp CV_DECL_ALIGNED(32) ptr[_Tpvec::nlanes] = {0}; \ - _Tp CV_DECL_ALIGNED(32) ptrvec[_Tpvec::nlanes] = {0}; \ - v_store(ptrvec, vec); \ - for (int i = 0; i < _Tpvec::nlanes/4; i++) \ - { \ - ptr[3*i ] = ptrvec[4*i ]; \ - ptr[3*i+1] = ptrvec[4*i+2]; \ - ptr[3*i+2] = ptrvec[4*i+2]; \ - } \ - return v_load(ptr); \ +inline v_int8x16 v_pack_triplets(const v_int8x16& vec) +{ + uint64 ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A}; + return v_int8x16((vint8m1_t)vrgather_vv_u8m1((vuint8m1_t)vint8m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16)); +} +inline v_uint8x16 v_pack_triplets(const v_uint8x16& vec) +{ + return v_reinterpret_as_u8(v_pack_triplets(v_reinterpret_as_s8(vec))); +} + +inline v_int16x8 v_pack_triplets(const v_int16x8& vec) +{ + uint64 ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A}; + return v_int16x8((vint16m1_t)vrgather_vv_u8m1((vuint8m1_t)vint16m1_t(vec), (vuint8m1_t)vle64_v_u64m1(ptr, 2), 16)); +} +inline v_uint16x8 v_pack_triplets(const v_uint16x8& vec) +{ + return v_reinterpret_as_u16(v_pack_triplets(v_reinterpret_as_s16(vec))); } -OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint8x16, uchar) -OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int8x16, schar) -OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint16x8, ushort) -OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int16x8, short) -OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_uint32x4, unsigned) -OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_int32x4, int) -OPENCV_HAL_IMPL_RVV_PACK_TRIPLETS(v_float32x4, float) +inline v_int32x4 v_pack_triplets(const v_int32x4& vec) { return vec; } +inline v_uint32x4 v_pack_triplets(const v_uint32x4& vec) { return vec; } +inline v_float32x4 v_pack_triplets(const v_float32x4& vec) { return vec; } ////// FP16 support /////// @@ -2443,7 +2446,7 @@ inline v_int32x4 v_trunc(const v_float64x2& a) // 16 >> 32 inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) { - int CV_DECL_ALIGNED(32) ptr[8] = {0}; + int ptr[8] = {0}; v_int32x4 t1, t2; vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); v_load_deinterleave(ptr, t1, t2); @@ -2451,7 +2454,7 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) } inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) { - int CV_DECL_ALIGNED(32) ptr[8] = {0}; + int ptr[8] = {0}; v_int32x4 t1, t2; vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); v_load_deinterleave(ptr, t1, t2); @@ -2461,7 +2464,7 @@ inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b, const v_int32 // 32 >> 64 inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) { - int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + int64 ptr[4] = {0}; v_int64x2 t1, t2; vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); v_load_deinterleave(ptr, t1, t2); @@ -2469,7 +2472,7 @@ inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b) } inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) { - int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + int64 ptr[4] = {0}; v_int64x2 t1, t2; vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); v_load_deinterleave(ptr, t1, t2); @@ -2479,7 +2482,7 @@ inline v_int64x2 v_dotprod(const v_int32x4& a, const v_int32x4& b, const v_int64 // 8 >> 32 inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) { - unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + unsigned ptr[16] = {0}; v_uint32x4 t1, t2, t3, t4; vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); v_load_deinterleave(ptr, t1, t2, t3, t4); @@ -2488,7 +2491,7 @@ inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b) inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) { - unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + unsigned ptr[16] = {0}; v_uint32x4 t1, t2, t3, t4; vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); v_load_deinterleave(ptr, t1, t2, t3, t4); @@ -2497,7 +2500,7 @@ inline v_uint32x4 v_dotprod_expand(const v_uint8x16& a, const v_uint8x16& b, inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) { - int CV_DECL_ALIGNED(32) ptr[16] = {0}; + int ptr[16] = {0}; v_int32x4 t1, t2, t3, t4; vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); v_load_deinterleave(ptr, t1, t2, t3, t4); @@ -2506,7 +2509,7 @@ inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b) inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) { - int CV_DECL_ALIGNED(32) ptr[16] = {0}; + int ptr[16] = {0}; v_int32x4 t1, t2, t3, t4; vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); v_load_deinterleave(ptr, t1, t2, t3, t4); @@ -2516,7 +2519,7 @@ inline v_int32x4 v_dotprod_expand(const v_int8x16& a, const v_int8x16& b, // 16 >> 64 inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) { - uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + uint64 ptr[8] = {0}; v_uint64x2 t1, t2, t3, t4; vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); v_load_deinterleave(ptr, t1, t2, t3, t4); @@ -2524,7 +2527,7 @@ inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b) } inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) { - uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + uint64 ptr[8] = {0}; v_uint64x2 t1, t2, t3, t4; vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); v_load_deinterleave(ptr, t1, t2, t3, t4); @@ -2533,7 +2536,7 @@ inline v_uint64x2 v_dotprod_expand(const v_uint16x8& a, const v_uint16x8& b, con inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) { - int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + int64 ptr[8] = {0}; v_int64x2 t1, t2, t3, t4; vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); v_load_deinterleave(ptr, t1, t2, t3, t4); @@ -2542,7 +2545,7 @@ inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b) inline v_int64x2 v_dotprod_expand(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) { - int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + int64 ptr[8] = {0}; v_int64x2 t1, t2, t3, t4; vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); v_load_deinterleave(ptr, t1, t2, t3, t4); @@ -2563,7 +2566,7 @@ inline v_float64x2 v_dotprod_expand(const v_int32x4& a, const v_int32x4& b, // 16 >> 32 inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) { - int CV_DECL_ALIGNED(32) ptr[8] = {0}; + int ptr[8] = {0}; vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); v_int32x4 t1 = v_load(ptr); v_int32x4 t2 = v_load(ptr+4); @@ -2571,7 +2574,7 @@ inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b) } inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_int32x4& c) { - int CV_DECL_ALIGNED(32) ptr[8] = {0}; + int ptr[8] = {0}; vse32_v_i32m2(ptr, vwmul_vv_i32m2(a, b, 8), 8); v_int32x4 t1 = v_load(ptr); v_int32x4 t2 = v_load(ptr+4); @@ -2581,7 +2584,7 @@ inline v_int32x4 v_dotprod_fast(const v_int16x8& a, const v_int16x8& b, const v_ // 32 >> 64 inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) { - int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + int64 ptr[4] = {0}; vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); v_int64x2 t1 = v_load(ptr); v_int64x2 t2 = v_load(ptr+2); @@ -2589,7 +2592,7 @@ inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b) } inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_int64x2& c) { - int64 CV_DECL_ALIGNED(32) ptr[4] = {0}; + int64 ptr[4] = {0}; vse64_v_i64m2(ptr, vwmul_vv_i64m2(a, b, 4), 4); v_int64x2 t1 = v_load(ptr); v_int64x2 t2 = v_load(ptr+2); @@ -2600,7 +2603,7 @@ inline v_int64x2 v_dotprod_fast(const v_int32x4& a, const v_int32x4& b, const v_ // 8 >> 32 inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b) { - unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + unsigned ptr[16] = {0}; vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); v_uint32x4 t1 = v_load(ptr); v_uint32x4 t2 = v_load(ptr+4); @@ -2610,7 +2613,7 @@ inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b } inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b, const v_uint32x4& c) { - unsigned CV_DECL_ALIGNED(32) ptr[16] = {0}; + unsigned ptr[16] = {0}; vse32_v_u32m4(ptr, vwcvtu_x_x_v_u32m4(vwmulu_vv_u16m2(a, b, 16), 16), 16); v_uint32x4 t1 = v_load(ptr); v_uint32x4 t2 = v_load(ptr+4); @@ -2620,7 +2623,7 @@ inline v_uint32x4 v_dotprod_expand_fast(const v_uint8x16& a, const v_uint8x16& b } inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) { - int CV_DECL_ALIGNED(32) ptr[16] = {0}; + int ptr[16] = {0}; vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); v_int32x4 t1 = v_load(ptr); v_int32x4 t2 = v_load(ptr+4); @@ -2630,7 +2633,7 @@ inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b) } inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, const v_int32x4& c) { - int CV_DECL_ALIGNED(32) ptr[16] = {0}; + int ptr[16] = {0}; vse32_v_i32m4(ptr, vwcvt_x_x_v_i32m4(vwmul_vv_i16m2(a, b, 16), 16), 16); v_int32x4 t1 = v_load(ptr); v_int32x4 t2 = v_load(ptr+4); @@ -2642,7 +2645,7 @@ inline v_int32x4 v_dotprod_expand_fast(const v_int8x16& a, const v_int8x16& b, c // 16 >> 64 inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b) { - uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + uint64 ptr[8] = {0}; vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); v_uint64x2 t1 = v_load(ptr); v_uint64x2 t2 = v_load(ptr+2); @@ -2652,7 +2655,7 @@ inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b } inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b, const v_uint64x2& c) { - uint64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + uint64 ptr[8] = {0}; vse64_v_u64m4(ptr, vwcvtu_x_x_v_u64m4(vwmulu_vv_u32m2(a, b, 8), 8), 8); v_uint64x2 t1 = v_load(ptr); v_uint64x2 t2 = v_load(ptr+2); @@ -2662,7 +2665,7 @@ inline v_uint64x2 v_dotprod_expand_fast(const v_uint16x8& a, const v_uint16x8& b } inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) { - int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + int64 ptr[8] = {0}; vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); v_int64x2 t1 = v_load(ptr); v_int64x2 t2 = v_load(ptr+2); @@ -2672,7 +2675,7 @@ inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b) } inline v_int64x2 v_dotprod_expand_fast(const v_int16x8& a, const v_int16x8& b, const v_int64x2& c) { - int64 CV_DECL_ALIGNED(32) ptr[8] = {0}; + int64 ptr[8] = {0}; vse64_v_i64m4(ptr, vwcvt_x_x_v_i64m4(vwmul_vv_i32m2(a, b, 8), 8), 8); v_int64x2 t1 = v_load(ptr); v_int64x2 t2 = v_load(ptr+2); @@ -2714,7 +2717,7 @@ inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0, #define OPENCV_HAL_IMPL_RVV_MUL_EXPAND(_Tpvec, _Tpwvec, _Tpw, suffix, wmul, width, vl, hvl) \ inline void v_mul_expand(const _Tpvec& a, const _Tpvec& b, _Tpwvec& c, _Tpwvec& d) \ { \ - _Tpw CV_DECL_ALIGNED(32) ptr[_Tpwvec::nlanes*2] = {0}; \ + _Tpw ptr[_Tpwvec::nlanes*2] = {0}; \ vse##width##_v_##suffix##m2(ptr, wmul(a, b, vl), vl); \ c = _Tpwvec(vle##width##_v_##suffix##m1(ptr, hvl)); \ d = _Tpwvec(vle##width##_v_##suffix##m1(ptr+_Tpwvec::nlanes, hvl)); \ From 4e5699fa716a3e5b0faddf1d6a00213aeb5c60cc Mon Sep 17 00:00:00 2001 From: Julia Bareeva <34717687+JulieBar@users.noreply.github.com> Date: Fri, 23 Jul 2021 17:11:50 +0300 Subject: [PATCH 079/128] Merge pull request #20450 from JulieBar:lstm_inside Support non-zero hidden state for LSTM * fully support non-zero hidden state for LSTM * check dims of hidden state for LSTM * fix failed test Test_Model.TextRecognition * add new tests for LSTM w/ non-zero hidden params Co-authored-by: Julie Bareeva --- modules/dnn/src/layers/recurrent_layers.cpp | 28 +++++--- modules/dnn/src/onnx/onnx_importer.cpp | 11 ++- modules/dnn/src/tensorflow/tf_importer.cpp | 16 +++-- modules/dnn/test/test_layers.cpp | 80 ++++++++++++++++++++- modules/dnn/test/test_onnx_importer.cpp | 10 +++ 5 files changed, 122 insertions(+), 23 deletions(-) diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index 69606a6b4ef5..a6715aefca92 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -112,19 +112,24 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer const Mat& Wh = blobs[0]; const Mat& Wx = blobs[1]; const Mat& bias = blobs[2]; + const Mat& hInternal = blobs[3]; + const Mat& cInternal = blobs[4]; CV_CheckEQ(Wh.dims, 2, ""); CV_CheckEQ(Wx.dims, 2, ""); CV_CheckEQ(Wh.rows, Wx.rows, ""); CV_CheckEQ(Wh.rows, (1 + static_cast(bidirectional))*4*Wh.cols, ""); CV_CheckEQ(Wh.rows, (int)bias.total(), ""); + CV_CheckEQ(hInternal.cols, Wh.cols, ""); + CV_CheckEQ(hInternal.cols, cInternal.cols, ""); + CV_CheckEQ(hInternal.rows, cInternal.rows, ""); CV_Assert(Wh.type() == Wx.type() && Wx.type() == bias.type()); // Peephole weights. - if (blobs.size() > 3) + if (blobs.size() > 5) { - CV_Assert(blobs.size() == 6); + CV_Assert(blobs.size() == 8); const int N = Wh.cols; - for (int i = 3; i < 6; ++i) + for (int i = 5; i < 8; ++i) { CV_Assert(blobs[i].rows == N && blobs[i].cols == N); CV_Assert(blobs[i].type() == bias.type()); @@ -181,7 +186,7 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer std::vector &outputs, std::vector &internals) const CV_OVERRIDE { - CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6)); + CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8)); CV_Assert(inputs.size() == 1); const MatShape& inp0 = inputs[0]; @@ -228,7 +233,7 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer std::vector input; inputs_arr.getMatVector(input); - CV_Assert((!usePeephole && blobs.size() == 3) || (usePeephole && blobs.size() == 6)); + CV_Assert((!usePeephole && blobs.size() == 5) || (usePeephole && blobs.size() == 8)); CV_Assert(input.size() == 1); const Mat& inp0 = input[0]; @@ -284,13 +289,14 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs); const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs); const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs); + const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs); + const Mat &c_0 = blobs[4].rowRange(i * blobs[4].rows / numDirs, (i + 1) * blobs[4].rows / numDirs); int numOut = Wh.size[1]; - Mat hInternal = internals[0], cInternal = internals[1], dummyOnes = internals[2], gates = internals[3]; - hInternal.setTo(0.); - cInternal.setTo(0.); + h_0.copyTo(hInternal); + c_0.copyTo(cInternal); dummyOnes.setTo(1.); int numSamplesTotal = numTimeStamps*numSamples; @@ -331,8 +337,8 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer if (usePeephole) { Mat gatesIF = gates.colRange(0, 2*numOut); - gemm(cInternal, blobs[3], 1, gateI, 1, gateI); - gemm(cInternal, blobs[4], 1, gateF, 1, gateF); + gemm(cInternal, blobs[5], 1, gateI, 1, gateI); + gemm(cInternal, blobs[6], 1, gateF, 1, gateF); sigmoid(gatesIF, gatesIF); } else @@ -355,7 +361,7 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer } if (usePeephole) { - gemm(cInternal, blobs[5], 1, gateO, 1, gateO); + gemm(cInternal, blobs[7], 1, gateO, 1, gateO); sigmoid(gateO, gateO); } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index ec61a9707eb9..4ad0fd496e37 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -900,8 +900,9 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) Mat Wx = getBlob(node_proto, 1); Mat Wh = getBlob(node_proto, 2); Mat b = getBlob(node_proto, 3); - CV_CheckEQ(countNonZero(getBlob(node_proto, 5)), 0, "Unsupported non zero initial_h"); - CV_CheckEQ(countNonZero(getBlob(node_proto, 6)), 0, "Unsupported non zero initial_c"); + Mat h0 = getBlob(node_proto, 5); + Mat c0 = getBlob(node_proto, 6); + b = b.reshape(1, b.size[0]); const int numHidden = lstmParams.get("hidden_size"); @@ -934,11 +935,15 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) } Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + c0 = c0.reshape(1, c0.size[0] * c0.size[1]); - lstmParams.blobs.resize(3); + lstmParams.blobs.resize(5); lstmParams.blobs[0] = Wh; lstmParams.blobs[1] = Wx; lstmParams.blobs[2] = b; + lstmParams.blobs[3] = h0; + lstmParams.blobs[4] = c0; lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 426710989e48..01fa0df985b7 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -1838,8 +1838,8 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod // op: "BlockLSTM" // input: "lstm_block_wrapper/ToInt64/x" (ignore, number of time stamps) // input: "input" - // input: "lstm_block_wrapper/zeros" (ignore) - // input: "lstm_block_wrapper/zeros" (ignore) + // input: "lstm_block_wrapper/zeros" + // input: "lstm_block_wrapper/zeros" // input: "lstm_block_wrapper/kernel" // input: "lstm_block_wrapper/w_i_diag" // input: "lstm_block_wrapper/w_f_diag" @@ -1865,9 +1865,11 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod } } - Mat W, Wh, Wx, b; + Mat W, Wh, Wx, b, cs_prev, h_prev; blobFromTensor(getConstBlob(layer, value_id, 4), W); blobFromTensor(getConstBlob(layer, value_id, 8), b); + blobFromTensor(getConstBlob(layer, value_id, 2), cs_prev); + blobFromTensor(getConstBlob(layer, value_id, 3), h_prev); const int outSize = W.cols / 4; // IGFO->IFOG @@ -1883,10 +1885,12 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod Wx = W.rowRange(0, W.rows - outSize).t(); Wh = W.rowRange(W.rows - outSize, W.rows).t(); - layerParams.blobs.resize(3); + layerParams.blobs.resize(5); layerParams.blobs[0] = Wh; layerParams.blobs[1] = Wx; layerParams.blobs[2] = b; + layerParams.blobs[3] = h_prev; + layerParams.blobs[4] = cs_prev; if (hasLayerAttr(layer, "use_peephole")) { @@ -1894,14 +1898,14 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod if (usePeephole) { layerParams.set("use_peephole", true); - layerParams.blobs.resize(6); + layerParams.blobs.resize(8); for (int i = 0; i < 3; ++i) { Mat w; blobFromTensor(getConstBlob(layer, value_id, 5 + i), w); w = w.reshape(1, w.total()); // Single column. w = Mat::diag(w); // Make a diagonal matrix. - layerParams.blobs[3 + i] = w; + layerParams.blobs[5 + i] = w; } } } diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 897603d274ca..fbe9605e7f9c 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -434,7 +434,7 @@ class Layer_LSTM_Test : public ::testing::Test { public: int numInp, numOut; - Mat Wh, Wx, b; + Mat Wh, Wx, b, h, c; Ptr layer; std::vector inputs, outputs; @@ -449,12 +449,17 @@ class Layer_LSTM_Test : public ::testing::Test Wh = Mat::ones(4 * numOut, numOut, CV_32F); Wx = Mat::ones(4 * numOut, numInp, CV_32F); b = Mat::ones(4 * numOut, 1, CV_32F); + h = Mat::ones(4, numOut, CV_32F); + c = Mat::ones(4, numOut, CV_32F); LayerParams lp; - lp.blobs.resize(3); + lp.blobs.resize(5); lp.blobs[0] = Wh; lp.blobs[1] = Wx; lp.blobs[2] = b; + lp.blobs[3] = h; + lp.blobs[4] = c; + lp.set("produce_cell_output", produceCellOutput); lp.set("use_timestamp_dim", useTimestampDim); @@ -502,10 +507,12 @@ TEST_F(Layer_LSTM_Test, get_set_test) TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent) { LayerParams lp; - lp.blobs.resize(3); + lp.blobs.resize(5); lp.blobs[0] = blobFromNPY(_tf("lstm.prototxt.w_2.npy")); // Wh lp.blobs[1] = blobFromNPY(_tf("lstm.prototxt.w_0.npy")); // Wx lp.blobs[2] = blobFromNPY(_tf("lstm.prototxt.w_1.npy")); // bias + lp.blobs[3] = Mat::zeros(2, 17, CV_32F); // h_0 + lp.blobs[4] = Mat::zeros(2, 17, CV_32F); // c_0 Ptr layer = LSTMLayer::create(lp); Mat inp = blobFromNPY(_tf("recurrent.input.npy")); @@ -516,6 +523,68 @@ TEST(Layer_LSTM_Test_Accuracy_with_, CaffeRecurrent) normAssert(h_t_reference, outputs[0]); } +TEST(Layer_LSTM_Test_Accuracy_with_, HiddenParams) +{ + Mat Wx = blobFromNPY(_tf("lstm.hidden.W.npy")); + Mat Wh = blobFromNPY(_tf("lstm.hidden.R.npy")); + Mat b = blobFromNPY(_tf("lstm.hidden.B.npy")); + Mat h0 = blobFromNPY(_tf("lstm.hidden.h0.npy")); + Mat c0 = blobFromNPY(_tf("lstm.hidden.c0.npy")); + + const int numHidden = 3; + const int numDirs = Wx.size[0]; + const int numFeatures = Wx.size[2]; + + b = b.reshape(1, b.size[0]); + Mat bx = b.colRange(0, b.cols / 2); + Mat bh = b.colRange(b.cols / 2, b.cols); + b = bx + bh; + + // IFGO->IGFO + for (int k = 0; k < numDirs; ++k) + { + float* WxData = Wx.ptr(k); + float* WhData = Wh.ptr(k); + float* biasData = b.ptr(k); + for (int j = 0; j < numHidden; ++j) + { + for (int i = 0; i < numFeatures; ++i) + { + std::swap(WxData[(numHidden + j) * numFeatures + i], + WxData[(numHidden * 2 + j) * numFeatures + i]); + } + for (int i = 0; i < numHidden; ++i) + { + std::swap(WhData[(numHidden + j) * numHidden + i], + WhData[(numHidden * 2 + j) * numHidden + i]); + } + std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); + } + } + + Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); + Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + c0 = c0.reshape(1, c0.size[0] * c0.size[1]); + + LayerParams lstmParams; + lstmParams.blobs.resize(5); + lstmParams.blobs[0] = Wh; + lstmParams.blobs[1] = Wx; + lstmParams.blobs[2] = b; + lstmParams.blobs[3] = h0; + lstmParams.blobs[4] = c0; + lstmParams.set("bidirectional", false); + Ptr layer = LSTMLayer::create(lstmParams); + + Mat inp = blobFromNPY(_tf("lstm.hidden.input.npy")); + std::vector inputs(1, inp), outputs; + runLayer(layer, inputs, outputs); + + Mat h_t_reference = blobFromNPY(_tf("lstm.hidden.output.npy")); + normAssert(h_t_reference, outputs[0]); +} + TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent) { Ptr layer = RNNLayer::create(LayerParams()); @@ -560,6 +629,9 @@ TEST(Layer_LSTM_Test_Accuracy_, Reverse) bias.at(2, 0) = 1e10f; // Output gate - always output everything bias.at(3, 0) = 0.f; // Update signal + cv::Mat hInternal = cv::Mat::zeros(1, 1, CV_32FC1); + cv::Mat cInternal = cv::Mat::zeros(1, 1, CV_32FC1); + LayerParams lp; lp.set("reverse", true); lp.set("use_timestamp_dim", true); @@ -567,6 +639,8 @@ TEST(Layer_LSTM_Test_Accuracy_, Reverse) lp.blobs.push_back(Wh); lp.blobs.push_back(Wx); lp.blobs.push_back(bias); + lp.blobs.push_back(hInternal); + lp.blobs.push_back(cInternal); cv::Ptr layer = LSTMLayer::create(lp); std::vector outputs; diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 3923068dbf17..05f77730af07 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -675,6 +675,16 @@ TEST_P(Test_ONNX_layers, LSTM_bidirectional) testONNXModels("lstm_bidirectional", npy, 0, 0, false, false); } +TEST_P(Test_ONNX_layers, LSTM_hidden) +{ + testONNXModels("hidden_lstm", npy, 0, 0, false, false); +} + +TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional) +{ + testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false); +} + TEST_P(Test_ONNX_layers, Pad2d_Unfused) { testONNXModels("ReflectionPad2d"); From 7aa922ceac7d61c0a2b6c8c76debb70e256f6e2f Mon Sep 17 00:00:00 2001 From: Parsa Date: Sat, 24 Jul 2021 19:59:24 +0430 Subject: [PATCH 080/128] Merge pull request #20440 from parsa-ra:patch-1 * Update config_reference.markdown Added description for `WITH_CLP` build option. * Added extra description Can't cross-reference with anchors to other sections of the markdown file due to the presence of markdown link extension in the form of `## Header {#id-of-header}` * Fixed trailing space issue --- .../config_reference/config_reference.markdown | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/tutorials/introduction/config_reference/config_reference.markdown b/doc/tutorials/introduction/config_reference/config_reference.markdown index 58b4ed55ca41..0ba5627249ec 100644 --- a/doc/tutorials/introduction/config_reference/config_reference.markdown +++ b/doc/tutorials/introduction/config_reference/config_reference.markdown @@ -589,6 +589,14 @@ Some features have been added specifically for automated build environments, lik | `OPENCV_CMAKE_HOOKS_DIR` | _empty_ | OpenCV allows to customize configuration process by adding custom hook scripts at each stage and substage. cmake scripts with predefined names located in the directory set by this variable will be included before and after various configuration stages. Examples of file names: _CMAKE_INIT.cmake_, _PRE_CMAKE_BOOTSTRAP.cmake_, _POST_CMAKE_BOOTSTRAP.cmake_, etc.. Other names are not documented and can be found in the project cmake files by searching for the _ocv_cmake_hook_ macro calls. | | `OPENCV_DUMP_HOOKS_FLOW` | _OFF_ | Enables a debug message print on each cmake hook script call. | +## Contrib Modules + +Following build options are utilized in `opencv_contrib` modules, as stated [previously](#tutorial_config_reference_general_contrib), these extra modules can be added to your final build by setting `DOPENCV_EXTRA_MODULES_PATH` option. + +| Option | Default | Description | +| ------ | ------- | ----------- | +| `WITH_CLP` | _OFF_ | Will add [coinor](https://projects.coin-or.org/Clp) linear programming library build support which is required in `videostab` module. Make sure to install the development libraries of coinor-clp. | + # Other non-documented options @@ -605,7 +613,6 @@ Some features have been added specifically for automated build environments, lik `WITH_CPUFEATURES` `WITH_EIGEN` `WITH_OPENVX` -`WITH_CLP` `WITH_DIRECTX` `WITH_VA` `WITH_LAPACK` From 2f180cea7f6e2519aed9665c37dafdf4ab0e80ba Mon Sep 17 00:00:00 2001 From: Giles Payne Date: Sun, 25 Jul 2021 14:23:52 +0900 Subject: [PATCH 081/128] Add Quicklook for Mat on iOS and macOS --- .../imgcodecs/misc/objc/ios/Mat+Converters.h | 2 +- .../imgcodecs/misc/objc/ios/Mat+Converters.mm | 2 +- .../imgcodecs/misc/objc/ios/Mat+QuickLook.h | 27 +++ .../imgcodecs/misc/objc/ios/Mat+QuickLook.mm | 155 ++++++++++++++++++ .../misc/objc/macosx/Mat+QuickLook.h | 27 +++ .../misc/objc/macosx/Mat+QuickLook.mm | 154 +++++++++++++++++ 6 files changed, 365 insertions(+), 2 deletions(-) create mode 100644 modules/imgcodecs/misc/objc/ios/Mat+QuickLook.h create mode 100644 modules/imgcodecs/misc/objc/ios/Mat+QuickLook.mm create mode 100644 modules/imgcodecs/misc/objc/macosx/Mat+QuickLook.h create mode 100644 modules/imgcodecs/misc/objc/macosx/Mat+QuickLook.mm diff --git a/modules/imgcodecs/misc/objc/ios/Mat+Converters.h b/modules/imgcodecs/misc/objc/ios/Mat+Converters.h index a3ee005c18be..0f74bb2f5dc7 100644 --- a/modules/imgcodecs/misc/objc/ios/Mat+Converters.h +++ b/modules/imgcodecs/misc/objc/ios/Mat+Converters.h @@ -1,5 +1,5 @@ // -// Mat+UIImage.h +// Mat+Converters.h // // Created by Giles Payne on 2020/03/03. // diff --git a/modules/imgcodecs/misc/objc/ios/Mat+Converters.mm b/modules/imgcodecs/misc/objc/ios/Mat+Converters.mm index 69250eb99415..79358cb6de7f 100644 --- a/modules/imgcodecs/misc/objc/ios/Mat+Converters.mm +++ b/modules/imgcodecs/misc/objc/ios/Mat+Converters.mm @@ -1,5 +1,5 @@ // -// Mat+UIImage.mm +// Mat+Converters.mm // // Created by Giles Payne on 2020/03/03. // diff --git a/modules/imgcodecs/misc/objc/ios/Mat+QuickLook.h b/modules/imgcodecs/misc/objc/ios/Mat+QuickLook.h new file mode 100644 index 000000000000..341172798ed4 --- /dev/null +++ b/modules/imgcodecs/misc/objc/ios/Mat+QuickLook.h @@ -0,0 +1,27 @@ +// +// Mat+QuickLook.h +// +// Created by Giles Payne on 2021/07/18. +// + +#pragma once + +#ifdef __cplusplus +#import "opencv2/core.hpp" +#else +#define CV_EXPORTS +#endif + +#import "Mat.h" +#import +#import + +NS_ASSUME_NONNULL_BEGIN + +CV_EXPORTS @interface Mat (QuickLook) + +- (id)debugQuickLookObject; + +@end + +NS_ASSUME_NONNULL_END diff --git a/modules/imgcodecs/misc/objc/ios/Mat+QuickLook.mm b/modules/imgcodecs/misc/objc/ios/Mat+QuickLook.mm new file mode 100644 index 000000000000..7bfee07eb131 --- /dev/null +++ b/modules/imgcodecs/misc/objc/ios/Mat+QuickLook.mm @@ -0,0 +1,155 @@ +// +// Mat+QuickLook.mm +// +// Created by Giles Payne on 2021/07/18. +// + +#import "Mat+QuickLook.h" +#import "Mat+Converters.h" +#import "Rect2i.h" +#import "Core.h" +#import "Imgproc.h" +#import + +#define SIZE 20 + +static UIFont* getCMU() { + return [UIFont fontWithName:@"CMU Serif" size:SIZE]; +} + +static UIFont* getBodoni72() { + return [UIFont fontWithName:@"Bodoni 72" size:SIZE]; +} + +static UIFont* getAnySerif() { + if (@available(iOS 13.0, *)) { + return [UIFont fontWithDescriptor:[[UIFontDescriptor preferredFontDescriptorWithTextStyle:UIFontTextStyleBody] fontDescriptorWithDesign:UIFontDescriptorSystemDesignSerif] size:SIZE]; + } else { + return nil; + } +} + +static UIFont* getSystemFont() { + return [UIFont systemFontOfSize:SIZE]; +} + +typedef UIFont* (*FontGetter)(); + +@implementation Mat (QuickLook) + +- (NSString*)makeLabel:(BOOL)isIntType val:(NSNumber*)num { + if (isIntType) { + return [NSString stringWithFormat:@"%d", num.intValue]; + } else { + int exponent = 1 + (int)log10(abs(num.doubleValue)); + if (num.doubleValue == (double)num.intValue && num.doubleValue < 10000 && num.doubleValue > -10000) { + return [NSString stringWithFormat:@"%d", num.intValue];; + } else if (exponent <= 5 && exponent >= -1) { + return [NSString stringWithFormat:[NSString stringWithFormat:@"%%%d.%df", 6, MIN(5 - exponent, 4)], num.doubleValue]; + } else { + return [[[NSString stringWithFormat:@"%.2e", num.doubleValue] stringByReplacingOccurrencesOfString:@"e+0" withString:@"e"] stringByReplacingOccurrencesOfString:@"e-0" withString:@"e-"]; + } + } +} + +- (void)relativeLine:(UIBezierPath*)path relX:(CGFloat)x relY:(CGFloat)y { + CGPoint curr = path.currentPoint; + [path addLineToPoint:CGPointMake(curr.x + x, curr.y + y)]; +} + +- (id)debugQuickLookObject { + if ([self dims] == 2 && [self rows] <= 10 && [self cols] <= 10) { + FontGetter fontGetters[] = { getCMU, getBodoni72, getAnySerif, getSystemFont }; + UIFont* font = nil; + for (int fontGetterIndex = 0; font==nil && fontGetterIndex < (sizeof(fontGetters)) / (sizeof(fontGetters[0])); fontGetterIndex++) { + font = fontGetters[fontGetterIndex](); + } + int elements = [self rows] * [self cols]; + NSDictionary* textFontAttributes = @{ NSFontAttributeName: font, NSForegroundColorAttributeName: UIColor.blackColor }; + NSMutableArray* rawData = [NSMutableArray new]; + for (int dataIndex = 0; dataIndex < elements; dataIndex++) { + [rawData addObject:[NSNumber numberWithDouble:0]]; + } + [self get:0 col: 0 data: rawData]; + BOOL isIntType = [self depth] <= CV_32S; + NSMutableArray* labels = [NSMutableArray new]; + NSMutableDictionary* boundingRects = [NSMutableDictionary dictionaryWithCapacity:elements]; + int maxWidth = 0, maxHeight = 0; + for (NSNumber* number in rawData) { + NSString* label = [self makeLabel:isIntType val:number]; + [labels addObject:label]; + CGRect boundingRect = [label boundingRectWithSize:CGSizeMake(CGFLOAT_MAX, CGFLOAT_MAX) options:NSStringDrawingUsesLineFragmentOrigin attributes:textFontAttributes context:nil]; + if (boundingRect.size.width > maxWidth) { + maxWidth = boundingRect.size.width; + } + if (boundingRect.size.height > maxHeight) { + maxHeight = boundingRect.size.height; + } + boundingRects[label] = [NSValue valueWithCGRect:boundingRect]; + } + + int rowGap = 6; + int colGap = 6; + int borderGap = 8; + int lineThickness = 3; + int lipWidth = 6; + int imageWidth = 2 * (borderGap + lipWidth) + maxWidth * [self cols] + colGap * ([self cols] - 1); + int imageHeight = 2 * (borderGap + lipWidth) + maxHeight * [self rows] + rowGap * ([self rows] - 1); + + UIBezierPath* leftBracket = [UIBezierPath new]; + [leftBracket moveToPoint:CGPointMake(borderGap, borderGap)]; + [self relativeLine:leftBracket relX:0 relY:imageHeight - 2 * borderGap]; + [self relativeLine:leftBracket relX:lineThickness + lipWidth relY:0]; + [self relativeLine:leftBracket relX:0 relY:-lineThickness]; + [self relativeLine:leftBracket relX:-lipWidth relY:0]; + [self relativeLine:leftBracket relX:0 relY:-(imageHeight - 2 * (borderGap + lineThickness))]; + [self relativeLine:leftBracket relX:lipWidth relY:0]; + [self relativeLine:leftBracket relX:0 relY:-lineThickness]; + [leftBracket closePath]; + CGAffineTransform reflect = CGAffineTransformConcat(CGAffineTransformMakeTranslation(-imageWidth, 0), CGAffineTransformMakeScale(-1, 1)); + UIBezierPath* rightBracket = [leftBracket copy]; + [rightBracket applyTransform:reflect]; + + CGRect rect = CGRectMake(0, 0, imageWidth, imageHeight); + UIGraphicsBeginImageContextWithOptions(rect.size, false, 0.0); + [UIColor.whiteColor setFill]; + UIRectFill(rect); + [UIColor.blackColor setFill]; + [leftBracket fill]; + [rightBracket fill]; + [labels enumerateObjectsUsingBlock:^(id label, NSUInteger index, BOOL *stop) + { + CGRect boundingRect = boundingRects[label].CGRectValue; + int row = (int)index / [self cols]; + int col = (int)index % [self cols]; + int x = borderGap + lipWidth + col * (maxWidth + colGap) + (maxWidth - boundingRect.size.width) / 2; + int y = borderGap + lipWidth + row * (maxHeight + rowGap) + (maxHeight - boundingRect.size.height) / 2; + CGRect textRect = CGRectMake(x, y, boundingRect.size.width, boundingRect.size.height); + [label drawInRect:textRect withAttributes:textFontAttributes]; + }]; + UIImage* image = UIGraphicsGetImageFromCurrentImageContext(); + UIGraphicsEndImageContext(); + return image; + } else if (([self dims] == 2) && ([self type] == CV_8U || [self type] == CV_8UC3 || [self type] == CV_8UC4)) { + return [self toUIImage]; + } else if ([self dims] == 2 && [self channels] == 1) { + Mat* normalized = [Mat new]; + [Core normalize:self dst:normalized alpha:0 beta:255 norm_type:NORM_MINMAX dtype:CV_8U]; + Mat* normalizedKey = [[Mat alloc] initWithRows:[self rows] + 10 cols:[self cols] type:CV_8U]; + std::vector key; + for (int index = 0; index < [self cols]; index++) { + key.push_back((char)(index * 256 / [self cols])); + } + for (int index = 0; index < 10; index++) { + [normalizedKey put:@[[NSNumber numberWithInt:index], [NSNumber numberWithInt:0]] count:[self cols] byteBuffer:key.data()]; + } + [normalized copyTo:[normalizedKey submatRoi:[[Rect2i alloc] initWithX:0 y:10 width:[self cols] height:[self rows]]]]; + Mat* colorMap = [Mat new]; + [Imgproc applyColorMap:normalizedKey dst:colorMap colormap:COLORMAP_JET]; + [Imgproc cvtColor:colorMap dst:colorMap code:COLOR_BGR2RGB]; + return [colorMap toUIImage]; + } + return [self description]; +} + +@end diff --git a/modules/imgcodecs/misc/objc/macosx/Mat+QuickLook.h b/modules/imgcodecs/misc/objc/macosx/Mat+QuickLook.h new file mode 100644 index 000000000000..9fa31aba399e --- /dev/null +++ b/modules/imgcodecs/misc/objc/macosx/Mat+QuickLook.h @@ -0,0 +1,27 @@ +// +// Mat+QuickLook.h +// +// Created by Giles Payne on 2021/07/18. +// + +#pragma once + +#ifdef __cplusplus +#import "opencv2/core.hpp" +#else +#define CV_EXPORTS +#endif + +#import "Mat.h" +#import +#import + +NS_ASSUME_NONNULL_BEGIN + +CV_EXPORTS @interface Mat (QuickLook) + +- (id)debugQuickLookObject; + +@end + +NS_ASSUME_NONNULL_END diff --git a/modules/imgcodecs/misc/objc/macosx/Mat+QuickLook.mm b/modules/imgcodecs/misc/objc/macosx/Mat+QuickLook.mm new file mode 100644 index 000000000000..6775f817806c --- /dev/null +++ b/modules/imgcodecs/misc/objc/macosx/Mat+QuickLook.mm @@ -0,0 +1,154 @@ +// +// Mat+QuickLook.mm +// +// Created by Giles Payne on 2021/07/18. +// + +#import "Mat+QuickLook.h" +#import "Mat+Converters.h" +#import "Rect2i.h" +#import "Core.h" +#import "Imgproc.h" +#import + +#define SIZE 20 + +static NSFont* getCMU() { + return [NSFont fontWithName:@"CMU Serif" size:SIZE]; +} + +static NSFont* getBodoni72() { + return [NSFont fontWithName:@"Bodoni 72" size:SIZE]; +} + +static NSFont* getAnySerif() { + if (@available(macOS 11.0, *)) { + return [NSFont fontWithDescriptor:[[NSFontDescriptor preferredFontDescriptorForTextStyle:NSFontTextStyleBody options:@{}] fontDescriptorWithDesign:NSFontDescriptorSystemDesignSerif] size:SIZE]; + } else { + return nil; + } +} + +static NSFont* getSystemFont() { + return [NSFont systemFontOfSize:SIZE]; +} + +typedef NSFont* (*FontGetter)(); + +@implementation Mat (QuickLook) + +- (NSString*)makeLabel:(BOOL)isIntType val:(NSNumber*)num { + if (isIntType) { + return [NSString stringWithFormat:@"%d", num.intValue]; + } else { + int exponent = 1 + (int)log10(abs(num.doubleValue)); + if (num.doubleValue == (double)num.intValue && num.doubleValue < 10000 && num.doubleValue > -10000) { + return [NSString stringWithFormat:@"%d", num.intValue];; + } else if (exponent <= 5 && exponent >= -1) { + return [NSString stringWithFormat:[NSString stringWithFormat:@"%%%d.%df", 6, MIN(5 - exponent, 4)], num.doubleValue]; + } else { + return [[[NSString stringWithFormat:@"%.2e", num.doubleValue] stringByReplacingOccurrencesOfString:@"e+0" withString:@"e"] stringByReplacingOccurrencesOfString:@"e-0" withString:@"e-"]; + } + } +} + +- (id)debugQuickLookObject { + // for smallish Mat objects display as a matrix + if ([self dims] == 2 && [self rows] <= 10 && [self cols] <= 10) { + FontGetter fontGetters[] = { getCMU, getBodoni72, getAnySerif, getSystemFont }; + NSFont* font = nil; + for (int fontGetterIndex = 0; font==nil && fontGetterIndex < (sizeof(fontGetters)) / (sizeof(fontGetters[0])); fontGetterIndex++) { + font = fontGetters[fontGetterIndex](); + } + int elements = [self rows] * [self cols]; + NSDictionary* textFontAttributes = @{ NSFontAttributeName: font, NSForegroundColorAttributeName: NSColor.blackColor }; + NSMutableArray* rawData = [NSMutableArray new]; + for (int dataIndex = 0; dataIndex < elements; dataIndex++) { + [rawData addObject:[NSNumber numberWithDouble:0]]; + } + [self get:0 col: 0 data: rawData]; + BOOL isIntType = [self depth] <= CV_32S; + NSMutableArray* labels = [NSMutableArray new]; + NSMutableDictionary* boundingRects = [NSMutableDictionary dictionaryWithCapacity:elements]; + int maxWidth = 0, maxHeight = 0; + for (NSNumber* number in rawData) { + NSString* label = [self makeLabel:isIntType val:number]; + [labels addObject:label]; + NSRect boundingRect = [label boundingRectWithSize:NSMakeSize(CGFLOAT_MAX, CGFLOAT_MAX) options:NSStringDrawingUsesLineFragmentOrigin attributes:textFontAttributes]; + if (boundingRect.size.width > maxWidth) { + maxWidth = boundingRect.size.width; + } + if (boundingRect.size.height > maxHeight) { + maxHeight = boundingRect.size.height; + } + boundingRects[label] = [NSValue valueWithRect:boundingRect]; + } + + int rowGap = 8; + int colGap = 8; + int borderGap = 9; + int lineThickness = 4; + int lipWidth = 8; + int imageWidth = 2 * (borderGap + lipWidth) + maxWidth * [self cols] + colGap * ([self cols] - 1); + int imageHeight = 2 * (borderGap + lipWidth) + maxHeight * [self rows] + rowGap * ([self rows] - 1); + NSImage* image = [[NSImage alloc] initWithSize:NSMakeSize(imageWidth, imageHeight)]; + NSBezierPath* leftBracket = [NSBezierPath new]; + [leftBracket moveToPoint:NSMakePoint(borderGap, borderGap)]; + [leftBracket relativeLineToPoint:NSMakePoint(0, imageHeight - 2 * borderGap)]; + [leftBracket relativeLineToPoint:NSMakePoint(lineThickness + lipWidth, 0)]; + [leftBracket relativeLineToPoint:NSMakePoint(0, -lineThickness)]; + [leftBracket relativeLineToPoint:NSMakePoint(-lipWidth, 0)]; + [leftBracket relativeLineToPoint:NSMakePoint(0, -(imageHeight - 2 * (borderGap + lineThickness)))]; + [leftBracket relativeLineToPoint:NSMakePoint(lipWidth, 0)]; + [leftBracket relativeLineToPoint:NSMakePoint(0, -lineThickness)]; + [leftBracket relativeLineToPoint:NSMakePoint(-(lineThickness + lipWidth), 0)]; + NSAffineTransform* reflect = [NSAffineTransform new]; + [reflect scaleXBy:-1 yBy:1]; + [reflect translateXBy:-imageWidth yBy:0]; + NSBezierPath* rightBracket = [leftBracket copy]; + [rightBracket transformUsingAffineTransform:reflect]; + + [image lockFocus]; + [NSColor.whiteColor drawSwatchInRect:NSMakeRect(0, 0, imageWidth, imageHeight)]; + [NSColor.blackColor set]; + [leftBracket fill]; + [rightBracket fill]; + + [labels enumerateObjectsUsingBlock:^(id label, NSUInteger index, BOOL *stop) + { + NSRect boundingRect = boundingRects[label].rectValue; + int row = [self rows] - 1 - ((int)index / [self cols]); + int col = (int)index % [self cols]; + int x = borderGap + lipWidth + col * (maxWidth + colGap) + (maxWidth - boundingRect.size.width) / 2; + int y = borderGap + lipWidth + row * (maxHeight + rowGap) + (maxHeight - boundingRect.size.height) / 2; + NSRect textRect = NSMakeRect(x, y, boundingRect.size.width, boundingRect.size.height); + [label drawInRect:textRect withAttributes:textFontAttributes]; + }]; + [image unlockFocus]; + return image; + } else if (([self dims] == 2) && ([self type] == CV_8U || [self type] == CV_8UC3 || [self type] == CV_8UC4)) { + // convert to NSImage if the Mats has 2 dimensions and a type and number of channels consistent with it being a image + return [self toNSImage]; + } else if ([self dims] == 2 && [self channels] == 1) { + // for other Mats with 2 dimensions and one channel - generate heat map + Mat* normalized = [Mat new]; + [Core normalize:self dst:normalized alpha:0 beta:255 norm_type:NORM_MINMAX dtype:CV_8U]; + Mat* normalizedKey = [[Mat alloc] initWithRows:[self rows] + 10 cols:[self cols] type:CV_8U]; + std::vector key; + for (int index = 0; index < [self cols]; index++) { + key.push_back((char)(index * 256 / [self cols])); + } + for (int index = 0; index < 10; index++) { + [normalizedKey put:@[[NSNumber numberWithInt:index], [NSNumber numberWithInt:0]] count:[self cols] byteBuffer:key.data()]; + } + [normalized copyTo:[normalizedKey submatRoi:[[Rect2i alloc] initWithX:0 y:10 width:[self cols] height:[self rows]]]]; + Mat* colorMap = [Mat new]; + [Imgproc applyColorMap:normalizedKey dst:colorMap colormap:COLORMAP_JET]; + [Imgproc cvtColor:colorMap dst:colorMap code:COLOR_BGR2RGB]; + return [colorMap toNSImage]; + } + //everything just return the Mat description + return [self description]; +} + +@end From 1e1984a586028ab233d26f0a4a5668653fc410ba Mon Sep 17 00:00:00 2001 From: Xerxes Battiwalla Date: Mon, 26 Jul 2021 14:54:27 +1000 Subject: [PATCH 082/128] Fixed typo in error message in OpenCVDetectCUDA.cmake There was a minor typo in the FATAL error message when the specified CUDA generation does not match any known generation --- cmake/OpenCVDetectCUDA.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index c7cfebe50f51..ac29e600d379 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -102,7 +102,7 @@ if(CUDA_FOUND) if(CUDA_GENERATION) if(NOT ";${_generations};" MATCHES ";${CUDA_GENERATION};") string(REPLACE ";" ", " _generations "${_generations}") - message(FATAL_ERROR "ERROR: ${_generations} Generations are suppered.") + message(FATAL_ERROR "ERROR: ${_generations} Generations are supported.") endif() unset(CUDA_ARCH_BIN CACHE) unset(CUDA_ARCH_PTX CACHE) From cff0168f3a136b86ac4f415c8332abcd39212f67 Mon Sep 17 00:00:00 2001 From: rogday Date: Wed, 28 Jul 2021 18:06:24 +0300 Subject: [PATCH 083/128] Merge pull request #20453 from rogday:onnx_importer_fix Split layer dispatch into functions in ONNXImporter * split layer dispatch into functions * fixes * identation and comment fixes * fix constness --- modules/dnn/src/dnn_common.hpp | 1 + modules/dnn/src/onnx/onnx_importer.cpp | 2922 +++++++++++--------- modules/dnn/src/tensorflow/tf_importer.cpp | 2 +- 3 files changed, 1566 insertions(+), 1359 deletions(-) diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp index cd6cea0c6b09..0f3feda91b4a 100644 --- a/modules/dnn/src/dnn_common.hpp +++ b/modules/dnn/src/dnn_common.hpp @@ -14,6 +14,7 @@ Mutex& getInitializationMutex(); void initializeLayerFactory(); namespace detail { +#define CALL_MEMBER_FN(object, ptrToMemFn) ((object).*(ptrToMemFn)) struct NetImplBase { diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 4ad0fd496e37..b833b2ea443f 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -62,7 +62,7 @@ class ONNXImporter public: ONNXImporter(Net& net, const char *onnxFile) - : dstNet(net) + : dstNet(net), dispatch(buildDispatchMap()) { hasDynamicShapes = false; CV_Assert(onnxFile); @@ -83,7 +83,7 @@ class ONNXImporter } ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer) - : dstNet(net) + : dstNet(net), dispatch(buildDispatchMap()) { hasDynamicShapes = false; CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)"); @@ -124,6 +124,57 @@ class ONNXImporter typedef std::map::iterator IterLayerId_t; void handleNode(const opencv_onnx::NodeProto& node_proto); + +private: + typedef void (ONNXImporter::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + typedef std::map DispatchMap; + + void parseMaxPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseAveragePool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseReduce (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseSlice (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseSplit (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseBias (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parsePow (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseNeg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseConstant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseLSTM (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseImageScaler (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseClip (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseElu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseTanh (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parsePRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseLRN (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseBatchNormalization (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseGemm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseConvTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseSqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseFlatten (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseUnsqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseExpand (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseReshape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parsePad (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseShape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseCast (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseConstantFill (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseGather (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseResize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseCustom (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + + const DispatchMap dispatch; + static const DispatchMap buildDispatchMap(); }; inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey) @@ -448,13 +499,11 @@ void ONNXImporter::populateNet() CV_LOG_DEBUG(NULL, "DNN/ONNX: import completed!"); } -void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) +void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) { - opencv_onnx::NodeProto node_proto = node_proto_; // TODO FIXIT - CV_Assert(node_proto.output_size() >= 1); std::string name = node_proto.output(0); - std::string layer_type = node_proto.op_type(); + const std::string& layer_type = node_proto.op_type(); CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) ); @@ -468,1537 +517,1694 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) layerParams.type = layer_type; layerParams.set("has_dynamic_shapes", hasDynamicShapes); - if (layer_type == "MaxPool") + DispatchMap::const_iterator iter = dispatch.find(layer_type); + if (iter != dispatch.end()) { - layerParams.type = "Pooling"; - layerParams.set("pool", "MAX"); - layerParams.set("ceil_mode", layerParams.has("pad_mode")); + CALL_MEMBER_FN(*this, iter->second)(layerParams, node_proto); } - else if (layer_type == "AveragePool") + else { - layerParams.type = "Pooling"; - layerParams.set("pool", "AVE"); - layerParams.set("ceil_mode", layerParams.has("pad_mode")); - layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); + parseCustom(layerParams, node_proto); } - else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" || - layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax") + } + catch (const cv::Exception& e) + { + CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " + << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) + ); + for (int i = 0; i < node_proto.input_size(); i++) { - CV_Assert(node_proto.input_size() == 1); - layerParams.type = "Pooling"; - String pool; - if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax") - pool = "MAX"; - else if (layer_type == "ReduceSum") - pool = "SUM"; - else - pool = "AVE"; - layerParams.set("pool", pool); - layerParams.set("global_pooling", !layerParams.has("axes")); - if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) - { - MatShape inpShape = outShapes[node_proto.input(0)]; - DictValue axes = layerParams.get("axes"); - bool keepdims = layerParams.get("keepdims"); - MatShape targetShape; - std::vector shouldDelete(inpShape.size(), false); - for (int i = 0; i < axes.size(); i++) { - int axis = normalize_axis(axes.get(i), inpShape.size()); - shouldDelete[axis] = true; - } - for (int axis = 0; axis < inpShape.size(); ++axis){ - if (!shouldDelete[axis]) - targetShape.push_back(inpShape[axis]); - else if (keepdims) - targetShape.push_back(1); - } - - if (inpShape.size() == 3 && axes.size() <= 2) - { - int axis = normalize_axis(axes.get(0), inpShape.size()); - CV_CheckNE(axis, 0, ""); - - LayerParams reshapeLp; - reshapeLp.name = layerParams.name + "/reshape"; - reshapeLp.type = "Reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - reshapeLp.set("axis", 0); - reshapeLp.set("num_axes", 1); - int newShape[] = {1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2)); - - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(reshapeLp.name); - addLayer(reshapeLp, proto); - - LayerParams avgLp; - avgLp.name = layerParams.name + "/avg"; - avgLp.type = "Pooling"; - CV_Assert(layer_id.find(avgLp.name) == layer_id.end()); - avgLp.set("pool", pool); - if (axes.size() == 2) - { - CV_CheckEQ(normalize_axis(axes.get(0), inpShape.size()), 1, "Unsupported mode"); - CV_CheckEQ(normalize_axis(axes.get(1), inpShape.size()), 2, "Unsupported mode"); - avgLp.set("global_pooling", true); - } - else - { - avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true); - avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1); - } - - node_proto.set_input(0, reshapeLp.name); - node_proto.set_output(0, avgLp.name); - addLayer(avgLp, node_proto); - } - else - { - if (inpShape.size() != 4 && inpShape.size() != 5) - CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation."); + CV_LOG_INFO(NULL, " Input[" << i << "] = '" << node_proto.input(i) << "'"); + } + for (int i = 0; i < node_proto.output_size(); i++) + { + CV_LOG_INFO(NULL, " Output[" << i << "] = '" << node_proto.output(i) << "'"); + } + CV_Error(Error::StsError, cv::format("Node [%s]:(%s) parse error: %s", layer_type.c_str(), name.c_str(), e.what())); + } +} - CV_Assert(axes.size() <= inpShape.size() - 2); - std::vector kernel_size(inpShape.size() - 2, 1); - if (axes.size() == 1 && (normalize_axis(axes.get(0), inpShape.size()) <= 1)) - { - int axis = normalize_axis(axes.get(0), inpShape.size()); - MatShape newShape = inpShape; - newShape[axis + 1] = total(newShape, axis + 1); - newShape.resize(axis + 2); - newShape.insert(newShape.begin(), 2 - axis, 1); - - LayerParams reshapeLp; - reshapeLp.type = "Reshape"; - reshapeLp.name = layerParams.name + "/reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); - - node_proto.set_output(0, reshapeLp.name); - addLayer(reshapeLp, node_proto); - - kernel_size.resize(2); - kernel_size[0] = inpShape[axis]; - node_proto.set_input(0, node_proto.output(0)); - } - else - { - for (int i = 0; i < axes.size(); i++) { - int axis = normalize_axis(axes.get(i), inpShape.size()); - CV_Assert_N(axis >= 2 + i, axis < inpShape.size()); - kernel_size[axis - 2] = inpShape[axis]; - } - } +void ONNXImporter::parseMaxPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Pooling"; + layerParams.set("pool", "MAX"); + layerParams.set("ceil_mode", layerParams.has("pad_mode")); + addLayer(layerParams, node_proto); +} - LayerParams poolLp = layerParams; - poolLp.name = layerParams.name + "/avg"; - CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); - poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size())); +void ONNXImporter::parseAveragePool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Pooling"; + layerParams.set("pool", "AVE"); + layerParams.set("ceil_mode", layerParams.has("pad_mode")); + layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); + addLayer(layerParams, node_proto); +} - node_proto.set_output(0, poolLp.name); - addLayer(poolLp, node_proto); - } +// "GlobalAveragePool" "GlobalMaxPool" "ReduceMean" "ReduceSum" "ReduceMax" +void ONNXImporter::parseReduce(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + const std::string& layer_type = node_proto.op_type(); + + CV_Assert(node_proto.input_size() == 1); + layerParams.type = "Pooling"; + String pool; + if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax") + pool = "MAX"; + else if (layer_type == "ReduceSum") + pool = "SUM"; + else + pool = "AVE"; + layerParams.set("pool", pool); + layerParams.set("global_pooling", !layerParams.has("axes")); + if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + { + MatShape inpShape = outShapes[node_proto.input(0)]; + DictValue axes = layerParams.get("axes"); + bool keepdims = layerParams.get("keepdims"); + MatShape targetShape; + std::vector shouldDelete(inpShape.size(), false); + for (int i = 0; i < axes.size(); i++) { + int axis = normalize_axis(axes.get(i), inpShape.size()); + shouldDelete[axis] = true; + } + for (int axis = 0; axis < inpShape.size(); ++axis){ + if (!shouldDelete[axis]) + targetShape.push_back(inpShape[axis]); + else if (keepdims) + targetShape.push_back(1); + } + + if (inpShape.size() == 3 && axes.size() <= 2) + { + int axis = normalize_axis(axes.get(0), inpShape.size()); + CV_CheckNE(axis, 0, ""); + + LayerParams reshapeLp; + reshapeLp.name = layerParams.name + "/reshape"; + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + reshapeLp.set("axis", 0); + reshapeLp.set("num_axes", 1); + int newShape[] = {1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2)); + + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(reshapeLp.name); + addLayer(reshapeLp, proto); + + LayerParams avgLp; + avgLp.name = layerParams.name + "/avg"; + avgLp.type = "Pooling"; + CV_Assert(layer_id.find(avgLp.name) == layer_id.end()); + avgLp.set("pool", pool); + if (axes.size() == 2) + { + CV_CheckEQ(normalize_axis(axes.get(0), inpShape.size()), 1, "Unsupported mode"); + CV_CheckEQ(normalize_axis(axes.get(1), inpShape.size()), 2, "Unsupported mode"); + avgLp.set("global_pooling", true); + } + else + { + avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true); + avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1); + } - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size())); + node_proto.set_input(0, reshapeLp.name); + node_proto.set_output(0, avgLp.name); + addLayer(avgLp, node_proto); + } + else + { + if (inpShape.size() != 4 && inpShape.size() != 5) + CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation."); - node_proto.set_input(0, node_proto.output(0)); - node_proto.set_output(0, layerParams.name); - } - else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + CV_Assert(axes.size() <= inpShape.size() - 2); + std::vector kernel_size(inpShape.size() - 2, 1); + if (axes.size() == 1 && (normalize_axis(axes.get(0), inpShape.size()) <= 1)) { - CV_CheckEQ(layerParams.get("keepdims"), 0, "layer only supports keepdims = false"); + int axis = normalize_axis(axes.get(0), inpShape.size()); + MatShape newShape = inpShape; + newShape[axis + 1] = total(newShape, axis + 1); + newShape.resize(axis + 2); + newShape.insert(newShape.begin(), 2 - axis, 1); + LayerParams reshapeLp; - reshapeLp.name = layerParams.name + "/reshape"; reshapeLp.type = "Reshape"; + reshapeLp.name = layerParams.name + "/reshape"; CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - int newShape[] = {1, 1, 1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4)); - - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(reshapeLp.name); - addLayer(reshapeLp, proto); + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); - LayerParams poolLp = layerParams; - poolLp.name = layerParams.name + "/pool"; - CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); - - node_proto.set_input(0, reshapeLp.name); - node_proto.set_output(0, poolLp.name); - addLayer(poolLp, node_proto); - - layerParams.type = "Reshape"; - int targetShape[] = {1}; - layerParams.set("dim", DictValue::arrayInt(&targetShape[0], 1)); + node_proto.set_output(0, reshapeLp.name); + addLayer(reshapeLp, node_proto); + kernel_size.resize(2); + kernel_size[0] = inpShape[axis]; node_proto.set_input(0, node_proto.output(0)); - node_proto.set_output(0, layerParams.name); } - } - else if (layer_type == "Slice") - { - int axis = 0; - std::vector begin; - std::vector end; - std::vector steps; - int inp_size = node_proto.input_size(); - - if (inp_size == 1) + else { - if (layerParams.has("axes")) { - DictValue axes = layerParams.get("axes"); - for (int i = 1; i < axes.size(); ++i) { - CV_Assert(axes.get(i - 1) == axes.get(i) - 1); - } - axis = axes.get(0); + for (int i = 0; i < axes.size(); i++) { + int axis = normalize_axis(axes.get(i), inpShape.size()); + CV_Assert_N(axis >= 2 + i, axis < inpShape.size()); + kernel_size[axis - 2] = inpShape[axis]; } + } - DictValue starts = layerParams.get("starts"); - DictValue ends = layerParams.get("ends"); - CV_Assert(starts.size() == ends.size()); - - if (axis > 0) { - begin.resize(axis, 0); - end.resize(axis, -1); - } - for (int i = 0; i < starts.size(); ++i) - { - begin.push_back(starts.get(i)); - int finish = ends.get(i); - end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim - } - } else { // inp_size > 1 - CV_Assert(inp_size >= 3); - for (int i = 1; i < inp_size; i++) { - CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end()); - } - Mat start_blob = getBlob(node_proto, 1); - Mat end_blob = getBlob(node_proto, 2); - CV_Assert(start_blob.total() == end_blob.total()); - - if (inp_size > 3) { - Mat axes_blob = getBlob(node_proto, 3); - const int* axes = (int*)axes_blob.data; - for (int i = 1; i < axes_blob.total(); ++i) { - CV_Assert(axes[i - 1] == axes[i] - 1); - } - axis = axes[0]; - } + LayerParams poolLp = layerParams; + poolLp.name = layerParams.name + "/avg"; + CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); + poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size())); - const int* starts = start_blob.ptr(); - const int* ends = end_blob.ptr(); - if (axis > 0) { - begin.resize(axis, 0); - end.resize(axis, -1); - } - std::copy(starts, starts + start_blob.total(), std::back_inserter(begin)); - for (int i = 0; i < end_blob.total(); ++i) - { - int finish = ends[i]; - end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim - } + node_proto.set_output(0, poolLp.name); + addLayer(poolLp, node_proto); + } - if (inp_size == 5) { - CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end()); - Mat step_blob = getBlob(node_proto, 4); - const int* steps_ptr = step_blob.ptr(); + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size())); - if (axis > 0) - steps.resize(axis, 1); + node_proto.set_input(0, node_proto.output(0)); + node_proto.set_output(0, layerParams.name); + } + else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + { + CV_CheckEQ(layerParams.get("keepdims"), 0, "layer only supports keepdims = false"); + LayerParams reshapeLp; + reshapeLp.name = layerParams.name + "/reshape"; + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + int newShape[] = {1, 1, 1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4)); + + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(reshapeLp.name); + addLayer(reshapeLp, proto); + + LayerParams poolLp = layerParams; + poolLp.name = layerParams.name + "/pool"; + CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); + + node_proto.set_input(0, reshapeLp.name); + node_proto.set_output(0, poolLp.name); + addLayer(poolLp, node_proto); + + layerParams.type = "Reshape"; + int targetShape[] = {1}; + layerParams.set("dim", DictValue::arrayInt(&targetShape[0], 1)); + + node_proto.set_input(0, node_proto.output(0)); + node_proto.set_output(0, layerParams.name); + } + addLayer(layerParams, node_proto); +} - std::copy(steps_ptr, steps_ptr + step_blob.total(), std::back_inserter(steps)); +void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + int axis = 0; + std::vector begin; + std::vector end; + std::vector steps; + int inp_size = node_proto.input_size(); - // Very strange application for Slice op with tensor reversing. - // We just workaround it for 2d constants. - if (constBlobs.find(node_proto.input(0)) != constBlobs.end() && - axis == 0 && - start_blob.at(0) == -1 && step_blob.at(0) == -1 && - end_blob.at(0) == std::numeric_limits::min()) - { - Mat inp = getBlob(node_proto, 0); - if (inp.dims == 2) - { - Mat flipped; - flip(inp, flipped, 0); - addConstant(layerParams.name, flipped); - return; - } - } - } + if (inp_size == 1) + { + if (layerParams.has("axes")) { + DictValue axes = layerParams.get("axes"); + for (int i = 1; i < axes.size(); ++i) { + CV_Assert(axes.get(i - 1) == axes.get(i) - 1); } - layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size())); - layerParams.set("end", DictValue::arrayInt(&end[0], end.size())); - layerParams.set("axis", axis); + axis = axes.get(0); + } - if (!steps.empty()) - layerParams.set("steps", DictValue::arrayInt(&steps[0], steps.size())); + DictValue starts = layerParams.get("starts"); + DictValue ends = layerParams.get("ends"); + CV_Assert(starts.size() == ends.size()); - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - Mat inp = getBlob(node_proto, 0); - std::vector inputs, sliced; - inputs.push_back(inp); - runLayer(layerParams, inputs, sliced); - CV_Assert(sliced.size() == 1); - addConstant(layerParams.name, sliced[0]); - return; - } + if (axis > 0) { + begin.resize(axis, 0); + end.resize(axis, -1); } - else if (layer_type == "Split") + for (int i = 0; i < starts.size(); ++i) { - if (layerParams.has("split")) - { - DictValue splits = layerParams.get("split"); - const int numSplits = splits.size(); - CV_Assert(numSplits > 1); - - std::vector slicePoints(numSplits - 1, splits.get(0)); - for (int i = 1; i < splits.size() - 1; ++i) - { - slicePoints[i] = slicePoints[i - 1] + splits.get(i - 1); - } - layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); - } - else - { - layerParams.set("num_split", node_proto.output_size()); - } - layerParams.type = "Slice"; + begin.push_back(starts.get(i)); + int finish = ends.get(i); + end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim } - else if (layer_type == "Add" || layer_type == "Sum" || layer_type == "Sub") - { - bool isSub = layer_type == "Sub"; - CV_CheckEQ(node_proto.input_size(), 2, ""); - bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end(); - bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end(); - if (is_const_0 && is_const_1) - { - Mat blob_0 = getBlob(node_proto, 0); - Mat blob_1 = getBlob(node_proto, 1); - CV_Assert(blob_0.size == blob_1.size); - Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1); - addConstant(layerParams.name, output); - return; - } - else if (is_const_0 || is_const_1) - { - int const_blob_id = is_const_0 ? 0 : 1; - Mat blob = getBlob(node_proto, const_blob_id); - int blob_total = blob.total(); - if (blob_total == 1) { - layerParams.type = "Power"; - layerParams.set("shift", (isSub ? -1 : 1) * blob.ptr()[0]); - } - else { - MatShape inpShape = outShapes[node_proto.input(1 - const_blob_id)]; - if (shape(blob) == inpShape) - { - LayerParams constParams; - constParams.name = layerParams.name + "/const"; - constParams.type = "Const"; - constParams.blobs.push_back((isSub ? -1 : 1) * blob); - int id = dstNet.addLayer(constParams.name, constParams.type, constParams); - layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0))); - outShapes[constParams.name] = shape(blob); - - layerParams.type = "Eltwise"; - node_proto.set_input(const_blob_id, constParams.name); - } - else - { - layerParams.type = "Scale"; - layerParams.set("bias_term", true); - int axis = 1; - for (int i = 0; i < graph_proto.initializer_size(); i++) - { - opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i); - if (tensor_proto.name() == node_proto.input(const_blob_id)) - { - axis = inpShape.size() - tensor_proto.dims_size(); - break; - } - } - layerParams.set("axis", axis); - blob = blob.reshape(1, 1); - layerParams.blobs.push_back((isSub ? -1 : 1) * blob); - } - } - } - else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) - { - layerParams.type = "Eltwise"; - if (isSub) - { - static float subCoeffs[] = {1.f, -1.f}; - layerParams.set("coeff", DictValue::arrayReal(subCoeffs, 2)); - } - } - else - { - if (isSub) - { - LayerParams powerParams; - powerParams.name = layerParams.name + "/neg"; - powerParams.type = "Power"; - powerParams.set("scale", -1); - - //Create Power layer - int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); - //Connect to input - IterLayerId_t layerId = layer_id.find(node_proto.input(1)); - CV_Assert(layerId != layer_id.end()); - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); - //Add shape - layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); - outShapes[powerParams.name] = outShapes[node_proto.input(1)]; - - //Replace input to Power - node_proto.set_input(1, powerParams.name); - } - layerParams.type = "Scale"; - layerParams.set("bias_term", true); - } + } else { // inp_size > 1 + CV_Assert(inp_size >= 3); + for (int i = 1; i < inp_size; i++) { + CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end()); } - else if (layer_type == "Pow") - { - if (layer_id.find(node_proto.input(1)) != layer_id.end()) - CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power"); - - Mat blob = getBlob(node_proto, 1); - if (blob.total() != 1) - CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power"); + Mat start_blob = getBlob(node_proto, 1); + Mat end_blob = getBlob(node_proto, 2); + CV_Assert(start_blob.total() == end_blob.total()); - blob.convertTo(blob, CV_32F); - layerParams.type = "Power"; - layerParams.set("power", blob.ptr()[0]); - } - else if (layer_type == "Max") - { - layerParams.type = "Eltwise"; - layerParams.set("operation", "max"); + if (inp_size > 3) { + Mat axes_blob = getBlob(node_proto, 3); + const int* axes = (int*)axes_blob.data; + for (int i = 1; i < axes_blob.total(); ++i) { + CV_Assert(axes[i - 1] == axes[i] - 1); + } + axis = axes[0]; } - else if (layer_type == "Neg") - { - layerParams.type = "Power"; - layerParams.set("scale", -1); + + const int* starts = start_blob.ptr(); + const int* ends = end_blob.ptr(); + if (axis > 0) { + begin.resize(axis, 0); + end.resize(axis, -1); } - else if (layer_type == "Constant") + std::copy(starts, starts + start_blob.total(), std::back_inserter(begin)); + for (int i = 0; i < end_blob.total(); ++i) { - CV_Assert(node_proto.input_size() == 0); - CV_Assert(layerParams.blobs.size() == 1); - addConstant(layerParams.name, layerParams.blobs[0]); - return; + int finish = ends[i]; + end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim } - else if (layer_type == "LSTM") - { - LayerParams lstmParams = layerParams; - lstmParams.name += "/lstm"; - - // https://pytorch.org/docs/stable/nn.html#lstm - CV_Assert(node_proto.input_size() == 7); - Mat Wx = getBlob(node_proto, 1); - Mat Wh = getBlob(node_proto, 2); - Mat b = getBlob(node_proto, 3); - Mat h0 = getBlob(node_proto, 5); - Mat c0 = getBlob(node_proto, 6); - - b = b.reshape(1, b.size[0]); - - const int numHidden = lstmParams.get("hidden_size"); - const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. - const int numFeatures = Wx.size[2]; - Mat bx = b.colRange(0, b.cols / 2); - Mat bh = b.colRange(b.cols / 2, b.cols); - b = bx + bh; - - // IFGO->IGFO - for (int k = 0; k < numDirs; ++k) + + if (inp_size == 5) { + CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end()); + Mat step_blob = getBlob(node_proto, 4); + const int* steps_ptr = step_blob.ptr(); + + if (axis > 0) + steps.resize(axis, 1); + + std::copy(steps_ptr, steps_ptr + step_blob.total(), std::back_inserter(steps)); + + // Very strange application for Slice op with tensor reversing. + // We just workaround it for 2d constants. + if (constBlobs.find(node_proto.input(0)) != constBlobs.end() && + axis == 0 && + start_blob.at(0) == -1 && step_blob.at(0) == -1 && + end_blob.at(0) == std::numeric_limits::min()) { - float* WxData = Wx.ptr(k); - float* WhData = Wh.ptr(k); - float* biasData = b.ptr(k); - for (int j = 0; j < numHidden; ++j) + Mat inp = getBlob(node_proto, 0); + if (inp.dims == 2) { - for (int i = 0; i < numFeatures; ++i) - { - std::swap(WxData[(numHidden + j) * numFeatures + i], - WxData[(numHidden * 2 + j) * numFeatures + i]); - } - for (int i = 0; i < numHidden; ++i) - { - std::swap(WhData[(numHidden + j) * numHidden + i], - WhData[(numHidden * 2 + j) * numHidden + i]); - } - std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); + Mat flipped; + flip(inp, flipped, 0); + addConstant(layerParams.name, flipped); + return; } } - Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); - Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); - h0 = h0.reshape(1, h0.size[0] * h0.size[1]); - c0 = c0.reshape(1, c0.size[0] * c0.size[1]); + } + } + layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size())); + layerParams.set("end", DictValue::arrayInt(&end[0], end.size())); + layerParams.set("axis", axis); - lstmParams.blobs.resize(5); - lstmParams.blobs[0] = Wh; - lstmParams.blobs[1] = Wx; - lstmParams.blobs[2] = b; - lstmParams.blobs[3] = h0; - lstmParams.blobs[4] = c0; - lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); + if (!steps.empty()) + layerParams.set("steps", DictValue::arrayInt(&steps[0], steps.size())); - node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name - addLayer(lstmParams, node_proto); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat inp = getBlob(node_proto, 0); + std::vector inputs, sliced; + inputs.push_back(inp); + runLayer(layerParams, inputs, sliced); + CV_Assert(sliced.size() == 1); + addConstant(layerParams.name, sliced[0]); + return; + } + addLayer(layerParams, node_proto); +} - MatShape lstmShape = outShapes[node_proto.output(0)]; +void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + if (layerParams.has("split")) + { + DictValue splits = layerParams.get("split"); + const int numSplits = splits.size(); + CV_Assert(numSplits > 1); - // Add fake 1 as it is done in ONNX - lstmShape.insert(lstmShape.begin() + 1, 1); + std::vector slicePoints(numSplits - 1, splits.get(0)); + for (int i = 1; i < splits.size() - 1; ++i) + { + slicePoints[i] = slicePoints[i - 1] + splits.get(i - 1); + } + layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); + } + else + { + layerParams.set("num_split", node_proto.output_size()); + } + layerParams.type = "Slice"; + addLayer(layerParams, node_proto); +} - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size())); - node_proto.set_input(0, lstmParams.name); // redirect input to LSTM - node_proto.set_output(0, layerParams.name); // keep origin LSTM's name +// "Add" "Sum" "Sub" +void ONNXImporter::parseBias(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + const std::string& layer_type = node_proto.op_type(); + bool isSub = layer_type == "Sub"; + CV_CheckEQ(node_proto.input_size(), 2, ""); + bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end(); + bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end(); + if (is_const_0 && is_const_1) + { + Mat blob_0 = getBlob(node_proto, 0); + Mat blob_1 = getBlob(node_proto, 1); + CV_Assert(blob_0.size == blob_1.size); + Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1); + addConstant(layerParams.name, output); + return; + } + else if (is_const_0 || is_const_1) + { + int const_blob_id = is_const_0 ? 0 : 1; + Mat blob = getBlob(node_proto, const_blob_id); + int blob_total = blob.total(); + if (blob_total == 1) { + layerParams.type = "Power"; + layerParams.set("shift", (isSub ? -1 : 1) * blob.ptr()[0]); } - else if (layer_type == "ImageScaler") - { - const float scale = layerParams.has("scale") ? layerParams.get("scale") : 1.0f; - layerParams.erase("scale"); + else { + MatShape inpShape = outShapes[node_proto.input(1 - const_blob_id)]; + if (shape(blob) == inpShape) + { + LayerParams constParams; + constParams.name = layerParams.name + "/const"; + constParams.type = "Const"; + constParams.blobs.push_back((isSub ? -1 : 1) * blob); + int id = dstNet.addLayer(constParams.name, constParams.type, constParams); + layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0))); + outShapes[constParams.name] = shape(blob); - if (layerParams.has("bias")) + layerParams.type = "Eltwise"; + node_proto.set_input(const_blob_id, constParams.name); + } + else { layerParams.type = "Scale"; - layerParams.blobs.push_back( - Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale)); - layerParams.set("bias_term", true); - Mat bias(1, layerParams.get("bias").size(), CV_32FC1); - for (int j = 0; j < bias.total(); j++) { - bias.at(0, j) = layerParams.get("bias").getRealValue(j); + int axis = 1; + for (int i = 0; i < graph_proto.initializer_size(); i++) + { + opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i); + if (tensor_proto.name() == node_proto.input(const_blob_id)) + { + axis = inpShape.size() - tensor_proto.dims_size(); + break; + } } - layerParams.blobs.push_back(bias); - layerParams.erase("bias"); - } - else { - layerParams.set("scale", scale); - layerParams.type = "Power"; + layerParams.set("axis", axis); + blob = blob.reshape(1, 1); + layerParams.blobs.push_back((isSub ? -1 : 1) * blob); } } - else if (layer_type == "Clip") - { - layerParams.type = "ReLU6"; - replaceLayerParam(layerParams, "min", "min_value"); - replaceLayerParam(layerParams, "max", "max_value"); - - } - else if (layer_type == "LeakyRelu") - { - layerParams.type = "ReLU"; - replaceLayerParam(layerParams, "alpha", "negative_slope"); - } - else if (layer_type == "Relu") - { - layerParams.type = "ReLU"; - } - else if (layer_type == "Elu") - { - layerParams.type = "ELU"; - } - else if (layer_type == "Tanh") - { - layerParams.type = "TanH"; - } - else if (layer_type == "PRelu") - { - layerParams.type = "PReLU"; - layerParams.blobs.push_back(getBlob(node_proto, 1)); - } - else if (layer_type == "LRN") + } + else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) + { + layerParams.type = "Eltwise"; + if (isSub) { - replaceLayerParam(layerParams, "size", "local_size"); + static float subCoeffs[] = {1.f, -1.f}; + layerParams.set("coeff", DictValue::arrayReal(subCoeffs, 2)); } - else if (layer_type == "InstanceNormalization") + } + else + { + if (isSub) { - if (node_proto.input_size() != 3) - CV_Error(Error::StsNotImplemented, - "Expected input, scale, bias"); - - layerParams.blobs.resize(4); - layerParams.blobs[2] = getBlob(node_proto, 1); // weightData - layerParams.blobs[3] = getBlob(node_proto, 2); // biasData - layerParams.set("has_bias", true); - layerParams.set("has_weight", true); - - // Get number of channels in input - int size = layerParams.blobs[2].total(); - layerParams.blobs[0] = Mat::zeros(size, 1, CV_32F); // mean - layerParams.blobs[1] = Mat::ones(size, 1, CV_32F); // std - - LayerParams mvnParams; - mvnParams.name = layerParams.name + "/MVN"; - mvnParams.type = "MVN"; - mvnParams.set("eps", layerParams.get("epsilon")); - layerParams.erase("epsilon"); - - //Create MVN layer - int id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); + LayerParams powerParams; + powerParams.name = layerParams.name + "/neg"; + powerParams.type = "Power"; + powerParams.set("scale", -1); + + //Create Power layer + int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); //Connect to input - IterLayerId_t layerId = layer_id.find(node_proto.input(0)); + IterLayerId_t layerId = layer_id.find(node_proto.input(1)); CV_Assert(layerId != layer_id.end()); dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); //Add shape - layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0))); - outShapes[mvnParams.name] = outShapes[node_proto.input(0)]; + layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); + outShapes[powerParams.name] = outShapes[node_proto.input(1)]; - //Replace Batch Norm's input to MVN - node_proto.set_input(0, mvnParams.name); - layerParams.type = "BatchNorm"; + //Replace input to Power + node_proto.set_input(1, powerParams.name); } - else if (layer_type == "BatchNormalization") - { - if (node_proto.input_size() != 5) - CV_Error(Error::StsNotImplemented, - "Expected input, scale, bias, mean and var"); - - layerParams.type = "BatchNorm"; - replaceLayerParam(layerParams, "epsilon", "eps"); - replaceLayerParam(layerParams, "spatial", "use_global_stats"); - - Mat meanData = getBlob(node_proto, 3); - Mat stdData = getBlob(node_proto, 4); + layerParams.type = "Scale"; + layerParams.set("bias_term", true); + } + addLayer(layerParams, node_proto); +} - layerParams.blobs.push_back(meanData); - layerParams.blobs.push_back(stdData); +void ONNXImporter::parsePow(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + if (layer_id.find(node_proto.input(1)) != layer_id.end()) + CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power"); - if (!node_proto.input(1).empty()) { - layerParams.set("has_weight", true); - layerParams.blobs.push_back(getBlob(node_proto, 1)); // weightData - } else { - layerParams.set("has_weight", false); - } + Mat blob = getBlob(node_proto, 1); + if (blob.total() != 1) + CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power"); - if (!node_proto.input(2).empty()) { - layerParams.set("has_bias", true); - layerParams.blobs.push_back(getBlob(node_proto, 2)); // biasData - } else { - layerParams.set("has_bias", false); - } - } - else if (layer_type == "Gemm") - { - CV_Assert(node_proto.input_size() >= 2); - layerParams.type = "InnerProduct"; - Mat weights = getBlob(node_proto, 1); - int ind_num_out = 0; - if (layerParams.has("transB") && !layerParams.get("transB")) { - transpose(weights, weights); - ind_num_out = 1; - } - layerParams.blobs.push_back(weights); + blob.convertTo(blob, CV_32F); + layerParams.type = "Power"; + layerParams.set("power", blob.ptr()[0]); + addLayer(layerParams, node_proto); +} - if (node_proto.input_size() == 3) { - Mat bias = getBlob(node_proto, 2); - layerParams.blobs.push_back(bias); - } - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - Mat inputBuf = getBlob(node_proto, 0); +void ONNXImporter::parseMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Eltwise"; + layerParams.set("operation", "max"); + addLayer(layerParams, node_proto); +} - LayerParams constParams; - constParams.name = node_proto.input(0); - constParams.type = "Const"; - constParams.blobs.push_back(inputBuf); +void ONNXImporter::parseNeg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Power"; + layerParams.set("scale", -1); + addLayer(layerParams, node_proto); +} - opencv_onnx::NodeProto proto; - proto.add_output(constParams.name); - addLayer(constParams, proto); - } +void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 0); + CV_Assert(layerParams.blobs.size() == 1); + addConstant(layerParams.name, layerParams.blobs[0]); +} - layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]); - layerParams.set("bias_term", node_proto.input_size() == 3); - } - else if (layer_type == "MatMul") - { - CV_Assert(node_proto.input_size() == 2); - layerParams.type = "InnerProduct"; - layerParams.set("bias_term", false); - CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end()); - int firstInpDims = outShapes[node_proto.input(0)].size(); - int secondInpDims; - - if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) - { - Mat blob = getBlob(node_proto, 1); - secondInpDims = blob.dims; - layerParams.blobs.push_back(blob.t()); - layerParams.set("num_output", layerParams.blobs[0].size[0]); - } else { - secondInpDims = outShapes[node_proto.input(1)].size(); - } - layerParams.set("axis", firstInpDims - secondInpDims + 1); - } - else if (layer_type == "Mul" || layer_type == "Div") +void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + LayerParams lstmParams = layerParams; + lstmParams.name += "/lstm"; + + // https://pytorch.org/docs/stable/nn.html#lstm + CV_Assert(node_proto.input_size() == 7); + Mat Wx = getBlob(node_proto, 1); + Mat Wh = getBlob(node_proto, 2); + Mat b = getBlob(node_proto, 3); + Mat h0 = getBlob(node_proto, 5); + Mat c0 = getBlob(node_proto, 6); + + b = b.reshape(1, b.size[0]); + + const int numHidden = lstmParams.get("hidden_size"); + const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. + const int numFeatures = Wx.size[2]; + Mat bx = b.colRange(0, b.cols / 2); + Mat bh = b.colRange(b.cols / 2, b.cols); + b = bx + bh; + + // IFGO->IGFO + for (int k = 0; k < numDirs; ++k) + { + float* WxData = Wx.ptr(k); + float* WhData = Wh.ptr(k); + float* biasData = b.ptr(k); + for (int j = 0; j < numHidden; ++j) { - CV_Assert(node_proto.input_size() == 2); - - bool isDiv = layer_type == "Div"; - int constId = -1; - bool haveVariables = false; - for (int i = 0; i < 2; ++i) + for (int i = 0; i < numFeatures; ++i) { - if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) - constId = i; - else - haveVariables = true; + std::swap(WxData[(numHidden + j) * numFeatures + i], + WxData[(numHidden * 2 + j) * numFeatures + i]); } - if (constId != -1 && haveVariables) + for (int i = 0; i < numHidden; ++i) { - Mat blob = getBlob(node_proto, constId); - blob = blob.reshape(1, 1); - if (blob.total() == 1) { - float blob_value = blob.ptr()[0]; - float coeff = isDiv ? 1.0 / blob_value : blob_value; - layerParams.set("scale", coeff); - layerParams.type = "Power"; - } - else { - if (isDiv) - divide(1.0, blob, blob); - layerParams.blobs.push_back(blob); - layerParams.type = "Scale"; - } + std::swap(WhData[(numHidden + j) * numHidden + i], + WhData[(numHidden * 2 + j) * numHidden + i]); } - else if (!haveVariables) - { - Mat inp0 = getBlob(node_proto, 0); - Mat inp1 = getBlob(node_proto, 1); - - if (inp0.size != inp1.size && (inp0.total() != 1 || inp1.total() != 1)) - CV_Error_(Error::StsNotImplemented, ("Different shapes case is not supported with constant inputs: %s", layer_type.c_str())); + std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); + } + } + Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); + Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + c0 = c0.reshape(1, c0.size[0] * c0.size[1]); + + lstmParams.blobs.resize(5); + lstmParams.blobs[0] = Wh; + lstmParams.blobs[1] = Wx; + lstmParams.blobs[2] = b; + lstmParams.blobs[3] = h0; + lstmParams.blobs[4] = c0; + lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); + + node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name + addLayer(lstmParams, node_proto); + + MatShape lstmShape = outShapes[node_proto.output(0)]; + + // Add fake 1 as it is done in ONNX + lstmShape.insert(lstmShape.begin() + 1, 1); + + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size())); + node_proto.set_input(0, lstmParams.name); // redirect input to LSTM + node_proto.set_output(0, layerParams.name); // keep origin LSTM's name + addLayer(layerParams, node_proto); +} - if (inp0.total() == 1 && inp1.total() == 1 && inp0.dims != inp1.dims) - { - if (inp0.dims < inp1.dims) - { - inp0 = inp0.reshape(1, inp1.dims, inp1.size); - inp0.dims = inp1.dims; - } - else - { - inp1 = inp1.reshape(1, inp0.dims, inp0.size); - inp1.dims = inp0.dims; - } - } +void ONNXImporter::parseImageScaler(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + const float scale = layerParams.has("scale") ? layerParams.get("scale") : 1.0f; + layerParams.erase("scale"); - Mat out; - if (inp0.total() != inp1.total()) - { - if (inp0.total() == 1) - { - float inp0_value = inp0.ptr()[0]; - float coeff = isDiv ? 1.0 / inp0_value : inp0_value; - multiply(inp1, coeff, out); - } - else - { - float inp1_value = inp1.ptr()[0]; - float coeff = isDiv ? 1.0 / inp1_value : inp1_value; - multiply(inp0, coeff, out); - } + if (layerParams.has("bias")) + { + layerParams.type = "Scale"; + layerParams.blobs.push_back( + Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale)); - } - else - { - out = isDiv ? inp0 / inp1 : inp0.mul(inp1); - } + layerParams.set("bias_term", true); + Mat bias(1, layerParams.get("bias").size(), CV_32FC1); + for (int j = 0; j < bias.total(); j++) { + bias.at(0, j) = layerParams.get("bias").getRealValue(j); + } + layerParams.blobs.push_back(bias); + layerParams.erase("bias"); + } + else { + layerParams.set("scale", scale); + layerParams.type = "Power"; + } + addLayer(layerParams, node_proto); +} - if (inp0.dims == 1 && inp1.dims == 1) - out.dims = 1; // to workaround dims == 1 - addConstant(layerParams.name, out); - return; - } - else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) - { - layerParams.type = "Eltwise"; - layerParams.set("operation", isDiv ? "div" : "prod"); - } - else - { - // Scale layer allocate output with the first input shape - if (total(outShapes[node_proto.input(0)]) < total(outShapes[node_proto.input(1)])) - { - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(1)); - proto.add_input(node_proto.input(0)); - proto.add_output(layerParams.name); - node_proto = proto; - } +void ONNXImporter::parseClip(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "ReLU6"; + replaceLayerParam(layerParams, "min", "min_value"); + replaceLayerParam(layerParams, "max", "max_value"); + addLayer(layerParams, node_proto); +} - if (isDiv) - { - LayerParams powerParams; - powerParams.name = layerParams.name + "/inv"; - powerParams.type = "Power"; - powerParams.set("power", -1); - - //Create Power layer - int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); - //Connect to input - IterLayerId_t layerId = layer_id.find(node_proto.input(1)); - CV_Assert(layerId != layer_id.end()); - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); - //Add shape - layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); - outShapes[powerParams.name] = outShapes[node_proto.input(1)]; - - //Replace input to Power - node_proto.set_input(1, powerParams.name); - } - layerParams.type = "Scale"; - } - } - else if (layer_type == "Conv") - { - CV_Assert(node_proto.input_size() >= 2); - layerParams.type = "Convolution"; - for (int j = 1; j < node_proto.input_size(); j++) { - if (constBlobs.find(node_proto.input(j)) != constBlobs.end()) - { - layerParams.blobs.push_back(getBlob(node_proto, j)); - } - } - int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0]; - layerParams.set("num_output", outCn); +void ONNXImporter::parseLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "ReLU"; + replaceLayerParam(layerParams, "alpha", "negative_slope"); + addLayer(layerParams, node_proto); +} - // Check for asymmetric padding in Conv2D - if (layerParams.has("pad")) - { - bool asymmetricPadding = false; - DictValue pads = layerParams.get("pad"); - const int dims = pads.size() / 2; - for (int i = 0; i < dims; ++i) - { - if (pads.get(i) != pads.get(i + dims)) - { - asymmetricPadding = true; - break; - } - } - if (asymmetricPadding && pads.size() == 4) // [pad_t, pad_l, pad_b, pad_r] - { - layerParams.erase("pad"); - // No paddings required for N, C axis - std::vector paddings(4, 0); - // Add paddings for H, W axis - for (int i = 0; i < dims; ++i) - { - paddings.push_back(pads.get(i)); - paddings.push_back(pads.get(dims + i)); - } - LayerParams padLp; - padLp.name = layerParams.name + "/pad"; - padLp.type = "Padding"; - padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); +void ONNXImporter::parseRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "ReLU"; + addLayer(layerParams, node_proto); +} - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(padLp.name); +void ONNXImporter::parseElu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "ELU"; + addLayer(layerParams, node_proto); +} - addLayer(padLp, proto); - node_proto.set_input(0, padLp.name); - } - } - } - else if (layer_type == "ConvTranspose") - { - CV_Assert(node_proto.input_size() >= 2); - layerParams.type = "Deconvolution"; - for (int j = 1; j < node_proto.input_size(); j++) { - layerParams.blobs.push_back(getBlob(node_proto, j)); - } - layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get("group", 1)); - layerParams.set("bias_term", node_proto.input_size() == 3); +void ONNXImporter::parseTanh(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "TanH"; + addLayer(layerParams, node_proto); +} - if (!layerParams.has("kernel_size")) - CV_Error(Error::StsNotImplemented, - "Required attribute 'kernel_size' is not present."); +void ONNXImporter::parsePRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "PReLU"; + layerParams.blobs.push_back(getBlob(node_proto, 1)); + addLayer(layerParams, node_proto); +} - if (layerParams.has("output_shape")) - { - const DictValue& outShape = layerParams.get("output_shape"); - DictValue strides = layerParams.get("stride"); - DictValue kernel = layerParams.get("kernel_size"); +void ONNXImporter::parseLRN(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + replaceLayerParam(layerParams, "size", "local_size"); + addLayer(layerParams, node_proto); +} - String padMode; - std::vector adjust_pads; - if (layerParams.has("pad_mode")) - { - padMode = toUpperCase(layerParams.get("pad_mode")); - if (padMode != "SAME" && padMode != "VALID") - CV_Error(Error::StsError, "Unsupported padding mode " + padMode); +void ONNXImporter::parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + if (node_proto.input_size() != 3) + CV_Error(Error::StsNotImplemented, + "Expected input, scale, bias"); + + layerParams.blobs.resize(4); + layerParams.blobs[2] = getBlob(node_proto, 1); // weightData + layerParams.blobs[3] = getBlob(node_proto, 2); // biasData + layerParams.set("has_bias", true); + layerParams.set("has_weight", true); + + // Get number of channels in input + int size = layerParams.blobs[2].total(); + layerParams.blobs[0] = Mat::zeros(size, 1, CV_32F); // mean + layerParams.blobs[1] = Mat::ones(size, 1, CV_32F); // std + + LayerParams mvnParams; + mvnParams.name = layerParams.name + "/MVN"; + mvnParams.type = "MVN"; + mvnParams.set("eps", layerParams.get("epsilon")); + layerParams.erase("epsilon"); + + //Create MVN layer + int id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); + //Connect to input + IterLayerId_t layerId = layer_id.find(node_proto.input(0)); + CV_Assert(layerId != layer_id.end()); + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + //Add shape + layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0))); + outShapes[mvnParams.name] = outShapes[node_proto.input(0)]; + + //Replace Batch Norm's input to MVN + node_proto.set_input(0, mvnParams.name); + layerParams.type = "BatchNorm"; + addLayer(layerParams, node_proto); +} - for (int i = 0; i < strides.size(); i++) - { - int sz = outShape.get(2 + i); - int stride = strides.get(i); - adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride : - (sz - kernel.get(i)) % stride); - } - layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size())); - } - } - else if (layerParams.has("output_padding")) - { - replaceLayerParam(layerParams, "output_padding", "adj"); - } - } - else if (layer_type == "Transpose") - { - layerParams.type = "Permute"; - replaceLayerParam(layerParams, "perm", "order"); +void ONNXImporter::parseBatchNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + if (node_proto.input_size() != 5) + CV_Error(Error::StsNotImplemented, + "Expected input, scale, bias, mean and var"); - CV_Assert(node_proto.input_size() == 1); - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - std::vector inputs(1, getBlob(node_proto, 0)), transposed; - runLayer(layerParams, inputs, transposed); - CV_Assert(transposed.size() == 1); - addConstant(layerParams.name, transposed[0]); - return; - } + layerParams.type = "BatchNorm"; + replaceLayerParam(layerParams, "epsilon", "eps"); + replaceLayerParam(layerParams, "spatial", "use_global_stats"); + + Mat meanData = getBlob(node_proto, 3); + Mat stdData = getBlob(node_proto, 4); + + layerParams.blobs.push_back(meanData); + layerParams.blobs.push_back(stdData); + + if (!node_proto.input(1).empty()) { + layerParams.set("has_weight", true); + layerParams.blobs.push_back(getBlob(node_proto, 1)); // weightData + } else { + layerParams.set("has_weight", false); + } + + if (!node_proto.input(2).empty()) { + layerParams.set("has_bias", true); + layerParams.blobs.push_back(getBlob(node_proto, 2)); // biasData + } else { + layerParams.set("has_bias", false); + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() >= 2); + layerParams.type = "InnerProduct"; + Mat weights = getBlob(node_proto, 1); + int ind_num_out = 0; + if (layerParams.has("transB") && !layerParams.get("transB")) { + transpose(weights, weights); + ind_num_out = 1; + } + layerParams.blobs.push_back(weights); + + if (node_proto.input_size() == 3) { + Mat bias = getBlob(node_proto, 2); + layerParams.blobs.push_back(bias); + } + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat inputBuf = getBlob(node_proto, 0); + + LayerParams constParams; + constParams.name = node_proto.input(0); + constParams.type = "Const"; + constParams.blobs.push_back(inputBuf); + + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(constParams, proto); + } + + layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]); + layerParams.set("bias_term", node_proto.input_size() == 3); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 2); + layerParams.type = "InnerProduct"; + layerParams.set("bias_term", false); + CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end()); + int firstInpDims = outShapes[node_proto.input(0)].size(); + int secondInpDims; + + if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) + { + Mat blob = getBlob(node_proto, 1); + secondInpDims = blob.dims; + layerParams.blobs.push_back(blob.t()); + layerParams.set("num_output", layerParams.blobs[0].size[0]); + } else { + secondInpDims = outShapes[node_proto.input(1)].size(); + } + layerParams.set("axis", firstInpDims - secondInpDims + 1); + addLayer(layerParams, node_proto); +} + +// "Mul" "Div" +void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + const std::string& layer_type = node_proto.op_type(); + CV_Assert(node_proto.input_size() == 2); + + bool isDiv = layer_type == "Div"; + int constId = -1; + bool haveVariables = false; + for (int i = 0; i < 2; ++i) + { + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) + constId = i; + else + haveVariables = true; + } + if (constId != -1 && haveVariables) + { + Mat blob = getBlob(node_proto, constId); + blob = blob.reshape(1, 1); + if (blob.total() == 1) { + float blob_value = blob.ptr()[0]; + float coeff = isDiv ? 1.0 / blob_value : blob_value; + layerParams.set("scale", coeff); + layerParams.type = "Power"; } - else if (layer_type == "Squeeze") - { - CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes")); - DictValue axes_dict = layerParams.get("axes"); - MatShape inpShape = outShapes[node_proto.input(0)]; + else { + if (isDiv) + divide(1.0, blob, blob); + layerParams.blobs.push_back(blob); + layerParams.type = "Scale"; + } + } + else if (!haveVariables) + { + Mat inp0 = getBlob(node_proto, 0); + Mat inp1 = getBlob(node_proto, 1); - std::vector maskedAxes(inpShape.size(), false); - for (int i = 0; i < axes_dict.size(); ++i) + if (inp0.size != inp1.size && (inp0.total() != 1 || inp1.total() != 1)) + CV_Error_(Error::StsNotImplemented, ("Different shapes case is not supported with constant inputs: %s", layer_type.c_str())); + + if (inp0.total() == 1 && inp1.total() == 1 && inp0.dims != inp1.dims) + { + if (inp0.dims < inp1.dims) { - int axis = axes_dict.getIntValue(i); - CV_CheckLE(axis, static_cast(inpShape.size()), "Squeeze axis"); - maskedAxes[axis] = inpShape[axis] == 1; + inp0 = inp0.reshape(1, inp1.dims, inp1.size); + inp0.dims = inp1.dims; } - MatShape outShape; - for (int i = 0; i < inpShape.size(); ++i) + else { - if (!maskedAxes[i]) - outShape.push_back(inpShape[i]); + inp1 = inp1.reshape(1, inp0.dims, inp0.size); + inp1.dims = inp0.dims; } - if (outShape.size() != inpShape.size()) + } + + Mat out; + if (inp0.total() != inp1.total()) + { + if (inp0.total() == 1) { - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); - if (hasDynamicShapes) - { - std::vector dynamicAxes; - std::vector inputIndices; - for (int index = 0; index < inpShape.size(); ++index) - { - if (!maskedAxes[index]) - inputIndices.push_back(index); - } - for (int index = 0; index < outShape.size(); ++index) - dynamicAxes.push_back(index); - layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); - layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); - } + float inp0_value = inp0.ptr()[0]; + float coeff = isDiv ? 1.0 / inp0_value : inp0_value; + multiply(inp1, coeff, out); } else - layerParams.type = "Identity"; - - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { - Mat inp = getBlob(node_proto, 0); - Mat out = inp.reshape(1, outShape); - out.dims = outShape.size(); // to workaround dims == 1 - addConstant(layerParams.name, out); - return; + float inp1_value = inp1.ptr()[0]; + float coeff = isDiv ? 1.0 / inp1_value : inp1_value; + multiply(inp0, coeff, out); } + } - else if (layer_type == "Flatten") + else { - CV_CheckEQ(node_proto.input_size(), 1, ""); - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - Mat input = getBlob(node_proto, 0); - int axis = normalize_axis(layerParams.get("axis", 1), input.dims); - - std::vector out_size(&input.size[0], &input.size[0] + axis); - out_size.push_back(input.total(axis)); - Mat output = input.reshape(1, out_size); - addConstant(layerParams.name, output); - return; - } + out = isDiv ? inp0 / inp1 : inp0.mul(inp1); } - else if (layer_type == "Unsqueeze") + + if (inp0.dims == 1 && inp1.dims == 1) + out.dims = 1; // to workaround dims == 1 + addConstant(layerParams.name, out); + return; + } + else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) + { + layerParams.type = "Eltwise"; + layerParams.set("operation", isDiv ? "div" : "prod"); + } + else + { + // Scale layer allocate output with the first input shape + if (total(outShapes[node_proto.input(0)]) < total(outShapes[node_proto.input(1)])) { - CV_Assert(node_proto.input_size() == 1); - DictValue axes = layerParams.get("axes"); - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - // Constant input. - Mat input = getBlob(node_proto, 0); + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(1)); + proto.add_input(node_proto.input(0)); + proto.add_output(layerParams.name); + node_proto = proto; + } - std::vector dims; - for (int j = 0; j < input.dims; j++) { - dims.push_back(input.size[j]); - } - CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size()); - for (int j = 0; j < axes.size(); j++) { - dims.insert(dims.begin() + axes.getIntValue(j), 1); - } + if (isDiv) + { + LayerParams powerParams; + powerParams.name = layerParams.name + "/inv"; + powerParams.type = "Power"; + powerParams.set("power", -1); - Mat out = input.reshape(0, dims); - addConstant(layerParams.name, out); - return; - } + //Create Power layer + int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); + //Connect to input + IterLayerId_t layerId = layer_id.find(node_proto.input(1)); + CV_Assert(layerId != layer_id.end()); + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + //Add shape + layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); + outShapes[powerParams.name] = outShapes[node_proto.input(1)]; - // Variable input. - if (axes.size() != 1) - CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze"); + //Replace input to Power + node_proto.set_input(1, powerParams.name); + } + layerParams.type = "Scale"; + } + addLayer(layerParams, node_proto); +} - MatShape inpShape = outShapes[node_proto.input(0)]; - int axis = axes.getIntValue(0); - CV_Assert(0 <= axis && axis <= inpShape.size()); - std::vector outShape = inpShape; - outShape.insert(outShape.begin() + axis, 1); - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); - if (hasDynamicShapes) - { - std::vector dynamicAxes; - std::vector inputIndices; - for (int index = 0; index < outShape.size(); ++index) { - if (index != axis) - dynamicAxes.push_back(index); - } - for (int index = 0; index < inpShape.size(); ++index) - inputIndices.push_back(index); - layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); - layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); - } +void ONNXImporter::parseConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + CV_Assert(node_proto.input_size() >= 2); + layerParams.type = "Convolution"; + for (int j = 1; j < node_proto.input_size(); j++) { + if (constBlobs.find(node_proto.input(j)) != constBlobs.end()) + { + layerParams.blobs.push_back(getBlob(node_proto, j)); } - else if (layer_type == "Expand") + } + int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0]; + layerParams.set("num_output", outCn); + + // Check for asymmetric padding in Conv2D + if (layerParams.has("pad")) + { + bool asymmetricPadding = false; + DictValue pads = layerParams.get("pad"); + const int dims = pads.size() / 2; + for (int i = 0; i < dims; ++i) { - CV_CheckEQ(node_proto.input_size(), 2, ""); - const std::string& input0 = node_proto.input(0); - const std::string& input1 = node_proto.input(1); - Mat newShapeMat = getBlob(input1); - MatShape targetShape(newShapeMat.ptr(), newShapeMat.ptr() + newShapeMat.total()); - - MatShape inpShape; - bool haveVariables = constBlobs.find(input0) == constBlobs.end(); - if (haveVariables) + if (pads.get(i) != pads.get(i + dims)) { - IterShape_t shapeIt = outShapes.find(input0); - CV_Assert(shapeIt != outShapes.end()); - inpShape = shapeIt->second; + asymmetricPadding = true; + break; } - else + } + if (asymmetricPadding && pads.size() == 4) // [pad_t, pad_l, pad_b, pad_r] + { + layerParams.erase("pad"); + // No paddings required for N, C axis + std::vector paddings(4, 0); + // Add paddings for H, W axis + for (int i = 0; i < dims; ++i) { - inpShape = shape(getBlob(input0)); + paddings.push_back(pads.get(i)); + paddings.push_back(pads.get(dims + i)); } + LayerParams padLp; + padLp.name = layerParams.name + "/pad"; + padLp.type = "Padding"; + padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); - String srcName = input0; - // Unsqueeze and repeat along new axis - if (targetShape.size() == inpShape.size() + 1) - { - for (int i = 0; i < targetShape.size(); i++) - { - if (targetShape[i] == -1 && i < inpShape.size()) - targetShape[i] = inpShape[i]; - else if (i < inpShape.size() && targetShape[i] != inpShape[i]) - inpShape.insert(inpShape.begin() + i, 1); - } - if (haveVariables) - { - LayerParams reshapeLp; - reshapeLp.name = layerParams.name + "/reshape"; - reshapeLp.type = "Reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); - - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(reshapeLp.name); - addLayer(reshapeLp, proto); - srcName = reshapeLp.name; - } - } - CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims"); + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(padLp.name); - std::vector broadcast_axes; - for (int i = 0; i < targetShape.size(); i++) - { - if (targetShape[i] != inpShape[i]) - { - if (inpShape[i] == 1) - broadcast_axes.push_back(i); - else - CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i)); - } - } + addLayer(padLp, proto); + node_proto.set_input(0, padLp.name); + } + } + addLayer(layerParams, node_proto); +} - if (!haveVariables) - { - if (broadcast_axes.size() != 1) - CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input"); - - Mat input = getBlob(node_proto, 0); - input = input.reshape(0, total(inpShape, 0, broadcast_axes[0])); - Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]); - output = output.reshape(0, targetShape); - addConstant(layerParams.name, output); - return; - } +void ONNXImporter::parseConvTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() >= 2); + layerParams.type = "Deconvolution"; + for (int j = 1; j < node_proto.input_size(); j++) { + layerParams.blobs.push_back(getBlob(node_proto, j)); + } + layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get("group", 1)); + layerParams.set("bias_term", node_proto.input_size() == 3); - if (broadcast_axes.size() == 2 && - broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1) - { - LayerParams constParams; - constParams.name = layerParams.name + "/const"; - CV_Assert(layer_id.find(constParams.name) == layer_id.end()); - constParams.type = "Const"; + if (!layerParams.has("kernel_size")) + CV_Error(Error::StsNotImplemented, + "Required attribute 'kernel_size' is not present."); - Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr(), CV_32F); - constParams.blobs.push_back(inp); + if (layerParams.has("output_shape")) + { + const DictValue& outShape = layerParams.get("output_shape"); + DictValue strides = layerParams.get("stride"); + DictValue kernel = layerParams.get("kernel_size"); - opencv_onnx::NodeProto proto; - proto.add_output(constParams.name); - addLayer(constParams, proto); + String padMode; + std::vector adjust_pads; + if (layerParams.has("pad_mode")) + { + padMode = toUpperCase(layerParams.get("pad_mode")); + if (padMode != "SAME" && padMode != "VALID") + CV_Error(Error::StsError, "Unsupported padding mode " + padMode); - layerParams.type = "Scale"; - layerParams.set("bias_term", false); - node_proto.set_input(0, constParams.name); - node_proto.set_input(1, srcName); - } - else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1) + for (int i = 0; i < strides.size(); i++) { - String base_name = layerParams.name + "/copy_"; - std::vector input_names; - for (int j = 0; j < targetShape[broadcast_axes[0]]; j++) - { - std::ostringstream ss; - ss << j; - LayerParams copyLP; - copyLP.name = base_name + ss.str(); - copyLP.type = "Identity"; - CV_Assert(layer_id.find(copyLP.name) == layer_id.end()); - input_names.push_back(copyLP.name); - - node_proto.set_input(0, srcName); - node_proto.set_output(0, copyLP.name); - addLayer(copyLP, node_proto); - } - node_proto.clear_input(); - for (int i = 0; i < input_names.size(); i++) - { - node_proto.add_input(input_names[i]); - } - layerParams.set("axis", broadcast_axes[0]); - layerParams.type = "Concat"; - node_proto.set_output(0, layerParams.name); + int sz = outShape.get(2 + i); + int stride = strides.get(i); + adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride : + (sz - kernel.get(i)) % stride); } - else - CV_Error(Error::StsNotImplemented, "Unsupported Expand op"); + layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size())); } - else if (layer_type == "Reshape") - { - CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape")); + } + else if (layerParams.has("output_padding")) + { + replaceLayerParam(layerParams, "output_padding", "adj"); + } + addLayer(layerParams, node_proto); +} - if (node_proto.input_size() == 2) { - Mat blob = getBlob(node_proto, 1); - CV_Assert(blob.type() == CV_32SC1); +void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Permute"; + replaceLayerParam(layerParams, "perm", "order"); - layerParams.set("dim", DictValue::arrayInt( - blob.ptr(), blob.total() )); + CV_Assert(node_proto.input_size() == 1); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + std::vector inputs(1, getBlob(node_proto, 0)), transposed; + runLayer(layerParams, inputs, transposed); + CV_Assert(transposed.size() == 1); + addConstant(layerParams.name, transposed[0]); + return; + } + addLayer(layerParams, node_proto); +} - if (layer_id.find(node_proto.input(0)) == layer_id.end()) { - std::vector inputs(1, getBlob(node_proto, 0)), outputs; - runLayer(layerParams, inputs, outputs); - addConstant(layerParams.name, outputs[0]); - return; - } - } - else { - DictValue shape = layerParams.get("shape"); - std::vector dim; - for (int j = 0; j < shape.size(); j++) { - dim.push_back(shape.getIntValue(j)); - } +void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes")); + DictValue axes_dict = layerParams.get("axes"); + MatShape inpShape = outShapes[node_proto.input(0)]; - if (layer_id.find(node_proto.input(0)) == layer_id.end()) { - Mat input = getBlob(node_proto, 0); - Mat out = input.reshape(0, dim); - addConstant(layerParams.name, out); - return; - } - replaceLayerParam(layerParams, "shape", "dim"); - } - } - else if (layer_type == "Pad") + std::vector maskedAxes(inpShape.size(), false); + for (int i = 0; i < axes_dict.size(); ++i) + { + int axis = axes_dict.getIntValue(i); + CV_CheckLE(axis, static_cast(inpShape.size()), "Squeeze axis"); + maskedAxes[axis] = inpShape[axis] == 1; + } + MatShape outShape; + for (int i = 0; i < inpShape.size(); ++i) + { + if (!maskedAxes[i]) + outShape.push_back(inpShape[i]); + } + if (outShape.size() != inpShape.size()) + { + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + if (hasDynamicShapes) { - layerParams.type = "Padding"; - replaceLayerParam(layerParams, "mode", "type"); - if (node_proto.input_size() == 3 || node_proto.input_size() == 2) + std::vector dynamicAxes; + std::vector inputIndices; + for (int index = 0; index < inpShape.size(); ++index) { - // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN. - // We need to shuffle it to begin0, end0, begin1, end1, ... - Mat paddings = getBlob(node_proto, 1).reshape(1, 2); - paddings = paddings.t(); - layerParams.set("paddings", DictValue::arrayInt(paddings.ptr(), paddings.total())); - - if (node_proto.input_size() == 3) - { - Mat value = getBlob(node_proto, 2); - layerParams.set("value", value.ptr()[0]); - } + if (!maskedAxes[index]) + inputIndices.push_back(index); } + for (int index = 0; index < outShape.size(); ++index) + dynamicAxes.push_back(index); + layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); + layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); } - else if (layer_type == "Shape") - { - CV_Assert(node_proto.input_size() == 1); - IterShape_t shapeIt = outShapes.find(node_proto.input(0)); - CV_Assert(shapeIt != outShapes.end()); - const MatShape& inpShape = shapeIt->second; + } + else + layerParams.type = "Identity"; - Mat shapeMat(inpShape.size(), 1, CV_32S); - for (int j = 0; j < inpShape.size(); ++j) - shapeMat.at(j) = inpShape[j]; - shapeMat.dims = 1; + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat inp = getBlob(node_proto, 0); + Mat out = inp.reshape(1, outShape); + out.dims = outShape.size(); // to workaround dims == 1 + addConstant(layerParams.name, out); + return; + } + addLayer(layerParams, node_proto); +} - addConstant(layerParams.name, shapeMat); - return; +void ONNXImporter::parseFlatten(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_CheckEQ(node_proto.input_size(), 1, ""); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat input = getBlob(node_proto, 0); + int axis = normalize_axis(layerParams.get("axis", 1), input.dims); + + std::vector out_size(&input.size[0], &input.size[0] + axis); + out_size.push_back(input.total(axis)); + Mat output = input.reshape(1, out_size); + addConstant(layerParams.name, output); + return; + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 1); + DictValue axes = layerParams.get("axes"); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + // Constant input. + Mat input = getBlob(node_proto, 0); + + std::vector dims; + for (int j = 0; j < input.dims; j++) { + dims.push_back(input.size[j]); + } + CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size()); + for (int j = 0; j < axes.size(); j++) { + dims.insert(dims.begin() + axes.getIntValue(j), 1); } - else if (layer_type == "Cast") + + Mat out = input.reshape(0, dims); + addConstant(layerParams.name, out); + return; + } + + // Variable input. + if (axes.size() != 1) + CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze"); + + MatShape inpShape = outShapes[node_proto.input(0)]; + int axis = axes.getIntValue(0); + CV_Assert(0 <= axis && axis <= inpShape.size()); + std::vector outShape = inpShape; + outShape.insert(outShape.begin() + axis, 1); + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + if (hasDynamicShapes) + { + std::vector dynamicAxes; + std::vector inputIndices; + for (int index = 0; index < outShape.size(); ++index) { + if (index != axis) + dynamicAxes.push_back(index); + } + for (int index = 0; index < inpShape.size(); ++index) + inputIndices.push_back(index); + layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); + layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + CV_CheckEQ(node_proto.input_size(), 2, ""); + const std::string& input0 = node_proto.input(0); + const std::string& input1 = node_proto.input(1); + Mat newShapeMat = getBlob(input1); + MatShape targetShape(newShapeMat.ptr(), newShapeMat.ptr() + newShapeMat.total()); + + MatShape inpShape; + bool haveVariables = constBlobs.find(input0) == constBlobs.end(); + if (haveVariables) + { + IterShape_t shapeIt = outShapes.find(input0); + CV_Assert(shapeIt != outShapes.end()); + inpShape = shapeIt->second; + } + else + { + inpShape = shape(getBlob(input0)); + } + + String srcName = input0; + // Unsqueeze and repeat along new axis + if (targetShape.size() == inpShape.size() + 1) + { + for (int i = 0; i < targetShape.size(); i++) { - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - Mat blob = getBlob(node_proto, 0); - int type; - switch (layerParams.get("to")) - { - case opencv_onnx::TensorProto_DataType_FLOAT: type = CV_32F; break; - case opencv_onnx::TensorProto_DataType_UINT8: type = CV_8U; break; - case opencv_onnx::TensorProto_DataType_UINT16: type = CV_16U; break; - case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break; - case opencv_onnx::TensorProto_DataType_INT8: - case opencv_onnx::TensorProto_DataType_INT16: - case opencv_onnx::TensorProto_DataType_INT32: - case opencv_onnx::TensorProto_DataType_INT64: type = CV_32S; break; - default: type = blob.type(); - } - Mat dst; - blob.convertTo(dst, type); - dst.dims = blob.dims; - addConstant(layerParams.name, dst); - return; - } - else - layerParams.type = "Identity"; + if (targetShape[i] == -1 && i < inpShape.size()) + targetShape[i] = inpShape[i]; + else if (i < inpShape.size() && targetShape[i] != inpShape[i]) + inpShape.insert(inpShape.begin() + i, 1); } - else if (layer_type == "ConstantOfShape" || layer_type == "ConstantFill") + if (haveVariables) { - int depth = CV_32F; - float fill_value; - if (!layerParams.blobs.empty()) - { - CV_Assert(!layerParams.has("value")); - depth = layerParams.blobs[0].depth(); - Mat floats; - layerParams.blobs[0].convertTo(floats, CV_32F); - fill_value = floats.at(0, 0); - } - else - fill_value = layerParams.get("value", 0); + LayerParams reshapeLp; + reshapeLp.name = layerParams.name + "/reshape"; + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); - MatShape inpShape = getBlob(node_proto, 0); - for (int i = 0; i < inpShape.size(); i++) - CV_CheckGT(inpShape[i], 0, ""); - Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value)); - addConstant(layerParams.name, tensor); - return; + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(reshapeLp.name); + addLayer(reshapeLp, proto); + srcName = reshapeLp.name; } - else if (layer_type == "Gather") - { - CV_Assert(node_proto.input_size() == 2); - Mat indexMat = getBlob(node_proto, 1); - CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1); - int index = indexMat.at(0); - int axis = layerParams.get("axis", 0); + } + CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims"); - if ((constBlobs.find(node_proto.input(0)) != constBlobs.end())) - { - Mat input = getBlob(node_proto, 0); - Mat out; - std::vector ranges(input.dims, Range::all()); - ranges[axis] = Range(index, index + 1); - - out = input(ranges); - MatShape outShape = shape(out); - if (outShape.size() > 1) - { - outShape.erase(outShape.begin() + axis); - out.reshape(0, outShape); - } else { - out.dims = 1; - } - addConstant(layerParams.name, out); - return; - } + std::vector broadcast_axes; + for (int i = 0; i < targetShape.size(); i++) + { + if (targetShape[i] != inpShape[i]) + { + if (inpShape[i] == 1) + broadcast_axes.push_back(i); else - { - IterShape_t shapeIt = outShapes.find(node_proto.input(0)); - CV_Assert(shapeIt != outShapes.end()); - MatShape inpShape = shapeIt->second; - - LayerParams sliceLp; - sliceLp.type = "Slice"; - sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name; - std::vector begin(inpShape.size(), 0); - std::vector end(inpShape.size(), -1); - begin[axis] = index; - end[axis] = index + 1; - - cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin.data(), begin.size()); - cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size()); - sliceLp.set("begin", paramBegin); - sliceLp.set("end", paramEnd); - sliceLp.set("has_dynamic_shapes", hasDynamicShapes); - - if (inpShape.size() > 1) - { - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(sliceLp.name); - addLayer(sliceLp, proto); - - inpShape.erase(inpShape.begin() + axis); - layerParams.type = "Reshape"; - layerParams.set("axis", 0); - layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); - if (hasDynamicShapes) - { - std::vector dynamicAxes; - std::vector inputIndices; - for (int index = 0; index < inpShape.size(); ++index) - dynamicAxes.push_back(index); - for (int index = 0; index < inpShape.size(); ++index) - inputIndices.push_back(index); - layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); - layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); - } - node_proto.set_input(0, sliceLp.name); - } - else - { - layerParams = sliceLp; - } - } + CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i)); } - else if (layer_type == "Concat") - { - bool hasVariableInps = false; - for (int i = 0; i < node_proto.input_size(); ++i) - { - if (layer_id.find(node_proto.input(i)) != layer_id.end()) - { - hasVariableInps = true; - break; - } - } + } - if (!hasVariableInps) - { - std::vector inputs(node_proto.input_size()), concatenated; - // Due constant folding we can get inputs with different number of dimensions - // Insert the missing dimension to inputs - MatShape inputShape; - for (size_t i = 0; i < inputs.size(); ++i) - { - inputs[i] = getBlob(node_proto, i); - if (inputs[i].size.dims() > inputShape.size()) - { - inputShape = shape(inputs[i]); - } - } + if (!haveVariables) + { + if (broadcast_axes.size() != 1) + CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input"); + + Mat input = getBlob(node_proto, 0); + input = input.reshape(0, total(inpShape, 0, broadcast_axes[0])); + Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]); + output = output.reshape(0, targetShape); + addConstant(layerParams.name, output); + return; + } - // Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1 - int axis = layerParams.get("axis", 1); - for (size_t i = 0; i < inputs.size(); ++i) - { - MatShape targetShape = inputShape; - targetShape[axis] = shape(inputs[i])[axis]; - CV_CheckEQ(total(targetShape), total(shape(inputs[i])), ""); - inputs[i] = inputs[i].reshape(0, targetShape); - } - runLayer(layerParams, inputs, concatenated); + if (broadcast_axes.size() == 2 && + broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1) + { + LayerParams constParams; + constParams.name = layerParams.name + "/const"; + CV_Assert(layer_id.find(constParams.name) == layer_id.end()); + constParams.type = "Const"; + + Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr(), CV_32F); + constParams.blobs.push_back(inp); + + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(constParams, proto); + + layerParams.type = "Scale"; + layerParams.set("bias_term", false); + node_proto.set_input(0, constParams.name); + node_proto.set_input(1, srcName); + } + else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1) + { + String base_name = layerParams.name + "/copy_"; + std::vector input_names; + for (int j = 0; j < targetShape[broadcast_axes[0]]; j++) + { + std::ostringstream ss; + ss << j; + LayerParams copyLP; + copyLP.name = base_name + ss.str(); + copyLP.type = "Identity"; + CV_Assert(layer_id.find(copyLP.name) == layer_id.end()); + input_names.push_back(copyLP.name); + + node_proto.set_input(0, srcName); + node_proto.set_output(0, copyLP.name); + addLayer(copyLP, node_proto); + } + node_proto.clear_input(); + for (int i = 0; i < input_names.size(); i++) + { + node_proto.add_input(input_names[i]); + } + layerParams.set("axis", broadcast_axes[0]); + layerParams.type = "Concat"; + node_proto.set_output(0, layerParams.name); + } + else + CV_Error(Error::StsNotImplemented, "Unsupported Expand op"); + addLayer(layerParams, node_proto); +} - CV_Assert(concatenated.size() == 1); - addConstant(layerParams.name, concatenated[0]); - return; - } - else - { - for (int i = 0; i < node_proto.input_size(); ++i) - { - if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) - { - LayerParams constParams; - constParams.name = node_proto.input(i); - constParams.type = "Const"; - constParams.blobs.push_back(getBlob(node_proto, i)); - - opencv_onnx::NodeProto proto; - proto.add_output(constParams.name); - addLayer(constParams, proto); - } - } - } +void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape")); + + if (node_proto.input_size() == 2) { + Mat blob = getBlob(node_proto, 1); + CV_Assert(blob.type() == CV_32SC1); + + layerParams.set("dim", DictValue::arrayInt( + blob.ptr(), blob.total() )); + + if (layer_id.find(node_proto.input(0)) == layer_id.end()) { + std::vector inputs(1, getBlob(node_proto, 0)), outputs; + runLayer(layerParams, inputs, outputs); + addConstant(layerParams.name, outputs[0]); + return; + } + } + else { + DictValue shape = layerParams.get("shape"); + std::vector dim; + for (int j = 0; j < shape.size(); j++) { + dim.push_back(shape.getIntValue(j)); + } + + if (layer_id.find(node_proto.input(0)) == layer_id.end()) { + Mat input = getBlob(node_proto, 0); + Mat out = input.reshape(0, dim); + addConstant(layerParams.name, out); + return; } - else if (layer_type == "Resize") + replaceLayerParam(layerParams, "shape", "dim"); + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Padding"; + replaceLayerParam(layerParams, "mode", "type"); + if (node_proto.input_size() == 3 || node_proto.input_size() == 2) + { + // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN. + // We need to shuffle it to begin0, end0, begin1, end1, ... + Mat paddings = getBlob(node_proto, 1).reshape(1, 2); + paddings = paddings.t(); + layerParams.set("paddings", DictValue::arrayInt(paddings.ptr(), paddings.total())); + + if (node_proto.input_size() == 3) { - for (int i = 1; i < node_proto.input_size(); i++) - CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end()); + Mat value = getBlob(node_proto, 2); + layerParams.set("value", value.ptr()[0]); + } + } + addLayer(layerParams, node_proto); +} - if (layerParams.has("coordinate_transformation_mode")) - { - String interp_mode = layerParams.get("coordinate_transformation_mode"); - CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); +void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 1); + IterShape_t shapeIt = outShapes.find(node_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + const MatShape& inpShape = shapeIt->second; - layerParams.set("align_corners", interp_mode == "align_corners"); - if (layerParams.get("mode") == "linear") - { - layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? - "opencv_linear" : "bilinear"); - } - } - if (layerParams.get("mode") == "linear" && framework_name == "pytorch") - layerParams.set("mode", "opencv_linear"); + Mat shapeMat(inpShape.size(), 1, CV_32S); + for (int j = 0; j < inpShape.size(); ++j) + shapeMat.at(j) = inpShape[j]; + shapeMat.dims = 1; - // input = [X, scales], [X, roi, scales] or [x, roi, scales, sizes] - int foundScaleId = hasDynamicShapes ? node_proto.input_size() - 1 - : node_proto.input_size() > 2 ? 2 : 1; + addConstant(layerParams.name, shapeMat); +} - Mat scales = getBlob(node_proto, foundScaleId); - if (scales.total() == 4) - { - layerParams.set("zoom_factor_y", scales.at(2)); - layerParams.set("zoom_factor_x", scales.at(3)); - } - else +void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat blob = getBlob(node_proto, 0); + int type; + switch (layerParams.get("to")) + { + case opencv_onnx::TensorProto_DataType_FLOAT: type = CV_32F; break; + case opencv_onnx::TensorProto_DataType_UINT8: type = CV_8U; break; + case opencv_onnx::TensorProto_DataType_UINT16: type = CV_16U; break; + case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break; + case opencv_onnx::TensorProto_DataType_INT8: + case opencv_onnx::TensorProto_DataType_INT16: + case opencv_onnx::TensorProto_DataType_INT32: + case opencv_onnx::TensorProto_DataType_INT64: type = CV_32S; break; + default: type = blob.type(); + } + Mat dst; + blob.convertTo(dst, type); + dst.dims = blob.dims; + addConstant(layerParams.name, dst); + return; + } + else + layerParams.type = "Identity"; + addLayer(layerParams, node_proto); +} + +// "ConstantOfShape" "ConstantFill" +void ONNXImporter::parseConstantFill(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + int depth = CV_32F; + float fill_value; + if (!layerParams.blobs.empty()) + { + CV_Assert(!layerParams.has("value")); + depth = layerParams.blobs[0].depth(); + Mat floats; + layerParams.blobs[0].convertTo(floats, CV_32F); + fill_value = floats.at(0, 0); + } + else + fill_value = layerParams.get("value", 0); + + MatShape inpShape = getBlob(node_proto, 0); + for (int i = 0; i < inpShape.size(); i++) + CV_CheckGT(inpShape[i], 0, ""); + Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value)); + addConstant(layerParams.name, tensor); +} + +void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + CV_Assert(node_proto.input_size() == 2); + Mat indexMat = getBlob(node_proto, 1); + CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1); + int index = indexMat.at(0); + int axis = layerParams.get("axis", 0); + + if ((constBlobs.find(node_proto.input(0)) != constBlobs.end())) + { + Mat input = getBlob(node_proto, 0); + Mat out; + std::vector ranges(input.dims, Range::all()); + ranges[axis] = Range(index, index + 1); + + out = input(ranges); + MatShape outShape = shape(out); + if (outShape.size() > 1) + { + outShape.erase(outShape.begin() + axis); + out.reshape(0, outShape); + } else { + out.dims = 1; + } + addConstant(layerParams.name, out); + return; + } + else + { + IterShape_t shapeIt = outShapes.find(node_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + MatShape inpShape = shapeIt->second; + + LayerParams sliceLp; + sliceLp.type = "Slice"; + sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name; + std::vector begin(inpShape.size(), 0); + std::vector end(inpShape.size(), -1); + begin[axis] = index; + end[axis] = index + 1; + + cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin.data(), begin.size()); + cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size()); + sliceLp.set("begin", paramBegin); + sliceLp.set("end", paramEnd); + sliceLp.set("has_dynamic_shapes", hasDynamicShapes); + + if (inpShape.size() > 1) + { + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(sliceLp.name); + addLayer(sliceLp, proto); + + inpShape.erase(inpShape.begin() + axis); + layerParams.type = "Reshape"; + layerParams.set("axis", 0); + layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); + if (hasDynamicShapes) { - const std::string& inputLast = node_proto.input(node_proto.input_size() - 1); - if (constBlobs.find(inputLast) != constBlobs.end()) - { - Mat shapes = getBlob(inputLast); - CV_CheckEQ(shapes.size[0], 4, ""); - CV_CheckEQ(shapes.size[1], 1, ""); - CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, ""); - if (shapes.depth() == CV_32F) - shapes.convertTo(shapes, CV_32S); - layerParams.set("width", shapes.at(3)); - layerParams.set("height", shapes.at(2)); - } + std::vector dynamicAxes; + std::vector inputIndices; + for (int index = 0; index < inpShape.size(); ++index) + dynamicAxes.push_back(index); + for (int index = 0; index < inpShape.size(); ++index) + inputIndices.push_back(index); + layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); + layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); } - replaceLayerParam(layerParams, "mode", "interpolation"); + node_proto.set_input(0, sliceLp.name); } - else if (layer_type == "Upsample") + else { - //fused from Resize Subgraph - if (layerParams.has("coordinate_transformation_mode")) - { - String interp_mode = layerParams.get("coordinate_transformation_mode"); - CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); + layerParams = sliceLp; + } + } + addLayer(layerParams, node_proto); +} - layerParams.set("align_corners", interp_mode == "align_corners"); - if (layerParams.get("mode") == "linear") - { - layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? - "opencv_linear" : "bilinear"); - } - } - if (layerParams.get("mode") == "linear" && framework_name == "pytorch") - layerParams.set("mode", "opencv_linear"); +void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + bool hasVariableInps = false; + for (int i = 0; i < node_proto.input_size(); ++i) + { + if (layer_id.find(node_proto.input(i)) != layer_id.end()) + { + hasVariableInps = true; + break; + } + } - layerParams.type = "Resize"; - if (layerParams.has("scales")) - { - // Pytorch layer - DictValue scales = layerParams.get("scales"); - CV_Assert(scales.size() == 4); - layerParams.set("zoom_factor_y", scales.getIntValue(2)); - layerParams.set("zoom_factor_x", scales.getIntValue(3)); - } - else if (layerParams.has("height_scale") && layerParams.has("width_scale")) - { - // Caffe2 layer - replaceLayerParam(layerParams, "height_scale", "zoom_factor_y"); - replaceLayerParam(layerParams, "width_scale", "zoom_factor_x"); - } - else + if (!hasVariableInps) + { + std::vector inputs(node_proto.input_size()), concatenated; + // Due constant folding we can get inputs with different number of dimensions + // Insert the missing dimension to inputs + MatShape inputShape; + for (size_t i = 0; i < inputs.size(); ++i) + { + inputs[i] = getBlob(node_proto, i); + if (inputs[i].size.dims() > inputShape.size()) { - // scales as input - const std::string& input1 = node_proto.input(1); - if (constBlobs.find(input1) != constBlobs.end()) - { - Mat scales = getBlob(input1); - CV_Assert(scales.total() == 4); - layerParams.set("zoom_factor_y", scales.at(2)); - layerParams.set("zoom_factor_x", scales.at(3)); - } + inputShape = shape(inputs[i]); } - replaceLayerParam(layerParams, "mode", "interpolation"); } - else if (layer_type == "SoftMax" || layer_type == "LogSoftmax") + + // Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1 + int axis = layerParams.get("axis", 1); + for (size_t i = 0; i < inputs.size(); ++i) { - layerParams.type = "Softmax"; - layerParams.set("log_softmax", layer_type == "LogSoftmax"); + MatShape targetShape = inputShape; + targetShape[axis] = shape(inputs[i])[axis]; + CV_CheckEQ(total(targetShape), total(shape(inputs[i])), ""); + inputs[i] = inputs[i].reshape(0, targetShape); } - else if (layer_type == "DetectionOutput") + runLayer(layerParams, inputs, concatenated); + + CV_Assert(concatenated.size() == 1); + addConstant(layerParams.name, concatenated[0]); + return; + } + else + { + for (int i = 0; i < node_proto.input_size(); ++i) { - CV_CheckEQ(node_proto.input_size(), 3, ""); - if (constBlobs.find(node_proto.input(2)) != constBlobs.end()) + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) { - Mat priors = getBlob(node_proto, 2); - LayerParams constParams; - constParams.name = layerParams.name + "/priors"; + constParams.name = node_proto.input(i); constParams.type = "Const"; - constParams.blobs.push_back(priors); - - opencv_onnx::NodeProto priorsProto; - priorsProto.add_output(constParams.name); - addLayer(constParams, priorsProto); + constParams.blobs.push_back(getBlob(node_proto, i)); - node_proto.set_input(2, constParams.name); + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(constParams, proto); } } - else + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + for (int i = 1; i < node_proto.input_size(); i++) + CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end()); + + if (layerParams.has("coordinate_transformation_mode")) + { + String interp_mode = layerParams.get("coordinate_transformation_mode"); + CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); + + layerParams.set("align_corners", interp_mode == "align_corners"); + if (layerParams.get("mode") == "linear") { - for (int j = 0; j < node_proto.input_size(); j++) { - if (layer_id.find(node_proto.input(j)) == layer_id.end()) - layerParams.blobs.push_back(getBlob(node_proto, j)); - } + layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? + "opencv_linear" : "bilinear"); } - addLayer(layerParams, node_proto); } - catch (const cv::Exception& e) + if (layerParams.get("mode") == "linear" && framework_name == "pytorch") + layerParams.set("mode", "opencv_linear"); + + // input = [X, scales], [X, roi, scales] or [x, roi, scales, sizes] + int foundScaleId = hasDynamicShapes ? node_proto.input_size() - 1 + : node_proto.input_size() > 2 ? 2 : 1; + + Mat scales = getBlob(node_proto, foundScaleId); + if (scales.total() == 4) { - CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " - << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) - ); - for (int i = 0; i < node_proto.input_size(); i++) + layerParams.set("zoom_factor_y", scales.at(2)); + layerParams.set("zoom_factor_x", scales.at(3)); + } + else + { + const std::string& inputLast = node_proto.input(node_proto.input_size() - 1); + if (constBlobs.find(inputLast) != constBlobs.end()) + { + Mat shapes = getBlob(inputLast); + CV_CheckEQ(shapes.size[0], 4, ""); + CV_CheckEQ(shapes.size[1], 1, ""); + CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, ""); + if (shapes.depth() == CV_32F) + shapes.convertTo(shapes, CV_32S); + layerParams.set("width", shapes.at(3)); + layerParams.set("height", shapes.at(2)); + } + } + replaceLayerParam(layerParams, "mode", "interpolation"); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseUpsample(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + //fused from Resize Subgraph + if (layerParams.has("coordinate_transformation_mode")) + { + String interp_mode = layerParams.get("coordinate_transformation_mode"); + CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); + + layerParams.set("align_corners", interp_mode == "align_corners"); + if (layerParams.get("mode") == "linear") { - CV_LOG_INFO(NULL, " Input[" << i << "] = '" << node_proto.input(i) << "'"); + layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? + "opencv_linear" : "bilinear"); } - for (int i = 0; i < node_proto.output_size(); i++) + } + if (layerParams.get("mode") == "linear" && framework_name == "pytorch") + layerParams.set("mode", "opencv_linear"); + + layerParams.type = "Resize"; + if (layerParams.has("scales")) + { + // Pytorch layer + DictValue scales = layerParams.get("scales"); + CV_Assert(scales.size() == 4); + layerParams.set("zoom_factor_y", scales.getIntValue(2)); + layerParams.set("zoom_factor_x", scales.getIntValue(3)); + } + else if (layerParams.has("height_scale") && layerParams.has("width_scale")) + { + // Caffe2 layer + replaceLayerParam(layerParams, "height_scale", "zoom_factor_y"); + replaceLayerParam(layerParams, "width_scale", "zoom_factor_x"); + } + else + { + // scales as input + const std::string& input1 = node_proto.input(1); + if (constBlobs.find(input1) != constBlobs.end()) { - CV_LOG_INFO(NULL, " Output[" << i << "] = '" << node_proto.output(i) << "'"); + Mat scales = getBlob(input1); + CV_Assert(scales.total() == 4); + layerParams.set("zoom_factor_y", scales.at(2)); + layerParams.set("zoom_factor_x", scales.at(3)); } - CV_Error(Error::StsError, cv::format("Node [%s]:(%s) parse error: %s", layer_type.c_str(), name.c_str(), e.what())); } + replaceLayerParam(layerParams, "mode", "interpolation"); + addLayer(layerParams, node_proto); +} + +// "SoftMax" "LogSoftmax" +void ONNXImporter::parseSoftMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + const std::string& layer_type = node_proto.op_type(); + layerParams.type = "Softmax"; + layerParams.set("log_softmax", layer_type == "LogSoftmax"); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseDetectionOutput(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + CV_CheckEQ(node_proto.input_size(), 3, ""); + if (constBlobs.find(node_proto.input(2)) != constBlobs.end()) + { + Mat priors = getBlob(node_proto, 2); + + LayerParams constParams; + constParams.name = layerParams.name + "/priors"; + constParams.type = "Const"; + constParams.blobs.push_back(priors); + + opencv_onnx::NodeProto priorsProto; + priorsProto.add_output(constParams.name); + addLayer(constParams, priorsProto); + + node_proto.set_input(2, constParams.name); + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseCustom(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + for (int j = 0; j < node_proto.input_size(); j++) { + if (layer_id.find(node_proto.input(j)) == layer_id.end()) + layerParams.blobs.push_back(getBlob(node_proto, j)); + } + addLayer(layerParams, node_proto); +} + +const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap() +{ + DispatchMap dispatch; + + dispatch["MaxPool"] = &ONNXImporter::parseMaxPool; + dispatch["AveragePool"] = &ONNXImporter::parseAveragePool; + dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] = + dispatch["ReduceMax"] = &ONNXImporter::parseReduce; + dispatch["Slice"] = &ONNXImporter::parseSlice; + dispatch["Split"] = &ONNXImporter::parseSplit; + dispatch["Add"] = dispatch["Sum"] = dispatch["Sub"] = &ONNXImporter::parseBias; + dispatch["Pow"] = &ONNXImporter::parsePow; + dispatch["Max"] = &ONNXImporter::parseMax; + dispatch["Neg"] = &ONNXImporter::parseNeg; + dispatch["Constant"] = &ONNXImporter::parseConstant; + dispatch["LSTM"] = &ONNXImporter::parseLSTM; + dispatch["ImageScaler"] = &ONNXImporter::parseImageScaler; + dispatch["Clip"] = &ONNXImporter::parseClip; + dispatch["LeakyRelu"] = &ONNXImporter::parseLeakyRelu; + dispatch["Relu"] = &ONNXImporter::parseRelu; + dispatch["Elu"] = &ONNXImporter::parseElu; + dispatch["Tanh"] = &ONNXImporter::parseTanh; + dispatch["PRelu"] = &ONNXImporter::parsePRelu; + dispatch["LRN"] = &ONNXImporter::parseLRN; + dispatch["InstanceNormalization"] = &ONNXImporter::parseInstanceNormalization; + dispatch["BatchNormalization"] = &ONNXImporter::parseBatchNormalization; + dispatch["Gemm"] = &ONNXImporter::parseGemm; + dispatch["MatMul"] = &ONNXImporter::parseMatMul; + dispatch["Mul"] = dispatch["Div"] = &ONNXImporter::parseMul; + dispatch["Conv"] = &ONNXImporter::parseConv; + dispatch["ConvTranspose"] = &ONNXImporter::parseConvTranspose; + dispatch["Transpose"] = &ONNXImporter::parseTranspose; + dispatch["Squeeze"] = &ONNXImporter::parseSqueeze; + dispatch["Flatten"] = &ONNXImporter::parseFlatten; + dispatch["Unsqueeze"] = &ONNXImporter::parseUnsqueeze; + dispatch["Expand"] = &ONNXImporter::parseExpand; + dispatch["Reshape"] = &ONNXImporter::parseReshape; + dispatch["Pad"] = &ONNXImporter::parsePad; + dispatch["Shape"] = &ONNXImporter::parseShape; + dispatch["Cast"] = &ONNXImporter::parseCast; + dispatch["ConstantFill"] = dispatch["ConstantOfShape"] = &ONNXImporter::parseConstantFill; + dispatch["Gather"] = &ONNXImporter::parseGather; + dispatch["Concat"] = &ONNXImporter::parseConcat; + dispatch["Resize"] = &ONNXImporter::parseResize; + dispatch["Upsample"] = &ONNXImporter::parseUpsample; + dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax; + dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput; + dispatch["Custom"] = &ONNXImporter::parseCustom; + + return dispatch; } Net readNetFromONNX(const String& onnxFile) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 01fa0df985b7..fa33211a50e1 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -2869,7 +2869,7 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer) DispatchMap::const_iterator iter = dispatch.find(type); if (iter != dispatch.end()) { - ((*this).*(iter->second))(net, layer, layerParams); + CALL_MEMBER_FN(*this, iter->second)(net, layer, layerParams); } else { From bdd3930855c06cf9e48b7b836837414c36660c9a Mon Sep 17 00:00:00 2001 From: Zhuo Zhang Date: Thu, 29 Jul 2021 09:34:09 +0800 Subject: [PATCH 084/128] Fix typo in comment, OpenMP => TBB --- .../include/opencv2/core/parallel/backend/parallel_for.tbb.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp b/modules/core/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp index 264def5f508b..04b0c4c6cb59 100644 --- a/modules/core/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp +++ b/modules/core/include/opencv2/core/parallel/backend/parallel_for.tbb.hpp @@ -38,7 +38,7 @@ static tbb::task_scheduler_init& getScheduler() } #endif -/** OpenMP parallel_for API implementation +/** TBB parallel_for API implementation * * @sa setParallelForBackend * @ingroup core_parallel_backend From 27392f832d99714e5134f877b7b8f72716c916f0 Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Fri, 30 Jul 2021 13:00:13 +0300 Subject: [PATCH 085/128] reimplement onnx refactor for master --- modules/dnn/src/dnn_common.hpp | 1 + modules/dnn/src/onnx/onnx_importer.cpp | 2999 +++++++++++--------- modules/dnn/src/tensorflow/tf_importer.cpp | 2 +- 3 files changed, 1602 insertions(+), 1400 deletions(-) diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp index 46fae41cc217..591be88079f3 100644 --- a/modules/dnn/src/dnn_common.hpp +++ b/modules/dnn/src/dnn_common.hpp @@ -14,6 +14,7 @@ Mutex& getInitializationMutex(); void initializeLayerFactory(); namespace detail { +#define CALL_MEMBER_FN(object, ptrToMemFn) ((object).*(ptrToMemFn)) class NotImplemented : public Layer { diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index d33fb68ac17f..33dc648b2c8d 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -66,7 +66,7 @@ class ONNXImporter public: ONNXImporter(Net& net, const char *onnxFile) - : dstNet(net), utilNet() + : dstNet(net), utilNet(), dispatch(buildDispatchMap()) { hasDynamicShapes = false; CV_Assert(onnxFile); @@ -87,7 +87,7 @@ class ONNXImporter } ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer) - : dstNet(net), utilNet() + : dstNet(net), utilNet(), dispatch(buildDispatchMap()) { hasDynamicShapes = false; CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)"); @@ -129,6 +129,57 @@ class ONNXImporter typedef std::map::iterator IterLayerId_t; void handleNode(const opencv_onnx::NodeProto& node_proto); + +private: + typedef void (ONNXImporter::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + typedef std::map DispatchMap; + + void parseMaxPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseAveragePool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseReduce (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseSlice (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseSplit (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseBias (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parsePow (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseNeg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseConstant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseLSTM (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseImageScaler (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseClip (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseElu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseTanh (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parsePRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseLRN (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseBatchNormalization (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseGemm (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseMatMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseMul (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseConv (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseConvTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseTranspose (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseSqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseFlatten (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseUnsqueeze (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseExpand (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseReshape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parsePad (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseShape (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseCast (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseConstantFill (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseGather (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseConcat (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseResize (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseCustom (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + + const DispatchMap dispatch; + static const DispatchMap buildDispatchMap(); }; inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey) @@ -585,13 +636,11 @@ const std::set& ONNXImporter::getSupportedTypes() return layerTypes; } -void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) +void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) { - opencv_onnx::NodeProto node_proto = node_proto_; // TODO FIXIT - CV_Assert(node_proto.output_size() >= 1); std::string name = node_proto.output(0); - std::string layer_type = node_proto.op_type(); + const std::string& layer_type = node_proto.op_type(); CV_LOG_DEBUG(NULL, "DNN/ONNX: processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) ); @@ -605,1593 +654,1745 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto_) layerParams.type = layer_type; layerParams.set("has_dynamic_shapes", hasDynamicShapes); - if (layer_type == "MaxPool") + DispatchMap::const_iterator iter = dispatch.find(layer_type); + if (iter != dispatch.end()) { - layerParams.type = "Pooling"; - layerParams.set("pool", "MAX"); - layerParams.set("ceil_mode", layerParams.has("pad_mode")); + CALL_MEMBER_FN(*this, iter->second)(layerParams, node_proto); } - else if (layer_type == "AveragePool") + else { - layerParams.type = "Pooling"; - layerParams.set("pool", "AVE"); - layerParams.set("ceil_mode", layerParams.has("pad_mode")); - layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); + parseCustom(layerParams, node_proto); } - else if (layer_type == "GlobalAveragePool" || layer_type == "GlobalMaxPool" || - layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax") + } + catch (const cv::Exception& e) + { + if (DNN_DIAGNOSTICS_RUN) { - CV_Assert(node_proto.input_size() == 1); - layerParams.type = "Pooling"; - String pool; - if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax") - pool = "MAX"; - else if (layer_type == "ReduceSum") - pool = "SUM"; - else - pool = "AVE"; - layerParams.set("pool", pool); - layerParams.set("global_pooling", !layerParams.has("axes")); - if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " + << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) << "\n" << e.msg + ); + auto registeredLayers = getLayerFactoryImpl(); + if (registeredLayers.find(layerParams.type) != registeredLayers.end()) { - MatShape inpShape = outShapes[node_proto.input(0)]; - DictValue axes = layerParams.get("axes"); - bool keepdims = layerParams.get("keepdims"); - MatShape targetShape; - std::vector shouldDelete(inpShape.size(), false); - for (int i = 0; i < axes.size(); i++) { - int axis = normalize_axis(axes.get(i), inpShape.size()); - shouldDelete[axis] = true; - } - for (int axis = 0; axis < inpShape.size(); ++axis){ - if (!shouldDelete[axis]) - targetShape.push_back(inpShape[axis]); - else if (keepdims) - targetShape.push_back(1); - } - - if (inpShape.size() == 3 && axes.size() <= 2) + try { - int axis = normalize_axis(axes.get(0), inpShape.size()); - CV_CheckNE(axis, 0, ""); - - LayerParams reshapeLp; - reshapeLp.name = layerParams.name + "/reshape"; - reshapeLp.type = "Reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - reshapeLp.set("axis", 0); - reshapeLp.set("num_axes", 1); - int newShape[] = {1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2)); - - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(reshapeLp.name); - addLayer(reshapeLp, proto); - - LayerParams avgLp; - avgLp.name = layerParams.name + "/avg"; - avgLp.type = "Pooling"; - CV_Assert(layer_id.find(avgLp.name) == layer_id.end()); - avgLp.set("pool", pool); - if (axes.size() == 2) - { - CV_CheckEQ(normalize_axis(axes.get(0), inpShape.size()), 1, "Unsupported mode"); - CV_CheckEQ(normalize_axis(axes.get(1), inpShape.size()), 2, "Unsupported mode"); - avgLp.set("global_pooling", true); - } - else - { - avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true); - avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1); - } - - node_proto.set_input(0, reshapeLp.name); - node_proto.set_output(0, avgLp.name); - addLayer(avgLp, node_proto); + Ptr layer = LayerFactory::createLayerInstance(layerParams.type, layerParams); } - else + catch (const std::exception& e) { - if (inpShape.size() != 4 && inpShape.size() != 5) - CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation."); - - CV_Assert(axes.size() <= inpShape.size() - 2); - std::vector kernel_size(inpShape.size() - 2, 1); - if (axes.size() == 1 && (normalize_axis(axes.get(0), inpShape.size()) <= 1)) - { - int axis = normalize_axis(axes.get(0), inpShape.size()); - MatShape newShape = inpShape; - newShape[axis + 1] = total(newShape, axis + 1); - newShape.resize(axis + 2); - newShape.insert(newShape.begin(), 2 - axis, 1); - - LayerParams reshapeLp; - reshapeLp.type = "Reshape"; - reshapeLp.name = layerParams.name + "/reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); - - node_proto.set_output(0, reshapeLp.name); - addLayer(reshapeLp, node_proto); - - kernel_size.resize(2); - kernel_size[0] = inpShape[axis]; - node_proto.set_input(0, node_proto.output(0)); - } - else - { - for (int i = 0; i < axes.size(); i++) { - int axis = normalize_axis(axes.get(i), inpShape.size()); - CV_Assert_N(axis >= 2 + i, axis < inpShape.size()); - kernel_size[axis - 2] = inpShape[axis]; - } - } - - LayerParams poolLp = layerParams; - poolLp.name = layerParams.name + "/avg"; - CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); - poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size())); - - node_proto.set_output(0, poolLp.name); - addLayer(poolLp, node_proto); + CV_LOG_ERROR(NULL, "DNN/ONNX: Layer of type " << layerParams.type << "(" << layer_type << ") cannot be created with parameters " << layerParams << ". Error: " << e.what() + ); } - - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size())); - - node_proto.set_input(0, node_proto.output(0)); - node_proto.set_output(0, layerParams.name); } - else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + } + else + { + CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " + << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) + ); + } + for (int i = 0; i < node_proto.input_size(); i++) + { + CV_LOG_INFO(NULL, " Input[" << i << "] = '" << node_proto.input(i) << "'"); + } + for (int i = 0; i < node_proto.output_size(); i++) + { + CV_LOG_INFO(NULL, " Output[" << i << "] = '" << node_proto.output(i) << "'"); + } + if (DNN_DIAGNOSTICS_RUN) + { + for (int i = 0; i < node_proto.output_size(); ++i) { - CV_CheckEQ(layerParams.get("keepdims"), 0, "layer only supports keepdims = false"); - LayerParams reshapeLp; - reshapeLp.name = layerParams.name + "/reshape"; - reshapeLp.type = "Reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - int newShape[] = {1, 1, 1, -1}; - reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4)); - - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(reshapeLp.name); - addLayer(reshapeLp, proto); + layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(0, i))); + outShapes[node_proto.output(i)] = outShapes[node_proto.input(0)]; + } + } + else + CV_Error(Error::StsError, cv::format("Node [%s]:(%s) parse error: %s", layer_type.c_str(), name.c_str(), e.what())); + } +} - LayerParams poolLp = layerParams; - poolLp.name = layerParams.name + "/pool"; - CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); +void ONNXImporter::parseMaxPool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Pooling"; + layerParams.set("pool", "MAX"); + layerParams.set("ceil_mode", layerParams.has("pad_mode")); + addLayer(layerParams, node_proto); +} - node_proto.set_input(0, reshapeLp.name); - node_proto.set_output(0, poolLp.name); - addLayer(poolLp, node_proto); +void ONNXImporter::parseAveragePool(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Pooling"; + layerParams.set("pool", "AVE"); + layerParams.set("ceil_mode", layerParams.has("pad_mode")); + layerParams.set("ave_pool_padded_area", framework_name == "pytorch"); + addLayer(layerParams, node_proto); +} - layerParams.type = "Reshape"; - int targetShape[] = {1}; - layerParams.set("dim", DictValue::arrayInt(&targetShape[0], 1)); +void ONNXImporter::parseReduce(LayerParams &layerParams, const opencv_onnx::NodeProto &node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + const std::string& layer_type = node_proto.op_type(); + + CV_Assert(node_proto.input_size() == 1); + layerParams.type = "Pooling"; + String pool; + if (layer_type == "GlobalMaxPool" || layer_type == "ReduceMax") + pool = "MAX"; + else if (layer_type == "ReduceSum") + pool = "SUM"; + else + pool = "AVE"; + layerParams.set("pool", pool); + layerParams.set("global_pooling", !layerParams.has("axes")); + if (layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + { + MatShape inpShape = outShapes[node_proto.input(0)]; + DictValue axes = layerParams.get("axes"); + bool keepdims = layerParams.get("keepdims"); + MatShape targetShape; + std::vector shouldDelete(inpShape.size(), false); + for (int i = 0; i < axes.size(); i++) { + int axis = normalize_axis(axes.get(i), inpShape.size()); + shouldDelete[axis] = true; + } + for (int axis = 0; axis < inpShape.size(); ++axis){ + if (!shouldDelete[axis]) + targetShape.push_back(inpShape[axis]); + else if (keepdims) + targetShape.push_back(1); + } - node_proto.set_input(0, node_proto.output(0)); - node_proto.set_output(0, layerParams.name); + if (inpShape.size() == 3 && axes.size() <= 2) + { + int axis = normalize_axis(axes.get(0), inpShape.size()); + CV_CheckNE(axis, 0, ""); + + LayerParams reshapeLp; + reshapeLp.name = layerParams.name + "/reshape"; + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + reshapeLp.set("axis", 0); + reshapeLp.set("num_axes", 1); + int newShape[] = {1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 2)); + + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(reshapeLp.name); + addLayer(reshapeLp, proto); + + LayerParams avgLp; + avgLp.name = layerParams.name + "/avg"; + avgLp.type = "Pooling"; + CV_Assert(layer_id.find(avgLp.name) == layer_id.end()); + avgLp.set("pool", pool); + if (axes.size() == 2) + { + CV_CheckEQ(normalize_axis(axes.get(0), inpShape.size()), 1, "Unsupported mode"); + CV_CheckEQ(normalize_axis(axes.get(1), inpShape.size()), 2, "Unsupported mode"); + avgLp.set("global_pooling", true); + } + else + { + avgLp.set(axis == 2 ? "global_pooling_w" : "global_pooling_h", true); + avgLp.set(axis == 2 ? "kernel_h" : "kernel_w", 1); } + + node_proto.set_input(0, reshapeLp.name); + node_proto.set_output(0, avgLp.name); + addLayer(avgLp, node_proto); } - else if (layer_type == "Slice") + else { - int axis = 0; - std::vector begin; - std::vector end; - std::vector steps; - int inp_size = node_proto.input_size(); + if (inpShape.size() != 4 && inpShape.size() != 5) + CV_Error(Error::StsNotImplemented, "Unsupported input shape of " + layer_type + " operation."); - if (inp_size == 1) + CV_Assert(axes.size() <= inpShape.size() - 2); + std::vector kernel_size(inpShape.size() - 2, 1); + if (axes.size() == 1 && (normalize_axis(axes.get(0), inpShape.size()) <= 1)) { - if (layerParams.has("axes")) { - DictValue axes = layerParams.get("axes"); - for (int i = 1; i < axes.size(); ++i) { - CV_Assert(axes.get(i - 1) == axes.get(i) - 1); - } - axis = axes.get(0); - } + int axis = normalize_axis(axes.get(0), inpShape.size()); + MatShape newShape = inpShape; + newShape[axis + 1] = total(newShape, axis + 1); + newShape.resize(axis + 2); + newShape.insert(newShape.begin(), 2 - axis, 1); - DictValue starts = layerParams.get("starts"); - DictValue ends = layerParams.get("ends"); - CV_Assert(starts.size() == ends.size()); + LayerParams reshapeLp; + reshapeLp.type = "Reshape"; + reshapeLp.name = layerParams.name + "/reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], newShape.size())); - if (axis > 0) { - begin.resize(axis, 0); - end.resize(axis, -1); - } - for (int i = 0; i < starts.size(); ++i) - { - begin.push_back(starts.get(i)); - int finish = ends.get(i); - end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim - } - } else { // inp_size > 1 - CV_Assert(inp_size >= 3); - for (int i = 1; i < inp_size; i++) { - CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end()); - } - Mat start_blob = getBlob(node_proto, 1); - Mat end_blob = getBlob(node_proto, 2); - CV_Assert(start_blob.total() == end_blob.total()); - - if (inp_size > 3) { - Mat axes_blob = getBlob(node_proto, 3); - const int* axes = (int*)axes_blob.data; - for (int i = 1; i < axes_blob.total(); ++i) { - CV_Assert(axes[i - 1] == axes[i] - 1); - } - axis = axes[0]; - } + node_proto.set_output(0, reshapeLp.name); + addLayer(reshapeLp, node_proto); - const int* starts = start_blob.ptr(); - const int* ends = end_blob.ptr(); - if (axis > 0) { - begin.resize(axis, 0); - end.resize(axis, -1); - } - std::copy(starts, starts + start_blob.total(), std::back_inserter(begin)); - for (int i = 0; i < end_blob.total(); ++i) - { - int finish = ends[i]; - end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim + kernel_size.resize(2); + kernel_size[0] = inpShape[axis]; + node_proto.set_input(0, node_proto.output(0)); + } + else + { + for (int i = 0; i < axes.size(); i++) { + int axis = normalize_axis(axes.get(i), inpShape.size()); + CV_Assert_N(axis >= 2 + i, axis < inpShape.size()); + kernel_size[axis - 2] = inpShape[axis]; } + } - if (inp_size == 5) { - CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end()); - Mat step_blob = getBlob(node_proto, 4); - const int* steps_ptr = step_blob.ptr(); + LayerParams poolLp = layerParams; + poolLp.name = layerParams.name + "/avg"; + CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); + poolLp.set("kernel_size", DictValue::arrayInt(&kernel_size[0], kernel_size.size())); - if (axis > 0) - steps.resize(axis, 1); + node_proto.set_output(0, poolLp.name); + addLayer(poolLp, node_proto); + } - std::copy(steps_ptr, steps_ptr + step_blob.total(), std::back_inserter(steps)); + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&targetShape[0], targetShape.size())); - // Very strange application for Slice op with tensor reversing. - // We just workaround it for 2d constants. - if (constBlobs.find(node_proto.input(0)) != constBlobs.end() && - axis == 0 && - start_blob.at(0) == -1 && step_blob.at(0) == -1 && - end_blob.at(0) == std::numeric_limits::min()) - { - Mat inp = getBlob(node_proto, 0); - if (inp.dims == 2) - { - Mat flipped; - flip(inp, flipped, 0); - addConstant(layerParams.name, flipped); - return; - } - } - } - } - layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size())); - layerParams.set("end", DictValue::arrayInt(&end[0], end.size())); - layerParams.set("axis", axis); + node_proto.set_input(0, node_proto.output(0)); + node_proto.set_output(0, layerParams.name); + } + else if (!layerParams.has("axes") && (layer_type == "ReduceMean" || layer_type == "ReduceSum" || layer_type == "ReduceMax")) + { + CV_CheckEQ(layerParams.get("keepdims"), 0, "layer only supports keepdims = false"); + LayerParams reshapeLp; + reshapeLp.name = layerParams.name + "/reshape"; + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + int newShape[] = {1, 1, 1, -1}; + reshapeLp.set("dim", DictValue::arrayInt(&newShape[0], 4)); + + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(reshapeLp.name); + addLayer(reshapeLp, proto); + + LayerParams poolLp = layerParams; + poolLp.name = layerParams.name + "/pool"; + CV_Assert(layer_id.find(poolLp.name) == layer_id.end()); + + node_proto.set_input(0, reshapeLp.name); + node_proto.set_output(0, poolLp.name); + addLayer(poolLp, node_proto); + + layerParams.type = "Reshape"; + int targetShape[] = {1}; + layerParams.set("dim", DictValue::arrayInt(&targetShape[0], 1)); + + node_proto.set_input(0, node_proto.output(0)); + node_proto.set_output(0, layerParams.name); + } + addLayer(layerParams, node_proto); +} - if (!steps.empty()) - layerParams.set("steps", DictValue::arrayInt(&steps[0], steps.size())); +void ONNXImporter::parseSlice(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + int axis = 0; + std::vector begin; + std::vector end; + std::vector steps; + int inp_size = node_proto.input_size(); - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - Mat inp = getBlob(node_proto, 0); - std::vector inputs, sliced; - inputs.push_back(inp); - runLayer(layerParams, inputs, sliced); - CV_Assert(sliced.size() == 1); - addConstant(layerParams.name, sliced[0]); - return; + if (inp_size == 1) + { + if (layerParams.has("axes")) { + DictValue axes = layerParams.get("axes"); + for (int i = 1; i < axes.size(); ++i) { + CV_Assert(axes.get(i - 1) == axes.get(i) - 1); } + axis = axes.get(0); } - else if (layer_type == "Split") - { - if (layerParams.has("split")) - { - DictValue splits = layerParams.get("split"); - const int numSplits = splits.size(); - CV_Assert(numSplits > 1); - std::vector slicePoints(numSplits - 1, splits.get(0)); - for (int i = 1; i < splits.size() - 1; ++i) - { - slicePoints[i] = slicePoints[i - 1] + splits.get(i - 1); - } - layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); - } - else - { - layerParams.set("num_split", node_proto.output_size()); - } - layerParams.type = "Slice"; + DictValue starts = layerParams.get("starts"); + DictValue ends = layerParams.get("ends"); + CV_Assert(starts.size() == ends.size()); + + if (axis > 0) { + begin.resize(axis, 0); + end.resize(axis, -1); } - else if (layer_type == "Add" || layer_type == "Sum" || layer_type == "Sub") + for (int i = 0; i < starts.size(); ++i) { - bool isSub = layer_type == "Sub"; - CV_CheckEQ(node_proto.input_size(), 2, ""); - bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end(); - bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end(); - if (is_const_0 && is_const_1) - { - Mat blob_0 = getBlob(node_proto, 0); - Mat blob_1 = getBlob(node_proto, 1); - CV_Assert(blob_0.size == blob_1.size); - Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1); - addConstant(layerParams.name, output); - return; - } - else if (is_const_0 || is_const_1) - { - int const_blob_id = is_const_0 ? 0 : 1; - Mat blob = getBlob(node_proto, const_blob_id); - int blob_total = blob.total(); - if (blob_total == 1) { - layerParams.type = "Power"; - layerParams.set("shift", (isSub ? -1 : 1) * blob.ptr()[0]); - } - else { - MatShape inpShape = outShapes[node_proto.input(1 - const_blob_id)]; - if (shape(blob) == inpShape) - { - LayerParams constParams; - constParams.name = layerParams.name + "/const"; - constParams.type = "Const"; - constParams.blobs.push_back((isSub ? -1 : 1) * blob); - int id; - if (DNN_DIAGNOSTICS_RUN) - id = utilNet.addLayer(constParams.name, constParams.type, constParams); - else - id = dstNet.addLayer(constParams.name, constParams.type, constParams); - layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0))); - outShapes[constParams.name] = shape(blob); - - layerParams.type = "Eltwise"; - node_proto.set_input(const_blob_id, constParams.name); - } - else - { - layerParams.type = "Scale"; - layerParams.set("bias_term", true); - int axis = 1; - for (int i = 0; i < graph_proto.initializer_size(); i++) - { - opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i); - if (tensor_proto.name() == node_proto.input(const_blob_id)) - { - axis = inpShape.size() - tensor_proto.dims_size(); - break; - } - } - layerParams.set("axis", axis); - blob = blob.reshape(1, 1); - layerParams.blobs.push_back((isSub ? -1 : 1) * blob); - } - } - } - else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) - { - layerParams.type = "Eltwise"; - if (isSub) - { - static float subCoeffs[] = {1.f, -1.f}; - layerParams.set("coeff", DictValue::arrayReal(subCoeffs, 2)); - } - } - else - { - if (isSub) - { - LayerParams powerParams; - powerParams.name = layerParams.name + "/neg"; - powerParams.type = "Power"; - powerParams.set("scale", -1); - - int id; - //Create Power layer - if (DNN_DIAGNOSTICS_RUN) - id = utilNet.addLayer(powerParams.name, powerParams.type, powerParams); - else - id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); - //Connect to input - IterLayerId_t layerId = layer_id.find(node_proto.input(1)); - CV_Assert(layerId != layer_id.end()); - if (DNN_DIAGNOSTICS_RUN) - utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); - else - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); - //Add shape - layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); - outShapes[powerParams.name] = outShapes[node_proto.input(1)]; - - //Replace input to Power - node_proto.set_input(1, powerParams.name); - } - layerParams.type = "Scale"; - layerParams.set("bias_term", true); - } + begin.push_back(starts.get(i)); + int finish = ends.get(i); + end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim } - else if (layer_type == "Pow") - { - if (layer_id.find(node_proto.input(1)) != layer_id.end()) - CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power"); - - Mat blob = getBlob(node_proto, 1); - if (blob.total() != 1) - CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power"); - - blob.convertTo(blob, CV_32F); - layerParams.type = "Power"; - layerParams.set("power", blob.ptr()[0]); + } else { // inp_size > 1 + CV_Assert(inp_size >= 3); + for (int i = 1; i < inp_size; i++) { + CV_Assert(constBlobs.find(node_proto.input(i)) != constBlobs.end()); } - else if (layer_type == "Max") - { - layerParams.type = "Eltwise"; - layerParams.set("operation", "max"); + Mat start_blob = getBlob(node_proto, 1); + Mat end_blob = getBlob(node_proto, 2); + CV_Assert(start_blob.total() == end_blob.total()); + + if (inp_size > 3) { + Mat axes_blob = getBlob(node_proto, 3); + const int* axes = (int*)axes_blob.data; + for (int i = 1; i < axes_blob.total(); ++i) { + CV_Assert(axes[i - 1] == axes[i] - 1); + } + axis = axes[0]; } - else if (layer_type == "Neg") - { - layerParams.type = "Power"; - layerParams.set("scale", -1); + + const int* starts = start_blob.ptr(); + const int* ends = end_blob.ptr(); + if (axis > 0) { + begin.resize(axis, 0); + end.resize(axis, -1); } - else if (layer_type == "Constant") + std::copy(starts, starts + start_blob.total(), std::back_inserter(begin)); + for (int i = 0; i < end_blob.total(); ++i) { - CV_Assert(node_proto.input_size() == 0); - CV_Assert(layerParams.blobs.size() == 1); - addConstant(layerParams.name, layerParams.blobs[0]); - return; + int finish = ends[i]; + end.push_back((finish < 0) ? --finish : finish); // numpy doesn't include last dim } - else if (layer_type == "LSTM") - { - LayerParams lstmParams = layerParams; - lstmParams.name += "/lstm"; - - // https://pytorch.org/docs/stable/nn.html#lstm - CV_Assert(node_proto.input_size() == 7); - Mat Wx = getBlob(node_proto, 1); - Mat Wh = getBlob(node_proto, 2); - Mat b = getBlob(node_proto, 3); - Mat h0 = getBlob(node_proto, 5); - Mat c0 = getBlob(node_proto, 6); - - b = b.reshape(1, b.size[0]); - - const int numHidden = lstmParams.get("hidden_size"); - const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. - const int numFeatures = Wx.size[2]; - Mat bx = b.colRange(0, b.cols / 2); - Mat bh = b.colRange(b.cols / 2, b.cols); - b = bx + bh; - - // IFGO->IGFO - for (int k = 0; k < numDirs; ++k) + + if (inp_size == 5) { + CV_Assert(constBlobs.find(node_proto.input(4)) != constBlobs.end()); + Mat step_blob = getBlob(node_proto, 4); + const int* steps_ptr = step_blob.ptr(); + + if (axis > 0) + steps.resize(axis, 1); + + std::copy(steps_ptr, steps_ptr + step_blob.total(), std::back_inserter(steps)); + + // Very strange application for Slice op with tensor reversing. + // We just workaround it for 2d constants. + if (constBlobs.find(node_proto.input(0)) != constBlobs.end() && + axis == 0 && + start_blob.at(0) == -1 && step_blob.at(0) == -1 && + end_blob.at(0) == std::numeric_limits::min()) { - float* WxData = Wx.ptr(k); - float* WhData = Wh.ptr(k); - float* biasData = b.ptr(k); - for (int j = 0; j < numHidden; ++j) + Mat inp = getBlob(node_proto, 0); + if (inp.dims == 2) { - for (int i = 0; i < numFeatures; ++i) - { - std::swap(WxData[(numHidden + j) * numFeatures + i], - WxData[(numHidden * 2 + j) * numFeatures + i]); - } - for (int i = 0; i < numHidden; ++i) - { - std::swap(WhData[(numHidden + j) * numHidden + i], - WhData[(numHidden * 2 + j) * numHidden + i]); - } - std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); + Mat flipped; + flip(inp, flipped, 0); + addConstant(layerParams.name, flipped); + return; } } - Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); - Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); - h0 = h0.reshape(1, h0.size[0] * h0.size[1]); - c0 = c0.reshape(1, c0.size[0] * c0.size[1]); + } + } + layerParams.set("begin", DictValue::arrayInt(&begin[0], begin.size())); + layerParams.set("end", DictValue::arrayInt(&end[0], end.size())); + layerParams.set("axis", axis); - lstmParams.blobs.resize(5); - lstmParams.blobs[0] = Wh; - lstmParams.blobs[1] = Wx; - lstmParams.blobs[2] = b; - lstmParams.blobs[3] = h0; - lstmParams.blobs[4] = c0; - lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); + if (!steps.empty()) + layerParams.set("steps", DictValue::arrayInt(&steps[0], steps.size())); - node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name - addLayer(lstmParams, node_proto); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat inp = getBlob(node_proto, 0); + std::vector inputs, sliced; + inputs.push_back(inp); + runLayer(layerParams, inputs, sliced); + CV_Assert(sliced.size() == 1); + addConstant(layerParams.name, sliced[0]); + return; + } + addLayer(layerParams, node_proto); +} - MatShape lstmShape = outShapes[node_proto.output(0)]; +void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + if (layerParams.has("split")) + { + DictValue splits = layerParams.get("split"); + const int numSplits = splits.size(); + CV_Assert(numSplits > 1); - // Add fake 1 as it is done in ONNX - lstmShape.insert(lstmShape.begin() + 1, 1); + std::vector slicePoints(numSplits - 1, splits.get(0)); + for (int i = 1; i < splits.size() - 1; ++i) + { + slicePoints[i] = slicePoints[i - 1] + splits.get(i - 1); + } + layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); + } + else + { + layerParams.set("num_split", node_proto.output_size()); + } + layerParams.type = "Slice"; + addLayer(layerParams, node_proto); +} - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size())); - node_proto.set_input(0, lstmParams.name); // redirect input to LSTM - node_proto.set_output(0, layerParams.name); // keep origin LSTM's name +void ONNXImporter::parseBias(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + const std::string& layer_type = node_proto.op_type(); + bool isSub = layer_type == "Sub"; + CV_CheckEQ(node_proto.input_size(), 2, ""); + bool is_const_0 = layer_id.find(node_proto.input(0)) == layer_id.end(); + bool is_const_1 = layer_id.find(node_proto.input(1)) == layer_id.end(); + if (is_const_0 && is_const_1) + { + Mat blob_0 = getBlob(node_proto, 0); + Mat blob_1 = getBlob(node_proto, 1); + CV_Assert(blob_0.size == blob_1.size); + Mat output = isSub ? (blob_0 - blob_1) : (blob_0 + blob_1); + addConstant(layerParams.name, output); + return; + } + else if (is_const_0 || is_const_1) + { + int const_blob_id = is_const_0 ? 0 : 1; + Mat blob = getBlob(node_proto, const_blob_id); + int blob_total = blob.total(); + if (blob_total == 1) { + layerParams.type = "Power"; + layerParams.set("shift", (isSub ? -1 : 1) * blob.ptr()[0]); } - else if (layer_type == "ImageScaler") - { - const float scale = layerParams.has("scale") ? layerParams.get("scale") : 1.0f; - layerParams.erase("scale"); + else { + MatShape inpShape = outShapes[node_proto.input(1 - const_blob_id)]; + if (shape(blob) == inpShape) + { + LayerParams constParams; + constParams.name = layerParams.name + "/const"; + constParams.type = "Const"; + constParams.blobs.push_back((isSub ? -1 : 1) * blob); + int id; + if (DNN_DIAGNOSTICS_RUN) + id = utilNet.addLayer(constParams.name, constParams.type, constParams); + else + id = dstNet.addLayer(constParams.name, constParams.type, constParams); + layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0))); + outShapes[constParams.name] = shape(blob); - if (layerParams.has("bias")) + layerParams.type = "Eltwise"; + node_proto.set_input(const_blob_id, constParams.name); + } + else { layerParams.type = "Scale"; - layerParams.blobs.push_back( - Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale)); - layerParams.set("bias_term", true); - Mat bias(1, layerParams.get("bias").size(), CV_32FC1); - for (int j = 0; j < bias.total(); j++) { - bias.at(0, j) = layerParams.get("bias").getRealValue(j); + int axis = 1; + for (int i = 0; i < graph_proto.initializer_size(); i++) + { + opencv_onnx::TensorProto tensor_proto = graph_proto.initializer(i); + if (tensor_proto.name() == node_proto.input(const_blob_id)) + { + axis = inpShape.size() - tensor_proto.dims_size(); + break; + } } - layerParams.blobs.push_back(bias); - layerParams.erase("bias"); - } - else { - layerParams.set("scale", scale); - layerParams.type = "Power"; + layerParams.set("axis", axis); + blob = blob.reshape(1, 1); + layerParams.blobs.push_back((isSub ? -1 : 1) * blob); } } - else if (layer_type == "Clip") - { - layerParams.type = "ReLU6"; - replaceLayerParam(layerParams, "min", "min_value"); - replaceLayerParam(layerParams, "max", "max_value"); - - } - else if (layer_type == "LeakyRelu") - { - layerParams.type = "ReLU"; - replaceLayerParam(layerParams, "alpha", "negative_slope"); - } - else if (layer_type == "Relu") - { - layerParams.type = "ReLU"; - } - else if (layer_type == "Elu") - { - layerParams.type = "ELU"; - } - else if (layer_type == "Tanh") - { - layerParams.type = "TanH"; - } - else if (layer_type == "PRelu") - { - layerParams.type = "PReLU"; - layerParams.blobs.push_back(getBlob(node_proto, 1)); - } - else if (layer_type == "LRN") + } + else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) + { + layerParams.type = "Eltwise"; + if (isSub) { - replaceLayerParam(layerParams, "size", "local_size"); + static float subCoeffs[] = {1.f, -1.f}; + layerParams.set("coeff", DictValue::arrayReal(subCoeffs, 2)); } - else if (layer_type == "InstanceNormalization") + } + else + { + if (isSub) { - if (node_proto.input_size() != 3) - CV_Error(Error::StsNotImplemented, - "Expected input, scale, bias"); - - layerParams.blobs.resize(4); - layerParams.blobs[2] = getBlob(node_proto, 1); // weightData - layerParams.blobs[3] = getBlob(node_proto, 2); // biasData - layerParams.set("has_bias", true); - layerParams.set("has_weight", true); - - // Get number of channels in input - int size = layerParams.blobs[2].total(); - layerParams.blobs[0] = Mat::zeros(size, 1, CV_32F); // mean - layerParams.blobs[1] = Mat::ones(size, 1, CV_32F); // std - - LayerParams mvnParams; - mvnParams.name = layerParams.name + "/MVN"; - mvnParams.type = "MVN"; - mvnParams.set("eps", layerParams.get("epsilon")); - layerParams.erase("epsilon"); - - //Create MVN layer + LayerParams powerParams; + powerParams.name = layerParams.name + "/neg"; + powerParams.type = "Power"; + powerParams.set("scale", -1); + int id; + //Create Power layer if (DNN_DIAGNOSTICS_RUN) - id = utilNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); + id = utilNet.addLayer(powerParams.name, powerParams.type, powerParams); else - id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); + id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); //Connect to input - IterLayerId_t layerId = layer_id.find(node_proto.input(0)); + IterLayerId_t layerId = layer_id.find(node_proto.input(1)); CV_Assert(layerId != layer_id.end()); if (DNN_DIAGNOSTICS_RUN) utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); else dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); //Add shape - layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0))); - outShapes[mvnParams.name] = outShapes[node_proto.input(0)]; + layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); + outShapes[powerParams.name] = outShapes[node_proto.input(1)]; - //Replace Batch Norm's input to MVN - node_proto.set_input(0, mvnParams.name); - layerParams.type = "BatchNorm"; + //Replace input to Power + node_proto.set_input(1, powerParams.name); } - else if (layer_type == "BatchNormalization") - { - if (node_proto.input_size() != 5) - CV_Error(Error::StsNotImplemented, - "Expected input, scale, bias, mean and var"); + layerParams.type = "Scale"; + layerParams.set("bias_term", true); + } + addLayer(layerParams, node_proto); +} - layerParams.type = "BatchNorm"; - replaceLayerParam(layerParams, "epsilon", "eps"); - replaceLayerParam(layerParams, "spatial", "use_global_stats"); +void ONNXImporter::parsePow(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + if (layer_id.find(node_proto.input(1)) != layer_id.end()) + CV_Error(Error::StsNotImplemented, "Unsupported Pow op with variable power"); - Mat meanData = getBlob(node_proto, 3); - Mat stdData = getBlob(node_proto, 4); + Mat blob = getBlob(node_proto, 1); + if (blob.total() != 1) + CV_Error(Error::StsNotImplemented, "Pow op supports only scalar power"); - layerParams.blobs.push_back(meanData); - layerParams.blobs.push_back(stdData); + blob.convertTo(blob, CV_32F); + layerParams.type = "Power"; + layerParams.set("power", blob.ptr()[0]); + addLayer(layerParams, node_proto); +} - if (!node_proto.input(1).empty()) { - layerParams.set("has_weight", true); - layerParams.blobs.push_back(getBlob(node_proto, 1)); // weightData - } else { - layerParams.set("has_weight", false); - } +void ONNXImporter::parseMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Eltwise"; + layerParams.set("operation", "max"); + addLayer(layerParams, node_proto); +} - if (!node_proto.input(2).empty()) { - layerParams.set("has_bias", true); - layerParams.blobs.push_back(getBlob(node_proto, 2)); // biasData - } else { - layerParams.set("has_bias", false); - } - } - else if (layer_type == "Gemm") - { - CV_Assert(node_proto.input_size() >= 2); - layerParams.type = "InnerProduct"; - Mat weights = getBlob(node_proto, 1); - int ind_num_out = 0; - if (layerParams.has("transB") && !layerParams.get("transB")) { - transpose(weights, weights); - ind_num_out = 1; - } - layerParams.blobs.push_back(weights); +void ONNXImporter::parseNeg(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Power"; + layerParams.set("scale", -1); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseConstant(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 0); + CV_Assert(layerParams.blobs.size() == 1); + addConstant(layerParams.name, layerParams.blobs[0]); +} - if (node_proto.input_size() == 3) { - Mat bias = getBlob(node_proto, 2); - layerParams.blobs.push_back(bias); +void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + LayerParams lstmParams = layerParams; + lstmParams.name += "/lstm"; + + // https://pytorch.org/docs/stable/nn.html#lstm + CV_Assert(node_proto.input_size() == 7); + Mat Wx = getBlob(node_proto, 1); + Mat Wh = getBlob(node_proto, 2); + Mat b = getBlob(node_proto, 3); + Mat h0 = getBlob(node_proto, 5); + Mat c0 = getBlob(node_proto, 6); + + b = b.reshape(1, b.size[0]); + + const int numHidden = lstmParams.get("hidden_size"); + const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. + const int numFeatures = Wx.size[2]; + Mat bx = b.colRange(0, b.cols / 2); + Mat bh = b.colRange(b.cols / 2, b.cols); + b = bx + bh; + + // IFGO->IGFO + for (int k = 0; k < numDirs; ++k) + { + float* WxData = Wx.ptr(k); + float* WhData = Wh.ptr(k); + float* biasData = b.ptr(k); + for (int j = 0; j < numHidden; ++j) + { + for (int i = 0; i < numFeatures; ++i) + { + std::swap(WxData[(numHidden + j) * numFeatures + i], + WxData[(numHidden * 2 + j) * numFeatures + i]); } - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + for (int i = 0; i < numHidden; ++i) { - Mat inputBuf = getBlob(node_proto, 0); - - LayerParams constParams; - constParams.name = node_proto.input(0); - constParams.type = "Const"; - constParams.blobs.push_back(inputBuf); - - opencv_onnx::NodeProto proto; - proto.add_output(constParams.name); - addLayer(constParams, proto); + std::swap(WhData[(numHidden + j) * numHidden + i], + WhData[(numHidden * 2 + j) * numHidden + i]); } - - layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]); - layerParams.set("bias_term", node_proto.input_size() == 3); + std::swap(biasData[numHidden + j], biasData[numHidden * 2 + j]); } - else if (layer_type == "MatMul") - { - CV_Assert(node_proto.input_size() == 2); - layerParams.type = "InnerProduct"; - layerParams.set("bias_term", false); - CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end()); - int firstInpDims = outShapes[node_proto.input(0)].size(); - int secondInpDims; - - if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) - { - Mat blob = getBlob(node_proto, 1); - secondInpDims = blob.dims; - layerParams.blobs.push_back(blob.t()); - layerParams.set("num_output", layerParams.blobs[0].size[0]); - } else { - secondInpDims = outShapes[node_proto.input(1)].size(); - } - layerParams.set("axis", firstInpDims - secondInpDims + 1); + } + Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); + Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + c0 = c0.reshape(1, c0.size[0] * c0.size[1]); + + lstmParams.blobs.resize(5); + lstmParams.blobs[0] = Wh; + lstmParams.blobs[1] = Wx; + lstmParams.blobs[2] = b; + lstmParams.blobs[3] = h0; + lstmParams.blobs[4] = c0; + lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); + + node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name + addLayer(lstmParams, node_proto); + + MatShape lstmShape = outShapes[node_proto.output(0)]; + + // Add fake 1 as it is done in ONNX + lstmShape.insert(lstmShape.begin() + 1, 1); + + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&lstmShape[0], lstmShape.size())); + node_proto.set_input(0, lstmParams.name); // redirect input to LSTM + node_proto.set_output(0, layerParams.name); // keep origin LSTM's name + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseImageScaler(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + const float scale = layerParams.has("scale") ? layerParams.get("scale") : 1.0f; + layerParams.erase("scale"); + + if (layerParams.has("bias")) + { + layerParams.type = "Scale"; + layerParams.blobs.push_back( + Mat(Size(1, layerParams.get("bias").size()), CV_32FC1, scale)); + + layerParams.set("bias_term", true); + Mat bias(1, layerParams.get("bias").size(), CV_32FC1); + for (int j = 0; j < bias.total(); j++) { + bias.at(0, j) = layerParams.get("bias").getRealValue(j); } - else if (layer_type == "Mul" || layer_type == "Div") - { - CV_Assert(node_proto.input_size() == 2); + layerParams.blobs.push_back(bias); + layerParams.erase("bias"); + } + else { + layerParams.set("scale", scale); + layerParams.type = "Power"; + } + addLayer(layerParams, node_proto); +} - bool isDiv = layer_type == "Div"; - int constId = -1; - bool haveVariables = false; - for (int i = 0; i < 2; ++i) - { - if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) - constId = i; - else - haveVariables = true; - } - if (constId != -1 && haveVariables) - { - Mat blob = getBlob(node_proto, constId); - blob = blob.reshape(1, 1); - if (blob.total() == 1) { - float blob_value = blob.ptr()[0]; - float coeff = isDiv ? 1.0 / blob_value : blob_value; - layerParams.set("scale", coeff); - layerParams.type = "Power"; - } - else { - if (isDiv) - divide(1.0, blob, blob); - layerParams.blobs.push_back(blob); - layerParams.type = "Scale"; - } - } - else if (!haveVariables) - { - Mat inp0 = getBlob(node_proto, 0); - Mat inp1 = getBlob(node_proto, 1); +void ONNXImporter::parseClip(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "ReLU6"; + replaceLayerParam(layerParams, "min", "min_value"); + replaceLayerParam(layerParams, "max", "max_value"); + addLayer(layerParams, node_proto); +} - if (inp0.size != inp1.size && (inp0.total() != 1 || inp1.total() != 1)) - CV_Error_(Error::StsNotImplemented, ("Different shapes case is not supported with constant inputs: %s", layer_type.c_str())); +void ONNXImporter::parseLeakyRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "ReLU"; + replaceLayerParam(layerParams, "alpha", "negative_slope"); + addLayer(layerParams, node_proto); +} - if (inp0.total() == 1 && inp1.total() == 1 && inp0.dims != inp1.dims) - { - if (inp0.dims < inp1.dims) - { - inp0 = inp0.reshape(1, inp1.dims, inp1.size); - inp0.dims = inp1.dims; - } - else - { - inp1 = inp1.reshape(1, inp0.dims, inp0.size); - inp1.dims = inp0.dims; - } - } +void ONNXImporter::parseRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "ReLU"; + addLayer(layerParams, node_proto); +} - Mat out; - if (inp0.total() != inp1.total()) - { - if (inp0.total() == 1) - { - float inp0_value = inp0.ptr()[0]; - float coeff = isDiv ? 1.0 / inp0_value : inp0_value; - multiply(inp1, coeff, out); - } - else - { - float inp1_value = inp1.ptr()[0]; - float coeff = isDiv ? 1.0 / inp1_value : inp1_value; - multiply(inp0, coeff, out); - } +void ONNXImporter::parseElu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "ELU"; + addLayer(layerParams, node_proto); +} - } - else - { - out = isDiv ? inp0 / inp1 : inp0.mul(inp1); - } +void ONNXImporter::parseTanh(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "TanH"; + addLayer(layerParams, node_proto); +} - if (inp0.dims == 1 && inp1.dims == 1) - out.dims = 1; // to workaround dims == 1 - addConstant(layerParams.name, out); - return; - } - else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) - { - layerParams.type = "Eltwise"; - layerParams.set("operation", isDiv ? "div" : "prod"); - } - else - { - // Scale layer allocate output with the first input shape - if (total(outShapes[node_proto.input(0)]) < total(outShapes[node_proto.input(1)])) - { - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(1)); - proto.add_input(node_proto.input(0)); - proto.add_output(layerParams.name); - node_proto = proto; - } +void ONNXImporter::parsePRelu(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "PReLU"; + layerParams.blobs.push_back(getBlob(node_proto, 1)); + addLayer(layerParams, node_proto); +} - if (isDiv) - { - LayerParams powerParams; - powerParams.name = layerParams.name + "/inv"; - powerParams.type = "Power"; - powerParams.set("power", -1); - - int id; - //Create Power layer - if (DNN_DIAGNOSTICS_RUN) - id = utilNet.addLayer(powerParams.name, powerParams.type, powerParams); - else - id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); - //Connect to input - IterLayerId_t layerId = layer_id.find(node_proto.input(1)); - CV_Assert(layerId != layer_id.end()); - if (DNN_DIAGNOSTICS_RUN) - utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); - else - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); - //Add shape - layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); - outShapes[powerParams.name] = outShapes[node_proto.input(1)]; - - //Replace input to Power - node_proto.set_input(1, powerParams.name); - } - layerParams.type = "Scale"; - } - } - else if (layer_type == "Conv") - { - CV_Assert(node_proto.input_size() >= 2); - layerParams.type = "Convolution"; - for (int j = 1; j < node_proto.input_size(); j++) { - if (constBlobs.find(node_proto.input(j)) != constBlobs.end()) - { - layerParams.blobs.push_back(getBlob(node_proto, j)); - } - } - int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0]; - layerParams.set("num_output", outCn); +void ONNXImporter::parseLRN(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + replaceLayerParam(layerParams, "size", "local_size"); + addLayer(layerParams, node_proto); +} - // Check for asymmetric padding in Conv2D - if (layerParams.has("pad")) - { - bool asymmetricPadding = false; - DictValue pads = layerParams.get("pad"); - const int dims = pads.size() / 2; - for (int i = 0; i < dims; ++i) - { - if (pads.get(i) != pads.get(i + dims)) - { - asymmetricPadding = true; - break; - } - } - if (asymmetricPadding && pads.size() == 4) // [pad_t, pad_l, pad_b, pad_r] - { - layerParams.erase("pad"); - // No paddings required for N, C axis - std::vector paddings(4, 0); - // Add paddings for H, W axis - for (int i = 0; i < dims; ++i) - { - paddings.push_back(pads.get(i)); - paddings.push_back(pads.get(dims + i)); - } - LayerParams padLp; - padLp.name = layerParams.name + "/pad"; - padLp.type = "Padding"; - padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); +void ONNXImporter::parseInstanceNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + if (node_proto.input_size() != 3) + CV_Error(Error::StsNotImplemented, + "Expected input, scale, bias"); + + layerParams.blobs.resize(4); + layerParams.blobs[2] = getBlob(node_proto, 1); // weightData + layerParams.blobs[3] = getBlob(node_proto, 2); // biasData + layerParams.set("has_bias", true); + layerParams.set("has_weight", true); + + // Get number of channels in input + int size = layerParams.blobs[2].total(); + layerParams.blobs[0] = Mat::zeros(size, 1, CV_32F); // mean + layerParams.blobs[1] = Mat::ones(size, 1, CV_32F); // std + + LayerParams mvnParams; + mvnParams.name = layerParams.name + "/MVN"; + mvnParams.type = "MVN"; + mvnParams.set("eps", layerParams.get("epsilon")); + layerParams.erase("epsilon"); + + //Create MVN layer + int id; + if (DNN_DIAGNOSTICS_RUN) + id = utilNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); + else + id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); + //Connect to input + IterLayerId_t layerId = layer_id.find(node_proto.input(0)); + CV_Assert(layerId != layer_id.end()); + if (DNN_DIAGNOSTICS_RUN) + utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + else + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + //Add shape + layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0))); + outShapes[mvnParams.name] = outShapes[node_proto.input(0)]; + + //Replace Batch Norm's input to MVN + node_proto.set_input(0, mvnParams.name); + layerParams.type = "BatchNorm"; + addLayer(layerParams, node_proto); +} - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(padLp.name); +void ONNXImporter::parseBatchNormalization(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + if (node_proto.input_size() != 5) + CV_Error(Error::StsNotImplemented, + "Expected input, scale, bias, mean and var"); - addLayer(padLp, proto); - node_proto.set_input(0, padLp.name); - } - } - } - else if (layer_type == "ConvTranspose") - { - CV_Assert(node_proto.input_size() >= 2); - layerParams.type = "Deconvolution"; - for (int j = 1; j < node_proto.input_size(); j++) { - layerParams.blobs.push_back(getBlob(node_proto, j)); - } - layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get("group", 1)); - layerParams.set("bias_term", node_proto.input_size() == 3); + layerParams.type = "BatchNorm"; + replaceLayerParam(layerParams, "epsilon", "eps"); + replaceLayerParam(layerParams, "spatial", "use_global_stats"); - if (!layerParams.has("kernel_size")) - CV_Error(Error::StsNotImplemented, - "Required attribute 'kernel_size' is not present."); + Mat meanData = getBlob(node_proto, 3); + Mat stdData = getBlob(node_proto, 4); - if (layerParams.has("output_shape")) - { - const DictValue& outShape = layerParams.get("output_shape"); - DictValue strides = layerParams.get("stride"); - DictValue kernel = layerParams.get("kernel_size"); + layerParams.blobs.push_back(meanData); + layerParams.blobs.push_back(stdData); - String padMode; - std::vector adjust_pads; - if (layerParams.has("pad_mode")) - { - padMode = toUpperCase(layerParams.get("pad_mode")); - if (padMode != "SAME" && padMode != "VALID") - CV_Error(Error::StsError, "Unsupported padding mode " + padMode); + if (!node_proto.input(1).empty()) { + layerParams.set("has_weight", true); + layerParams.blobs.push_back(getBlob(node_proto, 1)); // weightData + } else { + layerParams.set("has_weight", false); + } - for (int i = 0; i < strides.size(); i++) - { - int sz = outShape.get(2 + i); - int stride = strides.get(i); - adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride : - (sz - kernel.get(i)) % stride); - } - layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size())); - } - } - else if (layerParams.has("output_padding")) - { - replaceLayerParam(layerParams, "output_padding", "adj"); - } - } - else if (layer_type == "Transpose") - { - layerParams.type = "Permute"; - replaceLayerParam(layerParams, "perm", "order"); + if (!node_proto.input(2).empty()) { + layerParams.set("has_bias", true); + layerParams.blobs.push_back(getBlob(node_proto, 2)); // biasData + } else { + layerParams.set("has_bias", false); + } + addLayer(layerParams, node_proto); +} - CV_Assert(node_proto.input_size() == 1); - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - std::vector inputs(1, getBlob(node_proto, 0)), transposed; - runLayer(layerParams, inputs, transposed); - CV_Assert(transposed.size() == 1); - addConstant(layerParams.name, transposed[0]); - return; - } +void ONNXImporter::parseGemm(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() >= 2); + layerParams.type = "InnerProduct"; + Mat weights = getBlob(node_proto, 1); + int ind_num_out = 0; + if (layerParams.has("transB") && !layerParams.get("transB")) { + transpose(weights, weights); + ind_num_out = 1; + } + layerParams.blobs.push_back(weights); + + if (node_proto.input_size() == 3) { + Mat bias = getBlob(node_proto, 2); + layerParams.blobs.push_back(bias); + } + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat inputBuf = getBlob(node_proto, 0); + + LayerParams constParams; + constParams.name = node_proto.input(0); + constParams.type = "Const"; + constParams.blobs.push_back(inputBuf); + + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(constParams, proto); + } + + layerParams.set("num_output", layerParams.blobs[0].size[ind_num_out]); + layerParams.set("bias_term", node_proto.input_size() == 3); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseMatMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 2); + layerParams.type = "InnerProduct"; + layerParams.set("bias_term", false); + CV_Assert(constBlobs.find(node_proto.input(0)) == constBlobs.end()); + int firstInpDims = outShapes[node_proto.input(0)].size(); + int secondInpDims; + + if (constBlobs.find(node_proto.input(1)) != constBlobs.end()) + { + Mat blob = getBlob(node_proto, 1); + secondInpDims = blob.dims; + layerParams.blobs.push_back(blob.t()); + layerParams.set("num_output", layerParams.blobs[0].size[0]); + } else { + secondInpDims = outShapes[node_proto.input(1)].size(); + } + layerParams.set("axis", firstInpDims - secondInpDims + 1); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + const std::string& layer_type = node_proto.op_type(); + CV_Assert(node_proto.input_size() == 2); + + bool isDiv = layer_type == "Div"; + int constId = -1; + bool haveVariables = false; + for (int i = 0; i < 2; ++i) + { + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) + constId = i; + else + haveVariables = true; + } + if (constId != -1 && haveVariables) + { + Mat blob = getBlob(node_proto, constId); + blob = blob.reshape(1, 1); + if (blob.total() == 1) { + float blob_value = blob.ptr()[0]; + float coeff = isDiv ? 1.0 / blob_value : blob_value; + layerParams.set("scale", coeff); + layerParams.type = "Power"; } - else if (layer_type == "Squeeze") - { - CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes")); - DictValue axes_dict = layerParams.get("axes"); - MatShape inpShape = outShapes[node_proto.input(0)]; + else { + if (isDiv) + divide(1.0, blob, blob); + layerParams.blobs.push_back(blob); + layerParams.type = "Scale"; + } + } + else if (!haveVariables) + { + Mat inp0 = getBlob(node_proto, 0); + Mat inp1 = getBlob(node_proto, 1); - std::vector maskedAxes(inpShape.size(), false); - for (int i = 0; i < axes_dict.size(); ++i) + if (inp0.size != inp1.size && (inp0.total() != 1 || inp1.total() != 1)) + CV_Error_(Error::StsNotImplemented, ("Different shapes case is not supported with constant inputs: %s", layer_type.c_str())); + + if (inp0.total() == 1 && inp1.total() == 1 && inp0.dims != inp1.dims) + { + if (inp0.dims < inp1.dims) { - int axis = axes_dict.getIntValue(i); - CV_CheckLE(axis, static_cast(inpShape.size()), "Squeeze axis"); - maskedAxes[axis] = inpShape[axis] == 1; + inp0 = inp0.reshape(1, inp1.dims, inp1.size); + inp0.dims = inp1.dims; } - MatShape outShape; - for (int i = 0; i < inpShape.size(); ++i) + else { - if (!maskedAxes[i]) - outShape.push_back(inpShape[i]); + inp1 = inp1.reshape(1, inp0.dims, inp0.size); + inp1.dims = inp0.dims; } - if (outShape.size() != inpShape.size()) + } + + Mat out; + if (inp0.total() != inp1.total()) + { + if (inp0.total() == 1) { - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); - if (hasDynamicShapes) - { - std::vector dynamicAxes; - std::vector inputIndices; - for (int index = 0; index < inpShape.size(); ++index) - { - if (!maskedAxes[index]) - inputIndices.push_back(index); - } - for (int index = 0; index < outShape.size(); ++index) - dynamicAxes.push_back(index); - layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); - layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); - } + float inp0_value = inp0.ptr()[0]; + float coeff = isDiv ? 1.0 / inp0_value : inp0_value; + multiply(inp1, coeff, out); } else - layerParams.type = "Identity"; - - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) { - Mat inp = getBlob(node_proto, 0); - Mat out = inp.reshape(1, outShape); - out.dims = outShape.size(); // to workaround dims == 1 - addConstant(layerParams.name, out); - return; + float inp1_value = inp1.ptr()[0]; + float coeff = isDiv ? 1.0 / inp1_value : inp1_value; + multiply(inp0, coeff, out); } + } - else if (layer_type == "Flatten") + else { - CV_CheckEQ(node_proto.input_size(), 1, ""); - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - Mat input = getBlob(node_proto, 0); - int axis = normalize_axis(layerParams.get("axis", 1), input.dims); - - std::vector out_size(&input.size[0], &input.size[0] + axis); - out_size.push_back(input.total(axis)); - Mat output = input.reshape(1, out_size); - addConstant(layerParams.name, output); - return; - } + out = isDiv ? inp0 / inp1 : inp0.mul(inp1); } - else if (layer_type == "Unsqueeze") + + if (inp0.dims == 1 && inp1.dims == 1) + out.dims = 1; // to workaround dims == 1 + addConstant(layerParams.name, out); + return; + } + else if (outShapes[node_proto.input(0)] == outShapes[node_proto.input(1)]) + { + layerParams.type = "Eltwise"; + layerParams.set("operation", isDiv ? "div" : "prod"); + } + else + { + // Scale layer allocate output with the first input shape + if (total(outShapes[node_proto.input(0)]) < total(outShapes[node_proto.input(1)])) { - CV_Assert(node_proto.input_size() == 1); - DictValue axes = layerParams.get("axes"); - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - // Constant input. - Mat input = getBlob(node_proto, 0); + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(1)); + proto.add_input(node_proto.input(0)); + proto.add_output(layerParams.name); + node_proto = proto; + } - std::vector dims; - for (int j = 0; j < input.dims; j++) { - dims.push_back(input.size[j]); - } - CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size()); - for (int j = 0; j < axes.size(); j++) { - dims.insert(dims.begin() + axes.getIntValue(j), 1); - } + if (isDiv) + { + LayerParams powerParams; + powerParams.name = layerParams.name + "/inv"; + powerParams.type = "Power"; + powerParams.set("power", -1); - Mat out = input.reshape(0, dims); - addConstant(layerParams.name, out); - return; - } + int id; + //Create Power layer + if (DNN_DIAGNOSTICS_RUN) + id = utilNet.addLayer(powerParams.name, powerParams.type, powerParams); + else + id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); + //Connect to input + IterLayerId_t layerId = layer_id.find(node_proto.input(1)); + CV_Assert(layerId != layer_id.end()); + if (DNN_DIAGNOSTICS_RUN) + utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + else + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + //Add shape + layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); + outShapes[powerParams.name] = outShapes[node_proto.input(1)]; - // Variable input. - if (axes.size() != 1) - CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze"); + //Replace input to Power + node_proto.set_input(1, powerParams.name); + } + layerParams.type = "Scale"; + } + addLayer(layerParams, node_proto); +} - MatShape inpShape = outShapes[node_proto.input(0)]; - int axis = axes.getIntValue(0); - CV_Assert(0 <= axis && axis <= inpShape.size()); - std::vector outShape = inpShape; - outShape.insert(outShape.begin() + axis, 1); - layerParams.type = "Reshape"; - layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); - if (hasDynamicShapes) - { - std::vector dynamicAxes; - std::vector inputIndices; - for (int index = 0; index < outShape.size(); ++index) { - if (index != axis) - dynamicAxes.push_back(index); - } - for (int index = 0; index < inpShape.size(); ++index) - inputIndices.push_back(index); - layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); - layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); - } +void ONNXImporter::parseConv(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + CV_Assert(node_proto.input_size() >= 2); + layerParams.type = "Convolution"; + for (int j = 1; j < node_proto.input_size(); j++) { + if (constBlobs.find(node_proto.input(j)) != constBlobs.end()) + { + layerParams.blobs.push_back(getBlob(node_proto, j)); } - else if (layer_type == "Expand") + } + int outCn = layerParams.blobs.empty() ? outShapes[node_proto.input(1)][0] : layerParams.blobs[0].size[0]; + layerParams.set("num_output", outCn); + + // Check for asymmetric padding in Conv2D + if (layerParams.has("pad")) + { + bool asymmetricPadding = false; + DictValue pads = layerParams.get("pad"); + const int dims = pads.size() / 2; + for (int i = 0; i < dims; ++i) { - CV_CheckEQ(node_proto.input_size(), 2, ""); - const std::string& input0 = node_proto.input(0); - const std::string& input1 = node_proto.input(1); - Mat newShapeMat = getBlob(input1); - MatShape targetShape(newShapeMat.ptr(), newShapeMat.ptr() + newShapeMat.total()); - - MatShape inpShape; - bool haveVariables = constBlobs.find(input0) == constBlobs.end(); - if (haveVariables) + if (pads.get(i) != pads.get(i + dims)) { - IterShape_t shapeIt = outShapes.find(input0); - CV_Assert(shapeIt != outShapes.end()); - inpShape = shapeIt->second; + asymmetricPadding = true; + break; } - else + } + if (asymmetricPadding && pads.size() == 4) // [pad_t, pad_l, pad_b, pad_r] { - inpShape = shape(getBlob(input0)); - } - - String srcName = input0; - // Unsqueeze and repeat along new axis - if (targetShape.size() == inpShape.size() + 1) + layerParams.erase("pad"); + // No paddings required for N, C axis + std::vector paddings(4, 0); + // Add paddings for H, W axis + for (int i = 0; i < dims; ++i) { - for (int i = 0; i < targetShape.size(); i++) - { - if (targetShape[i] == -1 && i < inpShape.size()) - targetShape[i] = inpShape[i]; - else if (i < inpShape.size() && targetShape[i] != inpShape[i]) - inpShape.insert(inpShape.begin() + i, 1); - } - if (haveVariables) - { - LayerParams reshapeLp; - reshapeLp.name = layerParams.name + "/reshape"; - reshapeLp.type = "Reshape"; - CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); - reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); - - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(reshapeLp.name); - addLayer(reshapeLp, proto); - srcName = reshapeLp.name; - } + paddings.push_back(pads.get(i)); + paddings.push_back(pads.get(dims + i)); } - CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims"); + LayerParams padLp; + padLp.name = layerParams.name + "/pad"; + padLp.type = "Padding"; + padLp.set("paddings", DictValue::arrayInt(&paddings[0], paddings.size())); - std::vector broadcast_axes; - for (int i = 0; i < targetShape.size(); i++) - { - if (targetShape[i] != inpShape[i]) - { - if (inpShape[i] == 1) - broadcast_axes.push_back(i); - else - CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i)); - } - } + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(padLp.name); - if (!haveVariables) - { - if (broadcast_axes.size() != 1) - CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input"); - - Mat input = getBlob(node_proto, 0); - input = input.reshape(0, total(inpShape, 0, broadcast_axes[0])); - Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]); - output = output.reshape(0, targetShape); - addConstant(layerParams.name, output); - return; + addLayer(padLp, proto); + node_proto.set_input(0, padLp.name); } + } + addLayer(layerParams, node_proto); +} - if (broadcast_axes.size() == 2 && - broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1) - { - LayerParams constParams; - constParams.name = layerParams.name + "/const"; - CV_Assert(layer_id.find(constParams.name) == layer_id.end()); - constParams.type = "Const"; +void ONNXImporter::parseConvTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() >= 2); + layerParams.type = "Deconvolution"; + for (int j = 1; j < node_proto.input_size(); j++) { + layerParams.blobs.push_back(getBlob(node_proto, j)); + } + layerParams.set("num_output", layerParams.blobs[0].size[1] * layerParams.get("group", 1)); + layerParams.set("bias_term", node_proto.input_size() == 3); - Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr(), CV_32F); - constParams.blobs.push_back(inp); + if (!layerParams.has("kernel_size")) + CV_Error(Error::StsNotImplemented, + "Required attribute 'kernel_size' is not present."); - opencv_onnx::NodeProto proto; - proto.add_output(constParams.name); - addLayer(constParams, proto); + if (layerParams.has("output_shape")) + { + const DictValue& outShape = layerParams.get("output_shape"); + DictValue strides = layerParams.get("stride"); + DictValue kernel = layerParams.get("kernel_size"); - layerParams.type = "Scale"; - layerParams.set("bias_term", false); - node_proto.set_input(0, constParams.name); - node_proto.set_input(1, srcName); - } - else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1) + String padMode; + std::vector adjust_pads; + if (layerParams.has("pad_mode")) + { + padMode = toUpperCase(layerParams.get("pad_mode")); + if (padMode != "SAME" && padMode != "VALID") + CV_Error(Error::StsError, "Unsupported padding mode " + padMode); + + for (int i = 0; i < strides.size(); i++) { - String base_name = layerParams.name + "/copy_"; - std::vector input_names; - for (int j = 0; j < targetShape[broadcast_axes[0]]; j++) - { - std::ostringstream ss; - ss << j; - LayerParams copyLP; - copyLP.name = base_name + ss.str(); - copyLP.type = "Identity"; - CV_Assert(layer_id.find(copyLP.name) == layer_id.end()); - input_names.push_back(copyLP.name); - - node_proto.set_input(0, srcName); - node_proto.set_output(0, copyLP.name); - addLayer(copyLP, node_proto); - } - node_proto.clear_input(); - for (int i = 0; i < input_names.size(); i++) - { - node_proto.add_input(input_names[i]); - } - layerParams.set("axis", broadcast_axes[0]); - layerParams.type = "Concat"; - node_proto.set_output(0, layerParams.name); + int sz = outShape.get(2 + i); + int stride = strides.get(i); + adjust_pads.push_back(padMode == "SAME"? (sz - 1) % stride : + (sz - kernel.get(i)) % stride); } - else - CV_Error(Error::StsNotImplemented, "Unsupported Expand op"); + layerParams.set("adj", DictValue::arrayInt(&adjust_pads[0], adjust_pads.size())); } - else if (layer_type == "Reshape") - { - CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape")); + } + else if (layerParams.has("output_padding")) + { + replaceLayerParam(layerParams, "output_padding", "adj"); + } + addLayer(layerParams, node_proto); +} - if (node_proto.input_size() == 2) { - Mat blob = getBlob(node_proto, 1); - CV_Assert(blob.type() == CV_32SC1); +void ONNXImporter::parseTranspose(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Permute"; + replaceLayerParam(layerParams, "perm", "order"); - layerParams.set("dim", DictValue::arrayInt( - blob.ptr(), blob.total() )); + CV_Assert(node_proto.input_size() == 1); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + std::vector inputs(1, getBlob(node_proto, 0)), transposed; + runLayer(layerParams, inputs, transposed); + CV_Assert(transposed.size() == 1); + addConstant(layerParams.name, transposed[0]); + return; + } + addLayer(layerParams, node_proto); +} - if (layer_id.find(node_proto.input(0)) == layer_id.end()) { - std::vector inputs(1, getBlob(node_proto, 0)), outputs; - runLayer(layerParams, inputs, outputs); - addConstant(layerParams.name, outputs[0]); - return; - } - } - else { - DictValue shape = layerParams.get("shape"); - std::vector dim; - for (int j = 0; j < shape.size(); j++) { - dim.push_back(shape.getIntValue(j)); - } +void ONNXImporter::parseSqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert_N(node_proto.input_size() == 1, layerParams.has("axes")); + DictValue axes_dict = layerParams.get("axes"); + MatShape inpShape = outShapes[node_proto.input(0)]; - if (layer_id.find(node_proto.input(0)) == layer_id.end()) { - Mat input = getBlob(node_proto, 0); - Mat out = input.reshape(0, dim); - addConstant(layerParams.name, out); - return; - } - replaceLayerParam(layerParams, "shape", "dim"); - } - } - else if (layer_type == "Pad") + std::vector maskedAxes(inpShape.size(), false); + for (int i = 0; i < axes_dict.size(); ++i) + { + int axis = axes_dict.getIntValue(i); + CV_CheckLE(axis, static_cast(inpShape.size()), "Squeeze axis"); + maskedAxes[axis] = inpShape[axis] == 1; + } + MatShape outShape; + for (int i = 0; i < inpShape.size(); ++i) + { + if (!maskedAxes[i]) + outShape.push_back(inpShape[i]); + } + if (outShape.size() != inpShape.size()) + { + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + if (hasDynamicShapes) { - layerParams.type = "Padding"; - replaceLayerParam(layerParams, "mode", "type"); - if (node_proto.input_size() == 3 || node_proto.input_size() == 2) + std::vector dynamicAxes; + std::vector inputIndices; + for (int index = 0; index < inpShape.size(); ++index) { - // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN. - // We need to shuffle it to begin0, end0, begin1, end1, ... - Mat paddings = getBlob(node_proto, 1).reshape(1, 2); - paddings = paddings.t(); - layerParams.set("paddings", DictValue::arrayInt(paddings.ptr(), paddings.total())); - - if (node_proto.input_size() == 3) - { - Mat value = getBlob(node_proto, 2); - layerParams.set("value", value.ptr()[0]); - } + if (!maskedAxes[index]) + inputIndices.push_back(index); } + for (int index = 0; index < outShape.size(); ++index) + dynamicAxes.push_back(index); + layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); + layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); } - else if (layer_type == "Shape") - { - CV_Assert(node_proto.input_size() == 1); - IterShape_t shapeIt = outShapes.find(node_proto.input(0)); - CV_Assert(shapeIt != outShapes.end()); - const MatShape& inpShape = shapeIt->second; + } + else + layerParams.type = "Identity"; - Mat shapeMat(inpShape.size(), 1, CV_32S); - for (int j = 0; j < inpShape.size(); ++j) - shapeMat.at(j) = inpShape[j]; - shapeMat.dims = 1; + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat inp = getBlob(node_proto, 0); + Mat out = inp.reshape(1, outShape); + out.dims = outShape.size(); // to workaround dims == 1 + addConstant(layerParams.name, out); + return; + } + addLayer(layerParams, node_proto); +} - addConstant(layerParams.name, shapeMat); - return; +void ONNXImporter::parseFlatten(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_CheckEQ(node_proto.input_size(), 1, ""); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat input = getBlob(node_proto, 0); + int axis = normalize_axis(layerParams.get("axis", 1), input.dims); + + std::vector out_size(&input.size[0], &input.size[0] + axis); + out_size.push_back(input.total(axis)); + Mat output = input.reshape(1, out_size); + addConstant(layerParams.name, output); + return; + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseUnsqueeze(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 1); + DictValue axes = layerParams.get("axes"); + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + // Constant input. + Mat input = getBlob(node_proto, 0); + + std::vector dims; + for (int j = 0; j < input.dims; j++) { + dims.push_back(input.size[j]); + } + CV_Assert(axes.getIntValue(axes.size()-1) <= dims.size()); + for (int j = 0; j < axes.size(); j++) { + dims.insert(dims.begin() + axes.getIntValue(j), 1); + } + + Mat out = input.reshape(0, dims); + addConstant(layerParams.name, out); + return; + } + + // Variable input. + if (axes.size() != 1) + CV_Error(Error::StsNotImplemented, "Multidimensional unsqueeze"); + + MatShape inpShape = outShapes[node_proto.input(0)]; + int axis = axes.getIntValue(0); + CV_Assert(0 <= axis && axis <= inpShape.size()); + std::vector outShape = inpShape; + outShape.insert(outShape.begin() + axis, 1); + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&outShape[0], outShape.size())); + if (hasDynamicShapes) + { + std::vector dynamicAxes; + std::vector inputIndices; + for (int index = 0; index < outShape.size(); ++index) { + if (index != axis) + dynamicAxes.push_back(index); + } + for (int index = 0; index < inpShape.size(); ++index) + inputIndices.push_back(index); + layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); + layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseExpand(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + CV_CheckEQ(node_proto.input_size(), 2, ""); + const std::string& input0 = node_proto.input(0); + const std::string& input1 = node_proto.input(1); + Mat newShapeMat = getBlob(input1); + MatShape targetShape(newShapeMat.ptr(), newShapeMat.ptr() + newShapeMat.total()); + + MatShape inpShape; + bool haveVariables = constBlobs.find(input0) == constBlobs.end(); + if (haveVariables) + { + IterShape_t shapeIt = outShapes.find(input0); + CV_Assert(shapeIt != outShapes.end()); + inpShape = shapeIt->second; + } + else + { + inpShape = shape(getBlob(input0)); + } + + String srcName = input0; + // Unsqueeze and repeat along new axis + if (targetShape.size() == inpShape.size() + 1) + { + for (int i = 0; i < targetShape.size(); i++) + { + if (targetShape[i] == -1 && i < inpShape.size()) + targetShape[i] = inpShape[i]; + else if (i < inpShape.size() && targetShape[i] != inpShape[i]) + inpShape.insert(inpShape.begin() + i, 1); } - else if (layer_type == "Cast") + if (haveVariables) { - if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) - { - Mat blob = getBlob(node_proto, 0); - int type; - switch (layerParams.get("to")) - { - case opencv_onnx::TensorProto_DataType_FLOAT: type = CV_32F; break; - case opencv_onnx::TensorProto_DataType_UINT8: type = CV_8U; break; - case opencv_onnx::TensorProto_DataType_UINT16: type = CV_16U; break; - case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break; - case opencv_onnx::TensorProto_DataType_INT8: - case opencv_onnx::TensorProto_DataType_INT16: - case opencv_onnx::TensorProto_DataType_INT32: - case opencv_onnx::TensorProto_DataType_INT64: type = CV_32S; break; - default: type = blob.type(); - } - Mat dst; - blob.convertTo(dst, type); - dst.dims = blob.dims; - addConstant(layerParams.name, dst); - return; - } - else - layerParams.type = "Identity"; + LayerParams reshapeLp; + reshapeLp.name = layerParams.name + "/reshape"; + reshapeLp.type = "Reshape"; + CV_Assert(layer_id.find(reshapeLp.name) == layer_id.end()); + reshapeLp.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); + + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(reshapeLp.name); + addLayer(reshapeLp, proto); + srcName = reshapeLp.name; } - else if (layer_type == "ConstantOfShape" || layer_type == "ConstantFill") + } + CV_CheckEQ(inpShape.size(), targetShape.size(), "Unsupported Expand op with different dims"); + + std::vector broadcast_axes; + for (int i = 0; i < targetShape.size(); i++) + { + if (targetShape[i] != inpShape[i]) { - int depth = CV_32F; - float fill_value; - if (!layerParams.blobs.empty()) - { - CV_Assert(!layerParams.has("value")); - depth = layerParams.blobs[0].depth(); - Mat floats; - layerParams.blobs[0].convertTo(floats, CV_32F); - fill_value = floats.at(0, 0); - } + if (inpShape[i] == 1) + broadcast_axes.push_back(i); else - fill_value = layerParams.get("value", 0); + CV_Error(Error::StsError, format("Could not be broadcast by axis: %d", i)); + } + } - MatShape inpShape = getBlob(node_proto, 0); - for (int i = 0; i < inpShape.size(); i++) - CV_CheckGT(inpShape[i], 0, ""); - Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value)); - addConstant(layerParams.name, tensor); - return; + if (!haveVariables) + { + if (broadcast_axes.size() != 1) + CV_Error(Error::StsNotImplemented, "Expand op doesn't support multiple axes for constant input"); + + Mat input = getBlob(node_proto, 0); + input = input.reshape(0, total(inpShape, 0, broadcast_axes[0])); + Mat output = cv::repeat(input, 1, targetShape[broadcast_axes[0]]); + output = output.reshape(0, targetShape); + addConstant(layerParams.name, output); + return; + } + + if (broadcast_axes.size() == 2 && + broadcast_axes[0] == broadcast_axes[1] - 1 && broadcast_axes[1] == inpShape.size() - 1) + { + LayerParams constParams; + constParams.name = layerParams.name + "/const"; + CV_Assert(layer_id.find(constParams.name) == layer_id.end()); + constParams.type = "Const"; + + Mat inp = Mat::ones(newShapeMat.total(), newShapeMat.ptr(), CV_32F); + constParams.blobs.push_back(inp); + + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(constParams, proto); + + layerParams.type = "Scale"; + layerParams.set("bias_term", false); + node_proto.set_input(0, constParams.name); + node_proto.set_input(1, srcName); + } + else if (broadcast_axes.size() == 1 && broadcast_axes[0] <= 1) + { + String base_name = layerParams.name + "/copy_"; + std::vector input_names; + for (int j = 0; j < targetShape[broadcast_axes[0]]; j++) + { + std::ostringstream ss; + ss << j; + LayerParams copyLP; + copyLP.name = base_name + ss.str(); + copyLP.type = "Identity"; + CV_Assert(layer_id.find(copyLP.name) == layer_id.end()); + input_names.push_back(copyLP.name); + + node_proto.set_input(0, srcName); + node_proto.set_output(0, copyLP.name); + addLayer(copyLP, node_proto); } - else if (layer_type == "Gather") + node_proto.clear_input(); + for (int i = 0; i < input_names.size(); i++) { - CV_Assert(node_proto.input_size() == 2); - Mat indexMat = getBlob(node_proto, 1); - CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1); - int index = indexMat.at(0); - int axis = layerParams.get("axis", 0); + node_proto.add_input(input_names[i]); + } + layerParams.set("axis", broadcast_axes[0]); + layerParams.type = "Concat"; + node_proto.set_output(0, layerParams.name); + } + else + CV_Error(Error::StsNotImplemented, "Unsupported Expand op"); + addLayer(layerParams, node_proto); +} - if ((constBlobs.find(node_proto.input(0)) != constBlobs.end())) - { - Mat input = getBlob(node_proto, 0); - Mat out; - std::vector ranges(input.dims, Range::all()); - ranges[axis] = Range(index, index + 1); - - out = input(ranges); - MatShape outShape = shape(out); - if (outShape.size() > 1) - { - outShape.erase(outShape.begin() + axis); - out.reshape(0, outShape); - } else { - out.dims = 1; - } - addConstant(layerParams.name, out); - return; - } - else - { - IterShape_t shapeIt = outShapes.find(node_proto.input(0)); - CV_Assert(shapeIt != outShapes.end()); - MatShape inpShape = shapeIt->second; - - LayerParams sliceLp; - sliceLp.type = "Slice"; - sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name; - std::vector begin(inpShape.size(), 0); - std::vector end(inpShape.size(), -1); - begin[axis] = index; - end[axis] = index + 1; - - cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin.data(), begin.size()); - cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size()); - sliceLp.set("begin", paramBegin); - sliceLp.set("end", paramEnd); - sliceLp.set("has_dynamic_shapes", hasDynamicShapes); - - if (inpShape.size() > 1) - { - opencv_onnx::NodeProto proto; - proto.add_input(node_proto.input(0)); - proto.add_output(sliceLp.name); - addLayer(sliceLp, proto); - - inpShape.erase(inpShape.begin() + axis); - layerParams.type = "Reshape"; - layerParams.set("axis", 0); - layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); - if (hasDynamicShapes) - { - std::vector dynamicAxes; - std::vector inputIndices; - for (int index = 0; index < inpShape.size(); ++index) - dynamicAxes.push_back(index); - for (int index = 0; index < inpShape.size(); ++index) - inputIndices.push_back(index); - layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); - layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); - } - node_proto.set_input(0, sliceLp.name); - } - else - { - layerParams = sliceLp; - } - } +void ONNXImporter::parseReshape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 2 || layerParams.has("shape")); + + if (node_proto.input_size() == 2) { + Mat blob = getBlob(node_proto, 1); + CV_Assert(blob.type() == CV_32SC1); + + layerParams.set("dim", DictValue::arrayInt( + blob.ptr(), blob.total() )); + + if (layer_id.find(node_proto.input(0)) == layer_id.end()) { + std::vector inputs(1, getBlob(node_proto, 0)), outputs; + runLayer(layerParams, inputs, outputs); + addConstant(layerParams.name, outputs[0]); + return; + } + } + else { + DictValue shape = layerParams.get("shape"); + std::vector dim; + for (int j = 0; j < shape.size(); j++) { + dim.push_back(shape.getIntValue(j)); } - else if (layer_type == "Concat") - { - bool hasVariableInps = false; - for (int i = 0; i < node_proto.input_size(); ++i) - { - if (layer_id.find(node_proto.input(i)) != layer_id.end()) - { - hasVariableInps = true; - break; - } - } - if (!hasVariableInps) - { - std::vector inputs(node_proto.input_size()), concatenated; - // Due constant folding we can get inputs with different number of dimensions - // Insert the missing dimension to inputs - MatShape inputShape; - for (size_t i = 0; i < inputs.size(); ++i) - { - inputs[i] = getBlob(node_proto, i); - if (inputs[i].size.dims() > inputShape.size()) - { - inputShape = shape(inputs[i]); - } - } + if (layer_id.find(node_proto.input(0)) == layer_id.end()) { + Mat input = getBlob(node_proto, 0); + Mat out = input.reshape(0, dim); + addConstant(layerParams.name, out); + return; + } + replaceLayerParam(layerParams, "shape", "dim"); + } + addLayer(layerParams, node_proto); +} - // Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1 - int axis = layerParams.get("axis", 1); - for (size_t i = 0; i < inputs.size(); ++i) - { - MatShape targetShape = inputShape; - targetShape[axis] = shape(inputs[i])[axis]; - CV_CheckEQ(total(targetShape), total(shape(inputs[i])), ""); - inputs[i] = inputs[i].reshape(0, targetShape); - } - runLayer(layerParams, inputs, concatenated); +void ONNXImporter::parsePad(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "Padding"; + replaceLayerParam(layerParams, "mode", "type"); + if (node_proto.input_size() == 3 || node_proto.input_size() == 2) + { + // Paddings are in order begin0, begin1, .. beginN, end0, end1, ..., endN. + // We need to shuffle it to begin0, end0, begin1, end1, ... + Mat paddings = getBlob(node_proto, 1).reshape(1, 2); + paddings = paddings.t(); + layerParams.set("paddings", DictValue::arrayInt(paddings.ptr(), paddings.total())); - CV_Assert(concatenated.size() == 1); - addConstant(layerParams.name, concatenated[0]); - return; - } - else - { - for (int i = 0; i < node_proto.input_size(); ++i) - { - if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) - { - LayerParams constParams; - constParams.name = node_proto.input(i); - constParams.type = "Const"; - constParams.blobs.push_back(getBlob(node_proto, i)); - - opencv_onnx::NodeProto proto; - proto.add_output(constParams.name); - addLayer(constParams, proto); - } - } - } + if (node_proto.input_size() == 3) + { + Mat value = getBlob(node_proto, 2); + layerParams.set("value", value.ptr()[0]); } - else if (layer_type == "Resize") + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseShape(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + CV_Assert(node_proto.input_size() == 1); + IterShape_t shapeIt = outShapes.find(node_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + const MatShape& inpShape = shapeIt->second; + + Mat shapeMat(inpShape.size(), 1, CV_32S); + for (int j = 0; j < inpShape.size(); ++j) + shapeMat.at(j) = inpShape[j]; + shapeMat.dims = 1; + + addConstant(layerParams.name, shapeMat); +} + +void ONNXImporter::parseCast(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + if (constBlobs.find(node_proto.input(0)) != constBlobs.end()) + { + Mat blob = getBlob(node_proto, 0); + int type; + switch (layerParams.get("to")) { - for (int i = 1; i < node_proto.input_size(); i++) - CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end()); + case opencv_onnx::TensorProto_DataType_FLOAT: type = CV_32F; break; + case opencv_onnx::TensorProto_DataType_UINT8: type = CV_8U; break; + case opencv_onnx::TensorProto_DataType_UINT16: type = CV_16U; break; + case opencv_onnx::TensorProto_DataType_FLOAT16: type = CV_16S; break; + case opencv_onnx::TensorProto_DataType_INT8: + case opencv_onnx::TensorProto_DataType_INT16: + case opencv_onnx::TensorProto_DataType_INT32: + case opencv_onnx::TensorProto_DataType_INT64: type = CV_32S; break; + default: type = blob.type(); + } + Mat dst; + blob.convertTo(dst, type); + dst.dims = blob.dims; + addConstant(layerParams.name, dst); + return; + } + else + layerParams.type = "Identity"; + addLayer(layerParams, node_proto); +} - if (layerParams.has("coordinate_transformation_mode")) - { - String interp_mode = layerParams.get("coordinate_transformation_mode"); - CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); +void ONNXImporter::parseConstantFill(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + int depth = CV_32F; + float fill_value; + if (!layerParams.blobs.empty()) + { + CV_Assert(!layerParams.has("value")); + depth = layerParams.blobs[0].depth(); + Mat floats; + layerParams.blobs[0].convertTo(floats, CV_32F); + fill_value = floats.at(0, 0); + } + else + fill_value = layerParams.get("value", 0); - layerParams.set("align_corners", interp_mode == "align_corners"); - if (layerParams.get("mode") == "linear") - { - layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? - "opencv_linear" : "bilinear"); - } - } - if (layerParams.get("mode") == "linear" && framework_name == "pytorch") - layerParams.set("mode", "opencv_linear"); + MatShape inpShape = getBlob(node_proto, 0); + for (int i = 0; i < inpShape.size(); i++) + CV_CheckGT(inpShape[i], 0, ""); + Mat tensor(inpShape.size(), &inpShape[0], depth, Scalar(fill_value)); + addConstant(layerParams.name, tensor); +} - // input = [X, scales], [X, roi, scales] or [x, roi, scales, sizes] - int foundScaleId = hasDynamicShapes ? node_proto.input_size() - 1 - : node_proto.input_size() > 2 ? 2 : 1; +void ONNXImporter::parseGather(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + CV_Assert(node_proto.input_size() == 2); + Mat indexMat = getBlob(node_proto, 1); + CV_Assert_N(indexMat.type() == CV_32S, indexMat.total() == 1); + int index = indexMat.at(0); + int axis = layerParams.get("axis", 0); + + if ((constBlobs.find(node_proto.input(0)) != constBlobs.end())) + { + Mat input = getBlob(node_proto, 0); + Mat out; + std::vector ranges(input.dims, Range::all()); + ranges[axis] = Range(index, index + 1); + + out = input(ranges); + MatShape outShape = shape(out); + if (outShape.size() > 1) + { + outShape.erase(outShape.begin() + axis); + out.reshape(0, outShape); + } else { + out.dims = 1; + } + addConstant(layerParams.name, out); + return; + } + else + { + IterShape_t shapeIt = outShapes.find(node_proto.input(0)); + CV_Assert(shapeIt != outShapes.end()); + MatShape inpShape = shapeIt->second; + + LayerParams sliceLp; + sliceLp.type = "Slice"; + sliceLp.name = inpShape.size() > 1 ? layerParams.name + "/slice" : layerParams.name; + std::vector begin(inpShape.size(), 0); + std::vector end(inpShape.size(), -1); + begin[axis] = index; + end[axis] = index + 1; + + cv::dnn::DictValue paramBegin = cv::dnn::DictValue::arrayInt(begin.data(), begin.size()); + cv::dnn::DictValue paramEnd = cv::dnn::DictValue::arrayInt(end.data(), end.size()); + sliceLp.set("begin", paramBegin); + sliceLp.set("end", paramEnd); + sliceLp.set("has_dynamic_shapes", hasDynamicShapes); + + if (inpShape.size() > 1) + { + opencv_onnx::NodeProto proto; + proto.add_input(node_proto.input(0)); + proto.add_output(sliceLp.name); + addLayer(sliceLp, proto); - Mat scales = getBlob(node_proto, foundScaleId); - if (scales.total() == 4) - { - layerParams.set("zoom_factor_y", scales.at(2)); - layerParams.set("zoom_factor_x", scales.at(3)); - } - else + inpShape.erase(inpShape.begin() + axis); + layerParams.type = "Reshape"; + layerParams.set("axis", 0); + layerParams.set("dim", DictValue::arrayInt(&inpShape[0], inpShape.size())); + if (hasDynamicShapes) { - const std::string& inputLast = node_proto.input(node_proto.input_size() - 1); - if (constBlobs.find(inputLast) != constBlobs.end()) - { - Mat shapes = getBlob(inputLast); - CV_CheckEQ(shapes.size[0], 4, ""); - CV_CheckEQ(shapes.size[1], 1, ""); - CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, ""); - if (shapes.depth() == CV_32F) - shapes.convertTo(shapes, CV_32S); - layerParams.set("width", shapes.at(3)); - layerParams.set("height", shapes.at(2)); - } + std::vector dynamicAxes; + std::vector inputIndices; + for (int index = 0; index < inpShape.size(); ++index) + dynamicAxes.push_back(index); + for (int index = 0; index < inpShape.size(); ++index) + inputIndices.push_back(index); + layerParams.set("dynamic_axes", DictValue::arrayInt(dynamicAxes.data(), dynamicAxes.size())); + layerParams.set("input_indices", DictValue::arrayInt(inputIndices.data(), inputIndices.size())); } - replaceLayerParam(layerParams, "mode", "interpolation"); + node_proto.set_input(0, sliceLp.name); } - else if (layer_type == "Upsample") + else { - //fused from Resize Subgraph - if (layerParams.has("coordinate_transformation_mode")) - { - String interp_mode = layerParams.get("coordinate_transformation_mode"); - CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); + layerParams = sliceLp; + } + } + addLayer(layerParams, node_proto); +} - layerParams.set("align_corners", interp_mode == "align_corners"); - if (layerParams.get("mode") == "linear") - { - layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? - "opencv_linear" : "bilinear"); - } - } - if (layerParams.get("mode") == "linear" && framework_name == "pytorch") - layerParams.set("mode", "opencv_linear"); +void ONNXImporter::parseConcat(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + bool hasVariableInps = false; + for (int i = 0; i < node_proto.input_size(); ++i) + { + if (layer_id.find(node_proto.input(i)) != layer_id.end()) + { + hasVariableInps = true; + break; + } + } - layerParams.type = "Resize"; - if (layerParams.has("scales")) - { - // Pytorch layer - DictValue scales = layerParams.get("scales"); - CV_Assert(scales.size() == 4); - layerParams.set("zoom_factor_y", scales.getIntValue(2)); - layerParams.set("zoom_factor_x", scales.getIntValue(3)); - } - else if (layerParams.has("height_scale") && layerParams.has("width_scale")) + if (!hasVariableInps) + { + std::vector inputs(node_proto.input_size()), concatenated; + // Due constant folding we can get inputs with different number of dimensions + // Insert the missing dimension to inputs + MatShape inputShape; + for (size_t i = 0; i < inputs.size(); ++i) + { + inputs[i] = getBlob(node_proto, i); + if (inputs[i].size.dims() > inputShape.size()) { - // Caffe2 layer - replaceLayerParam(layerParams, "height_scale", "zoom_factor_y"); - replaceLayerParam(layerParams, "width_scale", "zoom_factor_x"); + inputShape = shape(inputs[i]); } - else - { - // scales as input - const std::string& input1 = node_proto.input(1); - if (constBlobs.find(input1) != constBlobs.end()) - { - Mat scales = getBlob(input1); - CV_Assert(scales.total() == 4); - layerParams.set("zoom_factor_y", scales.at(2)); - layerParams.set("zoom_factor_x", scales.at(3)); - } - } - replaceLayerParam(layerParams, "mode", "interpolation"); } - else if (layer_type == "SoftMax" || layer_type == "LogSoftmax") + + // Concat-1 has default value for axis is 1: https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Concat-1 + int axis = layerParams.get("axis", 1); + for (size_t i = 0; i < inputs.size(); ++i) { - layerParams.type = "Softmax"; - layerParams.set("log_softmax", layer_type == "LogSoftmax"); + MatShape targetShape = inputShape; + targetShape[axis] = shape(inputs[i])[axis]; + CV_CheckEQ(total(targetShape), total(shape(inputs[i])), ""); + inputs[i] = inputs[i].reshape(0, targetShape); } - else if (layer_type == "DetectionOutput") + runLayer(layerParams, inputs, concatenated); + + CV_Assert(concatenated.size() == 1); + addConstant(layerParams.name, concatenated[0]); + return; + } + else + { + for (int i = 0; i < node_proto.input_size(); ++i) { - CV_CheckEQ(node_proto.input_size(), 3, ""); - if (constBlobs.find(node_proto.input(2)) != constBlobs.end()) + if (constBlobs.find(node_proto.input(i)) != constBlobs.end()) { - Mat priors = getBlob(node_proto, 2); - LayerParams constParams; - constParams.name = layerParams.name + "/priors"; + constParams.name = node_proto.input(i); constParams.type = "Const"; - constParams.blobs.push_back(priors); + constParams.blobs.push_back(getBlob(node_proto, i)); - opencv_onnx::NodeProto priorsProto; - priorsProto.add_output(constParams.name); - addLayer(constParams, priorsProto); - - node_proto.set_input(2, constParams.name); - } - } - else - { - for (int j = 0; j < node_proto.input_size(); j++) { - if (layer_id.find(node_proto.input(j)) == layer_id.end()) - layerParams.blobs.push_back(getBlob(node_proto, j)); + opencv_onnx::NodeProto proto; + proto.add_output(constParams.name); + addLayer(constParams, proto); } } - addLayer(layerParams, node_proto); } - catch (const cv::Exception& e) + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseResize(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + for (int i = 1; i < node_proto.input_size(); i++) + CV_Assert(layer_id.find(node_proto.input(i)) == layer_id.end()); + + if (layerParams.has("coordinate_transformation_mode")) { - if (DNN_DIAGNOSTICS_RUN) - { - CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " - << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) << "\n" << e.msg - ); - auto registeredLayers = getLayerFactoryImpl(); - if (registeredLayers.find(layerParams.type) != registeredLayers.end()) - { - try - { - Ptr layer = LayerFactory::createLayerInstance(layerParams.type, layerParams); - } - catch (const std::exception& e) - { - CV_LOG_ERROR(NULL, "DNN/ONNX: Layer of type " << layerParams.type << "(" << layer_type << ") cannot be created with parameters " << layerParams << ". Error: " << e.what() - ); - } - } - } - else + String interp_mode = layerParams.get("coordinate_transformation_mode"); + CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); + + layerParams.set("align_corners", interp_mode == "align_corners"); + if (layerParams.get("mode") == "linear") { - CV_LOG_ERROR(NULL, "DNN/ONNX: ERROR during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " - << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) - ); + layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? + "opencv_linear" : "bilinear"); } - for (int i = 0; i < node_proto.input_size(); i++) + } + if (layerParams.get("mode") == "linear" && framework_name == "pytorch") + layerParams.set("mode", "opencv_linear"); + + // input = [X, scales], [X, roi, scales] or [x, roi, scales, sizes] + int foundScaleId = hasDynamicShapes ? node_proto.input_size() - 1 + : node_proto.input_size() > 2 ? 2 : 1; + + Mat scales = getBlob(node_proto, foundScaleId); + if (scales.total() == 4) + { + layerParams.set("zoom_factor_y", scales.at(2)); + layerParams.set("zoom_factor_x", scales.at(3)); + } + else + { + const std::string& inputLast = node_proto.input(node_proto.input_size() - 1); + if (constBlobs.find(inputLast) != constBlobs.end()) { - CV_LOG_INFO(NULL, " Input[" << i << "] = '" << node_proto.input(i) << "'"); + Mat shapes = getBlob(inputLast); + CV_CheckEQ(shapes.size[0], 4, ""); + CV_CheckEQ(shapes.size[1], 1, ""); + CV_CheckDepth(shapes.depth(), shapes.depth() == CV_32S || shapes.depth() == CV_32F, ""); + if (shapes.depth() == CV_32F) + shapes.convertTo(shapes, CV_32S); + layerParams.set("width", shapes.at(3)); + layerParams.set("height", shapes.at(2)); } - for (int i = 0; i < node_proto.output_size(); i++) + } + replaceLayerParam(layerParams, "mode", "interpolation"); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseUpsample(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + //fused from Resize Subgraph + if (layerParams.has("coordinate_transformation_mode")) + { + String interp_mode = layerParams.get("coordinate_transformation_mode"); + CV_Assert_N(interp_mode != "tf_crop_and_resize", interp_mode != "tf_half_pixel_for_nn"); + + layerParams.set("align_corners", interp_mode == "align_corners"); + if (layerParams.get("mode") == "linear") { - CV_LOG_INFO(NULL, " Output[" << i << "] = '" << node_proto.output(i) << "'"); + layerParams.set("mode", interp_mode == "pytorch_half_pixel" ? + "opencv_linear" : "bilinear"); } - if (DNN_DIAGNOSTICS_RUN) + } + if (layerParams.get("mode") == "linear" && framework_name == "pytorch") + layerParams.set("mode", "opencv_linear"); + + layerParams.type = "Resize"; + if (layerParams.has("scales")) + { + // Pytorch layer + DictValue scales = layerParams.get("scales"); + CV_Assert(scales.size() == 4); + layerParams.set("zoom_factor_y", scales.getIntValue(2)); + layerParams.set("zoom_factor_x", scales.getIntValue(3)); + } + else if (layerParams.has("height_scale") && layerParams.has("width_scale")) + { + // Caffe2 layer + replaceLayerParam(layerParams, "height_scale", "zoom_factor_y"); + replaceLayerParam(layerParams, "width_scale", "zoom_factor_x"); + } + else + { + // scales as input + const std::string& input1 = node_proto.input(1); + if (constBlobs.find(input1) != constBlobs.end()) { - for (int i = 0; i < node_proto.output_size(); ++i) - { - layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(0, i))); - outShapes[node_proto.output(i)] = outShapes[node_proto.input(0)]; - } + Mat scales = getBlob(input1); + CV_Assert(scales.total() == 4); + layerParams.set("zoom_factor_y", scales.at(2)); + layerParams.set("zoom_factor_x", scales.at(3)); } - else - CV_Error(Error::StsError, cv::format("Node [%s]:(%s) parse error: %s", layer_type.c_str(), name.c_str(), e.what())); } + replaceLayerParam(layerParams, "mode", "interpolation"); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseSoftMax(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + const std::string& layer_type = node_proto.op_type(); + layerParams.type = "Softmax"; + layerParams.set("log_softmax", layer_type == "LogSoftmax"); + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseDetectionOutput(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + CV_CheckEQ(node_proto.input_size(), 3, ""); + if (constBlobs.find(node_proto.input(2)) != constBlobs.end()) + { + Mat priors = getBlob(node_proto, 2); + + LayerParams constParams; + constParams.name = layerParams.name + "/priors"; + constParams.type = "Const"; + constParams.blobs.push_back(priors); + + opencv_onnx::NodeProto priorsProto; + priorsProto.add_output(constParams.name); + addLayer(constParams, priorsProto); + + node_proto.set_input(2, constParams.name); + } + addLayer(layerParams, node_proto); +} + +void ONNXImporter::parseCustom(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + for (int j = 0; j < node_proto.input_size(); j++) { + if (layer_id.find(node_proto.input(j)) == layer_id.end()) + layerParams.blobs.push_back(getBlob(node_proto, j)); + } + addLayer(layerParams, node_proto); +} + +const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap() +{ + DispatchMap dispatch; + + dispatch["MaxPool"] = &ONNXImporter::parseMaxPool; + dispatch["AveragePool"] = &ONNXImporter::parseAveragePool; + dispatch["GlobalAveragePool"] = dispatch["GlobalMaxPool"] = dispatch["ReduceMean"] = dispatch["ReduceSum"] = + dispatch["ReduceMax"] = &ONNXImporter::parseReduce; + dispatch["Slice"] = &ONNXImporter::parseSlice; + dispatch["Split"] = &ONNXImporter::parseSplit; + dispatch["Add"] = dispatch["Sum"] = dispatch["Sub"] = &ONNXImporter::parseBias; + dispatch["Pow"] = &ONNXImporter::parsePow; + dispatch["Max"] = &ONNXImporter::parseMax; + dispatch["Neg"] = &ONNXImporter::parseNeg; + dispatch["Constant"] = &ONNXImporter::parseConstant; + dispatch["LSTM"] = &ONNXImporter::parseLSTM; + dispatch["ImageScaler"] = &ONNXImporter::parseImageScaler; + dispatch["Clip"] = &ONNXImporter::parseClip; + dispatch["LeakyRelu"] = &ONNXImporter::parseLeakyRelu; + dispatch["Relu"] = &ONNXImporter::parseRelu; + dispatch["Elu"] = &ONNXImporter::parseElu; + dispatch["Tanh"] = &ONNXImporter::parseTanh; + dispatch["PRelu"] = &ONNXImporter::parsePRelu; + dispatch["LRN"] = &ONNXImporter::parseLRN; + dispatch["InstanceNormalization"] = &ONNXImporter::parseInstanceNormalization; + dispatch["BatchNormalization"] = &ONNXImporter::parseBatchNormalization; + dispatch["Gemm"] = &ONNXImporter::parseGemm; + dispatch["MatMul"] = &ONNXImporter::parseMatMul; + dispatch["Mul"] = dispatch["Div"] = &ONNXImporter::parseMul; + dispatch["Conv"] = &ONNXImporter::parseConv; + dispatch["ConvTranspose"] = &ONNXImporter::parseConvTranspose; + dispatch["Transpose"] = &ONNXImporter::parseTranspose; + dispatch["Squeeze"] = &ONNXImporter::parseSqueeze; + dispatch["Flatten"] = &ONNXImporter::parseFlatten; + dispatch["Unsqueeze"] = &ONNXImporter::parseUnsqueeze; + dispatch["Expand"] = &ONNXImporter::parseExpand; + dispatch["Reshape"] = &ONNXImporter::parseReshape; + dispatch["Pad"] = &ONNXImporter::parsePad; + dispatch["Shape"] = &ONNXImporter::parseShape; + dispatch["Cast"] = &ONNXImporter::parseCast; + dispatch["ConstantFill"] = dispatch["ConstantOfShape"] = &ONNXImporter::parseConstantFill; + dispatch["Gather"] = &ONNXImporter::parseGather; + dispatch["Concat"] = &ONNXImporter::parseConcat; + dispatch["Resize"] = &ONNXImporter::parseResize; + dispatch["Upsample"] = &ONNXImporter::parseUpsample; + dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax; + dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput; + dispatch["Custom"] = &ONNXImporter::parseCustom; + + return dispatch; } Net readNetFromONNX(const String& onnxFile) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index 4bd09adda043..38a55d12b9a9 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -2894,7 +2894,7 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer) DispatchMap::const_iterator iter = dispatch.find(type); if (iter != dispatch.end()) { - ((*this).*(iter->second))(net, layer, layerParams); + CALL_MEMBER_FN(*this, iter->second)(net, layer, layerParams); } else if (!DNN_DIAGNOSTICS_RUN || !layerHandler->handleMissing(layer)) { From b468468e7e24d78387fe1cf75ec7b46ae2319457 Mon Sep 17 00:00:00 2001 From: Mahendra Kumar <66687425+kumar-mahendra@users.noreply.github.com> Date: Fri, 23 Jul 2021 16:44:48 +0530 Subject: [PATCH 086/128] Closing brackets missing In line 94, closing brackets are added which were missing . --- .../py_contour_features/py_contour_features.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/py_tutorials/py_imgproc/py_contours/py_contour_features/py_contour_features.markdown b/doc/py_tutorials/py_imgproc/py_contours/py_contour_features/py_contour_features.markdown index ecd0e97af2de..d0e6c4b2ac11 100644 --- a/doc/py_tutorials/py_imgproc/py_contours/py_contour_features/py_contour_features.markdown +++ b/doc/py_tutorials/py_imgproc/py_contours/py_contour_features/py_contour_features.markdown @@ -91,7 +91,7 @@ convexity defects, which are the local maximum deviations of hull from contours. There is a little bit things to discuss about it its syntax: @code{.py} -hull = cv.convexHull(points[, hull[, clockwise[, returnPoints]] +hull = cv.convexHull(points[, hull[, clockwise[, returnPoints]]]) @endcode Arguments details: From 531ea5b3a21a5632789e592cfd71e9515849523b Mon Sep 17 00:00:00 2001 From: Vadim Levin Date: Sun, 1 Aug 2021 11:59:16 +0300 Subject: [PATCH 087/128] fix: convert arguments names that are keywords reserved by Python --- .../include/opencv2/core/bindings_utils.hpp | 6 ++++++ modules/python/src2/gen2.py | 12 ++++++++++++ modules/python/test/test_misc.py | 17 +++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/modules/core/include/opencv2/core/bindings_utils.hpp b/modules/core/include/opencv2/core/bindings_utils.hpp index 98a4a2b78539..a3f83d9c2cf5 100644 --- a/modules/core/include/opencv2/core/bindings_utils.hpp +++ b/modules/core/include/opencv2/core/bindings_utils.hpp @@ -116,6 +116,12 @@ String dumpRange(const Range& argument) } } +CV_WRAP static inline +String testReservedKeywordConversion(int positional_argument, int lambda = 2, int from = 3) +{ + return format("arg=%d, lambda=%d, from=%d", positional_argument, lambda, from); +} + CV_WRAP static inline void testRaiseGeneralException() { diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index bccf0d27677b..51566fc24844 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -212,6 +212,16 @@ class FormatStrings: "c_string": ArgTypeInfo("char*", FormatStrings.string, '(char*)""') } +# Set of reserved keywords for Python. Can be acquired via the following call +# $ python -c "help('keywords')" +# Keywords that are reserved in C/C++ are excluded because they can not be +# used as variables identifiers +python_reserved_keywords = { + "True", "None", "False", "as", "assert", "def", "del", "elif", "except", "exec", + "finally", "from", "global", "import", "in", "is", "lambda", "nonlocal", + "pass", "print", "raise", "with", "yield" +} + def normalize_class_name(name): return re.sub(r"^cv\.", "", name).replace(".", "_") @@ -369,6 +379,8 @@ class ArgInfo(object): def __init__(self, arg_tuple): self.tp = handle_ptr(arg_tuple[0]) self.name = arg_tuple[1] + if self.name in python_reserved_keywords: + self.name += "_" self.defval = arg_tuple[2] self.isarray = False self.arraylen = 0 diff --git a/modules/python/test/test_misc.py b/modules/python/test/test_misc.py index 121e86a64c62..4d435a46b668 100644 --- a/modules/python/test/test_misc.py +++ b/modules/python/test/test_misc.py @@ -463,6 +463,23 @@ def test_parse_to_range_not_convertible(self): with self.assertRaises((TypeError), msg=get_no_exception_msg(not_convertible)): _ = cv.utils.dumpRange(not_convertible) + def test_reserved_keywords_are_transformed(self): + default_lambda_value = 2 + default_from_value = 3 + format_str = "arg={}, lambda={}, from={}" + self.assertEqual( + cv.utils.testReservedKeywordConversion(20), format_str.format(20, default_lambda_value, default_from_value) + ) + self.assertEqual( + cv.utils.testReservedKeywordConversion(10, lambda_=10), format_str.format(10, 10, default_from_value) + ) + self.assertEqual( + cv.utils.testReservedKeywordConversion(10, from_=10), format_str.format(10, default_lambda_value, 10) + ) + self.assertEqual( + cv.utils.testReservedKeywordConversion(20, lambda_=-4, from_=12), format_str.format(20, -4, 12) + ) + class SamplesFindFile(NewOpenCVTests): From ba0cea6826a90689d4d58efcd28d1aa53f3478cc Mon Sep 17 00:00:00 2001 From: Dmitry Budnikov Date: Mon, 2 Aug 2021 13:58:18 +0300 Subject: [PATCH 088/128] Merge pull request #20474 from dbudniko:dbudniko/mtcnn_graph_without_resizes Remove explicit PNet resizes from MTCNN graph * remove PNet resizes * address comment from Ruslan --- modules/gapi/samples/face_detection_mtcnn.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/modules/gapi/samples/face_detection_mtcnn.cpp b/modules/gapi/samples/face_detection_mtcnn.cpp index ad7c20c1175c..50cb666a90f5 100644 --- a/modules/gapi/samples/face_detection_mtcnn.cpp +++ b/modules/gapi/samples/face_detection_mtcnn.cpp @@ -591,16 +591,13 @@ int main(int argc, char* argv[]) { cv::GMat in_originalRGB = cv::gapi::BGR2RGB(in_original); cv::GMat in_transposedRGB = cv::gapi::transpose(in_originalRGB); cv::GOpaque in_sz = cv::gapi::streaming::size(in_original); - cv::GMat in_resized[MAX_PYRAMID_LEVELS]; cv::GMat regressions[MAX_PYRAMID_LEVELS]; cv::GMat scores[MAX_PYRAMID_LEVELS]; cv::GArray nms_p_faces[MAX_PYRAMID_LEVELS]; cv::GArray total_faces[MAX_PYRAMID_LEVELS]; //The very first PNet pyramid layer to init total_faces[0] - cv::Size currentSize = cv::Size(level_size[0].height, level_size[0].width); - in_resized[0] = cv::gapi::resize(in_transposedRGB, currentSize); - std::tie(regressions[0], scores[0]) = run_mtcnn_p(in_resized[0], get_pnet_level_name(level_size[0])); + std::tie(regressions[0], scores[0]) = run_mtcnn_p(in_transposedRGB, get_pnet_level_name(level_size[0])); cv::GArray faces0 = custom::BuildFaces::on(scores[0], regressions[0], static_cast(scales[0]), conf_thresh_p); cv::GArray final_p_faces_for_bb2squares = custom::ApplyRegression::on(faces0, true); cv::GArray final_faces_pnet0 = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares); @@ -608,9 +605,7 @@ int main(int argc, char* argv[]) { //The rest PNet pyramid layers to accumlate all layers result in total_faces[PYRAMID_LEVELS - 1]] for (int i = 1; i < pyramid_levels; ++i) { - currentSize = cv::Size(level_size[i].height, level_size[i].width); - in_resized[i] = cv::gapi::resize(in_transposedRGB, currentSize); - std::tie(regressions[i], scores[i]) = run_mtcnn_p(in_resized[i], get_pnet_level_name(level_size[i])); + std::tie(regressions[i], scores[i]) = run_mtcnn_p(in_transposedRGB, get_pnet_level_name(level_size[i])); cv::GArray faces = custom::BuildFaces::on(scores[i], regressions[i], static_cast(scales[i]), conf_thresh_p); cv::GArray final_p_faces_for_bb2squares_i = custom::ApplyRegression::on(faces, true); cv::GArray final_faces_pnet_i = custom::BBoxesToSquares::on(final_p_faces_for_bb2squares_i); From 2d8ce500fa4961a9122346e51c2d6d421f0b8cef Mon Sep 17 00:00:00 2001 From: AleksandrPanov Date: Mon, 2 Aug 2021 18:41:53 +0300 Subject: [PATCH 089/128] add note about using version 3.4 to traincascade documentation --- doc/tutorials/objdetect/traincascade.markdown | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/tutorials/objdetect/traincascade.markdown b/doc/tutorials/objdetect/traincascade.markdown index d7acdca5539f..d78de2ec9a6f 100644 --- a/doc/tutorials/objdetect/traincascade.markdown +++ b/doc/tutorials/objdetect/traincascade.markdown @@ -10,6 +10,8 @@ Working with a boosted cascade of weak classifiers includes two major stages: th To support this tutorial, several official OpenCV applications will be used: [opencv_createsamples](https://github.com/opencv/opencv/tree/3.4/apps/createsamples), [opencv_annotation](https://github.com/opencv/opencv/tree/3.4/apps/annotation), [opencv_traincascade](https://github.com/opencv/opencv/tree/3.4/apps/traincascade) and [opencv_visualisation](https://github.com/opencv/opencv/tree/3.4/apps/visualisation). +@note Createsamples and traincascade are disabled since OpenCV 4.0. Consider using these apps for training from 3.4 branch for Cascade Classifier. Model format is the same between 3.4 and 4.x. + ### Important notes - If you come across any tutorial mentioning the old opencv_haartraining tool (which is deprecated and still using the OpenCV1.x interface), then please ignore that tutorial and stick to the opencv_traincascade tool. This tool is a newer version, written in C++ in accordance to the OpenCV 2.x and OpenCV 3.x API. The opencv_traincascade supports both HAAR like wavelet features @cite Viola01 and LBP (Local Binary Patterns) @cite Liao2007 features. LBP features yield integer precision in contrast to HAAR features, yielding floating point precision, so both training and detection with LBP are several times faster then with HAAR features. Regarding the LBP and HAAR detection quality, it mainly depends on the training data used and the training parameters selected. It's possible to train a LBP-based classifier that will provide almost the same quality as HAAR-based one, within a percentage of the training time. From d773691848f6850eb1b21b2b6031a5f64c321efa Mon Sep 17 00:00:00 2001 From: AleksandrPanov Date: Wed, 4 Aug 2021 15:37:20 +0300 Subject: [PATCH 090/128] add note about hierarchy in python version --- modules/imgproc/include/opencv2/imgproc.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index a1cfff991da8..5e66b14e3b2b 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -3951,6 +3951,7 @@ hierarchy[i][0] , hierarchy[i][1] , hierarchy[i][2] , and hierarchy[i][3] are se in contours of the next and previous contours at the same hierarchical level, the first child contour and the parent contour, respectively. If for the contour i there are no next, previous, parent, or nested contours, the corresponding elements of hierarchy[i] will be negative. +@note In Python, hierarchy is nested inside a top level array. Use hierarchy[0][i] to access hierarchical elements of i-th contour. @param mode Contour retrieval mode, see #RetrievalModes @param method Contour approximation method, see #ContourApproximationModes @param offset Optional offset by which every contour point is shifted. This is useful if the From cefa60260105aaa33d54daddacd3c60a1ce93e57 Mon Sep 17 00:00:00 2001 From: Duong Dac Date: Wed, 4 Aug 2021 15:17:25 +0200 Subject: [PATCH 091/128] Avoid adding false UMat/GpuMat declaration --- modules/python/src2/hdr_parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py index ac3f383dc85b..749a9033eed6 100755 --- a/modules/python/src2/hdr_parser.py +++ b/modules/python/src2/hdr_parser.py @@ -954,7 +954,8 @@ def parse(self, hname, wmode=True): has_mat = len(list(filter(lambda x: x[0] in {"Mat", "vector_Mat"}, args))) > 0 if has_mat: _, _, _, gpumat_decl = self.parse_stmt(stmt, token, mat="cuda::GpuMat", docstring=docstring) - decls.append(gpumat_decl) + if gpumat_decl != decl: + decls.append(gpumat_decl) if self._generate_umat_decls: # If function takes as one of arguments Mat or vector - we want to create the @@ -963,7 +964,8 @@ def parse(self, hname, wmode=True): has_mat = len(list(filter(lambda x: x[0] in {"Mat", "vector_Mat"}, args))) > 0 if has_mat: _, _, _, umat_decl = self.parse_stmt(stmt, token, mat="UMat", docstring=docstring) - decls.append(umat_decl) + if umat_decl != decl: + decls.append(umat_decl) docstring = "" if stmt_type == "namespace": From 6a2e559222e3950d7323e5703b3f5857f59dafce Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Wed, 4 Aug 2021 20:04:03 +0300 Subject: [PATCH 092/128] Fixed memory access issue in v_rshr_pack_store intrinsic on RISC-V RVV. --- modules/core/include/opencv2/core/hal/intrin_rvv.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp index 51433cdbae72..5b3319378103 100644 --- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp @@ -1967,7 +1967,7 @@ void v_rshr_pack_store(_Tp* ptr, const _wTpvec& a) \ _wTp arr[_Tpvec::nlanes] = {0}; \ v_store(arr, a); \ v_store(arr + _wTpvec::nlanes, _wTpvec(vmv_v_x_##suffix##m1(0, hvl))); \ - v_store(ptr, _Tpvec(rshr(vle##width##_v_##suffix##m2(arr, vl), n, vl))); \ + vse##hwidth##_v_##hsuffix##m1(ptr, _Tpvec(rshr(vle##width##_v_##suffix##m2(arr, vl), n, vl)), hvl); \ } OPENCV_HAL_IMPL_RVV_PACK(v_uint8x16, uchar, v_uint16x8, ushort, 8, 16, u8, u16, vnclipu_wx_u8m1, vnclipu_wx_u8m1, 8, 16) From d5f34cf34c7088ed69e3e3a922435743e3d95d81 Mon Sep 17 00:00:00 2001 From: Abhinav Sharma <63901956+abhi-bhatra@users.noreply.github.com> Date: Wed, 4 Aug 2021 22:58:59 +0530 Subject: [PATCH 093/128] Merge pull request #20493 from abhi-bhatra:patch-1 Fix Broken Link * Update README.md Fix broken link * Update Readme.txt * Update readme.txt Add missing link * Update readme.txt Fix names --- 3rdparty/readme.txt | 6 ++++-- platforms/winrt/readme.txt | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/3rdparty/readme.txt b/3rdparty/readme.txt index 3b961782b097..4e4a6ba0a653 100644 --- a/3rdparty/readme.txt +++ b/3rdparty/readme.txt @@ -31,7 +31,7 @@ libpng Portable Network Graphics library. libtiff Tag Image File Format (TIFF) Software Copyright (c) 1988-1997 Sam Leffler Copyright (c) 1991-1997 Silicon Graphics, Inc. - See libtiff home page http://www.remotesensing.org/libtiff/ + See libtiff home page http://www.libtiff.org/ for details and links to the source code WITH_TIFF CMake option must be ON to add libtiff & zlib support to imgcodecs. @@ -51,7 +51,9 @@ jasper JasPer is a collection of software Copyright (c) 1999-2000 The University of British Columbia Copyright (c) 2001-2003 Michael David Adams - The JasPer license can be found in libjasper. + See JasPer official GitHub repository + https://github.com/jasper-software/jasper.git + for details and links to source code ------------------------------------------------------------------------------------ openexr OpenEXR is a high dynamic-range (HDR) image file format developed by Industrial Light & Magic for use in computer imaging applications. diff --git a/platforms/winrt/readme.txt b/platforms/winrt/readme.txt index 2fb4ce1f54c4..2d1b4e6c30c1 100644 --- a/platforms/winrt/readme.txt +++ b/platforms/winrt/readme.txt @@ -13,7 +13,7 @@ Install Visual Studio 2013 Community Edition http://go.microsoft.com/?linkid=9863608 Install Visual Studio Express 2012 for Windows Desktop - http://www.microsoft.com/en-us/download/details.aspx?id=34673 + https://devblogs.microsoft.com/visualstudio/visual-studio-express-2012-for-windows-desktop-is-here/ @@ -156,4 +156,4 @@ Manual build cmake -G "Visual Studio 12 2013 Win64" -DCMAKE_SYSTEM_NAME:String=WindowsStore -DCMAKE_SYSTEM_VERSION:String=8.1 -DCMAKE_VS_EFFECTIVE_PLATFORMS:String=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install\WS\8.1\x64\ .. -Return to "Running tests for Windows Store", list item 4. \ No newline at end of file +Return to "Running tests for Windows Store", list item 4. From 5af09e73f250946e3e5447099a23c1e07131b1fe Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Thu, 5 Aug 2021 11:59:24 +0300 Subject: [PATCH 094/128] Merge pull request #20490 from TolyaTalamanov:at/support-fp16-input-precision [G-API] Support FP16 input precision for IE backend * Support fp16 input precision IE backend * Add support to wrapIE --- modules/gapi/src/backends/ie/giebackend.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index 77a6515f8530..fc9fc502ef6d 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -108,6 +108,7 @@ inline IE::Precision toIE(int depth) { case CV_8U: return IE::Precision::U8; case CV_32S: return IE::Precision::I32; case CV_32F: return IE::Precision::FP32; + case CV_16F: return IE::Precision::FP16; default: GAPI_Assert(false && "IE. Unsupported data type"); } return IE::Precision::UNSPECIFIED; @@ -161,6 +162,7 @@ inline IE::Blob::Ptr wrapIE(const cv::Mat &mat, cv::gapi::ie::TraitAs hint) { HANDLE(8U, uint8_t); HANDLE(32F, float); HANDLE(32S, int); + HANDLE(16F, int16_t); #undef HANDLE default: GAPI_Assert(false && "IE. Unsupported data type"); } From 65134c793bc7c80cf63f7a4d32158fa4e17ffed3 Mon Sep 17 00:00:00 2001 From: AleksandrPanov Date: Thu, 5 Aug 2021 14:21:34 +0300 Subject: [PATCH 095/128] add links in python and js contours tutorial --- .../js_contour_features/js_contour_features.markdown | 3 +++ .../js_contour_properties/js_contour_properties.markdown | 3 +++ .../js_contours/js_contours_begin/js_contours_begin.markdown | 2 ++ .../js_contours_hierarchy/js_contours_hierarchy.markdown | 2 ++ .../js_contours_more_functions.markdown | 3 +++ .../py_contour_features/py_contour_features.markdown | 3 +++ .../py_contour_properties/py_contour_properties.markdown | 3 +++ .../py_contours/py_contours_begin/py_contours_begin.markdown | 2 ++ .../py_contours_hierarchy/py_contours_hierarchy.markdown | 2 ++ .../py_contours_more_functions.markdown | 4 ++++ 10 files changed, 27 insertions(+) diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_contour_features/js_contour_features.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_contour_features/js_contour_features.markdown index 22544b280c60..842126958731 100644 --- a/doc/js_tutorials/js_imgproc/js_contours/js_contour_features/js_contour_features.markdown +++ b/doc/js_tutorials/js_imgproc/js_contours/js_contour_features/js_contour_features.markdown @@ -1,6 +1,9 @@ Contour Features {#tutorial_js_contour_features} ================ +@prev_tutorial{tutorial_js_contours_begin} +@next_tutorial{tutorial_js_contour_properties} + Goal ---- diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_contour_properties/js_contour_properties.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_contour_properties/js_contour_properties.markdown index 1dbb15c4cf3e..3392283305c0 100644 --- a/doc/js_tutorials/js_imgproc/js_contours/js_contour_properties/js_contour_properties.markdown +++ b/doc/js_tutorials/js_imgproc/js_contours/js_contour_properties/js_contour_properties.markdown @@ -1,6 +1,9 @@ Contour Properties {#tutorial_js_contour_properties} ================== +@prev_tutorial{tutorial_js_contour_features} +@next_tutorial{tutorial_js_contours_more_functions} + Goal ---- diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_contours_begin/js_contours_begin.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_contours_begin/js_contours_begin.markdown index 9678a7c9f05d..3caf17f873a4 100644 --- a/doc/js_tutorials/js_imgproc/js_contours/js_contours_begin/js_contours_begin.markdown +++ b/doc/js_tutorials/js_imgproc/js_contours/js_contours_begin/js_contours_begin.markdown @@ -1,6 +1,8 @@ Contours : Getting Started {#tutorial_js_contours_begin} ========================== +@next_tutorial{tutorial_js_contour_features} + Goal ---- diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_contours_hierarchy/js_contours_hierarchy.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_contours_hierarchy/js_contours_hierarchy.markdown index c98628e48648..c2e408a96292 100644 --- a/doc/js_tutorials/js_imgproc/js_contours/js_contours_hierarchy/js_contours_hierarchy.markdown +++ b/doc/js_tutorials/js_imgproc/js_contours/js_contours_hierarchy/js_contours_hierarchy.markdown @@ -1,6 +1,8 @@ Contours Hierarchy {#tutorial_js_contours_hierarchy} ================== +@prev_tutorial{tutorial_js_contours_more_functions} + Goal ---- diff --git a/doc/js_tutorials/js_imgproc/js_contours/js_contours_more_functions/js_contours_more_functions.markdown b/doc/js_tutorials/js_imgproc/js_contours/js_contours_more_functions/js_contours_more_functions.markdown index b75311666271..941f0c486a1f 100644 --- a/doc/js_tutorials/js_imgproc/js_contours/js_contours_more_functions/js_contours_more_functions.markdown +++ b/doc/js_tutorials/js_imgproc/js_contours/js_contours_more_functions/js_contours_more_functions.markdown @@ -1,6 +1,9 @@ Contours : More Functions {#tutorial_js_contours_more_functions} ========================= +@prev_tutorial{tutorial_js_contour_properties} +@next_tutorial{tutorial_js_contours_hierarchy} + Goal ---- diff --git a/doc/py_tutorials/py_imgproc/py_contours/py_contour_features/py_contour_features.markdown b/doc/py_tutorials/py_imgproc/py_contours/py_contour_features/py_contour_features.markdown index d0e6c4b2ac11..e8cfbd659715 100644 --- a/doc/py_tutorials/py_imgproc/py_contours/py_contour_features/py_contour_features.markdown +++ b/doc/py_tutorials/py_imgproc/py_contours/py_contour_features/py_contour_features.markdown @@ -1,6 +1,9 @@ Contour Features {#tutorial_py_contour_features} ================ +@prev_tutorial{tutorial_py_contours_begin} +@next_tutorial{tutorial_py_contour_properties} + Goal ---- diff --git a/doc/py_tutorials/py_imgproc/py_contours/py_contour_properties/py_contour_properties.markdown b/doc/py_tutorials/py_imgproc/py_contours/py_contour_properties/py_contour_properties.markdown index 461c87034398..282f62ddf98e 100644 --- a/doc/py_tutorials/py_imgproc/py_contours/py_contour_properties/py_contour_properties.markdown +++ b/doc/py_tutorials/py_imgproc/py_contours/py_contour_properties/py_contour_properties.markdown @@ -1,6 +1,9 @@ Contour Properties {#tutorial_py_contour_properties} ================== +@prev_tutorial{tutorial_py_contour_features} +@next_tutorial{tutorial_py_contours_more_functions} + Here we will learn to extract some frequently used properties of objects like Solidity, Equivalent Diameter, Mask image, Mean Intensity etc. More features can be found at [Matlab regionprops documentation](http://www.mathworks.in/help/images/ref/regionprops.html). diff --git a/doc/py_tutorials/py_imgproc/py_contours/py_contours_begin/py_contours_begin.markdown b/doc/py_tutorials/py_imgproc/py_contours/py_contours_begin/py_contours_begin.markdown index c2055f75af68..74d7b252a542 100644 --- a/doc/py_tutorials/py_imgproc/py_contours/py_contours_begin/py_contours_begin.markdown +++ b/doc/py_tutorials/py_imgproc/py_contours/py_contours_begin/py_contours_begin.markdown @@ -1,6 +1,8 @@ Contours : Getting Started {#tutorial_py_contours_begin} ========================== +@next_tutorial{tutorial_py_contour_features} + Goal ---- diff --git a/doc/py_tutorials/py_imgproc/py_contours/py_contours_hierarchy/py_contours_hierarchy.markdown b/doc/py_tutorials/py_imgproc/py_contours/py_contours_hierarchy/py_contours_hierarchy.markdown index 2619ea2a7095..075e6ec81f93 100644 --- a/doc/py_tutorials/py_imgproc/py_contours/py_contours_hierarchy/py_contours_hierarchy.markdown +++ b/doc/py_tutorials/py_imgproc/py_contours/py_contours_hierarchy/py_contours_hierarchy.markdown @@ -1,6 +1,8 @@ Contours Hierarchy {#tutorial_py_contours_hierarchy} ================== +@prev_tutorial{tutorial_py_contours_more_functions} + Goal ---- diff --git a/doc/py_tutorials/py_imgproc/py_contours/py_contours_more_functions/py_contours_more_functions.markdown b/doc/py_tutorials/py_imgproc/py_contours/py_contours_more_functions/py_contours_more_functions.markdown index fb5f59bef661..397a2a63a06f 100644 --- a/doc/py_tutorials/py_imgproc/py_contours/py_contours_more_functions/py_contours_more_functions.markdown +++ b/doc/py_tutorials/py_imgproc/py_contours/py_contours_more_functions/py_contours_more_functions.markdown @@ -1,6 +1,10 @@ Contours : More Functions {#tutorial_py_contours_more_functions} ========================= +@prev_tutorial{tutorial_py_contour_properties} +@next_tutorial{tutorial_py_contours_hierarchy} + + Goal ---- From 4ff76cad2a0b204a43ec4d57aebe65aa2a616543 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 5 Aug 2021 11:42:58 +0000 Subject: [PATCH 096/128] cmake: fix cross-compilation problems - unexpected pkg-config module (we should not use host binary) - bump cmake_minimum_required to 3.5 in toolchain files --- modules/highgui/cmake/init.cmake | 4 +++- modules/videoio/cmake/init.cmake | 4 +++- platforms/linux/gnu.toolchain.cmake | 2 +- platforms/linux/riscv.toolchain.cmake | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/highgui/cmake/init.cmake b/modules/highgui/cmake/init.cmake index a302c4d534a6..2002ff0e9d70 100644 --- a/modules/highgui/cmake/init.cmake +++ b/modules/highgui/cmake/init.cmake @@ -25,7 +25,9 @@ endif() # Detect available dependencies # -include(FindPkgConfig) +if(NOT PROJECT_NAME STREQUAL "OpenCV") + include(FindPkgConfig) +endif() macro(add_backend backend_id cond_var) if(${cond_var}) diff --git a/modules/videoio/cmake/init.cmake b/modules/videoio/cmake/init.cmake index 68838790b8a2..af664f94df37 100644 --- a/modules/videoio/cmake/init.cmake +++ b/modules/videoio/cmake/init.cmake @@ -1,4 +1,6 @@ -include(FindPkgConfig) +if(NOT PROJECT_NAME STREQUAL "OpenCV") + include(FindPkgConfig) +endif() macro(add_backend backend_id cond_var) if(${cond_var}) diff --git a/platforms/linux/gnu.toolchain.cmake b/platforms/linux/gnu.toolchain.cmake index cba08e7fbbf4..64258e65b3f5 100644 --- a/platforms/linux/gnu.toolchain.cmake +++ b/platforms/linux/gnu.toolchain.cmake @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.5) # load settings in case of "try compile" set(TOOLCHAIN_CONFIG_FILE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/toolchain.config.cmake") diff --git a/platforms/linux/riscv.toolchain.cmake b/platforms/linux/riscv.toolchain.cmake index 2a69d7e0048d..cea80bd9ba90 100644 --- a/platforms/linux/riscv.toolchain.cmake +++ b/platforms/linux/riscv.toolchain.cmake @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.5) if(COMMAND toolchain_save_config) return() # prevent recursive call From 633fedaa96d1a6e48db5f0897d82fa5d2c5f4560 Mon Sep 17 00:00:00 2001 From: Julia Bareeva <34717687+JulieBar@users.noreply.github.com> Date: Thu, 5 Aug 2021 18:13:17 +0300 Subject: [PATCH 097/128] Merge pull request #20480 from JulieBar:lstm_pytest Add Python's test for LSTM layer * Add Python's test for LSTM layer * Set different test threshold for FP16 target * rename test to test_input_3d Co-authored-by: Julie Bareeva --- modules/dnn/misc/python/test/test_dnn.py | 36 ++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/modules/dnn/misc/python/test/test_dnn.py b/modules/dnn/misc/python/test/test_dnn.py index 932984f1c201..f7bfc0111940 100644 --- a/modules/dnn/misc/python/test/test_dnn.py +++ b/modules/dnn/misc/python/test/test_dnn.py @@ -62,6 +62,12 @@ def printParams(backend, target): } print('%s/%s' % (backendNames[backend], targetNames[target])) +def getDefaultThreshold(target): + if target == cv.dnn.DNN_TARGET_OPENCL_FP16 or target == cv.dnn.DNN_TARGET_MYRIAD: + return 4e-3 + else: + return 1e-5 + testdata_required = bool(os.environ.get('OPENCV_DNN_TEST_REQUIRE_TESTDATA', False)) g_dnnBackendsAndTargets = None @@ -305,5 +311,35 @@ def forward(self, inputs): cv.dnn_unregisterLayer('CropCaffe') + # check that dnn module can work with 3D tensor as input for network + def test_input_3d(self): + model = self.find_dnn_file('dnn/onnx/models/hidden_lstm.onnx') + input_file = self.find_dnn_file('dnn/onnx/data/input_hidden_lstm.npy') + output_file = self.find_dnn_file('dnn/onnx/data/output_hidden_lstm.npy') + if model is None: + raise unittest.SkipTest("Missing DNN test files (dnn/onnx/models/hidden_lstm.onnx). " + "Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") + if input_file is None or output_file is None: + raise unittest.SkipTest("Missing DNN test files (dnn/onnx/data/{input/output}_hidden_lstm.npy). " + "Verify OPENCV_DNN_TEST_DATA_PATH configuration parameter.") + + net = cv.dnn.readNet(model) + input = np.load(input_file) + # we have to expand the shape of input tensor because Python bindings cut 3D tensors to 2D + # it should be fixed in future. see : https://github.com/opencv/opencv/issues/19091 + # please remove `expand_dims` after that + input = np.expand_dims(input, axis=3) + gold_output = np.load(output_file) + net.setInput(input) + + for backend, target in self.dnnBackendsAndTargets: + printParams(backend, target) + + net.setPreferableBackend(backend) + net.setPreferableTarget(target) + real_output = net.forward() + + normAssert(self, real_output, gold_output, "", getDefaultThreshold(target)) + if __name__ == '__main__': NewOpenCVTests.bootstrap() From 6306bc3ddcbe3480a6d113081bebedb5745dff24 Mon Sep 17 00:00:00 2001 From: Maxim Pashchenkov Date: Fri, 6 Aug 2021 09:52:46 +0300 Subject: [PATCH 098/128] Applying of comments --- .../misc/python/samples/gaze_estimation.py | 39 +++++++------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/modules/gapi/misc/python/samples/gaze_estimation.py b/modules/gapi/misc/python/samples/gaze_estimation.py index db190f67bb99..5536787e608c 100644 --- a/modules/gapi/misc/python/samples/gaze_estimation.py +++ b/modules/gapi/misc/python/samples/gaze_estimation.py @@ -3,6 +3,7 @@ import numpy as np import cv2 as cv + # ------------------------Service operations------------------------ def weight_path(model_path): """ Get path of weights based on path to IR @@ -171,11 +172,7 @@ def run(in_ys, in_ps, in_rs): Return: Arrays with heads poses """ - out_poses = [] - size = len(in_ys) - for i in range(size): - out_poses.append(np.array([in_ys[i][0], in_ps[i][0], in_rs[i][0]]).T) - return out_poses + return [np.array([ys[0], ps[0], rs[0]]).T for ys, ps, rs in zip(in_ys, in_ps, in_rs)] @cv.gapi.kernel(GParseEyes) @@ -199,20 +196,19 @@ def run(in_landm_per_face, in_face_rcs, frame_size): right_eyes = [] midpoints = [] lmarks = [] - num_faces = len(in_landm_per_face) surface = (0, 0, *frame_size) - for i in range(num_faces): - rect = in_face_rcs[i] - points = process_landmarks(*rect, in_landm_per_face[i]) - for p in points: - lmarks.append(p) - size = int(len(in_landm_per_face[i][0]) / 2) - - rect, midpoint_l = eye_box(lmarks[0 + i * size], lmarks[1 + i * size]) + for landm_face, rect in zip(in_landm_per_face, in_face_rcs): + points = process_landmarks(*rect, landm_face) + lmarks.extend(points) + + rect, midpoint_l = eye_box(points[0], points[1]) left_eyes.append(intersection(surface, rect)) - rect, midpoint_r = eye_box(lmarks[2 + i * size], lmarks[3 + i * size]) + + rect, midpoint_r = eye_box(points[2], points[3]) right_eyes.append(intersection(surface, rect)) - midpoints += [midpoint_l, midpoint_r] + + midpoints.append(midpoint_l) + midpoints.append(midpoint_r) return left_eyes, right_eyes, midpoints, lmarks @@ -231,14 +227,8 @@ def run(eyesl, eyesr): Return: States of left eyes and states of right eyes """ - size = len(eyesl) - out_l_st = [] - out_r_st = [] - for i in range(size): - for st in eyesl[i]: - out_l_st += [1 if st[0] < st[1] else 0] - for st in eyesr[i]: - out_r_st += [1 if st[0] < st[1] else 0] + out_l_st = [int(st) for eye_l in eyesl for st in (eye_l[:, 0] < eye_l[:, 1]).ravel()] + out_r_st = [int(st) for eye_r in eyesr for st in (eye_r[:, 0] < eye_r[:, 1]).ravel()] return out_l_st, out_r_st @@ -459,6 +449,7 @@ def run(eyesl, eyesr): # Show result cv.imshow('Gaze Estimation', oimg) + cv.waitKey(1) fps = int(1. / (time.time() - start_time_cycle)) frames += 1 From 24de676a6480aae151b1a2c80a483ec3ef98ab2a Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Fri, 6 Aug 2021 13:26:49 +0300 Subject: [PATCH 099/128] Merge pull request #20476 from TolyaTalamanov:at/support-unet-camvid-0001-segm-sample [G-API] Support postprocessing for not argmaxed outputs * Support postprocessing for not argmaxed outputs * Fix typo * Add assert * Remove static cast * CamelCast to snake_case * Fix windows warning * Add static_cast to uint8_t * Add const to variables --- .../gapi/samples/semantic_segmentation.cpp | 83 ++++++++++++++----- 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/modules/gapi/samples/semantic_segmentation.cpp b/modules/gapi/samples/semantic_segmentation.cpp index 0a6e7231c4f2..4cdb14cc5c1a 100644 --- a/modules/gapi/samples/semantic_segmentation.cpp +++ b/modules/gapi/samples/semantic_segmentation.cpp @@ -47,6 +47,53 @@ std::string get_weights_path(const std::string &model_path) { CV_Assert(ext == ".xml"); return model_path.substr(0u, sz - EXT_LEN) + ".bin"; } + +void classesToColors(const cv::Mat &out_blob, + cv::Mat &mask_img) { + const int H = out_blob.size[0]; + const int W = out_blob.size[1]; + + mask_img.create(H, W, CV_8UC3); + GAPI_Assert(out_blob.type() == CV_8UC1); + const uint8_t* const classes = out_blob.ptr(); + + for (int rowId = 0; rowId < H; ++rowId) { + for (int colId = 0; colId < W; ++colId) { + uint8_t class_id = classes[rowId * W + colId]; + mask_img.at(rowId, colId) = + class_id < colors.size() + ? colors[class_id] + : cv::Vec3b{0, 0, 0}; // NB: sample supports 20 classes + } + } +} + +void probsToClasses(const cv::Mat& probs, cv::Mat& classes) { + const int C = probs.size[1]; + const int H = probs.size[2]; + const int W = probs.size[3]; + + classes.create(H, W, CV_8UC1); + GAPI_Assert(probs.depth() == CV_32F); + float* out_p = reinterpret_cast(probs.data); + uint8_t* classes_p = reinterpret_cast(classes.data); + + for (int h = 0; h < H; ++h) { + for (int w = 0; w < W; ++w) { + double max = 0; + int class_id = 0; + for (int c = 0; c < C; ++c) { + int idx = c * H * W + h * W + w; + if (out_p[idx] > max) { + max = out_p[idx]; + class_id = c; + } + } + classes_p[h * W + w] = static_cast(class_id); + } + } +} + } // anonymous namespace namespace custom { @@ -57,25 +104,21 @@ G_API_OP(PostProcessing, , "sample.custom.post_pro }; GAPI_OCV_KERNEL(OCVPostProcessing, PostProcessing) { - static void run(const cv::Mat &in, const cv::Mat &detected_classes, cv::Mat &out) { - // This kernel constructs output image by class table and colors vector - - // The semantic-segmentation-adas-0001 output a blob with the shape - // [B, C=1, H=1024, W=2048] - const int outHeight = 1024; - const int outWidth = 2048; - cv::Mat maskImg(outHeight, outWidth, CV_8UC3); - const int* const classes = detected_classes.ptr(); - for (int rowId = 0; rowId < outHeight; ++rowId) { - for (int colId = 0; colId < outWidth; ++colId) { - size_t classId = static_cast(classes[rowId * outWidth + colId]); - maskImg.at(rowId, colId) = - classId < colors.size() - ? colors[classId] - : cv::Vec3b{0, 0, 0}; // sample detects 20 classes - } + static void run(const cv::Mat &in, const cv::Mat &out_blob, cv::Mat &out) { + cv::Mat classes; + // NB: If output has more than single plane, it contains probabilities + // otherwise class id. + if (out_blob.size[1] > 1) { + probsToClasses(out_blob, classes); + } else { + out_blob.convertTo(classes, CV_8UC1); + classes = classes.reshape(1, out_blob.size[2]); } - cv::resize(maskImg, out, in.size()); + + cv::Mat mask_img; + classesToColors(classes, mask_img); + + cv::resize(mask_img, out, in.size()); const float blending = 0.3f; out = in * blending + out * (1 - blending); } @@ -104,8 +147,8 @@ int main(int argc, char *argv[]) { // Now build the graph cv::GMat in; - cv::GMat detected_classes = cv::gapi::infer(in); - cv::GMat out = custom::PostProcessing::on(in, detected_classes); + cv::GMat out_blob = cv::gapi::infer(in); + cv::GMat out = custom::PostProcessing::on(in, out_blob); cv::GStreamingCompiled pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)) .compileStreaming(cv::compile_args(kernels, networks)); From ba539eb9aad7571361657ea7e00a7c3efcc2f9ba Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Fri, 6 Aug 2021 14:41:56 +0300 Subject: [PATCH 100/128] Merge pull request #20508 from TolyaTalamanov:at/expand-python-pyparams [G-API] Expand PyParams to support constInput * Wrap constInputs to python * Wrap cfgNumRequests * Fix alignment * Move macro to the line above --- .../gapi/include/opencv2/gapi/infer/bindings_ie.hpp | 11 +++++++++++ modules/gapi/src/backends/ie/bindings_ie.cpp | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/modules/gapi/include/opencv2/gapi/infer/bindings_ie.hpp b/modules/gapi/include/opencv2/gapi/infer/bindings_ie.hpp index fdd4128b1ae2..92ef2101a179 100644 --- a/modules/gapi/include/opencv2/gapi/infer/bindings_ie.hpp +++ b/modules/gapi/include/opencv2/gapi/infer/bindings_ie.hpp @@ -22,17 +22,28 @@ namespace ie { // This class can be marked as SIMPLE, because it's implemented as pimpl class GAPI_EXPORTS_W_SIMPLE PyParams { public: + GAPI_WRAP PyParams() = default; + GAPI_WRAP PyParams(const std::string &tag, const std::string &model, const std::string &weights, const std::string &device); + GAPI_WRAP PyParams(const std::string &tag, const std::string &model, const std::string &device); + GAPI_WRAP + PyParams& constInput(const std::string &layer_name, + const cv::Mat &data, + TraitAs hint = TraitAs::TENSOR); + + GAPI_WRAP + PyParams& cfgNumRequests(size_t nireq); + GBackend backend() const; std::string tag() const; cv::util::any params() const; diff --git a/modules/gapi/src/backends/ie/bindings_ie.cpp b/modules/gapi/src/backends/ie/bindings_ie.cpp index 35191d7bcb53..5874fe137864 100644 --- a/modules/gapi/src/backends/ie/bindings_ie.cpp +++ b/modules/gapi/src/backends/ie/bindings_ie.cpp @@ -37,3 +37,15 @@ cv::gapi::ie::PyParams cv::gapi::ie::params(const std::string &tag, const std::string &device) { return {tag, model, device}; } + +cv::gapi::ie::PyParams& cv::gapi::ie::PyParams::constInput(const std::string &layer_name, + const cv::Mat &data, + TraitAs hint) { + m_priv->constInput(layer_name, data, hint); + return *this; +} + +cv::gapi::ie::PyParams& cv::gapi::ie::PyParams::cfgNumRequests(size_t nireq) { + m_priv->cfgNumRequests(nireq); + return *this; +} From e1cafa383431a65e4ca0493a21668444d10a14b3 Mon Sep 17 00:00:00 2001 From: Julia Bareeva <34717687+JulieBar@users.noreply.github.com> Date: Sat, 7 Aug 2021 10:07:37 +0300 Subject: [PATCH 101/128] Merge pull request #20442 from JulieBar:gru_layer * Add initialization and inference for GRU layer * fix issues found on review --- .../dnn/include/opencv2/dnn/all_layers.hpp | 34 +++ modules/dnn/src/init.cpp | 1 + modules/dnn/src/layers/recurrent_layers.cpp | 209 ++++++++++++++++++ modules/dnn/src/onnx/onnx_importer.cpp | 43 ++++ modules/dnn/test/test_layers.cpp | 29 +++ modules/dnn/test/test_onnx_importer.cpp | 10 + 6 files changed, 326 insertions(+) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 24d35646df17..9c96c5a5f187 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -165,6 +165,40 @@ CV__DNN_INLINE_NS_BEGIN int outputNameToIndex(const String& outputName) CV_OVERRIDE; }; + /** @brief GRU recurrent one-layer + * + * Accepts input sequence and computes the final hidden state for each element in the batch. + * + * - input[0] containing the features of the input sequence. + * input[0] should have shape [`T`, `N`, `data_dims`] where `T` is sequence length, `N` is batch size, `data_dims` is input size + * - output would have shape [`T`, `N`, `D` * `hidden_size`] where `D = 2` if layer is bidirectional otherwise `D = 1` + * + * Depends on the following attributes: + * - hidden_size - Number of neurons in the hidden layer + * - direction - RNN could be bidirectional or forward + * + * The final hidden state @f$ h_t @f$ computes by the following formulas: + * + @f{eqnarray*}{ + r_t = \sigma(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\ + z_t = \sigma(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \\ + n_t = \tanh(W_{in} x_t + b_{in} + r_t \odot (W_{hn} h_{(t-1)}+ b_{hn})) \\ + h_t = (1 - z_t) \odot n_t + z_t \odot h_{(t-1)} \\ + @f} + * Where @f$x_t@f$ is current input, @f$h_{(t-1)}@f$ is previous or initial hidden state. + * + * @f$W_{x?}@f$, @f$W_{h?}@f$ and @f$b_{?}@f$ are learned weights represented as matrices: + * @f$W_{x?} \in R^{N_h \times N_x}@f$, @f$W_{h?} \in R^{N_h \times N_h}@f$, @f$b_? \in R^{N_h}@f$. + * + * @f$\odot@f$ is per-element multiply operation. + */ + class CV_EXPORTS GRULayer : public Layer + { + public: + /** Creates instance of GRU layer */ + static Ptr create(const LayerParams& params); + }; + /** @brief Classical recurrent layer Accepts two inputs @f$x_t@f$ and @f$h_{t-1}@f$ and compute two outputs @f$o_t@f$ and @f$h_t@f$. diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 698168817f5f..ebd887999b83 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -139,6 +139,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(FlowWarp, FlowWarpLayer); CV_DNN_REGISTER_LAYER_CLASS(LSTM, LSTMLayer); + CV_DNN_REGISTER_LAYER_CLASS(GRU, GRULayer); } CV__DNN_INLINE_NS_END diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index feae35dac01b..cb2ffb8cc99c 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -563,5 +563,214 @@ CV_EXPORTS_W Ptr RNNLayer::create(const LayerParams& params) return Ptr(new RNNLayerImpl(params)); } +class GRULayerImpl CV_FINAL : public GRULayer +{ + int numTimeStamps, numSamples; + bool allocated; + + MatShape outTailShape; //shape of single output sample + MatShape outTsShape; //shape of N output samples + bool bidirectional; // If true, produces both forward and reversed directions along time axis + +public: + + GRULayerImpl(const LayerParams& params) : numTimeStamps(0), numSamples(0) + { + setParamsFrom(params); + + bidirectional = params.get("bidirectional", false); + if (!blobs.empty()) + { + CV_Assert(blobs.size() >= 3); + + blobs[2] = blobs[2].reshape(1, 1); + + const Mat& Wh = blobs[0]; + const Mat& Wx = blobs[1]; + const Mat& bias = blobs[2]; + const Mat& hInternal = blobs[3]; + CV_CheckEQ(Wh.dims, 2, ""); + CV_CheckEQ(Wx.dims, 2, ""); + CV_CheckEQ(Wh.rows, Wx.rows, ""); + CV_CheckEQ(Wh.rows, (1 + static_cast(bidirectional)) * 3 * Wh.cols, ""); + CV_CheckEQ(Wh.rows * 2, (int)bias.total(), ""); + CV_CheckEQ(hInternal.cols, Wh.cols, ""); + CV_CheckTypeEQ(Wh.type(), Wx.type(), ""); + CV_CheckTypeEQ(Wx.type(), bias.type(), ""); + } + + allocated = false; + outTailShape.clear(); + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() == 1); + const MatShape& inp0 = inputs[0]; + + const Mat &Wh = blobs[0], &Wx = blobs[1]; + int _numOut = Wh.size[1]; + int _numInp = Wx.size[1]; + MatShape outTailShape_(outTailShape), outResShape; + + if (!outTailShape_.empty()) + CV_Assert(total(outTailShape_) == _numOut); + else + outTailShape_.assign(1, _numOut); + + int _numSamples; + CV_Assert(inp0.size() >= 2 && total(inp0, 2) == _numInp); + _numSamples = inp0[1]; + outResShape.push_back(inp0[0]); + + outResShape.push_back(_numSamples); + outResShape.insert(outResShape.end(), outTailShape_.begin(), outTailShape_.end()); + outResShape.back() *= (1 + static_cast(bidirectional)); + + outputs.assign(1, outResShape); + + internals.assign(1, shape(_numSamples, _numOut)); // hInternal + internals.push_back(shape(_numSamples, 1)); // dummyOnes + internals.push_back(shape(_numSamples, 2 * _numOut)); // gates + internals.push_back(shape(_numSamples, 2 * _numOut)); // gates_b + internals.push_back(shape(_numSamples, 1 * _numOut)); // h_linear + internals.push_back(shape(_numSamples, _numOut)); // ones + + return false; + } + + void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE + { + std::vector input; + inputs_arr.getMatVector(input); + + CV_Assert(input.size() == 1); + const Mat& inp0 = input[0]; + + Mat &Wh = blobs[0], &Wx = blobs[1]; + int numOut = Wh.size[1]; + int numInp = Wx.size[1]; + + if (!outTailShape.empty()) + CV_Assert(total(outTailShape) == numOut); + else + outTailShape.assign(1, numOut); + + CV_Assert(inp0.dims >= 2 && (int)inp0.total(2) == numInp); + numTimeStamps = inp0.size[0]; + numSamples = inp0.size[1]; + + outTsShape.clear(); + outTsShape.push_back(numSamples); + outTsShape.insert(outTsShape.end(), outTailShape.begin(), outTailShape.end()); + outTsShape.back() *= (1 + static_cast(bidirectional)); + + allocated = true; + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + if (inputs_arr.depth() == CV_16S) + { + forward_fallback(inputs_arr, outputs_arr, internals_arr); + return; + } + + std::vector input, output, internals; + inputs_arr.getMatVector(input); + outputs_arr.getMatVector(output); + internals_arr.getMatVector(internals); + + const int numDirs = 1 + static_cast(bidirectional); + for (int i = 0; i < numDirs; ++i) + { + const Mat &Wh = blobs[0].rowRange(i * blobs[0].rows / numDirs, (i + 1) * blobs[0].rows / numDirs); + const Mat &Wx = blobs[1].rowRange(i * blobs[1].rows / numDirs, (i + 1) * blobs[1].rows / numDirs); + const Mat &bias = blobs[2].colRange(i * blobs[2].cols / numDirs, (i + 1) * blobs[2].cols / numDirs); + const Mat &h_0 = blobs[3].rowRange(i * blobs[3].rows / numDirs, (i + 1) * blobs[3].rows / numDirs); + + const Mat &bx = bias.colRange(0, bias.cols / 2); + const Mat &bh = bias.colRange(bias.cols / 2, bias.cols); + + Mat hInternal = internals[0], dummyOnes = internals[1], gates = internals[2], + b_rz = internals[3], n_t = internals[4], ones = internals[5]; + h_0.copyTo(hInternal); + dummyOnes.setTo(1.); + ones.setTo(1.); + + int numOut = Wh.size[1]; + const Mat& wx_rz = Wx.rowRange(0, 2 * numOut); + const Mat& wh_rz = Wh.rowRange(0, 2 * numOut); + b_rz = bx.colRange(0, 2 * numOut) + bh.colRange(0, 2 * numOut); + const Mat& wx_n = Wx.rowRange(2 * numOut, 3 * numOut); + const Mat& wh_n = Wh.rowRange(2 * numOut, 3 * numOut); + const Mat& b_in = bx.colRange(2 * numOut, 3 * numOut); + const Mat& b_hn = bh.colRange(2 * numOut, 3 * numOut); + + int numSamplesTotal = numTimeStamps * numSamples; + Mat xTs = input[0].reshape(1, numSamplesTotal); + + Mat hOutTs = output[0].reshape(1, numSamplesTotal); + hOutTs = hOutTs.colRange(i * hOutTs.cols / numDirs, (i + 1) * hOutTs.cols / numDirs); + Mat cOutTs = Mat(); + + int tsStart, tsEnd, tsInc; + if (i == 1) { + tsStart = numTimeStamps - 1; + tsEnd = -1; + tsInc = -1; + } + else { + tsStart = 0; + tsEnd = numTimeStamps; + tsInc = 1; + } + for (int ts = tsStart; ts != tsEnd; ts += tsInc) + { + Range curRowRange(ts * numSamples, (ts + 1) * numSamples); + Mat xCurr = xTs.rowRange(curRowRange); + + // calculate r_t = sigmoid(x * Wx_r + h_(t-1) * Wh_r + b_r) + // calculate z_t = sigmoid(x * Wx_z + h_(t-1) * Wh_z + b_z) + gemm(xCurr, wx_rz, 1, gates, 0, gates, GEMM_2_T); // x * Wx_rz + gemm(hInternal, wh_rz, 1, gates, 1, gates, GEMM_2_T); // + h_(t-1) * Wh_rz + gemm(dummyOnes, b_rz, 1, gates, 1, gates); // + b_rz + sigmoid(gates, gates); // sigmoid() + + Mat z = gates.colRange(0, gates.cols / 2); + Mat r = gates.colRange(gates.cols / 2, gates.cols); + + // calculate n_t = tanh(r (*) (h_(t-1) * Wh_n + b_hn) + x * Wx_n + b_in) + gemm(hInternal, wh_n, 1, n_t, 0, n_t, GEMM_2_T); // h_(t-1) * Wh_n + gemm(dummyOnes, b_hn, 1, n_t, 1, n_t); // + b_hn + multiply(r, n_t, n_t); // r (*) (h_(t-1) * Wh_n + b_hn) + + gemm(xCurr, wx_n, 1, n_t, 1, n_t, GEMM_2_T); // + x * Wx_n + gemm(dummyOnes, b_in, 1, n_t, 1, n_t); // + b_in + tanh(n_t, n_t); // tanh() + + //compute next h_t = z (*) h_(t-1) + (1 - z) (*) n_t + multiply(z, hInternal, hInternal); // z (*) h_{t-1} + subtract(ones, z, z); // 1 - z + multiply(z, n_t, z); // (1 - z) * n + add(z, hInternal, hInternal); // z (*) h_(t-1) + (1 - z) (*) n_t + + //save results in output blobs + hInternal.copyTo(hOutTs.rowRange(curRowRange)); + } + } + } +}; + +Ptr GRULayer::create(const LayerParams ¶ms) { + return Ptr(new GRULayerImpl(params)); +} + } } diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 2573d783d84c..dee7e128fa02 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -145,6 +145,7 @@ class ONNXImporter void parseNeg (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseConstant (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseLSTM (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseGRU (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseImageScaler (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseClip (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseLeakyRelu (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); @@ -582,6 +583,7 @@ const std::set& ONNXImporter::getSupportedTypes() "Neg", "Constant", "LSTM", + "GRU", "ImageScaler", "Clip", "LeakyRelu", @@ -1239,6 +1241,46 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr addLayer(layerParams, node_proto); } +void ONNXImporter::parseGRU(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto_) +{ + opencv_onnx::NodeProto node_proto = node_proto_; + LayerParams gruParams = layerParams; + gruParams.name += "/gru"; + + // https://pytorch.org/docs/stable/generated/torch.nn.GRU.html?highlight=gru# + CV_Assert(node_proto.input_size() == 6); + Mat Wx = getBlob(node_proto, 1); + Mat Wh = getBlob(node_proto, 2); + Mat b = getBlob(node_proto, 3); + Mat h0 = getBlob(node_proto, 5); + + Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); + Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + b = b.reshape(1, b.size[0]); + + gruParams.blobs.resize(4); + gruParams.blobs[0] = Wh; + gruParams.blobs[1] = Wx; + gruParams.blobs[2] = b; + gruParams.blobs[3] = h0; + gruParams.set("bidirectional", gruParams.get("direction", "") == "bidirectional"); + + node_proto.set_output(0, gruParams.name); // set different name so output shapes will be registered on that name + addLayer(gruParams, node_proto); + + MatShape gruShape = outShapes[node_proto.output(0)]; + + // Add fake 1 as it is done in ONNX + gruShape.insert(gruShape.begin() + 1, 1); + + layerParams.type = "Reshape"; + layerParams.set("dim", DictValue::arrayInt(&gruShape[0], gruShape.size())); + node_proto.set_input(0, gruParams.name); // redirect input to GRU + node_proto.set_output(0, layerParams.name); // keep origin GRU's name + addLayer(layerParams, node_proto); +} + void ONNXImporter::parseImageScaler(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { const float scale = layerParams.has("scale") ? layerParams.get("scale") : 1.0f; @@ -2358,6 +2400,7 @@ const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap() dispatch["Neg"] = &ONNXImporter::parseNeg; dispatch["Constant"] = &ONNXImporter::parseConstant; dispatch["LSTM"] = &ONNXImporter::parseLSTM; + dispatch["GRU"] = &ONNXImporter::parseGRU; dispatch["ImageScaler"] = &ONNXImporter::parseImageScaler; dispatch["Clip"] = &ONNXImporter::parseClip; dispatch["LeakyRelu"] = &ONNXImporter::parseLeakyRelu; diff --git a/modules/dnn/test/test_layers.cpp b/modules/dnn/test/test_layers.cpp index 1383c59e28d7..04d5fa63559e 100644 --- a/modules/dnn/test/test_layers.cpp +++ b/modules/dnn/test/test_layers.cpp @@ -596,6 +596,35 @@ TEST(Layer_LSTM_Test_Accuracy_with_, HiddenParams) normAssert(h_t_reference, outputs[0]); } +TEST(Layer_GRU_Test_Accuracy_with_, Pytorch) +{ + Mat Wx = blobFromNPY(_tf("gru.W.npy")); + Mat Wh = blobFromNPY(_tf("gru.R.npy")); + Mat b = blobFromNPY(_tf("gru.B.npy")); + Mat h0 = blobFromNPY(_tf("gru.h0.npy")); + + Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); + Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + b = b.reshape(1, b.size[0]); + + LayerParams gruParams; + gruParams.blobs.resize(4); + gruParams.blobs[0] = Wh; + gruParams.blobs[1] = Wx; + gruParams.blobs[2] = b; + gruParams.blobs[3] = h0; + gruParams.set("bidirectional", false); + Ptr layer = GRULayer::create(gruParams); + + Mat inp = blobFromNPY(_tf("gru.input.npy")); + std::vector inputs(1, inp), outputs; + runLayer(layer, inputs, outputs); + + Mat h_t_reference = blobFromNPY(_tf("gru.output.npy")); + normAssert(h_t_reference, outputs[0]); +} + TEST(Layer_RNN_Test_Accuracy_with_, CaffeRecurrent) { Ptr layer = RNNLayer::create(LayerParams()); diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 8bfd86495508..0e252cca8312 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -720,6 +720,16 @@ TEST_P(Test_ONNX_layers, LSTM_hidden_bidirectional) testONNXModels("hidden_lstm_bi", npy, 0, 0, false, false); } +TEST_P(Test_ONNX_layers, GRU) +{ + testONNXModels("gru", npy, 0, 0, false, false); +} + +TEST_P(Test_ONNX_layers, GRU_bidirectional) +{ + testONNXModels("gru_bi", npy, 0, 0, false, false); +} + TEST_P(Test_ONNX_layers, Pad2d_Unfused) { testONNXModels("ReflectionPad2d"); From 2a177052de55c85554194a2464a91e6e09c7f768 Mon Sep 17 00:00:00 2001 From: SamFC10 Date: Mon, 9 Aug 2021 12:08:55 +0530 Subject: [PATCH 102/128] fix bug in prior-box variances --- .../dnn/src/layers/detection_output_layer.cpp | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/modules/dnn/src/layers/detection_output_layer.cpp b/modules/dnn/src/layers/detection_output_layer.cpp index 8374d74293be..614b3a646266 100644 --- a/modules/dnn/src/layers/detection_output_layer.cpp +++ b/modules/dnn/src/layers/detection_output_layer.cpp @@ -456,7 +456,7 @@ class DetectionOutputLayerImpl CV_FINAL : public DetectionOutputLayer // Retrieve all prior bboxes std::vector priorBBoxes; std::vector > priorVariances; - GetPriorBBoxes(priorData, numPriors, _bboxesNormalized, priorBBoxes, priorVariances); + GetPriorBBoxes(priorData, numPriors, _bboxesNormalized, _varianceEncodedInTarget, priorBBoxes, priorVariances); // Decode all loc predictions to bboxes util::NormalizedBBox clipBounds; @@ -750,7 +750,7 @@ class DetectionOutputLayerImpl CV_FINAL : public DetectionOutputLayer CV_Assert(prior_bboxes.size() == prior_variances.size()); CV_Assert(prior_bboxes.size() == bboxes.size()); size_t num_bboxes = prior_bboxes.size(); - CV_Assert(num_bboxes == 0 || prior_variances[0].size() == 4); + CV_Assert(num_bboxes == 0 || prior_variances[0].size() == 4 || variance_encoded_in_target); decode_bboxes.clear(); decode_bboxes.resize(num_bboxes); if(variance_encoded_in_target) { @@ -802,12 +802,13 @@ class DetectionOutputLayerImpl CV_FINAL : public DetectionOutputLayer } // Get prior bounding boxes from prior_data - // prior_data: 1 x 2 x num_priors * 4 x 1 blob. + // prior_data: 1 x 1 x num_priors * 4 x 1 blob or 1 x 2 x num_priors * 4 x 1 blob. // num_priors: number of priors. // prior_bboxes: stores all the prior bboxes in the format of util::NormalizedBBox. // prior_variances: stores all the variances needed by prior bboxes. static void GetPriorBBoxes(const float* priorData, const int& numPriors, - bool normalized_bbox, std::vector& priorBBoxes, + bool normalized_bbox, bool variance_encoded_in_target, + std::vector& priorBBoxes, std::vector >& priorVariances) { priorBBoxes.clear(); priorBBoxes.resize(numPriors); @@ -823,13 +824,16 @@ class DetectionOutputLayerImpl CV_FINAL : public DetectionOutputLayer bbox.set_size(BBoxSize(bbox, normalized_bbox)); } - for (int i = 0; i < numPriors; ++i) + if (!variance_encoded_in_target) { - int startIdx = (numPriors + i) * 4; - // not needed here: priorVariances[i].clear(); - for (int j = 0; j < 4; ++j) + for (int i = 0; i < numPriors; ++i) { - priorVariances[i].push_back(priorData[startIdx + j]); + int startIdx = (numPriors + i) * 4; + // not needed here: priorVariances[i].clear(); + for (int j = 0; j < 4; ++j) + { + priorVariances[i].push_back(priorData[startIdx + j]); + } } } } From 739ff84732f6385a43b32b37d875921b9073d009 Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Mon, 9 Aug 2021 13:28:33 +0300 Subject: [PATCH 103/128] add Max layer to TFImporter --- modules/dnn/src/tensorflow/tf_importer.cpp | 26 +++++++++++++++++----- modules/dnn/test/test_tf_importer.cpp | 22 ++++++++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index fa33211a50e1..ca9d7c5e2174 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -647,7 +647,7 @@ const TFImporter::DispatchMap TFImporter::buildDispatchMap() dispatch["PriorBox"] = &TFImporter::parsePriorBox; dispatch["Softmax"] = &TFImporter::parseSoftmax; dispatch["CropAndResize"] = &TFImporter::parseCropAndResize; - dispatch["Mean"] = dispatch["Sum"] = &TFImporter::parseMean; + dispatch["Mean"] = dispatch["Sum"] = dispatch["Max"] = &TFImporter::parseMean; dispatch["Pack"] = &TFImporter::parsePack; dispatch["ClipByValue"] = &TFImporter::parseClipByValue; dispatch["LeakyRelu"] = &TFImporter::parseLeakyRelu; @@ -657,6 +657,7 @@ const TFImporter::DispatchMap TFImporter::buildDispatchMap() return dispatch; } +// "Conv2D" "SpaceToBatchND" "DepthwiseConv2dNative" "Pad" "MirrorPad" "Conv3D" void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer_, LayerParams& layerParams) { tensorflow::NodeDef layer = layer_; @@ -876,6 +877,7 @@ void TFImporter::parseConvolution(tensorflow::GraphDef& net, const tensorflow::N data_layouts[name] = DATA_LAYOUT_NHWC; } +// "BiasAdd" "Add" "AddV2" "Sub" "AddN" void TFImporter::parseBias(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1087,6 +1089,7 @@ void TFImporter::parseReshape(tensorflow::GraphDef& net, const tensorflow::NodeD } } +// "Flatten" "Squeeze" void TFImporter::parseFlatten(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1245,6 +1248,7 @@ void TFImporter::parseLrn(tensorflow::GraphDef& net, const tensorflow::NodeDef& connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } +// "Concat" "ConcatV2" void TFImporter::parseConcat(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1295,6 +1299,7 @@ void TFImporter::parseConcat(tensorflow::GraphDef& net, const tensorflow::NodeDe } } +// "MaxPool" "MaxPool3D" void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1316,6 +1321,7 @@ void TFImporter::parseMaxPool(tensorflow::GraphDef& net, const tensorflow::NodeD connectToAllBlobs(layer_id, dstNet, parsePin(inputName), id, num_inputs); } +// "AvgPool" "AvgPool3D" void TFImporter::parseAvgPool(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1502,6 +1508,7 @@ void TFImporter::parseStridedSlice(tensorflow::GraphDef& net, const tensorflow:: connect(layer_id, dstNet, parsePin(layer.input(0)), id, 0); } +// "Mul" "RealDiv" void TFImporter::parseMul(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); @@ -1659,6 +1666,7 @@ void TFImporter::parseMul(tensorflow::GraphDef& net, const tensorflow::NodeDef& } } +// "FusedBatchNorm" "FusedBatchNormV3" void TFImporter::parseFusedBatchNorm(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { // op: "FusedBatchNorm" @@ -1918,6 +1926,7 @@ void TFImporter::parseBlockLSTM(tensorflow::GraphDef& net, const tensorflow::Nod data_layouts[name] = DATA_LAYOUT_UNKNOWN; } +// "ResizeNearestNeighbor" "ResizeBilinear" "FusedResizeAndPadConv2D" void TFImporter::parseResize(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer_, LayerParams& layerParams) { tensorflow::NodeDef layer = layer_; @@ -2106,6 +2115,7 @@ void TFImporter::parseCropAndResize(tensorflow::GraphDef& net, const tensorflow: connect(layer_id, dstNet, parsePin(layer.input(1)), id, 1); } +// "Mean" "Sum" "Max" void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { // Computes the mean of elements across dimensions of a tensor. @@ -2124,7 +2134,12 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& const std::string& name = layer.name(); const std::string& type = layer.op(); const int num_inputs = layer.input_size(); + std::string pool_type = cv::toLowerCase(type); + if (pool_type == "mean") + { + pool_type = "ave"; + } CV_CheckGT(num_inputs, 0, ""); Mat indices = getTensorContent(getConstBlob(layer, value_id, 1)); @@ -2161,7 +2176,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& LayerParams avgLp; std::string avgName = name + "/avg"; CV_Assert(layer_id.find(avgName) == layer_id.end()); - avgLp.set("pool", type == "Mean" ? "ave" : "sum"); + avgLp.set("pool", pool_type); // pooling kernel H x 1 avgLp.set("global_pooling_h", true); avgLp.set("kernel_w", 1); @@ -2202,7 +2217,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& int axis = toNCHW(indices.at(0)); if (axis == 2 || axis == 3) { - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("pool", pool_type); layerParams.set(axis == 2 ? "kernel_w" : "kernel_h", 1); layerParams.set(axis == 2 ? "global_pooling_h" : "global_pooling_w", true); int id = dstNet.addLayer(name, "Pooling", layerParams); @@ -2234,7 +2249,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& Pin inpId = parsePin(layer.input(0)); addPermuteLayer(order, name + "/nhwc", inpId); - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("pool", pool_type); layerParams.set("kernel_h", 1); layerParams.set("global_pooling_w", true); int id = dstNet.addLayer(name, "Pooling", layerParams); @@ -2264,7 +2279,7 @@ void TFImporter::parseMean(tensorflow::GraphDef& net, const tensorflow::NodeDef& if (indices.total() != 2 || indices.at(0) != 1 || indices.at(1) != 2) CV_Error(Error::StsNotImplemented, "Unsupported mode of reduce_mean or reduce_sum operation."); - layerParams.set("pool", type == "Mean" ? "ave" : "sum"); + layerParams.set("pool", pool_type); layerParams.set("global_pooling", true); int id = dstNet.addLayer(name, "Pooling", layerParams); layer_id[name] = id; @@ -2368,6 +2383,7 @@ void TFImporter::parseLeakyRelu(tensorflow::GraphDef& net, const tensorflow::Nod connectToAllBlobs(layer_id, dstNet, parsePin(layer.input(0)), id, num_inputs); } +// "Abs" "Tanh" "Sigmoid" "Relu" "Elu" "Exp" "Identity" "Relu6" void TFImporter::parseActivation(tensorflow::GraphDef& net, const tensorflow::NodeDef& layer, LayerParams& layerParams) { const std::string& name = layer.name(); diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 4f7840f9e4eb..68d6e88a6642 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -128,6 +128,13 @@ TEST_P(Test_TensorFlow_layers, reduce_mean) runTensorFlowNet("global_pool_by_axis"); } +TEST_P(Test_TensorFlow_layers, reduce_max) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + runTensorFlowNet("max_pool_by_axis"); +} + TEST_P(Test_TensorFlow_layers, reduce_sum) { if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) @@ -135,11 +142,21 @@ TEST_P(Test_TensorFlow_layers, reduce_sum) runTensorFlowNet("sum_pool_by_axis"); } +TEST_P(Test_TensorFlow_layers, reduce_max_channel) +{ + runTensorFlowNet("reduce_max_channel"); +} + TEST_P(Test_TensorFlow_layers, reduce_sum_channel) { runTensorFlowNet("reduce_sum_channel"); } +TEST_P(Test_TensorFlow_layers, reduce_max_channel_keep_dims) +{ + runTensorFlowNet("reduce_max_channel", false, 0.0, 0.0, false, "_keep_dims"); +} + TEST_P(Test_TensorFlow_layers, reduce_sum_channel_keep_dims) { runTensorFlowNet("reduce_sum_channel", false, 0.0, 0.0, false, "_keep_dims"); @@ -386,6 +403,11 @@ TEST_P(Test_TensorFlow_layers, pooling_reduce_mean) runTensorFlowNet("reduce_mean"); // an average pooling over all spatial dimensions. } +TEST_P(Test_TensorFlow_layers, pooling_reduce_max) +{ + runTensorFlowNet("reduce_max"); // a MAX pooling over all spatial dimensions. +} + TEST_P(Test_TensorFlow_layers, pooling_reduce_sum) { runTensorFlowNet("reduce_sum"); // a SUM pooling over all spatial dimensions. From 21d0f4075141568f7bf6616c5f8e79c06987283e Mon Sep 17 00:00:00 2001 From: Saikat Nanda Date: Mon, 9 Aug 2021 21:41:00 -0400 Subject: [PATCH 104/128] Fix YUV indexes + YUV Planner detection condition --- modules/videoio/src/cap_android_camera.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/videoio/src/cap_android_camera.cpp b/modules/videoio/src/cap_android_camera.cpp index b369a12a6861..5952b6f08c48 100644 --- a/modules/videoio/src/cap_android_camera.cpp +++ b/modules/videoio/src/cap_android_camera.cpp @@ -304,8 +304,8 @@ class AndroidCameraCapture : public IVideoCapture AImage_getPlaneRowStride(image.get(), 0, &yStride); AImage_getPlaneRowStride(image.get(), 1, &uvStride); AImage_getPlaneData(image.get(), 0, &yPixel, &yLen); - AImage_getPlaneData(image.get(), 1, &vPixel, &vLen); - AImage_getPlaneData(image.get(), 2, &uPixel, &uLen); + AImage_getPlaneData(image.get(), 1, &uPixel, &uLen); + AImage_getPlaneData(image.get(), 2, &vPixel, &vLen); AImage_getPlanePixelStride(image.get(), 1, &uvPixelStride); if ( (uvPixelStride == 2) && (vPixel == uPixel + 1) && (yLen == frameWidth * frameHeight) && (uLen == ((yLen / 2) - 1)) && (vLen == uLen) ) { @@ -313,7 +313,7 @@ class AndroidCameraCapture : public IVideoCapture if (fourCC == FOURCC_UNKNOWN) { fourCC = FOURCC_NV21; } - } else if ( (uvPixelStride == 1) && (vPixel = uPixel + uLen) && (yLen == frameWidth * frameHeight) && (uLen == yLen / 4) && (vLen == uLen) ) { + } else if ( (uvPixelStride == 1) && (vPixel == uPixel + uLen) && (yLen == frameWidth * frameHeight) && (uLen == yLen / 4) && (vLen == uLen) ) { colorFormat = COLOR_FormatYUV420Planar; if (fourCC == FOURCC_UNKNOWN) { fourCC = FOURCC_YV12; From 992b47b9916f9dbdfee16ed1a59ba64cda0779bb Mon Sep 17 00:00:00 2001 From: AleksandrPanov Date: Tue, 10 Aug 2021 18:53:28 +0300 Subject: [PATCH 105/128] add 19769 and 19769_lightweight tests --- modules/imgproc/test/test_convhull.cpp | 73 ++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/modules/imgproc/test/test_convhull.cpp b/modules/imgproc/test/test_convhull.cpp index f1d739b0e059..dee3769762c9 100644 --- a/modules/imgproc/test/test_convhull.cpp +++ b/modules/imgproc/test/test_convhull.cpp @@ -2384,5 +2384,78 @@ TEST(Imgproc_minAreaRect, reproducer_18157) EXPECT_TRUE(checkMinAreaRect(rr, contour)) << rr.center << " " << rr.size << " " << rr.angle; } +TEST(Imgproc_minAreaRect, reproducer_19769_lightweight) +{ + const int N = 23; + float pts_[N][2] = { + {1325, 732}, {1248, 808}, {582, 1510}, {586, 1524}, + {595, 1541}, {599, 1547}, {789, 1745}, {829, 1786}, + {997, 1958}, {1116, 2074}, {1207, 2066}, {1216, 2058}, + {1231, 2044}, {1265, 2011}, {2036, 1254}, {2100, 1191}, + {2169, 1123}, {2315, 979}, {2395, 900}, {2438, 787}, + {2434, 782}, {2416, 762}, {2266, 610} + }; + Mat contour(N, 1, CV_32FC2, (void*)pts_); + + RotatedRect rr = cv::minAreaRect(contour); + + EXPECT_TRUE(checkMinAreaRect(rr, contour)) << rr.center << " " << rr.size << " " << rr.angle; +} + +TEST(Imgproc_minAreaRect, reproducer_19769) +{ + const int N = 169; + float pts_[N][2] = { + {1854, 227}, {1850, 228}, {1847, 229}, {1835, 235}, + {1832, 237}, {1829, 239}, {1825, 242}, {1818, 248}, + {1807, 258}, {1759, 306}, {1712, 351}, {1708, 356}, + {1658, 404}, {1655, 408}, {1602, 459}, {1599, 463}, + {1542, 518}, {1477, 582}, {1402, 656}, {1325, 732}, + {1248, 808}, {1161, 894}, {1157, 898}, {1155, 900}, + {1068, 986}, {1060, 995}, {1058, 997}, {957, 1097}, + {956, 1097}, {814, 1238}, {810, 1242}, {805, 1248}, + {610, 1442}, {603, 1450}, {599, 1455}, {596, 1459}, + {594, 1462}, {592, 1465}, {590, 1470}, {588, 1472}, + {586, 1476}, {586, 1478}, {584, 1481}, {583, 1485}, + {582, 1490}, {582, 1510}, {583, 1515}, {584, 1518}, + {585, 1521}, {586, 1524}, {593, 1538}, {595, 1541}, + {597, 1544}, {599, 1547}, {603, 1552}, {609, 1559}, + {623, 1574}, {645, 1597}, {677, 1630}, {713, 1667}, + {753, 1707}, {789, 1744}, {789, 1745}, {829, 1786}, + {871, 1828}, {909, 1867}, {909, 1868}, {950, 1910}, + {953, 1912}, {997, 1958}, {1047, 2009}, {1094, 2056}, + {1105, 2066}, {1110, 2070}, {1113, 2072}, {1116, 2074}, + {1119, 2076}, {1122, 2077}, {1124, 2079}, {1130, 2082}, + {1133, 2083}, {1136, 2084}, {1139, 2085}, {1142, 2086}, + {1148, 2087}, {1166, 2087}, {1170, 2086}, {1174, 2085}, + {1177, 2084}, {1180, 2083}, {1188, 2079}, {1190, 2077}, + {1193, 2076}, {1196, 2074}, {1199, 2072}, {1202, 2070}, + {1207, 2066}, {1216, 2058}, {1231, 2044}, {1265, 2011}, + {1314, 1962}, {1360, 1917}, {1361, 1917}, {1408, 1871}, + {1457, 1822}, {1508, 1773}, {1512, 1768}, {1560, 1722}, + {1617, 1665}, {1671, 1613}, {1730, 1554}, {1784, 1502}, + {1786, 1500}, {1787, 1498}, {1846, 1440}, {1850, 1437}, + {1908, 1380}, {1974, 1314}, {2034, 1256}, {2036, 1254}, + {2100, 1191}, {2169, 1123}, {2242, 1051}, {2315, 979}, + {2395, 900}, {2426, 869}, {2435, 859}, {2438, 855}, + {2440, 852}, {2442, 849}, {2443, 846}, {2445, 844}, + {2446, 842}, {2446, 840}, {2448, 837}, {2449, 834}, + {2450, 829}, {2450, 814}, {2449, 809}, {2448, 806}, + {2447, 803}, {2442, 793}, {2440, 790}, {2438, 787}, + {2434, 782}, {2428, 775}, {2416, 762}, {2411, 758}, + {2342, 688}, {2340, 686}, {2338, 684}, {2266, 610}, + {2260, 605}, {2170, 513}, {2075, 417}, {2073, 415}, + {2069, 412}, {1955, 297}, {1955, 296}, {1913, 254}, + {1904, 246}, {1897, 240}, {1894, 238}, {1891, 236}, + {1888, 234}, {1880, 230}, {1877, 229}, {1874, 228}, + {1870, 227} + }; + Mat contour(N, 1, CV_32FC2, (void*)pts_); + + RotatedRect rr = cv::minAreaRect(contour); + + EXPECT_TRUE(checkMinAreaRect(rr, contour)) << rr.center << " " << rr.size << " " << rr.angle; +} + }} // namespace /* End of file. */ From aaca4987c9ffe12e9e486d40ae264859caed89df Mon Sep 17 00:00:00 2001 From: HAN Liutong Date: Wed, 11 Aug 2021 06:16:03 +0800 Subject: [PATCH 106/128] Merge pull request #20287 from hanliutong:dev-rvv-0.10 Optimization of DNN using native RISC-V vector intrinsics. * Use RVV to optimize fastGEMM (FP32) in DNN. * Use RVV to optimize fastGEMM1T in DNN. * Use RVV to optimize fastConv in DNN. * Use RVV to optimize fastDepthwiseConv in DNN. * Vectorize tails using vl. * Use "vl" instead of scalar to handle small block in fastConv. * Fix memory access out of bound in "fastGEMM1T". * Remove setvl. * Remove useless initialization. * Use loop unrolling to handle tail part instead of switch. --- modules/dnn/CMakeLists.txt | 2 +- modules/dnn/src/layers/convolution_layer.cpp | 25 +- .../dnn/src/layers/fully_connected_layer.cpp | 9 +- modules/dnn/src/layers/layers_common.simd.hpp | 549 ++++++++++++++++++ 4 files changed, 582 insertions(+), 3 deletions(-) diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 4c8129cbda1c..70f9a5a73e5a 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -8,7 +8,7 @@ endif() set(the_description "Deep neural network module. It allows to load models from different frameworks and to make forward pass") -ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX) +ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX RVV) ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java objc js) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index fb57f265111d..68c543be2477 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -914,11 +914,12 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl bool useAVX; bool useAVX2; bool useAVX512; + bool useRVV; int blk_size_cn; ParallelConv() : input_(0), weights_(0), output_(0), ngroups_(0), nstripes_(0), - biasvec_(0), reluslope_(0), activ_(0), is1x1_(false), useAVX(false), useAVX2(false), useAVX512(false) + biasvec_(0), reluslope_(0), activ_(0), is1x1_(false), useAVX(false), useAVX2(false), useAVX512(false), useRVV(false) , blk_size_cn(0) {} @@ -976,6 +977,7 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl p.useAVX = checkHardwareSupport(CPU_AVX) && isConv2D; p.useAVX2 = checkHardwareSupport(CPU_AVX2) && isConv2D; p.useAVX512 = CV_CPU_HAS_SUPPORT_AVX512_SKX && isConv2D; + p.useRVV = checkHardwareSupport(CPU_RVV) && isConv2D; int kernel_d = isConv3D? kernel_size[0] : 1; int kernel_h = isConv1D? 1 : kernel_size[kernel_size.size() - 2]; @@ -1176,6 +1178,13 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl stride_h, stride_w, dilation_h, dilation_w, pad_t, pad_l, biasptr, relu, inptr_, height, width, outptr_, out_d, outH, outW); else + #endif + #if CV_TRY_RVV + if(useRVV) + opt_RVV::fastDepthwiseConv(wptr, kernel_h, kernel_w, + stride_h, stride_w, dilation_h, dilation_w, pad_t, pad_l, + biasptr, relu, inptr_, height, width, outptr_, out_d, outH, outW); + else #endif { const float w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2], @@ -1546,6 +1555,12 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl opt_AVX::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0, outShape, bsz, vsz, vsz_a, relu, cn0 == 0); else + #endif + #if CV_TRY_RVV + if(useRVV) + opt_RVV::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0, + outShape, bsz, vsz, vsz_a, relu, cn0 == 0); + else #endif for( int i = 0; i < outCn; i += 2 ) { @@ -2297,6 +2312,7 @@ class DeConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl useAVX = checkHardwareSupport(CPU_AVX); useAVX2 = checkHardwareSupport(CPU_AVX2); useAVX512 = CV_CPU_HAS_SUPPORT_AVX512_SKX; + useRVV = checkHardwareSupport(CPU_RVV); } void operator()(const Range& range_) const CV_OVERRIDE @@ -2328,6 +2344,12 @@ class DeConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl if( useAVX ) opt_AVX::fastGEMM( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax ); else + #endif + #if CV_TRY_RVV + if( useRVV ) { + opt_RVV::fastGEMM( aptr, astep, bptr, bstep, cptr, cstep, mmax, kmax, nmax ); + } + else #endif for( m = 0; m < mmax; m += 2 ) { @@ -2427,6 +2449,7 @@ class DeConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl bool useAVX; bool useAVX2; bool useAVX512; + bool useRVV; }; class Col2ImInvoker : public cv::ParallelLoopBody diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index d9c1fa65c143..529f3c04fdef 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -168,7 +168,7 @@ class FullyConnectedLayerImpl CV_FINAL : public InnerProductLayer class FullyConnected : public ParallelLoopBody { public: - FullyConnected() : srcMat(0), weights(0), biasMat(0), activ(0), dstMat(0), nstripes(0), useAVX(false), useAVX2(false), useAVX512(false) {} + FullyConnected() : srcMat(0), weights(0), biasMat(0), activ(0), dstMat(0), nstripes(0), useAVX(false), useAVX2(false), useAVX512(false), useRVV(false) {} static void run(const Mat& srcMat, const Mat& weights, const Mat& biasMat, Mat& dstMat, const ActivationLayer* activ, int nstripes) @@ -191,6 +191,7 @@ class FullyConnectedLayerImpl CV_FINAL : public InnerProductLayer p.useAVX = checkHardwareSupport(CPU_AVX); p.useAVX2 = checkHardwareSupport(CPU_AVX2); p.useAVX512 = CV_CPU_HAS_SUPPORT_AVX512_SKX; + p.useRVV = checkHardwareSupport(CPU_RVV); parallel_for_(Range(0, nstripes), p, nstripes); } @@ -239,6 +240,11 @@ class FullyConnectedLayerImpl CV_FINAL : public InnerProductLayer if( useAVX ) opt_AVX::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize); else + #endif + #if CV_TRY_RVV + if( useRVV ) + opt_RVV::fastGEMM1T( sptr, wptr, wstep, biasptr, dptr, nw, vecsize); + else #endif { int i = 0; @@ -293,6 +299,7 @@ class FullyConnectedLayerImpl CV_FINAL : public InnerProductLayer bool useAVX; bool useAVX2; bool useAVX512; + bool useRVV; }; #ifdef HAVE_OPENCL diff --git a/modules/dnn/src/layers/layers_common.simd.hpp b/modules/dnn/src/layers/layers_common.simd.hpp index 706695a7b20f..762e22e54d2f 100644 --- a/modules/dnn/src/layers/layers_common.simd.hpp +++ b/modules/dnn/src/layers/layers_common.simd.hpp @@ -737,5 +737,554 @@ void fastGEMM( const float* aptr, size_t astep, const float* bptr, #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_RVV + +void fastGEMM( const float* aptr, size_t astep, const float* bptr, + size_t bstep, float* cptr, size_t cstep, + int ma, int na, int nb ) +{ + int n = 0; + size_t vl = 8; + size_t mvl0 = 8; + size_t mvl1 = 8; + for( ; n < nb; n += 16 ) + { + if ( n + 16 > nb) { + mvl0 = nb - n; + mvl1 = (nb - n -8) > 0 ? (nb - n -8) : 0; + } + + for( int m = 0; m < ma; m += 4 ) + { + const float* aptr0 = aptr + astep*m; + const float* aptr1 = aptr + astep*std::min(m+1, ma-1); + const float* aptr2 = aptr + astep*std::min(m+2, ma-1); + const float* aptr3 = aptr + astep*std::min(m+3, ma-1); + + float* cptr0 = cptr + cstep*m; + float* cptr1 = cptr + cstep*std::min(m+1, ma-1); + float* cptr2 = cptr + cstep*std::min(m+2, ma-1); + float* cptr3 = cptr + cstep*std::min(m+3, ma-1); + + vfloat32m2_t d00 = vfmv_v_f_f32m2(0, vl), d01 = vfmv_v_f_f32m2(0, vl); + vfloat32m2_t d10 = vfmv_v_f_f32m2(0, vl), d11 = vfmv_v_f_f32m2(0, vl); + vfloat32m2_t d20 = vfmv_v_f_f32m2(0, vl), d21 = vfmv_v_f_f32m2(0, vl); + vfloat32m2_t d30 = vfmv_v_f_f32m2(0, vl), d31 = vfmv_v_f_f32m2(0, vl); + + for( int k = 0; k < na; k++ ) + { + vfloat32m2_t a0 = vfmv_v_f_f32m2(aptr0[k], vl); + vfloat32m2_t a1 = vfmv_v_f_f32m2(aptr1[k], vl); + vfloat32m2_t a2 = vfmv_v_f_f32m2(aptr2[k], vl); + vfloat32m2_t a3 = vfmv_v_f_f32m2(aptr3[k], vl); + vfloat32m2_t b0 = vle32_v_f32m2(bptr + k*bstep + n, mvl0); + vfloat32m2_t b1 = vle32_v_f32m2(bptr + k*bstep + n + 8, mvl1); + d00 = vfmacc_vv_f32m2(d00, a0, b0, mvl0); + d01 = vfmacc_vv_f32m2(d01, a0, b1, mvl1); + d10 = vfmacc_vv_f32m2(d10, a1, b0, mvl0); + d11 = vfmacc_vv_f32m2(d11, a1, b1, mvl1); + d20 = vfmacc_vv_f32m2(d20, a2, b0, mvl0); + d21 = vfmacc_vv_f32m2(d21, a2, b1, mvl1); + d30 = vfmacc_vv_f32m2(d30, a3, b0, mvl0); + d31 = vfmacc_vv_f32m2(d31, a3, b1, mvl1); + } + vse32_v_f32m2(cptr0 + n, d00, mvl0); + vse32_v_f32m2(cptr1 + n, d10, mvl0); + vse32_v_f32m2(cptr2 + n, d20, mvl0); + vse32_v_f32m2(cptr3 + n, d30, mvl0); + vse32_v_f32m2(cptr0 + n + 8, d01, mvl1); + vse32_v_f32m2(cptr1 + n + 8, d11, mvl1); + vse32_v_f32m2(cptr2 + n + 8, d21, mvl1); + vse32_v_f32m2(cptr3 + n + 8, d31, mvl1); + } + } +} + +void fastGEMM1T( const float* vec, const float* weights, + size_t wstep, const float* bias, + float* dst, int nvecs, int vecsize ) +{ + int i = 0; + size_t vl = 8; + for( ; i <= nvecs - 8; i += 8 ) + { + const float* wptr = weights + i*wstep; + vfloat32m2_t vs0 = vfmv_v_f_f32m2(0, vl), vs1 = vfmv_v_f_f32m2(0, vl), + vs2 = vfmv_v_f_f32m2(0, vl), vs3 = vfmv_v_f_f32m2(0, vl), + vs4 = vfmv_v_f_f32m2(0, vl), vs5 = vfmv_v_f_f32m2(0, vl), + vs6 = vfmv_v_f_f32m2(0, vl), vs7 = vfmv_v_f_f32m2(0, vl); + + for( int k = 0; k < vecsize; k += 8, wptr += 8 ) + { + vfloat32m2_t v = vle32_v_f32m2(vec + k, vl); + + vs0 = vfmacc_vv_f32m2(vs0, vle32_v_f32m2(wptr, vl), v, vl); + vs1 = vfmacc_vv_f32m2(vs1, vle32_v_f32m2(wptr + wstep, vl), v, vl); + vs2 = vfmacc_vv_f32m2(vs2, vle32_v_f32m2(wptr + wstep*2, vl), v, vl); + vs3 = vfmacc_vv_f32m2(vs3, vle32_v_f32m2(wptr + wstep*3, vl), v, vl); + vs4 = vfmacc_vv_f32m2(vs4, vle32_v_f32m2(wptr + wstep*4, vl), v, vl); + vs5 = vfmacc_vv_f32m2(vs5, vle32_v_f32m2(wptr + wstep*5, vl), v, vl); + vs6 = vfmacc_vv_f32m2(vs6, vle32_v_f32m2(wptr + wstep*6, vl), v, vl); + vs7 = vfmacc_vv_f32m2(vs7, vle32_v_f32m2(wptr + wstep*7, vl), v, vl); + } + + // Calculate the sum of each vector + vfloat32m1_t zero = vfmv_v_f_f32m1(0, vl); + vfloat32m1_t temp0 = vfredsum_vs_f32m2_f32m1(temp0, vs0, zero, vl); + vfloat32m1_t temp1 = vfredsum_vs_f32m2_f32m1(temp1, vs1, zero, vl); + vfloat32m1_t temp2 = vfredsum_vs_f32m2_f32m1(temp2, vs2, zero, vl); + vfloat32m1_t temp3 = vfredsum_vs_f32m2_f32m1(temp3, vs3, zero, vl); + vfloat32m1_t temp4 = vfredsum_vs_f32m2_f32m1(temp4, vs4, zero, vl); + vfloat32m1_t temp5 = vfredsum_vs_f32m2_f32m1(temp5, vs5, zero, vl); + vfloat32m1_t temp6 = vfredsum_vs_f32m2_f32m1(temp6, vs6, zero, vl); + vfloat32m1_t temp7 = vfredsum_vs_f32m2_f32m1(temp7, vs7, zero, vl); + float32_t sum[8]; + sum[0] = vfmv_f_s_f32m1_f32(temp0); + sum[1] = vfmv_f_s_f32m1_f32(temp1); + sum[2] = vfmv_f_s_f32m1_f32(temp2); + sum[3] = vfmv_f_s_f32m1_f32(temp3); + sum[4] = vfmv_f_s_f32m1_f32(temp4); + sum[5] = vfmv_f_s_f32m1_f32(temp5); + sum[6] = vfmv_f_s_f32m1_f32(temp6); + sum[7] = vfmv_f_s_f32m1_f32(temp7); + vfloat32m2_t s0 = vfadd_vv_f32m2(vle32_v_f32m2(sum, vl), vle32_v_f32m2(bias + i, vl), vl); + vse32_v_f32m2(dst + i, s0, vl); + } + int mvl = nvecs - i; + if (mvl > 0) + { + const float* wptr = weights + i*wstep; + vfloat32m2_t vs0 = vfmv_v_f_f32m2(0, vl), vs1 = vfmv_v_f_f32m2(0, vl), + vs2 = vfmv_v_f_f32m2(0, vl), vs3 = vfmv_v_f_f32m2(0, vl), + vs4 = vfmv_v_f_f32m2(0, vl), vs5 = vfmv_v_f_f32m2(0, vl), + vs6 = vfmv_v_f_f32m2(0, vl), vs7 = vfmv_v_f_f32m2(0, vl); + int k = 0; + for( ; k <= vecsize - 8; k += 8, wptr += 8 ) + { + vfloat32m2_t v = vle32_v_f32m2(vec + k, vl); + vs0 = vfmacc_vv_f32m2(vs0, vle32_v_f32m2(wptr, vl), v, vl); + vs1 = vfmacc_vv_f32m2(vs1, vle32_v_f32m2(wptr + wstep*std::min(1, mvl-1), vl), v, vl); + vs2 = vfmacc_vv_f32m2(vs2, vle32_v_f32m2(wptr + wstep*std::min(2, mvl-1), vl), v, vl); + vs3 = vfmacc_vv_f32m2(vs3, vle32_v_f32m2(wptr + wstep*std::min(3, mvl-1), vl), v, vl); + vs4 = vfmacc_vv_f32m2(vs4, vle32_v_f32m2(wptr + wstep*std::min(4, mvl-1), vl), v, vl); + vs5 = vfmacc_vv_f32m2(vs5, vle32_v_f32m2(wptr + wstep*std::min(5, mvl-1), vl), v, vl); + vs6 = vfmacc_vv_f32m2(vs6, vle32_v_f32m2(wptr + wstep*std::min(6, mvl-1), vl), v, vl); + } + int kvl = vecsize - k; + if (kvl > 0) { + vfloat32m2_t v = vle32_v_f32m2(vec + k, kvl); + vs0 = vfmacc_vv_f32m2(vs0, vle32_v_f32m2(wptr, kvl), v, kvl); + vs1 = vfmacc_vv_f32m2(vs1, vle32_v_f32m2(wptr + wstep*std::min(1, mvl-1), kvl), v, kvl); + vs2 = vfmacc_vv_f32m2(vs2, vle32_v_f32m2(wptr + wstep*std::min(2, mvl-1), kvl), v, kvl); + vs3 = vfmacc_vv_f32m2(vs3, vle32_v_f32m2(wptr + wstep*std::min(3, mvl-1), kvl), v, kvl); + vs4 = vfmacc_vv_f32m2(vs4, vle32_v_f32m2(wptr + wstep*std::min(4, mvl-1), kvl), v, kvl); + vs5 = vfmacc_vv_f32m2(vs5, vle32_v_f32m2(wptr + wstep*std::min(5, mvl-1), kvl), v, kvl); + vs6 = vfmacc_vv_f32m2(vs6, vle32_v_f32m2(wptr + wstep*std::min(6, mvl-1), kvl), v, kvl); + } + // Calculate the sum of each vector + vfloat32m1_t zero = vfmv_v_f_f32m1(0, vl); + vfloat32m1_t temp0 = vfmv_v_f_f32m1(0, 4), temp1 = vfmv_v_f_f32m1(0, 4), + temp2 = vfmv_v_f_f32m1(0, 4), temp3 = vfmv_v_f_f32m1(0, 4), + temp4 = vfmv_v_f_f32m1(0, 4), temp5 = vfmv_v_f_f32m1(0, 4), + temp6 = vfmv_v_f_f32m1(0, 4), temp7 = vfmv_v_f_f32m1(0, 4); + temp0 = vfredsum_vs_f32m2_f32m1(temp0, vs0, zero, vl); + temp1 = vfredsum_vs_f32m2_f32m1(temp1, vs1, zero, vl); + temp2 = vfredsum_vs_f32m2_f32m1(temp2, vs2, zero, vl); + temp3 = vfredsum_vs_f32m2_f32m1(temp3, vs3, zero, vl); + temp4 = vfredsum_vs_f32m2_f32m1(temp4, vs4, zero, vl); + temp5 = vfredsum_vs_f32m2_f32m1(temp5, vs5, zero, vl); + temp6 = vfredsum_vs_f32m2_f32m1(temp6, vs6, zero, vl); + temp7 = vfredsum_vs_f32m2_f32m1(temp7, vs7, zero, vl); + + float32_t sum[8]; + sum[0] = vfmv_f_s_f32m1_f32(temp0); + sum[1] = vfmv_f_s_f32m1_f32(temp1); + sum[2] = vfmv_f_s_f32m1_f32(temp2); + sum[3] = vfmv_f_s_f32m1_f32(temp3); + sum[4] = vfmv_f_s_f32m1_f32(temp4); + sum[5] = vfmv_f_s_f32m1_f32(temp5); + sum[6] = vfmv_f_s_f32m1_f32(temp6); + sum[7] = vfmv_f_s_f32m1_f32(temp7); + + vfloat32m2_t s0 = vfadd_vv_f32m2(vle32_v_f32m2(sum, mvl), vle32_v_f32m2(bias + i, mvl), mvl); + vse32_v_f32m2(dst + i, s0, mvl); + } +} + +enum { FASCONV_BASE_VECSZ = 4 }; // TODO: Large base size. +void fastConv( const float* weights, size_t wstep, const float* bias, + const float* rowbuf, float* output, const int* outShape, + int blockSize, int vecsize, int vecsize_aligned, + const float* relu, bool initOutput ) +{ + int vl = 4; + int outCn = outShape[1]; + size_t outPlaneSize = outShape[2]*outShape[3]; + float r0 = 1.f, r1 = 1.f, r2 = 1.f; + vfloat32m1_t vr0 = vfmv_v_f_f32m1(1, vl), vr1 = vfmv_v_f_f32m1(1, vl), vr2 = vfmv_v_f_f32m1(1, vl); + int maskbuf[FASCONV_BASE_VECSZ] = {0}; + int rsz = blockSize % FASCONV_BASE_VECSZ; + for( int i = 0; i < rsz; i++ ) + maskbuf[FASCONV_BASE_VECSZ - i - 1] = -1; + vint32m1_t vmaskbuf = vle32_v_i32m1(maskbuf ,vl); + vbool32_t mask = vmslt_vx_i32m1_b32(vmaskbuf, 0, vl); // mask for tail + // now compute dot product of the weights + // and im2row-transformed part of the tensor + for( int i = 0; i < outCn; i += 3 ) + { + const float* wptr0 = weights + i*wstep; + const float* wptr1 = wptr0 + wstep; + const float* wptr2 = wptr1 + wstep; + float* outptr0 = output + i*outPlaneSize; + float* outptr1 = outptr0 + outPlaneSize; + float* outptr2 = outptr1 + outPlaneSize; + float bias0 = bias[i], bias1 = bias[i+1], bias2 = bias[i+2]; + + if( i+2 >= outCn ) + { + wptr2 = wptr1; + outptr2 = outptr1; + bias2 = bias1; + if( i+1 >= outCn ) + { + wptr2 = wptr1 = wptr0; + outptr2 = outptr1 = outptr0; + bias2 = bias1 = bias0; + } + } + + if( relu ) + { + r0 = relu[i]; r1 = relu[i+1]; r2 = relu[i+2]; + if( i+2 >= outCn ) + { + r2 = r1; + if( i+1 >= outCn ) + r2 = r1 = r0; + } + vr0 = vfmv_v_f_f32m1(r0, vl); + vr1 = vfmv_v_f_f32m1(r1, vl); + vr2 = vfmv_v_f_f32m1(r2, vl); + } + + int j = 0; + for( ; j < blockSize; j += FASCONV_BASE_VECSZ ) + { + bool tail = false; + if (j + FASCONV_BASE_VECSZ > blockSize) + { + if (j == 0) { + vl = blockSize; + } + else { + j = blockSize - FASCONV_BASE_VECSZ; + tail = true; + } + } + int k = 0; + const float* rptr = rowbuf + j*vecsize_aligned; + int vlm2 = 8; + vfloat32m2_t vs00 = vfmv_v_f_f32m2(0, vlm2), vs01 = vfmv_v_f_f32m2(0, vlm2), + vs02 = vfmv_v_f_f32m2(0, vlm2), vs03 = vfmv_v_f_f32m2(0, vlm2), + vs10 = vfmv_v_f_f32m2(0, vlm2), vs11 = vfmv_v_f_f32m2(0, vlm2), + vs12 = vfmv_v_f_f32m2(0, vlm2), vs13 = vfmv_v_f_f32m2(0, vlm2), + vs20 = vfmv_v_f_f32m2(0, vlm2), vs21 = vfmv_v_f_f32m2(0, vlm2), + vs22 = vfmv_v_f_f32m2(0, vlm2), vs23 = vfmv_v_f_f32m2(0, vlm2); + + for (; k < vecsize; k += 8, rptr += 8 ) + { + if (k+8 >= vecsize) { + vlm2 = vecsize - k; + } + vfloat32m2_t w0 = vle32_v_f32m2(wptr0 + k, vlm2); + vfloat32m2_t w1 = vle32_v_f32m2(wptr1 + k, vlm2); + vfloat32m2_t w2 = vle32_v_f32m2(wptr2 + k, vlm2); + vfloat32m2_t r0 = vle32_v_f32m2(rptr, vlm2); + + vs00 = vfmacc_vv_f32m2(vs00, w0, r0, vlm2); + vs10 = vfmacc_vv_f32m2(vs10, w1, r0, vlm2); + vs20 = vfmacc_vv_f32m2(vs20, w2, r0, vlm2); + + r0 = vle32_v_f32m2(rptr + vecsize_aligned, vlm2); + vs01 = vfmacc_vv_f32m2(vs01, w0, r0, vlm2); + vs11 = vfmacc_vv_f32m2(vs11, w1, r0, vlm2); + vs21 = vfmacc_vv_f32m2(vs21, w2, r0, vlm2); + + r0 = vle32_v_f32m2(rptr + vecsize_aligned*2, vlm2); + vs02 = vfmacc_vv_f32m2(vs02, w0, r0, vlm2); + vs12 = vfmacc_vv_f32m2(vs12, w1, r0, vlm2); + vs22 = vfmacc_vv_f32m2(vs22, w2, r0, vlm2); + + r0 = vle32_v_f32m2(rptr + vecsize_aligned*3, vlm2); + vs03 = vfmacc_vv_f32m2(vs03, w0, r0, vlm2); + vs13 = vfmacc_vv_f32m2(vs13, w1, r0, vlm2); + vs23 = vfmacc_vv_f32m2(vs23, w2, r0, vlm2); + } + vfloat32m1_t s0, s1, s2; + + if( initOutput ) + { + s0 = vfmv_v_f_f32m1(bias0, vl); + s1 = vfmv_v_f_f32m1(bias1, vl); + s2 = vfmv_v_f_f32m1(bias2, vl); + } + else + { + s0 = vle32_v_f32m1(outptr0 + j, vl); + s1 = vle32_v_f32m1(outptr1 + j, vl); + s2 = vle32_v_f32m1(outptr2 + j, vl); + } + // compute sum of each vs + vfloat32m1_t zero = vfmv_v_f_f32m1(0, vl); + vfloat32m1_t temp00 = vfredsum_vs_f32m2_f32m1(temp00, vs00, zero, 8); + vfloat32m1_t temp01 = vfredsum_vs_f32m2_f32m1(temp01, vs01, zero, 8); + vfloat32m1_t temp02 = vfredsum_vs_f32m2_f32m1(temp02, vs02, zero, 8); + vfloat32m1_t temp03 = vfredsum_vs_f32m2_f32m1(temp03, vs03, zero, 8); + vfloat32m1_t temp10 = vfredsum_vs_f32m2_f32m1(temp10, vs10, zero, 8); + vfloat32m1_t temp11 = vfredsum_vs_f32m2_f32m1(temp11, vs11, zero, 8); + vfloat32m1_t temp12 = vfredsum_vs_f32m2_f32m1(temp12, vs12, zero, 8); + vfloat32m1_t temp13 = vfredsum_vs_f32m2_f32m1(temp13, vs13, zero, 8); + vfloat32m1_t temp20 = vfredsum_vs_f32m2_f32m1(temp20, vs20, zero, 8); + vfloat32m1_t temp21 = vfredsum_vs_f32m2_f32m1(temp21, vs21, zero, 8); + vfloat32m1_t temp22 = vfredsum_vs_f32m2_f32m1(temp22, vs22, zero, 8); + vfloat32m1_t temp23 = vfredsum_vs_f32m2_f32m1(temp23, vs23, zero, 8); + float32_t sum0[4], sum1[4], sum2[4]; + sum0[0] = vfmv_f_s_f32m1_f32(temp00); + sum0[1] = vfmv_f_s_f32m1_f32(temp01); + sum0[2] = vfmv_f_s_f32m1_f32(temp02); + sum0[3] = vfmv_f_s_f32m1_f32(temp03); + sum1[0] = vfmv_f_s_f32m1_f32(temp10); + sum1[1] = vfmv_f_s_f32m1_f32(temp11); + sum1[2] = vfmv_f_s_f32m1_f32(temp12); + sum1[3] = vfmv_f_s_f32m1_f32(temp13); + sum2[0] = vfmv_f_s_f32m1_f32(temp20); + sum2[1] = vfmv_f_s_f32m1_f32(temp21); + sum2[2] = vfmv_f_s_f32m1_f32(temp22); + sum2[3] = vfmv_f_s_f32m1_f32(temp23); + + s0 = vfadd_vv_f32m1(vle32_v_f32m1(sum0, vl), s0, vl); + s1 = vfadd_vv_f32m1(vle32_v_f32m1(sum1, vl), s1, vl); + s2 = vfadd_vv_f32m1(vle32_v_f32m1(sum2, vl), s2, vl); + + + if( relu ) + { + vbool32_t m0 = vmfgt_vf_f32m1_b32(s0, 0, vl); + vbool32_t m1 = vmfgt_vf_f32m1_b32(s1, 0, vl); + vbool32_t m2 = vmfgt_vf_f32m1_b32(s2, 0, vl); + s0 = vmerge_vvm_f32m1(m0, vfmul_vv_f32m1(s0, vr0, vl), s0, vl); + s1 = vmerge_vvm_f32m1(m1, vfmul_vv_f32m1(s1, vr1, vl), s1, vl); + s2 = vmerge_vvm_f32m1(m2, vfmul_vv_f32m1(s2, vr2, vl), s2, vl); + } + + if( tail ) + { + s0 = vmerge_vvm_f32m1(mask, vle32_v_f32m1(outptr0 + j, vl), s0, vl); + s1 = vmerge_vvm_f32m1(mask, vle32_v_f32m1(outptr1 + j, vl), s1, vl); + s2 = vmerge_vvm_f32m1(mask, vle32_v_f32m1(outptr2 + j, vl), s2, vl); + } + + vse32_v_f32m1(outptr0 + j, s0, vl); + vse32_v_f32m1(outptr1 + j, s1, vl); + vse32_v_f32m1(outptr2 + j, s2, vl); + } + } +} + +/* +Example for load_deinterleave: + input: ptr[16] = {1,2,3, ... ,14,15,16} + output: a = {1, 3, 5, 7, 9, 11, 13, 15} + output: b = {2, 4, 6, 8,10, 12, 14, 16} +*/ +static inline void vfloat32m2_load_deinterleave(const float* ptr, vfloat32m2_t& a, vfloat32m2_t& b) +{ + int vl = 8; + uint32_t masks[] = {1,1,1,1,0,0,0,0}; + vuint32m2_t vm = vle32_v_u32m2(masks,vl); + vbool16_t mask01 = vmseq_vx_u32m2_b16 (vm, 0, vl); + vbool16_t mask10 = vmseq_vx_u32m2_b16 (vm, 1, vl); + vfloat32m2_t ta = vle32_v_f32m2(ptr, vl), tb = vle32_v_f32m2(ptr+8, vl); + uint idx[] = {0,2,4,6,1,3,5,7}; + uint idxa[] = {0,0,0,0,0,1,2,3}, idxb[] = {4,5,6,7,0,0,0,0}; + vuint32m2_t vidxa = vle32_v_u32m2(idxa, 8), vidxb = vle32_v_u32m2(idxb, 8); + vuint32m2_t vidx = vle32_v_u32m2(idx, 8); + vfloat32m2_t high = vfmv_v_f_f32m2(0, 8), low = vfmv_v_f_f32m2(0, 8); + high = vrgather_vv_f32m2(ta, vidx, 8); + low = vrgather_vv_f32m2(tb, vidx, 8); + a = vrgather_vv_f32m2_m(mask01, high, low, vidxa, 8); + b = vrgather_vv_f32m2_m(mask10, low, high, vidxb, 8); +} + +void fastDepthwiseConv( const float* wptr, + int kernel_h, int kernel_w, + int stride_h, int stride_w, + int dilation_h, int dilation_w, + int pad_t, int pad_l, + const float* biasptr, const float* relu, + const float* inptr_, + int height, int width, + float* outptr_, + int out_d, int outH, int outW ) +{ + int vl = 8; + const float w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2], + w10 = wptr[3], w11 = wptr[4], w12 = wptr[5], + w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8]; + int outW1 = std::min(outW, (width - dilation_w*(kernel_w - 1) + pad_l)/stride_w); + float relu_coeff = relu ? relu[out_d] : 1.f, bias = biasptr[out_d]; + + for (int out_i = 0; out_i < outH; out_i++) + { + int in_i = out_i * stride_h - pad_t, out_j = 0; + const float* imgptr0 = inptr_ + in_i*width; + const float* imgptr1 = imgptr0 + dilation_h*width; + const float* imgptr2 = imgptr0 + (dilation_h*2)*width; + float out, w00 = w00_, w01 = w01_, w02 = w02_; + float w20 = w20_, w21 = w21_, w22 = w22_; + if (in_i < 0) + { + w00 = w01 = w02 = 0.f; + imgptr0 = imgptr1; + } + else if (in_i + dilation_h*(kernel_h-1) >= height) + { + w20 = w21 = w22 = 0.f; + imgptr2 = imgptr1; + } + float* outptr = outptr_ + out_i*outW; + if (pad_l > 0) + { + out = imgptr0[0]*w01 + imgptr0[dilation_w]*w02 + + imgptr1[0]*w11 + imgptr1[dilation_w]*w12 + + imgptr2[0]*w21 + imgptr2[dilation_w]*w22 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[0] = out; + out_j = 1; + } + + if (stride_w == 1 || (stride_w == 2 && dilation_w == 1)) + { + const int VECSZ = 8; + vfloat32m2_t vw00 = vfmv_v_f_f32m2(w00, vl), vw01 = vfmv_v_f_f32m2(w01, vl), vw02 = vfmv_v_f_f32m2(w02, vl), + vw10 = vfmv_v_f_f32m2(w10, vl), vw11 = vfmv_v_f_f32m2(w11, vl), vw12 = vfmv_v_f_f32m2(w12, vl), + vw20 = vfmv_v_f_f32m2(w20, vl), vw21 = vfmv_v_f_f32m2(w21, vl), vw22 = vfmv_v_f_f32m2(w22, vl); + vfloat32m2_t vbias = vfmv_v_f_f32m2(bias, vl), vrc = vfmv_v_f_f32m2(relu_coeff, vl); + + if( stride_w == 1 ) + for( ; out_j < outW1; out_j += VECSZ ) + { + if (out_j + VECSZ > outW1 && out_j > pad_l) + out_j = outW1 - VECSZ; + int in_j = out_j * stride_w - pad_l; + vfloat32m2_t v00 = vle32_v_f32m2(imgptr0 + in_j, vl), + v01 = vle32_v_f32m2(imgptr0 + in_j + dilation_w, vl), + v02 = vle32_v_f32m2(imgptr0 + in_j + dilation_w*2, vl), + v10 = vle32_v_f32m2(imgptr1 + in_j, vl), + v11 = vle32_v_f32m2(imgptr1 + in_j + dilation_w, vl), + v12 = vle32_v_f32m2(imgptr1 + in_j + dilation_w*2, vl), + v20 = vle32_v_f32m2(imgptr2 + in_j, vl), + v21 = vle32_v_f32m2(imgptr2 + in_j + dilation_w, vl), + v22 = vle32_v_f32m2(imgptr2 + in_j + dilation_w*2, vl); + + vfloat32m2_t vout0 = vfmacc_vv_f32m2(vbias, v00, vw00, vl); + vfloat32m2_t vout1 = vfmul_vv_f32m2(v01, vw01, vl); + vfloat32m2_t vout2 = vfmul_vv_f32m2(v02, vw02, vl); + + vout0 = vfmacc_vv_f32m2(vout0, v10, vw10, vl); + vout1 = vfmacc_vv_f32m2(vout1, v11, vw11, vl); + vout2 = vfmacc_vv_f32m2(vout2, v12, vw12, vl); + + vout0 = vfmacc_vv_f32m2(vout0, v20, vw20, vl); + vout1 = vfmacc_vv_f32m2(vout1, v21, vw21, vl); + vout2 = vfmacc_vv_f32m2(vout2, v22, vw22, vl); + + vout0 = vfadd_vv_f32m2(vfadd_vv_f32m2(vout0, vout1, vl), vout2, vl); + if (relu) + { + vbool16_t m = vmfgt_vf_f32m2_b16(vout0, 0, vl); + vout0 = vmerge_vvm_f32m2(m, vfmul_vv_f32m2(vout0, vrc, vl), vout0, vl); + } + vse32_v_f32m2(outptr + out_j, vout0, vl); + } + else + for( ; out_j < outW1; out_j += VECSZ ) + { + if (out_j + VECSZ > outW1 && out_j > pad_l) + out_j = outW1 - VECSZ; + int in_j = out_j * stride_w - pad_l; + vfloat32m2_t v00, v01, v02, v10, v11, v12, v20, v21, v22, unused; + vfloat32m2_load_deinterleave(imgptr0 + in_j, v00, v01); + vfloat32m2_load_deinterleave(imgptr0 + in_j + 2, v02, unused); + vfloat32m2_load_deinterleave(imgptr1 + in_j, v10, v11); + vfloat32m2_load_deinterleave(imgptr1 + in_j + 2, v12, unused); + vfloat32m2_load_deinterleave(imgptr2 + in_j, v20, v21); + vfloat32m2_load_deinterleave(imgptr2 + in_j + 2, v22, unused); + + vfloat32m2_t vout0 = vfmacc_vv_f32m2(vbias, v00, vw00, vl); + vfloat32m2_t vout1 = vfmul_vv_f32m2(v01, vw01, vl); + vfloat32m2_t vout2 = vfmul_vv_f32m2(v02, vw02, vl); + + vout0 = vfmacc_vv_f32m2(vout0, v10, vw10, vl); + vout1 = vfmacc_vv_f32m2(vout1, v11, vw11, vl); + vout2 = vfmacc_vv_f32m2(vout2, v12, vw12, vl); + + vout0 = vfmacc_vv_f32m2(vout0, v20, vw20, vl); + vout1 = vfmacc_vv_f32m2(vout1, v21, vw21, vl); + vout2 = vfmacc_vv_f32m2(vout2, v22, vw22, vl); + + vout0 = vfadd_vv_f32m2(vfadd_vv_f32m2(vout0, vout1, vl), vout2, vl); + if (relu) + { + vbool16_t m = vmfgt_vf_f32m2_b16(vout0, 0, vl); + vout0 = vmerge_vvm_f32m2(m, vfmul_vv_f32m2(vout0, vrc, vl), vout0, vl); + } + vse32_v_f32m2(outptr + out_j, vout0, vl); + } + } + + for (; out_j < outW1; out_j++) + { + int in_j = out_j * stride_w - pad_l; + out = imgptr0[in_j]*w00 + imgptr0[in_j + dilation_w]*w01 + imgptr0[in_j + dilation_w*2]*w02 + + imgptr1[in_j]*w10 + imgptr1[in_j + dilation_w]*w11 + imgptr1[in_j + dilation_w*2]*w12 + + imgptr2[in_j]*w20 + imgptr2[in_j + dilation_w]*w21 + imgptr2[in_j + dilation_w*2]*w22 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[out_j] = out; + } + + for (; out_j < outW; out_j++ ) + { + int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w*2; + float s0 = 1.f, s1 = 1.f, s2 = 1.f; + if (in_j0 >= width) + { + in_j0 = 0; + s0 = 0.f; + } + if (in_j1 >= width) + { + in_j1 = 0; + s1 = 0.f; + } + if (in_j2 >= width) + { + in_j2 = 0; + s2 = 0.f; + } + out = imgptr0[in_j0]*w00*s0 + imgptr0[in_j1]*w01*s1 + imgptr0[in_j2]*w02*s2 + + imgptr1[in_j0]*w10*s0 + imgptr1[in_j1]*w11*s1 + imgptr1[in_j2]*w12*s2 + + imgptr2[in_j0]*w20*s0 + imgptr2[in_j1]*w21*s1 + imgptr2[in_j2]*w22*s2 + bias; + if (relu) + out = out > 0.f ? out : out*relu_coeff; + outptr[out_j] = out; + } + } +} + +#endif // CV_RVV + CV_CPU_OPTIMIZATION_NAMESPACE_END }} // namespace From 7c73e28a6d5ceae9ffac5a3f6c771a8efb7bb35e Mon Sep 17 00:00:00 2001 From: Hamdi Sahloul Date: Mon, 26 Jul 2021 19:03:13 +0900 Subject: [PATCH 107/128] Improves FLANN's heap allocations by a memory pool --- modules/flann/include/opencv2/flann/heap.h | 167 +++++++++++++----- .../flann/hierarchical_clustering_index.h | 6 +- .../include/opencv2/flann/kdtree_index.h | 9 +- .../include/opencv2/flann/kmeans_index.h | 8 +- 4 files changed, 130 insertions(+), 60 deletions(-) diff --git a/modules/flann/include/opencv2/flann/heap.h b/modules/flann/include/opencv2/flann/heap.h index ee1c682cfe98..8cace2044973 100644 --- a/modules/flann/include/opencv2/flann/heap.h +++ b/modules/flann/include/opencv2/flann/heap.h @@ -36,9 +36,21 @@ #include #include +#include + namespace cvflann { +// TODO: Define x > y operator and use std::greater instead +template +struct greater +{ + bool operator()(const T& x, const T& y) const + { + return y < x; + } +}; + /** * Priority Queue Implementation * @@ -49,117 +61,180 @@ namespace cvflann template class Heap { - /** * Storage array for the heap. * Type T must be comparable. */ std::vector heap; - int length; - +public: /** - * Number of element in the heap + * \brief Constructs a heap with a pre-allocated capacity + * + * \param capacity heap maximum capacity */ - int count; - - + Heap(const int capacity) + { + reserve(capacity); + } -public: /** - * Constructor. + * \brief Move-constructs a heap from an external vector * - * Params: - * sz = heap size + * \param vec external vector */ + Heap(std::vector&& vec) + : heap(std::move(vec)) + { + std::make_heap(heap.begin(), heap.end(), greater()); + } - Heap(int sz) + /** + * + * \returns heap size + */ + int size() const { - length = sz; - heap.reserve(length); - count = 0; + return (int)heap.size(); } /** * - * Returns: heap size + * \returns heap capacity */ - int size() + int capacity() const { - return count; + return (int)heap.capacity(); } /** - * Tests if the heap is empty + * \brief Tests if the heap is empty * - * Returns: true is heap empty, false otherwise + * \returns true is heap empty, false otherwise */ bool empty() { - return size()==0; + return heap.empty(); } /** - * Clears the heap. + * \brief Clears the heap. */ void clear() { heap.clear(); - count = 0; } - struct CompareT + /** + * \brief Sets the heap maximum capacity. + * + * \param capacity heap maximum capacity + */ + void reserve(const int capacity) { - bool operator()(const T& t_1, const T& t_2) const - { - return t_2 < t_1; - } - }; + heap.reserve(capacity); + } /** - * Insert a new element in the heap. + * \brief Inserts a new element in the heap. * * We select the next empty leaf node, and then keep moving any larger * parents down until the right location is found to store this element. * - * Params: - * value = the new element to be inserted in the heap + * \param value the new element to be inserted in the heap */ void insert(T value) { /* If heap is full, then return without adding this element. */ - if (count == length) { + if (size() == capacity()) { return; } heap.push_back(value); - static CompareT compareT; - std::push_heap(heap.begin(), heap.end(), compareT); - ++count; + std::push_heap(heap.begin(), heap.end(), greater()); } - - /** - * Returns the node of minimum value from the heap (top of the heap). + * \brief Returns the node of minimum value from the heap (top of the heap). * - * Params: - * value = out parameter used to return the min element - * Returns: false if heap empty + * \param[out] value parameter used to return the min element + * \returns false if heap empty */ bool popMin(T& value) { - if (count == 0) { + if (empty()) { return false; } value = heap[0]; - static CompareT compareT; - std::pop_heap(heap.begin(), heap.end(), compareT); + std::pop_heap(heap.begin(), heap.end(), greater()); heap.pop_back(); - --count; return true; /* Return old last node. */ } + + /** + * \brief Returns a shared heap for the given memory pool ID. + * + * It constructs the heap if it does not already exists. + * + * \param poolId a user-chosen hashable ID for identifying the heap. + * For thread-safe operations, using current thread ID is a good choice. + * \param capacity heap maximum capacity + * \param iterThreshold remove heaps that were not reused for more than specified iterations count + * if iterThreshold value is less 2, it will be internally adjusted to twice the number of CPU threads + * \returns pointer to the heap + */ + template + static cv::Ptr> getPooledInstance( + const HashableT& poolId, const int capacity, int iterThreshold = 0) + { + static cv::Mutex mutex; + const cv::AutoLock lock(mutex); + + struct HeapMapValueType { + cv::Ptr> heapPtr; + int iterCounter; + }; + typedef std::unordered_map HeapMapType; + + static HeapMapType heapsPool; + typename HeapMapType::iterator heapIt = heapsPool.find(poolId); + + if (heapIt == heapsPool.end()) + { + // Construct the heap as it does not already exists + HeapMapValueType heapAndTimePair = {cv::makePtr>(capacity), 0}; + const std::pair& emplaceResult = heapsPool.emplace(poolId, std::move(heapAndTimePair)); + CV_CheckEQ(static_cast(emplaceResult.second), 1, "Failed to insert the heap into its memory pool"); + heapIt = emplaceResult.first; + } + else + { + CV_CheckEQ(heapIt->second.heapPtr.use_count(), 1, "Cannot modify a heap that is currently accessed by another caller"); + heapIt->second.heapPtr->clear(); + heapIt->second.heapPtr->reserve(capacity); + heapIt->second.iterCounter = 0; + } + + if (iterThreshold <= 1) { + iterThreshold = 2 * cv::getNumThreads(); + } + + // Remove heaps that were not reused for more than given iterThreshold + typename HeapMapType::iterator cleanupIt = heapsPool.begin(); + while (cleanupIt != heapsPool.end()) + { + if (cleanupIt->second.iterCounter++ > iterThreshold) + { + CV_Assert(cleanupIt != heapIt); + cleanupIt = heapsPool.erase(cleanupIt); + continue; + } + ++cleanupIt; + } + + return heapIt->second.heapPtr; + } }; } diff --git a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h index 2d39d4f0f654..60662e7714b3 100644 --- a/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h +++ b/modules/flann/include/opencv2/flann/hierarchical_clustering_index.h @@ -532,7 +532,7 @@ class HierarchicalClusteringIndex : public NNIndex const bool explore_all_trees = get_param(searchParams,"explore_all_trees",false); // Priority queue storing intermediate branches in the best-bin-first search - Heap* heap = new Heap((int)size_); + const cv::Ptr>& heap = Heap::getPooledInstance(cv::utils::getThreadID(), (int)size_); std::vector checked(size_,false); int checks = 0; @@ -548,8 +548,6 @@ class HierarchicalClusteringIndex : public NNIndex findNN(node, result, vec, checks, maxChecks, heap, checked, false); } - delete heap; - CV_Assert(result.full()); } @@ -742,7 +740,7 @@ class HierarchicalClusteringIndex : public NNIndex void findNN(NodePtr node, ResultSet& result, const ElementType* vec, int& checks, int maxChecks, - Heap* heap, std::vector& checked, bool explore_all_trees = false) + const cv::Ptr>& heap, std::vector& checked, bool explore_all_trees = false) { if (node->childs==NULL) { if (!explore_all_trees && (checks>=maxChecks) && result.full()) { diff --git a/modules/flann/include/opencv2/flann/kdtree_index.h b/modules/flann/include/opencv2/flann/kdtree_index.h index 603fdbd421a5..8245f7db796e 100644 --- a/modules/flann/include/opencv2/flann/kdtree_index.h +++ b/modules/flann/include/opencv2/flann/kdtree_index.h @@ -445,11 +445,12 @@ class KDTreeIndex : public NNIndex { int i; BranchSt branch; - int checkCount = 0; - Heap* heap = new Heap((int)size_); DynamicBitset checked(size_); + // Priority queue storing intermediate branches in the best-bin-first search + const cv::Ptr>& heap = Heap::getPooledInstance(cv::utils::getThreadID(), (int)size_); + /* Search once through each tree down to root. */ for (i = 0; i < trees_; ++i) { searchLevel(result, vec, tree_roots_[i], 0, checkCount, maxCheck, @@ -464,8 +465,6 @@ class KDTreeIndex : public NNIndex epsError, heap, checked, false); } - delete heap; - CV_Assert(result.full()); } @@ -476,7 +475,7 @@ class KDTreeIndex : public NNIndex * at least "mindistsq". */ void searchLevel(ResultSet& result_set, const ElementType* vec, NodePtr node, DistanceType mindist, int& checkCount, int maxCheck, - float epsError, Heap* heap, DynamicBitset& checked, bool explore_all_trees = false) + float epsError, const cv::Ptr>& heap, DynamicBitset& checked, bool explore_all_trees = false) { if (result_set.worstDist() } else { // Priority queue storing intermediate branches in the best-bin-first search - Heap* heap = new Heap((int)size_); + const cv::Ptr>& heap = Heap::getPooledInstance(cv::utils::getThreadID(), (int)size_); int checks = 0; for (int i=0; i KMeansNodePtr node = branch.node; findNN(node, result, vec, checks, maxChecks, heap); } - delete heap; - CV_Assert(result.full()); } } @@ -1529,7 +1527,7 @@ class KMeansIndex : public NNIndex void findNN(KMeansNodePtr node, ResultSet& result, const ElementType* vec, int& checks, int maxChecks, - Heap* heap) + const cv::Ptr>& heap) { // Ignore those clusters that are too far away { @@ -1577,7 +1575,7 @@ class KMeansIndex : public NNIndex * distances = array with the distances to each child node. * Returns: */ - int exploreNodeBranches(KMeansNodePtr node, const ElementType* q, DistanceType* domain_distances, Heap* heap) + int exploreNodeBranches(KMeansNodePtr node, const ElementType* q, DistanceType* domain_distances, const cv::Ptr>& heap) { int best_index = 0; From 9d61c181434a6903fa15e4915b9fffed65ebcae8 Mon Sep 17 00:00:00 2001 From: utibenkei Date: Sun, 8 Aug 2021 01:08:31 +0900 Subject: [PATCH 108/128] fix testSaveLoad --- modules/ml/misc/java/test/MLTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ml/misc/java/test/MLTest.java b/modules/ml/misc/java/test/MLTest.java index 2b08543a843b..504805dffa97 100644 --- a/modules/ml/misc/java/test/MLTest.java +++ b/modules/ml/misc/java/test/MLTest.java @@ -36,7 +36,7 @@ public void testSaveLoad() { String filename = OpenCVTestRunner.getTempFileName("yml"); saved.save(filename); SVM loaded = SVM.load(filename); - assertTrue(saved.isTrained()); + assertTrue(loaded.isTrained()); } } From 8199967b3189fb9aa711afc4e815cd13f312b7ae Mon Sep 17 00:00:00 2001 From: AleksandrPanov Date: Wed, 11 Aug 2021 19:08:52 +0300 Subject: [PATCH 109/128] fix choose minimum angle in rotatingCalipers --- modules/imgproc/src/rotcalipers.cpp | 55 ++++++++++++++++++----------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/modules/imgproc/src/rotcalipers.cpp b/modules/imgproc/src/rotcalipers.cpp index 527f71a2477f..e3d81c7e0c0f 100644 --- a/modules/imgproc/src/rotcalipers.cpp +++ b/modules/imgproc/src/rotcalipers.cpp @@ -88,6 +88,32 @@ enum { CALIPERS_MAXHEIGHT=0, CALIPERS_MINAREARECT=1, CALIPERS_MAXDIST=2 }; // Notes: //F*/ +static void rotate90CCW(const cv::Point2f& in, cv::Point2f &out) +{ + out.x = -in.y; + out.y = in.x; +} + +static void rotate90CW(const cv::Point2f& in, cv::Point2f &out) +{ + out.x = in.y; + out.y = -in.x; +} + +static void rotate180(const cv::Point2f& in, cv::Point2f &out) +{ + out.x = -in.x; + out.y = -in.y; +} + +/* return true if first vector is to the right (clockwise) of the second */ +static bool firstVecIsRight(const cv::Point2f& vec1, const cv::Point2f &vec2) +{ + cv::Point2f tmp; + rotate90CW(vec1, tmp); + return tmp.x * vec2.x + tmp.y * vec2.y < 0; +} + /* we will use usual cartesian coordinates */ static void rotatingCalipers( const Point2f* points, int n, int mode, float* out ) { @@ -100,6 +126,7 @@ static void rotatingCalipers( const Point2f* points, int n, int mode, float* out Point2f* vect = (Point2f*)(inv_vect_length + n); int left = 0, bottom = 0, right = 0, top = 0; int seq[4] = { -1, -1, -1, -1 }; + Point2f rot_vect[4]; /* rotating calipers sides will always have coordinates (a,b) (-b,a) (-a,-b) (b, -a) @@ -179,32 +206,18 @@ static void rotatingCalipers( const Point2f* points, int n, int mode, float* out /* all of edges will be checked while rotating calipers by 90 degrees */ for( k = 0; k < n; k++ ) { - /* sinus of minimal angle */ - /*float sinus;*/ - - /* compute cosine of angle between calipers side and polygon edge */ - /* dp - dot product */ - float dp[4] = { - +base_a * vect[seq[0]].x + base_b * vect[seq[0]].y, - -base_b * vect[seq[1]].x + base_a * vect[seq[1]].y, - -base_a * vect[seq[2]].x - base_b * vect[seq[2]].y, - +base_b * vect[seq[3]].x - base_a * vect[seq[3]].y, - }; - - float maxcos = dp[0] * inv_vect_length[seq[0]]; - /* number of calipers edges, that has minimal angle with edge */ int main_element = 0; - /* choose minimal angle */ - for ( i = 1; i < 4; ++i ) + /* choose minimum angle between calipers side and polygon edge by dot product sign */ + rot_vect[0] = vect[seq[0]]; + rotate90CW(vect[seq[1]], rot_vect[1]); + rotate180(vect[seq[2]], rot_vect[2]); + rotate90CCW(vect[seq[3]], rot_vect[3]); + for (i = 1; i < 4; i++) { - float cosalpha = dp[i] * inv_vect_length[seq[i]]; - if (cosalpha > maxcos) - { + if (firstVecIsRight(rot_vect[i], rot_vect[main_element])) main_element = i; - maxcos = cosalpha; - } } /*rotate calipers*/ From 4d63a89fa6611a69c1999328375f39afde1b1696 Mon Sep 17 00:00:00 2001 From: Daniel Playfair Cal Date: Thu, 12 Aug 2021 03:58:08 +1000 Subject: [PATCH 110/128] Merge pull request #20536 from hedgepigdaniel:fix/ocl-context-create-ownership docs(core/ocl): clarify ownership of arguments passed into OpenCL related functions * docs(core/ocl): clarify ownership in OpenCLExecutionContext::create Although it is technically true that OpenCLExecutionContext::create calls `clRetainContext` on its context argument, it is misleading because it does not increase the reference count overall. Clarify that the ownership of one reference of the passed context and device is taken. * docs(core/ocl): document ownership transfer in ocl::Device::fromHandle --- modules/core/include/opencv2/core/ocl.hpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index f9cc9e019a03..03666df5176b 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -235,7 +235,11 @@ class CV_EXPORTS_W_SIMPLE Device /** * @param d OpenCL handle (cl_device_id). clRetainDevice() is called on success. - */ + * + * @note Ownership of the passed device is passed to OpenCV on success. + * The caller should additionally call `clRetainDevice` on it if it intends + * to continue using the device. + */ static Device fromHandle(void* d); struct Impl; @@ -826,11 +830,13 @@ class CV_EXPORTS_W OpenCLExecutionContext OpenCLExecutionContext cloneWithNewQueue() const; /** @brief Creates OpenCL execution context - * OpenCV will check if available OpenCL platform has platformName name, then assign context to - * OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and - * new command queue will be created. + * OpenCV will check if available OpenCL platform has platformName name, + * then assign context to OpenCV. + * The deviceID device will be used as target device and a new command queue will be created. * - * @note Lifetime of passed handles is transferred to OpenCV wrappers on success + * @note On success, ownership of one reference of the context and device is taken. + * The caller should additionally call `clRetainContext` and/or `clRetainDevice` + * to increase the reference count if it wishes to continue using them. * * @param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime * @param platformID ID of platform attached context was created for (cl_platform_id) From 25cd7c7c509826e42214d00ca4e73ac096abd382 Mon Sep 17 00:00:00 2001 From: AleksandrPanov Date: Thu, 12 Aug 2021 14:40:40 +0300 Subject: [PATCH 111/128] add note about Python's dsize to doc --- modules/imgproc/include/opencv2/imgproc.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index 5e66b14e3b2b..f7583c19267a 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -2223,7 +2223,7 @@ enlarge an image, it will generally look best with c#INTER_CUBIC (slow) or #INTE @param src input image. @param dst output image; it has the size dsize (when it is non-zero) or the size computed from src.size(), fx, and fy; the type of dst is the same as of src. -@param dsize output image size; if it equals zero, it is computed as: +@param dsize output image size; if it equals zero (`None` in Python), it is computed as: \f[\texttt{dsize = Size(round(fx*src.cols), round(fy*src.rows))}\f] Either dsize or both fx and fy must be non-zero. @param fx scale factor along the horizontal axis; when it equals 0, it is computed as From 955cf35d5f890b85baa12b254a325b98880813d0 Mon Sep 17 00:00:00 2001 From: JIANG Yichen Date: Mon, 9 Aug 2021 13:46:11 +0800 Subject: [PATCH 112/128] Implement ctc prefix beam search decode for TextRecognitionModel. The algorithm is based on Hannun's paper: First-Pass Large Vocabulary Continuous Speech Recognition using Bi-Directional Recurrent DNNs --- .../dnn_text_spotting.markdown | 5 + modules/dnn/include/opencv2/dnn/dnn.hpp | 13 +- modules/dnn/src/math_utils.hpp | 83 ++++++ modules/dnn/src/model.cpp | 248 +++++++++++++++--- modules/dnn/test/test_model.cpp | 19 ++ 5 files changed, 332 insertions(+), 36 deletions(-) create mode 100644 modules/dnn/src/math_utils.hpp diff --git a/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown b/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown index 5f28b6ce7a16..b0be2627b291 100644 --- a/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown +++ b/doc/tutorials/dnn/dnn_text_spotting/dnn_text_spotting.markdown @@ -26,6 +26,11 @@ Before recognition, you should `setVocabulary` and `setDecodeType`. - `T` is the sequence length - `B` is the batch size (only support `B=1` in inference) - and `Dim` is the length of vocabulary +1('Blank' of CTC is at the index=0 of Dim). +- "CTC-prefix-beam-search", the output of the text recognition model should be a probability matrix same with "CTC-greedy". + - The algorithm is proposed at Hannun's [paper](https://arxiv.org/abs/1408.2873). + - `setDecodeOptsCTCPrefixBeamSearch` could be used to control the beam size in search step. + - To futher optimize for big vocabulary, a new option `vocPruneSize` is introduced to avoid iterate the whole vocbulary + but only the number of `vocPruneSize` tokens with top probabilty. @ref cv::dnn::TextRecognitionModel::recognize() is the main function for text recognition. - The input image should be a cropped text image or an image with `roiRects` diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 255b41de88a5..a498039f6571 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -1373,7 +1373,9 @@ class CV_EXPORTS_W_SIMPLE TextRecognitionModel : public Model /** * @brief Set the decoding method of translating the network output into string - * @param[in] decodeType The decoding method of translating the network output into string: {'CTC-greedy': greedy decoding for the output of CTC-based methods} + * @param[in] decodeType The decoding method of translating the network output into string, currently supported type: + * - `"CTC-greedy"` greedy decoding for the output of CTC-based methods + * - `"CTC-prefix-beam-search"` Prefix beam search decoding for the output of CTC-based methods */ CV_WRAP TextRecognitionModel& setDecodeType(const std::string& decodeType); @@ -1385,6 +1387,15 @@ class CV_EXPORTS_W_SIMPLE TextRecognitionModel : public Model CV_WRAP const std::string& getDecodeType() const; + /** + * @brief Set the decoding method options for `"CTC-prefix-beam-search"` decode usage + * @param[in] beamSize Beam size for search + * @param[in] vocPruneSize Parameter to optimize big vocabulary search, + * only take top @p vocPruneSize tokens in each search step, @p vocPruneSize <= 0 stands for disable this prune. + */ + CV_WRAP + TextRecognitionModel& setDecodeOptsCTCPrefixBeamSearch(int beamSize, int vocPruneSize = 0); + /** * @brief Set the vocabulary for recognition. * @param[in] vocabulary the associated vocabulary of the network. diff --git a/modules/dnn/src/math_utils.hpp b/modules/dnn/src/math_utils.hpp new file mode 100644 index 000000000000..19ee474c7365 --- /dev/null +++ b/modules/dnn/src/math_utils.hpp @@ -0,0 +1,83 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +// Code is borrowed from https://github.com/kaldi-asr/kaldi/blob/master/src/base/kaldi-math.h + +// base/kaldi-math.h + +// Copyright 2009-2011 Ondrej Glembek; Microsoft Corporation; Yanmin Qian; +// Jan Silovsky; Saarland University +// +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#ifndef __OPENCV_DNN_MATH_UTILS_HPP__ +#define __OPENCV_DNN_MATH_UTILS_HPP__ + +#ifdef OS_QNX +#include +#else +#include +#endif + +#include + +#ifndef FLT_EPSILON +#define FLT_EPSILON 1.19209290e-7f +#endif + +namespace cv { namespace dnn { + +const float kNegativeInfinity = -std::numeric_limits::infinity(); + +const float kMinLogDiffFloat = std::log(FLT_EPSILON); + +#if !defined(_MSC_VER) || (_MSC_VER >= 1700) +inline float Log1p(float x) { return log1pf(x); } +#else +inline float Log1p(float x) { + const float cutoff = 1.0e-07; + if (x < cutoff) + return x - 2 * x * x; + else + return Log(1.0 + x); +} +#endif + +inline float Exp(float x) { return expf(x); } + +inline float LogAdd(float x, float y) { + float diff; + if (x < y) { + diff = x - y; + x = y; + } else { + diff = y - x; + } + // diff is negative. x is now the larger one. + + if (diff >= kMinLogDiffFloat) { + float res; + res = x + Log1p(Exp(diff)); + return res; + } else { + return x; // return the larger one. + } +} + +}} // namespace + +#endif // __OPENCV_DNN_MATH_UTILS_HPP__ diff --git a/modules/dnn/src/model.cpp b/modules/dnn/src/model.cpp index 0af8223a7feb..bc8709d22edc 100644 --- a/modules/dnn/src/model.cpp +++ b/modules/dnn/src/model.cpp @@ -3,8 +3,10 @@ // of this distribution and at http://opencv.org/license.html. #include "precomp.hpp" +#include "math_utils.hpp" #include #include +#include #include #include @@ -552,6 +554,9 @@ struct TextRecognitionModel_Impl : public Model::Impl std::string decodeType; std::vector vocabulary; + int beamSize = 10; + int vocPruneSize = 0; + TextRecognitionModel_Impl() { CV_TRACE_FUNCTION(); @@ -575,6 +580,13 @@ struct TextRecognitionModel_Impl : public Model::Impl decodeType = type; } + inline + void setDecodeOptsCTCPrefixBeamSearch(int beam, int vocPrune) + { + beamSize = beam; + vocPruneSize = vocPrune; + } + virtual std::string decode(const Mat& prediction) { @@ -586,53 +598,213 @@ struct TextRecognitionModel_Impl : public Model::Impl CV_Error(Error::StsBadArg, "TextRecognitionModel: vocabulary is not specified"); std::string decodeSeq; - if (decodeType == "CTC-greedy") + if (decodeType == "CTC-greedy") { + decodeSeq = ctcGreedyDecode(prediction); + } else if (decodeType == "CTC-prefix-beam-search") { + decodeSeq = ctcPrefixBeamSearchDecode(prediction); + } else if (decodeType.length() == 0) { + CV_Error(Error::StsBadArg, "Please set decodeType"); + } else { + CV_Error_(Error::StsBadArg, ("Unsupported decodeType: %s", decodeType.c_str())); + } + + return decodeSeq; + } + + virtual + std::string ctcGreedyDecode(const Mat& prediction) + { + std::string decodeSeq; + CV_CheckEQ(prediction.dims, 3, ""); + CV_CheckType(prediction.type(), CV_32FC1, ""); + const int vocLength = (int)(vocabulary.size()); + CV_CheckLE(prediction.size[1], vocLength, ""); + bool ctcFlag = true; + int lastLoc = 0; + for (int i = 0; i < prediction.size[0]; i++) { - CV_CheckEQ(prediction.dims, 3, ""); - CV_CheckType(prediction.type(), CV_32FC1, ""); - const int vocLength = (int)(vocabulary.size()); - CV_CheckLE(prediction.size[1], vocLength, ""); - bool ctcFlag = true; - int lastLoc = 0; - for (int i = 0; i < prediction.size[0]; i++) + const float* pred = prediction.ptr(i); + int maxLoc = 0; + float maxScore = pred[0]; + for (int j = 1; j < vocLength + 1; j++) { - const float* pred = prediction.ptr(i); - int maxLoc = 0; - float maxScore = pred[0]; - for (int j = 1; j < vocLength + 1; j++) + float score = pred[j]; + if (maxScore < score) { - float score = pred[j]; - if (maxScore < score) - { - maxScore = score; - maxLoc = j; - } + maxScore = score; + maxLoc = j; } + } - if (maxLoc > 0) - { - std::string currentChar = vocabulary.at(maxLoc - 1); - if (maxLoc != lastLoc || ctcFlag) - { - lastLoc = maxLoc; - decodeSeq += currentChar; - ctcFlag = false; - } - } - else + if (maxLoc > 0) + { + std::string currentChar = vocabulary.at(maxLoc - 1); + if (maxLoc != lastLoc || ctcFlag) { - ctcFlag = true; + lastLoc = maxLoc; + decodeSeq += currentChar; + ctcFlag = false; } } - } else if (decodeType.length() == 0) { - CV_Error(Error::StsBadArg, "Please set decodeType"); - } else { - CV_Error_(Error::StsBadArg, ("Unsupported decodeType: %s", decodeType.c_str())); + else + { + ctcFlag = true; + } } - return decodeSeq; } + struct PrefixScore + { + // blank ending score + float pB; + // none blank ending score + float pNB; + + PrefixScore() : pB(kNegativeInfinity), pNB(kNegativeInfinity) + { + + } + PrefixScore(float pB, float pNB) : pB(pB), pNB(pNB) + { + + } + }; + + struct PrefixHash + { + size_t operator()(const std::vector& prefix) const + { + // BKDR hash + unsigned int seed = 131; + size_t hash = 0; + for (size_t i = 0; i < prefix.size(); i++) + { + hash = hash * seed + prefix[i]; + } + return hash; + } + }; + + static + std::vector> TopK( + const float* predictions, int length, int k) + { + std::vector> results; + // No prune. + if (k <= 0) + { + for (int i = 0; i < length; ++i) + { + results.emplace_back(predictions[i], i); + } + return results; + } + + for (int i = 0; i < k; ++i) + { + results.emplace_back(predictions[i], i); + } + std::make_heap(results.begin(), results.end(), std::greater>{}); + + for (int i = k; i < length; ++i) + { + if (predictions[i] > results.front().first) + { + std::pop_heap(results.begin(), results.end(), std::greater>{}); + results.pop_back(); + results.emplace_back(predictions[i], i); + std::push_heap(results.begin(), results.end(), std::greater>{}); + } + } + return results; + } + + static inline + bool PrefixScoreCompare( + const std::pair, PrefixScore>& a, + const std::pair, PrefixScore>& b) + { + float probA = LogAdd(a.second.pB, a.second.pNB); + float probB = LogAdd(b.second.pB, b.second.pNB); + return probA > probB; + } + + virtual + std::string ctcPrefixBeamSearchDecode(const Mat& prediction) { + // CTC prefix beam seach decode. + // For more detail, refer to: + // https://distill.pub/2017/ctc/#inference + // https://gist.github.com/awni/56369a90d03953e370f3964c826ed4b0i + using Beam = std::vector, PrefixScore>>; + using BeamInDict = std::unordered_map, PrefixScore, PrefixHash>; + + CV_CheckType(prediction.type(), CV_32FC1, ""); + CV_CheckEQ(prediction.dims, 3, ""); + CV_CheckEQ(prediction.size[1], 1, ""); + CV_CheckEQ(prediction.size[2], (int)vocabulary.size() + 1, ""); // Length add 1 for ctc blank + + std::string decodeSeq; + Beam beam = {std::make_pair(std::vector(), PrefixScore(0.0, kNegativeInfinity))}; + for (int i = 0; i < prediction.size[0]; i++) + { + // Loop over time + BeamInDict nextBeam; + const float* pred = prediction.ptr(i); + std::vector> topkPreds = + TopK(pred, vocabulary.size() + 1, vocPruneSize); + for (const auto& each : topkPreds) + { + // Loop over vocabulary + float prob = each.first; + int token = each.second; + for (const auto& it : beam) + { + const std::vector& prefix = it.first; + const PrefixScore& prefixScore = it.second; + if (token == 0) // 0 stands for ctc blank + { + PrefixScore& nextScore = nextBeam[prefix]; + nextScore.pB = LogAdd(nextScore.pB, + LogAdd(prefixScore.pB + prob, prefixScore.pNB + prob)); + continue; + } + + std::vector nPrefix(prefix); + nPrefix.push_back(token); + PrefixScore& nextScore = nextBeam[nPrefix]; + if (prefix.size() > 0 && token == prefix.back()) + { + nextScore.pNB = LogAdd(nextScore.pNB, prefixScore.pB + prob); + PrefixScore& mScore = nextBeam[prefix]; + mScore.pNB = LogAdd(mScore.pNB, prefixScore.pNB + prob); + } + else + { + nextScore.pNB = LogAdd(nextScore.pNB, + LogAdd(prefixScore.pB + prob, prefixScore.pNB + prob)); + } + } + } + // Beam prune + Beam newBeam(nextBeam.begin(), nextBeam.end()); + int newBeamSize = std::min(static_cast(newBeam.size()), beamSize); + std::nth_element(newBeam.begin(), newBeam.begin() + newBeamSize, + newBeam.end(), PrefixScoreCompare); + newBeam.resize(newBeamSize); + std::sort(newBeam.begin(), newBeam.end(), PrefixScoreCompare); + beam = std::move(newBeam); + } + + CV_Assert(!beam.empty()); + for (int token : beam[0].first) + { + CV_Check(token, token > 0 && token <= vocabulary.size(), ""); + decodeSeq += vocabulary.at(token - 1); + } + return decodeSeq; + } + virtual std::string recognize(InputArray frame) { @@ -698,6 +870,12 @@ const std::string& TextRecognitionModel::getDecodeType() const return TextRecognitionModel_Impl::from(impl).decodeType; } +TextRecognitionModel& TextRecognitionModel::setDecodeOptsCTCPrefixBeamSearch(int beamSize, int vocPruneSize) +{ + TextRecognitionModel_Impl::from(impl).setDecodeOptsCTCPrefixBeamSearch(beamSize, vocPruneSize); + return *this; +} + TextRecognitionModel& TextRecognitionModel::setVocabulary(const std::vector& inputVoc) { TextRecognitionModel_Impl::from(impl).setVocabulary(inputVoc); diff --git a/modules/dnn/test/test_model.cpp b/modules/dnn/test/test_model.cpp index f7befa9937ae..6ac9702c6993 100644 --- a/modules/dnn/test/test_model.cpp +++ b/modules/dnn/test/test_model.cpp @@ -615,6 +615,25 @@ TEST_P(Test_Model, TextRecognition) testTextRecognitionModel(weightPath, "", imgPath, seq, decodeType, vocabulary, size, mean, scale); } +TEST_P(Test_Model, TextRecognitionWithCTCPrefixBeamSearch) +{ + if (target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + + std::string imgPath = _tf("text_rec_test.png"); + std::string weightPath = _tf("onnx/models/crnn.onnx", false); + std::string seq = "welcome"; + + Size size{100, 32}; + double scale = 1.0 / 127.5; + Scalar mean = Scalar(127.5); + std::string decodeType = "CTC-prefix-beam-search"; + std::vector vocabulary = {"0","1","2","3","4","5","6","7","8","9", + "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"}; + + testTextRecognitionModel(weightPath, "", imgPath, seq, decodeType, vocabulary, size, mean, scale); +} + TEST_P(Test_Model, TextDetectionByDB) { if (target == DNN_TARGET_OPENCL_FP16) From 4300bb2e1f5eadd9b6eb1244ab2ed0250c2418b2 Mon Sep 17 00:00:00 2001 From: Iyad Ahmed Date: Thu, 12 Aug 2021 16:51:02 +0000 Subject: [PATCH 113/128] Merge pull request #20541 from iyadahmed:video_capture_timeout_prop * VideoCapture timeout set/get * Common formatting for enum values * Fix enum values wrongly in videoio.hpp * Define timeout enum values in public api and align with master --- modules/videoio/include/opencv2/videoio.hpp | 2 ++ modules/videoio/src/cap_ffmpeg_api.hpp | 4 ++- modules/videoio/src/cap_ffmpeg_impl.hpp | 33 +++++++++++++++++---- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index ba9c18bd97e1..aa247dd84eac 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -179,6 +179,8 @@ enum VideoCaptureProperties { CAP_PROP_BITRATE =47, //!< (read-only) Video bitrate in kbits/s CAP_PROP_ORIENTATION_META=48, //!< (read-only) Frame rotation defined by stream meta (applicable for FFmpeg back-end only) CAP_PROP_ORIENTATION_AUTO=49, //!< if true - rotates output frames of CvCapture considering video file's metadata (applicable for FFmpeg back-end only) (https://github.com/opencv/opencv/issues/15499) + CAP_PROP_OPEN_TIMEOUT_MSEC=53, + CAP_PROP_READ_TIMEOUT_MSEC=54, #ifndef CV_DOXYGEN CV__CAP_PROP_LATEST #endif diff --git a/modules/videoio/src/cap_ffmpeg_api.hpp b/modules/videoio/src/cap_ffmpeg_api.hpp index 984d36f23cf1..e6187655394d 100644 --- a/modules/videoio/src/cap_ffmpeg_api.hpp +++ b/modules/videoio/src/cap_ffmpeg_api.hpp @@ -30,7 +30,9 @@ enum CV_FFMPEG_CAP_PROP_CODEC_PIXEL_FORMAT=46, CV_FFMPEG_CAP_PROP_BITRATE=47, CV_FFMPEG_CAP_PROP_ORIENTATION_META=48, - CV_FFMPEG_CAP_PROP_ORIENTATION_AUTO=49 + CV_FFMPEG_CAP_PROP_ORIENTATION_AUTO=49, + CV_FFMPEG_CAP_PROP_OPEN_TIMEOUT_MSEC=53, + CV_FFMPEG_CAP_PROP_READ_TIMEOUT_MSEC=54 }; typedef struct CvCapture_FFMPEG CvCapture_FFMPEG; diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 4164ab941c1c..937d34821573 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -186,8 +186,8 @@ extern "C" { #endif #if USE_AV_INTERRUPT_CALLBACK -#define LIBAVFORMAT_INTERRUPT_OPEN_TIMEOUT_MS 30000 -#define LIBAVFORMAT_INTERRUPT_READ_TIMEOUT_MS 30000 +#define LIBAVFORMAT_INTERRUPT_OPEN_DEFAULT_TIMEOUT_MS 30000 +#define LIBAVFORMAT_INTERRUPT_READ_DEFAULT_TIMEOUT_MS 30000 #ifdef _WIN32 // http://stackoverflow.com/questions/5404277/porting-clock-gettime-to-windows @@ -534,6 +534,8 @@ struct CvCapture_FFMPEG AVDictionary *dict; #endif #if USE_AV_INTERRUPT_CALLBACK + int open_timeout_ms; + int read_timeout_ms; AVInterruptCallbackMetadata interrupt_metadata; #endif @@ -568,6 +570,11 @@ void CvCapture_FFMPEG::init() frame_number = 0; eps_zero = 0.000025; +#if USE_AV_INTERRUPT_CALLBACK + open_timeout_ms = LIBAVFORMAT_INTERRUPT_OPEN_DEFAULT_TIMEOUT_MS; + read_timeout_ms = LIBAVFORMAT_INTERRUPT_READ_DEFAULT_TIMEOUT_MS; +#endif + rotation_angle = 0; #if (LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(52, 111, 0)) @@ -923,7 +930,7 @@ bool CvCapture_FFMPEG::open( const char* _filename ) #if USE_AV_INTERRUPT_CALLBACK /* interrupt callback */ - interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_OPEN_TIMEOUT_MS; + interrupt_metadata.timeout_after_ms = open_timeout_ms; get_monotonic_time(&interrupt_metadata.value); ic = avformat_alloc_context(); @@ -1227,7 +1234,7 @@ bool CvCapture_FFMPEG::grabFrame() #if USE_AV_INTERRUPT_CALLBACK // activate interrupt callback get_monotonic_time(&interrupt_metadata.value); - interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_READ_TIMEOUT_MS; + interrupt_metadata.timeout_after_ms = read_timeout_ms; #endif // get the next frame @@ -1483,6 +1490,12 @@ double CvCapture_FFMPEG::getProperty( int property_id ) const #else return 0; #endif +#if USE_AV_INTERRUPT_CALLBACK + case CV_FFMPEG_CAP_PROP_OPEN_TIMEOUT_MSEC: + return static_cast(open_timeout_ms); + case CV_FFMPEG_CAP_PROP_READ_TIMEOUT_MSEC: + return static_cast(read_timeout_ms); +#endif // USE_AV_INTERRUPT_CALLBACK default: break; } @@ -1677,6 +1690,14 @@ bool CvCapture_FFMPEG::setProperty( int property_id, double value ) return false; #endif break; +#if USE_AV_INTERRUPT_CALLBACK + case CV_FFMPEG_CAP_PROP_OPEN_TIMEOUT_MSEC: + open_timeout_ms = (int)value; + break; + case CV_FFMPEG_CAP_PROP_READ_TIMEOUT_MSEC: + read_timeout_ms = (int)value; + break; +#endif // USE_AV_INTERRUPT_CALLBACK default: return false; } @@ -3114,7 +3135,7 @@ bool InputMediaStream_FFMPEG::open(const char* fileName, int* codec, int* chroma #if USE_AV_INTERRUPT_CALLBACK /* interrupt callback */ - interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_OPEN_TIMEOUT_MS; + interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_OPEN_DEFAULT_TIMEOUT_MS; get_monotonic_time(&interrupt_metadata.value); ctx_ = avformat_alloc_context(); @@ -3241,7 +3262,7 @@ bool InputMediaStream_FFMPEG::read(unsigned char** data, int* size, int* endOfFi #if USE_AV_INTERRUPT_CALLBACK // activate interrupt callback get_monotonic_time(&interrupt_metadata.value); - interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_READ_TIMEOUT_MS; + interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_READ_DEFAULT_TIMEOUT_MS; #endif // free last packet if exist From cfb36443fb02586aac34cb14f6f67b551e29cb68 Mon Sep 17 00:00:00 2001 From: Julia Bareeva <34717687+JulieBar@users.noreply.github.com> Date: Fri, 13 Aug 2021 15:41:00 +0300 Subject: [PATCH 114/128] Merge pull request #20506 from JulieBar:lstm_activations * Support activations(Sigmoid, Tanh) for LSTM * fix warning --- modules/dnn/src/layers/recurrent_layers.cpp | 49 ++++++++++++++++++--- modules/dnn/src/onnx/onnx_importer.cpp | 38 +++++++++++++--- modules/dnn/test/test_onnx_importer.cpp | 5 +++ 3 files changed, 79 insertions(+), 13 deletions(-) diff --git a/modules/dnn/src/layers/recurrent_layers.cpp b/modules/dnn/src/layers/recurrent_layers.cpp index a6715aefca92..9088c13390cf 100644 --- a/modules/dnn/src/layers/recurrent_layers.cpp +++ b/modules/dnn/src/layers/recurrent_layers.cpp @@ -80,12 +80,31 @@ static void sigmoid(const Mat &src, Mat &dst) cv::pow(1 + dst, -1, dst); } +typedef void (*ActivationFunction)(const Mat &src, Mat &dst); +static ActivationFunction get_activation_function(const String& activation) { + // most used activations for PyTorch and TF : Tanh, Sigmoid + // if you need to support more optional activations use std::map instead + if (activation == "Tanh") + { + return tanh; + } + else if (activation == "Sigmoid") + { + return sigmoid; + } + else + { + CV_Error(Error::StsNotImplemented, + cv::format("Activation function [%s] for layer LSTM is not supported", activation.c_str())); + } +} + class LSTMLayerImpl CV_FINAL : public LSTMLayer { int numTimeStamps, numSamples; bool allocated; - MatShape outTailShape; //shape of single output sample + MatShape outTailShape; //shape of single output sample MatShape outTsShape; //shape of N output samples bool useTimestampDim; @@ -95,6 +114,10 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer bool reverse; // If true, go in negative direction along the time axis bool bidirectional; // If true, produces both forward and reversed directions along time axis + ActivationFunction f_activation; + ActivationFunction g_activation; + ActivationFunction h_activation; + public: LSTMLayerImpl(const LayerParams& params) @@ -145,6 +168,20 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer reverse = params.get("reverse", false); CV_Assert(!reverse || !bidirectional); + // read activations + DictValue activations = params.get("activations", ""); + if (activations.size() == 1) // if activations wasn't specified use default + { + f_activation = sigmoid; + g_activation = tanh; + h_activation = tanh; + } else { + CV_Assert(activations.size() == 3); + f_activation = get_activation_function(activations.getStringValue(0)); + g_activation = get_activation_function(activations.getStringValue(1)); + h_activation = get_activation_function(activations.getStringValue(2)); + } + allocated = false; outTailShape.clear(); } @@ -339,15 +376,15 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer Mat gatesIF = gates.colRange(0, 2*numOut); gemm(cInternal, blobs[5], 1, gateI, 1, gateI); gemm(cInternal, blobs[6], 1, gateF, 1, gateF); - sigmoid(gatesIF, gatesIF); + f_activation(gatesIF, gatesIF); } else { Mat gatesIFO = gates.colRange(0, 3*numOut); - sigmoid(gatesIFO, gatesIFO); + f_activation(gatesIFO, gatesIFO); } - tanh(gateG, gateG); + g_activation(gateG, gateG); //compute c_t multiply(gateF, cInternal, gateF); // f_t (*) c_{t-1} @@ -362,11 +399,11 @@ class LSTMLayerImpl CV_FINAL : public LSTMLayer if (usePeephole) { gemm(cInternal, blobs[7], 1, gateO, 1, gateO); - sigmoid(gateO, gateO); + f_activation(gateO, gateO); } //compute h_t - tanh(cInternal, hInternal); + h_activation(cInternal, hInternal); multiply(gateO, hInternal, hInternal); //save results in output blobs diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index b833b2ea443f..32b56278bda7 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -244,6 +244,10 @@ static DictValue parse(const ::google::protobuf::RepeatedField< ::google::protob return DictValue::arrayInt(&dst[0], src.size()); } +static DictValue parseStr(const ::google::protobuf::RepeatedPtrField< ::std::string>& src) { + return DictValue::arrayString(src.begin(), static_cast(src.size())); +} + LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_proto) { LayerParams lp; @@ -301,6 +305,10 @@ LayerParams ONNXImporter::getLayerParams(const opencv_onnx::NodeProto& node_prot CV_Assert(attribute_proto.ints_size() == 1 || attribute_proto.ints_size() == 2 || attribute_proto.ints_size() == 3); lp.set("dilation", parse(attribute_proto.ints())); } + else if(attribute_name == "activations" && node_proto.op_type() == "LSTM") + { + lp.set(attribute_name, parseStr(attribute_proto.strings())); + } else if (attribute_proto.has_i()) { ::google::protobuf::int64 src = attribute_proto.i(); @@ -997,18 +1005,32 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr lstmParams.name += "/lstm"; // https://pytorch.org/docs/stable/nn.html#lstm - CV_Assert(node_proto.input_size() == 7); + CV_Assert(node_proto.input_size() >= 7); Mat Wx = getBlob(node_proto, 1); Mat Wh = getBlob(node_proto, 2); Mat b = getBlob(node_proto, 3); - Mat h0 = getBlob(node_proto, 5); - Mat c0 = getBlob(node_proto, 6); - - b = b.reshape(1, b.size[0]); const int numHidden = lstmParams.get("hidden_size"); const int numDirs = Wx.size[0]; // Is 1 for forward only and 2 for bidirectional LSTM. const int numFeatures = Wx.size[2]; + + Mat h0, c0; + if (!node_proto.input(5).empty()) { + h0 = getBlob(node_proto, 5); + h0 = h0.reshape(1, h0.size[0] * h0.size[1]); + } else { + // initial_h attribute can be empty in case of keras2onnx producer. fill it with zeros + h0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1); + } + if (!node_proto.input(6).empty()) { + c0 = getBlob(node_proto, 6); + c0 = c0.reshape(1, c0.size[0] * c0.size[1]); + } else { + // initial_c attribute can be empty in case of keras2onnx producer. fill it with zeros + c0 = Mat::zeros(numDirs * numFeatures, numHidden, CV_32FC1); + } + + b = b.reshape(1, b.size[0]); Mat bx = b.colRange(0, b.cols / 2); Mat bh = b.colRange(b.cols / 2, b.cols); b = bx + bh; @@ -1036,8 +1058,7 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr } Wx = Wx.reshape(1, Wx.size[0] * Wx.size[1]); Wh = Wh.reshape(1, Wh.size[0] * Wh.size[1]); - h0 = h0.reshape(1, h0.size[0] * h0.size[1]); - c0 = c0.reshape(1, c0.size[0] * c0.size[1]); + lstmParams.blobs.resize(5); lstmParams.blobs[0] = Wh; @@ -1045,6 +1066,9 @@ void ONNXImporter::parseLSTM(LayerParams& layerParams, const opencv_onnx::NodePr lstmParams.blobs[2] = b; lstmParams.blobs[3] = h0; lstmParams.blobs[4] = c0; + + // read direction attribute + lstmParams.set("reverse", lstmParams.get("direction", "") == "reverse"); lstmParams.set("bidirectional", lstmParams.get("direction", "") == "bidirectional"); node_proto.set_output(0, lstmParams.name); // set different name so output shapes will be registered on that name diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 05f77730af07..a446a37c7944 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -665,6 +665,11 @@ TEST_P(Test_ONNX_layers, Split_EltwiseMax) testONNXModels("split_max"); } +TEST_P(Test_ONNX_layers, LSTM_Activations) +{ + testONNXModels("lstm_cntk_tanh", pb, 0, 0, false, false); +} + TEST_P(Test_ONNX_layers, LSTM) { testONNXModels("lstm", npy, 0, 0, false, false); From 917cd13ce24cd89770b78769d7bc9729277ff4c8 Mon Sep 17 00:00:00 2001 From: Iyad Ahmed Date: Fri, 13 Aug 2021 20:12:05 +0000 Subject: [PATCH 115/128] Merge pull request #20549 from iyadahmed:video_capture_timeout_set_get * VideoCapture add open/read timeout params to FFMPEG backend * Fix wrong enum name * Fix wrong enum name --- modules/videoio/include/opencv2/videoio.hpp | 2 ++ modules/videoio/src/cap_ffmpeg_impl.hpp | 25 +++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/modules/videoio/include/opencv2/videoio.hpp b/modules/videoio/include/opencv2/videoio.hpp index 348448bda7a8..16016e4b8e9a 100644 --- a/modules/videoio/include/opencv2/videoio.hpp +++ b/modules/videoio/include/opencv2/videoio.hpp @@ -186,6 +186,8 @@ enum VideoCaptureProperties { CAP_PROP_HW_ACCELERATION=50, //!< (**open-only**) Hardware acceleration type (see #VideoAccelerationType). Setting supported only via `params` parameter in cv::VideoCapture constructor / .open() method. Default value is backend-specific. CAP_PROP_HW_DEVICE =51, //!< (**open-only**) Hardware device index (select GPU if multiple available). Device enumeration is acceleration type specific. CAP_PROP_HW_ACCELERATION_USE_OPENCL=52, //!< (**open-only**) If non-zero, create new OpenCL context and bind it to current thread. The OpenCL context created with Video Acceleration context attached it (if not attached yet) for optimized GPU data copy between HW accelerated decoder and cv::UMat. + CAP_PROP_OPEN_TIMEOUT_MSEC=53, //!< (**open-only**) timeout in milliseconds for opening a video capture (applicable for FFmpeg back-end only) + CAP_PROP_READ_TIMEOUT_MSEC=54, //!< (**open-only**) timeout in milliseconds for reading from a video capture (applicable for FFmpeg back-end only) #ifndef CV_DOXYGEN CV__CAP_PROP_LATEST #endif diff --git a/modules/videoio/src/cap_ffmpeg_impl.hpp b/modules/videoio/src/cap_ffmpeg_impl.hpp index 1e73cb8fc881..9ec75501d040 100644 --- a/modules/videoio/src/cap_ffmpeg_impl.hpp +++ b/modules/videoio/src/cap_ffmpeg_impl.hpp @@ -183,8 +183,8 @@ extern "C" { #endif #if USE_AV_INTERRUPT_CALLBACK -#define LIBAVFORMAT_INTERRUPT_OPEN_TIMEOUT_MS 30000 -#define LIBAVFORMAT_INTERRUPT_READ_TIMEOUT_MS 30000 +#define LIBAVFORMAT_INTERRUPT_OPEN_DEFAULT_TIMEOUT_MS 30000 +#define LIBAVFORMAT_INTERRUPT_READ_DEFAULT_TIMEOUT_MS 30000 #ifdef _WIN32 // http://stackoverflow.com/questions/5404277/porting-clock-gettime-to-windows @@ -523,6 +523,8 @@ struct CvCapture_FFMPEG AVDictionary *dict; #if USE_AV_INTERRUPT_CALLBACK + int open_timeout; + int read_timeout; AVInterruptCallbackMetadata interrupt_metadata; #endif @@ -569,6 +571,11 @@ void CvCapture_FFMPEG::init() #endif dict = NULL; +#if USE_AV_INTERRUPT_CALLBACK + open_timeout = LIBAVFORMAT_INTERRUPT_OPEN_DEFAULT_TIMEOUT_MS; + read_timeout = LIBAVFORMAT_INTERRUPT_READ_DEFAULT_TIMEOUT_MS; +#endif + rawMode = false; rawModeInitialized = false; memset(&packet_filtered, 0, sizeof(packet_filtered)); @@ -928,6 +935,16 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& if (params.has(CAP_PROP_HW_ACCELERATION_USE_OPENCL)) { use_opencl = params.get(CAP_PROP_HW_ACCELERATION_USE_OPENCL); } +#if USE_AV_INTERRUPT_CALLBACK + if (params.has(CAP_PROP_OPEN_TIMEOUT_MSEC)) + { + open_timeout = params.get(CAP_PROP_OPEN_TIMEOUT_MSEC); + } + if (params.has(CAP_PROP_READ_TIMEOUT_MSEC)) + { + read_timeout = params.get(CAP_PROP_READ_TIMEOUT_MSEC); + } +#endif if (params.warnUnusedParameters()) { CV_LOG_ERROR(NULL, "VIDEOIO/FFMPEG: unsupported parameters in .open(), see logger INFO channel for details. Bailout"); @@ -937,7 +954,7 @@ bool CvCapture_FFMPEG::open(const char* _filename, const VideoCaptureParameters& #if USE_AV_INTERRUPT_CALLBACK /* interrupt callback */ - interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_OPEN_TIMEOUT_MS; + interrupt_metadata.timeout_after_ms = open_timeout; get_monotonic_time(&interrupt_metadata.value); ic = avformat_alloc_context(); @@ -1282,7 +1299,7 @@ bool CvCapture_FFMPEG::grabFrame() #if USE_AV_INTERRUPT_CALLBACK // activate interrupt callback get_monotonic_time(&interrupt_metadata.value); - interrupt_metadata.timeout_after_ms = LIBAVFORMAT_INTERRUPT_READ_TIMEOUT_MS; + interrupt_metadata.timeout_after_ms = read_timeout; #endif #if USE_AV_SEND_FRAME_API From 9ef41f68fbec10b41948f10d86a2f8f8b8cff56a Mon Sep 17 00:00:00 2001 From: Smirnov Egor Date: Mon, 16 Aug 2021 15:44:54 +0300 Subject: [PATCH 116/128] fix Split partial sum --- modules/dnn/src/onnx/onnx_importer.cpp | 2 +- modules/dnn/test/test_onnx_importer.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 32b56278bda7..6da2c5edf6b1 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -851,7 +851,7 @@ void ONNXImporter::parseSplit(LayerParams& layerParams, const opencv_onnx::NodeP std::vector slicePoints(numSplits - 1, splits.get(0)); for (int i = 1; i < splits.size() - 1; ++i) { - slicePoints[i] = slicePoints[i - 1] + splits.get(i - 1); + slicePoints[i] = slicePoints[i - 1] + splits.get(i); } layerParams.set("slice_point", DictValue::arrayInt(&slicePoints[0], slicePoints.size())); } diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index a446a37c7944..983f72d6d688 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -617,6 +617,7 @@ TEST_P(Test_ONNX_layers, Split) testONNXModels("split_2"); testONNXModels("split_3"); testONNXModels("split_4"); + testONNXModels("split_sizes"); } TEST_P(Test_ONNX_layers, Slice) From 8dcec034ed6b52b2074573a56573883698e446b0 Mon Sep 17 00:00:00 2001 From: zyp Date: Mon, 16 Aug 2021 18:20:10 +0200 Subject: [PATCH 117/128] Merge pull request #18694 from zyp:gstreamer_gray16 * videoio/gstreamer: Add support for GRAY16_LE. * videoio/gstreamer: added BGRA/BGRx support Co-authored-by: Maksim Shabunin --- modules/videoio/src/cap_gstreamer.cpp | 23 +++++++++++++++++++++-- modules/videoio/test/test_gstreamer.cpp | 12 ++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/modules/videoio/src/cap_gstreamer.cpp b/modules/videoio/src/cap_gstreamer.cpp index 60ecf6611a87..e040a22cb0f1 100644 --- a/modules/videoio/src/cap_gstreamer.cpp +++ b/modules/videoio/src/cap_gstreamer.cpp @@ -475,8 +475,9 @@ bool GStreamerCapture::retrieveFrame(int, OutputArray dst) // video/x-raw, format=I420 -> 8bit, 1 channel (height is 1.5x larger than true height) // video/x-bayer -> 8bit, 1 channel // image/jpeg -> 8bit, mjpeg: buffer_size x 1 x 1 + // video/x-raw, format=GRAY16_LE (BE) -> 16 bit, 1 channel + // video/x-raw, format={BGRA, RGBA, BGRx, RGBx} -> 8bit, 4 channels // bayer data is never decoded, the user is responsible for that - // everything is 8 bit, so we just test the caps for bit depth Size sz = Size(frame_width, frame_height); guint n_planes = GST_VIDEO_INFO_N_PLANES(&info); if (name == "video/x-raw") @@ -507,6 +508,24 @@ bool GStreamerCapture::retrieveFrame(int, OutputArray dst) src.copyTo(dst); return true; } + else if (format == "GRAY16_LE" || format == "GRAY16_BE") + { + CV_CheckEQ((int)n_planes, 1, ""); + size_t step = GST_VIDEO_INFO_PLANE_STRIDE(&info, 0); + CV_CheckGE(step, (size_t)frame_width, ""); + Mat src(sz, CV_16UC1, map_info.data + GST_VIDEO_INFO_PLANE_OFFSET(&info, 0), step); + src.copyTo(dst); + return true; + } + else if (format == "BGRA" || format == "RGBA" || format == "BGRX" || format == "RGBX") + { + CV_CheckEQ((int)n_planes, 1, ""); + size_t step = GST_VIDEO_INFO_PLANE_STRIDE(&info, 0); + CV_CheckGE(step, (size_t)frame_width, ""); + Mat src(sz, CV_8UC4, map_info.data + GST_VIDEO_INFO_PLANE_OFFSET(&info, 0), step); + src.copyTo(dst); + return true; + } else if (format == "UYVY" || format == "YUY2" || format == "YVYU") { CV_CheckEQ((int)n_planes, 1, ""); @@ -1008,7 +1027,7 @@ bool GStreamerCapture::open(const String &filename_, const cv::VideoCaptureParam sink_pad.attach(gst_element_get_static_pad(sink, "sink")); peer_caps.attach(gst_pad_peer_query_caps(sink_pad, NULL)); if (!gst_caps_can_intersect(caps, peer_caps)) { - caps.attach(gst_caps_from_string("video/x-raw, format=(string){UYVY,YUY2,YVYU,NV12,NV21,YV12,I420}")); + caps.attach(gst_caps_from_string("video/x-raw, format=(string){UYVY,YUY2,YVYU,NV12,NV21,YV12,I420,BGRA,RGBA,BGRx,RGBx,GRAY16_LE,GRAY16_BE}")); CV_Assert(caps); } } diff --git a/modules/videoio/test/test_gstreamer.cpp b/modules/videoio/test/test_gstreamer.cpp index ca100367b126..207f6de50baa 100644 --- a/modules/videoio/test/test_gstreamer.cpp +++ b/modules/videoio/test/test_gstreamer.cpp @@ -35,6 +35,10 @@ TEST_P(videoio_gstreamer, read_check) cvtColor(decode_frame, rgb_frame, convertToRGB); cvtColor(rgb_frame, gray_frame, COLOR_RGB2GRAY); + if (gray_frame.depth() == CV_16U) + { + gray_frame.convertTo(gray_frame, CV_8U, 255.0/65535); + } vector circles; HoughCircles(gray_frame, circles, HOUGH_GRADIENT, 1, gray_frame.rows/16, 100, 30, 1, 30 ); @@ -58,6 +62,10 @@ TEST_P(videoio_gstreamer, read_check) static const Param test_data[] = { make_tuple("video/x-raw, format=BGR" , Size(640, 480), Size(640, 480), COLOR_BGR2RGB), + make_tuple("video/x-raw, format=BGRA" , Size(640, 480), Size(640, 480), COLOR_BGRA2RGB), + make_tuple("video/x-raw, format=RGBA" , Size(640, 480), Size(640, 480), COLOR_RGBA2RGB), + make_tuple("video/x-raw, format=BGRx" , Size(640, 480), Size(640, 480), COLOR_BGRA2RGB), + make_tuple("video/x-raw, format=RGBx" , Size(640, 480), Size(640, 480), COLOR_RGBA2RGB), make_tuple("video/x-raw, format=GRAY8", Size(640, 480), Size(640, 480), COLOR_GRAY2RGB), make_tuple("video/x-raw, format=UYVY" , Size(640, 480), Size(640, 480), COLOR_YUV2RGB_UYVY), make_tuple("video/x-raw, format=YUY2" , Size(640, 480), Size(640, 480), COLOR_YUV2RGB_YUY2), @@ -76,6 +84,10 @@ static const Param test_data[] = { make_tuple("video/x-raw, format=NV21" , Size(322, 242), Size(322, 363), COLOR_YUV2RGB_NV21), make_tuple("video/x-raw, format=YV12" , Size(322, 242), Size(322, 363), COLOR_YUV2RGB_YV12), make_tuple("video/x-raw, format=I420" , Size(322, 242), Size(322, 363), COLOR_YUV2RGB_I420), + + // 16 bit + make_tuple("video/x-raw, format=GRAY16_LE", Size(640, 480), Size(640, 480), COLOR_GRAY2RGB), + make_tuple("video/x-raw, format=GRAY16_BE", Size(640, 480), Size(640, 480), COLOR_GRAY2RGB), }; INSTANTIATE_TEST_CASE_P(videoio, videoio_gstreamer, testing::ValuesIn(test_data)); From a50dec88d5e39a24438f9215a2fa4ee4cc6ae3ab Mon Sep 17 00:00:00 2001 From: rogday Date: Tue, 17 Aug 2021 18:23:27 +0300 Subject: [PATCH 118/128] Merge pull request #20547 from rogday:gdb_pretty_printer * add gdb rpetty printer for cv::Mat * address review comments --- .../linux_gcc_cmake/linux_gcc_cmake.markdown | 2 +- .../images/example.png | Bin 0 -> 35727 bytes .../linux_gdb_pretty_printer.markdown | 38 ++++ .../linux_install/linux_install.markdown | 2 +- .../table_of_content_introduction.markdown | 1 + samples/gdb/gdbinit | 23 ++ samples/gdb/mat_pretty_printer.py | 212 ++++++++++++++++++ 7 files changed, 276 insertions(+), 2 deletions(-) create mode 100644 doc/tutorials/introduction/linux_gdb_pretty_printer/images/example.png create mode 100644 doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown create mode 100644 samples/gdb/gdbinit create mode 100644 samples/gdb/mat_pretty_printer.py diff --git a/doc/tutorials/introduction/linux_gcc_cmake/linux_gcc_cmake.markdown b/doc/tutorials/introduction/linux_gcc_cmake/linux_gcc_cmake.markdown index eb59fea209d6..ee3f1eb7f9b9 100644 --- a/doc/tutorials/introduction/linux_gcc_cmake/linux_gcc_cmake.markdown +++ b/doc/tutorials/introduction/linux_gcc_cmake/linux_gcc_cmake.markdown @@ -1,7 +1,7 @@ Using OpenCV with gcc and CMake {#tutorial_linux_gcc_cmake} =============================== -@prev_tutorial{tutorial_linux_install} +@prev_tutorial{tutorial_linux_gdb_pretty_printer} @next_tutorial{tutorial_linux_eclipse} | | | diff --git a/doc/tutorials/introduction/linux_gdb_pretty_printer/images/example.png b/doc/tutorials/introduction/linux_gdb_pretty_printer/images/example.png new file mode 100644 index 0000000000000000000000000000000000000000..0ec673dcc21d74eb626b3f9b7a627f029c781873 GIT binary patch literal 35727 zcmbrlW0+-Iuq~LCwr$&XW~FUb+N`u~+qP}nwr$(4&N}Da+kN|~-`DYD`kp5u7SFF0RZp;#D#t;xnx{qIy-1Cdmi&y zA4v-j=OA?g0fX0s3L^`L){(2%8IdJnH=7a-N1+TmW$a(4g=x0RE1jVmkLa#J21nl^}d$n#8@U_4Y~&IuYU z&FwrjQd8%U)UZJx%_>wfDK{JV=m3}E(zWPg(3PSt-5M7(*wS!Lix?}Xf`Du|6>_sO zM1Rht1kHd0+a4+y3on~6;&EQA@@ug*fnn>OmQRDU;-fmA^QqGy4JRzLy4y-$0BwKw z)f;G(6efmaEx)dwQcAfvu&R|63;8LbT}`8{zZwrHtQLJ9spzUa(#~^S#m6K9kZ)r`I`(cn zT<}P^MNX8ej`;3ki#85SCv0BrID&+oN{H+=MEntxO~GV{=;FwtWP=PQcp{aOu|T1b-QMq-|)kGaV3}-4{ z!`Om~kQl4^1W1A~AtxwUWCo;|n7F*tQo!qpQ)sta|G+`UE-liZYJ;W2f)nF6a?;$K zT%>XAzbo&C)xb9-E$Za2ZzToJL`~*gwudl`YF4-$1#Qz_lADiIW>eWYSSF9zf?Da; zN2amEk;*sbk%w4+ACz0Gp@&Q!aO^+dw-&)aJ7c{%XZKc#n{LWw^zh! zA?I!p&t9dTb0xN?_ZH~wtnB?5!OE(GOF|hS;whQvbo?U%xCSs~GbL!jR=WXg%I%hgKMR zik_mTg*P(zleb%%fLZ@=Fm(K>vbk5N+&0$-Fl4krnqToJvPL*A8m1$NJacRyl zT{9k{`#S2cb@LhiSa3qy-1!h^>wxxpsSeW!8`Zif|6n8i0dNXkhM{cl1#$YLStKHn zGK`P`X;y#A`ww*>?*^KlVxl+?&Ut$Z zVu7V-nyiZu*F+5$Dj_4qNuNZKG3|4nD+qQ!tEE8kIrH6>l4FUo=-nv^*&`rD+Ea

Q(%%|WTqhQs~!=|nUv!kcA~{jVq#P>iWFDX^L^Gw>VMb4%l#d#%X-bh74JL( z4j+4>3;Zk0U_oq^|EX980&kuHrWgYqq8ezT62`}_ky5YSVHE+X1k^c)>yVW4DRZMM zX7xKUxP*%@^8BGo!7FeOjDsa?U9Q}ri&`#ZYBn?pu8m=wZwm7{_nd8o=uxjU!4^qx zAW@JcRyZXCJ*Kfxu6g85th>NLn{6OVsAQCFo@E+9^@ zh{;`r<`v-*A`(cBI509f{X`h2aM3mtT#^v^DLq|^9WXzGjXv>!by9#UJ~j!f@uB9+ z-ETz&==3rJX7a&T_a!8hlvO8y9zNXz0Y#sqlvpAndwnTFL?A;LBzURNS{XXqvc-wp z)#Y;bRD@g~p$7>q*WeB7REKbW zWJ+hUIa16TEa(3QzaJ1`KlYDc4z>U8M6#T(UheOt{J4BIHjqdd^w?&sLu(6p1C*D* z!5$hKf4P=!+K6j9G-|j|kqFrfC-`qN@&>0?<0ZH_3kJAjqBtP%Y-XxtS){IS;n&(# zqwO+4>2Q(XPPqlrI6{&VZ^VnV8R0|LLJ*OCAY9F3ursn+Q_3#xK1c%RaeDm9=>Cxp z9lO$Z$`?J~vzOW=hAg&34-^pLaa}Xijzze!jNMxi4<1NzA@e$C&SjDuh*t=@s*1X| znHI2aQE-py?f$)nzJq#}m^2<2?NnrE)xgSgOm^k|=*R0SmyO1C1^l1$FLLY7go)~^ zXYr|jce6l9YRY+3*3V`kH`|a-VR=jEh-VQZDve#7M+5~yH_)Uo2&8`&C#-Nt2^Rmm zD^)c+i_>qII+{ac4f{&D)h?D1nfi%v(FuppPME(VxM3BPI)?aXEbtXSc0&>WUe1rN z24p93d~DqW^H0Cvz|A;)NF@I)c>)ax`Okj|2>s`^1bM)0IGNgL$K*%p(F8QvclzrS zRae%|#5iphw=q@99N$gZK-p+me!iS`hW-|Kh_0?wM*8J49kuBpwLCfu_smc?qyWq$ zW{j1%c!+Fxg3jR?aYf?HxE$N;MRqRziOtO;bpX1S0v`0>F&6&e0;;wANuHyDF{(0o z6@&~Lm7T}uazEZ`TOilinF_erZzGGwQ+0!QTGx7g8eO~oiS7O6kAD50{~U9_)P29W zJZxRk*vyxQ2hxn!92nB}bwWStM$P#teE|;?X|&iO$u^|()MA~6q8gpGF~%to$L-Jr zs|QD&tRg^nZe4>mCgmkff}zOM5UKSkW;H>c+f)aTu1tz*!?ZCkoXsV$mtI|{VE-%< z76DZ)^N4%=!f%6OR+ZoBhSA@#P3C5ST6idYP)KGVL~!EFu(CN^!q?_;7(S++T5i@1 zYoS756xh0=FnO{F?n?pYx%s&Q1m>vq|yg7%06qh|oA*%KaN!6C_Kan|@ub)@KgQS;m3y zVO$@bSiEyEdA*VeKDP4>gR!FTEzC~adq3-BbR{^nKH=nA*m-4>$$y@Rx_B$&ZGYWS zL06UGf%s{JpBG^$C@Q+@UVnCcj8OptHor|znRR)eka$H5O}^7v7%GZ$z{$$qCG)UN zhFJIn9vJfce8=~n(m18(fe@dYVTLiNi#*VMo=_GSm|tqzu1HXt7=kHG@icgN1ZFr! zmfu_dtgsD@N$ggwd0bxMP@AnL-iBnYL_pJ9l zsb#$w8xtm;T5McTOB9uu=oj+_5YCp`Y9nN|oUy%gVY4@QyYoaOVm{Qj!H?X#pjM#P zeI72S5F!W8l>-l%eCH01s*t|k9}T9eeF-w-@dj?!>Cea#b(IDhEQYl&>5Nu$@yycB`jWJQKxZX`!X} z3Bpyij3C=)^OzShxD`)TdPtZEih{zt%f~I*%PyR^jk19G<-9{Ve|sw6nT$ZN3>d>B zDi_$0!oX5wsuaw_J`@FT9%)#a1-ba6-H{<*Ip3SCsFN83MJ>cs*7*PikjUqWlVwf= zb9gT*m~AGiYL4iVU=LGofF}h(yjU8RVfB$w&Lg&9m-rDjXTKNDGxJeQa&Es>chdBy z`C8@P1$&gecGIlS#MPUnokCmcgFv5WURg=~wWEihODCk_76pEgv&0~?<7;G`-Stsu zFy$;<$5gk+A9AGofFq0Qli6~3`{sG9^1QqzL_tX^s0LFv7{}Hf&W$M{f#0ep9$)DW zQ}4DmO9n^w(TnxGf0ChLeyq_4d)?_^<6I8&yzj_HjeqnaotsfF&u?*F{|@0iWp_;b zA~BaN@B}Cd^Yqxeog{v5H^ri#jcIHtIDD7KZ$W9bBBX=|zw5{Q@)^eS-YW}SSR51* zxBTrD<4x{x6Nn~be=_$h0h+zq93G54?E26U9_-9LaeSzP%e4~j&Z?tr#gbh?Wvmev zt2Q#8o2AkKRIK1YRUs=|q;4=m zJm}2-7XtH6%%AbpgLqCbgdyDp*eiLrbIcd zq+ps+AJp4@^s`(87xSm#-m#}Ca9AAiRwJe4gBYt>B$(G!Xbhk6KVX7eqaZB9en0!f zMnFG19H^4(KkTI&HYGbX41iIM!Uy$D#l(`2qLD7+6ES#iv8#i~ek3+6d&yi6Xo{q+ zWv-7HrzB@-kkcEVg-?Pv+Mrc-%9l}^mn3MIWj}}0RyGBBY;>%qr33)##yTrQh{~Zg z;1mVDp%vM--xBGuru!Hj?;gkMzWdbFXV1U8n{q5luWd~aAjpqD=JVTF*l7v+R29bj z;=(i;CQG3m)iVIaPJ?2TsGGHnvbEe0rgq&aU7T*FeW@`_zTF+VETFW$ zdCi4K%b-n|gFaK-?IHHb3Yt)w!77mr6x0SteZ&~u!Pt?~w}QBNxEtQA7oqf;!iam7 zs*Rk)u(6rS&dL(cr|lyMMvI}CoHEHD?XBPRj3{S3w>)=ozSOdm;F(k;6R{zi$)XzB z7*dqomUPl2(!zE*!g%7^+l-$%<|?*mgf80~^R9w8wdeHP@r3a( ziHQd-Yc<1jB^6Vmb`pb@4qS)y6A0n_J3Znd1!_>sLTlSweHC#0gMmuVu| ztx!6dHEOw8CP9PYa*(r;!;Bs0g&R8MQ4P@)W2Fu zm#z}GX&^c+dlD@3D+sYVKB1>e&w<~8l}Vb!e5gt3onp6Er?0x6T%DKX<*fpCS$}oj z+D#!8brzQ$e?t(t`x-Cnhwn^R5Vda zhz|{4${hrh7>dgfy6d2-weiWxtIYHCY4-OQHQ_Cyi8Va^dyY$2xnn!x!yg2qY45P{ z-61?}btfNZd4IX7x`Kpx`t8>eHXRn-DL*3vG-Y5@DuHb`4!6`{#kp?3>DPU3dOX;0 zX<^P-lQq11Wzo_RdzYj$6vwmPt?JhI7{V7wrTxw^S7BY5*n#mi%;!+d>mC;F%iZf0 ze~OOM`dX*&rD*iIY(3pcy9GdAVH{P+%*fC&?U{ORdow;eJ>6A(Y=1RY=D{1oJ}kc~ zNL;U9i7G|)y4^&G2(O-gAkP|QPbT46%J8UzhLqXPpl4E4QWsNj2|y}tyQQ6>ep6q^ zd3czvn!;+zPidAP)1;G6&Bz9j;Ac$MuhZ4#F8TkwbP@WqbPlxS(W=6rP|! z&SiFLr<*2=c za-yG+SB##(y%IV3Q1=GT?!f`iUQYpt7sq{BW6#OoP)pR)j578X6_oBYA9!5-w6$vG z>Ts)4om$v(Ra*qoXD>J3DBzT1FhMRGfB=toQ0%DyrYA-s4Mf1|IZSaV zc1nels8MM-G5`7`<93I5SxyyvA8gHrRQntnWBuiX?3xG-e`PqFkF&ABMA~G%&!8O& zLi2?{6fK-Zl1TQ$Yu`^2={t~5!P+VVIB zEo-b!?+Sx7!Q3E!YpOgy^{9*Xu*?PHO5?QnP{4ts@uFgy@XWM$okt^w zA-+N>^z^It#eV}OAoW2crzawiiVTGq!h;tNlOjc`CXJtUkZVXc+F!mp4Dlq7DsQh- z#@8mZvXOq@xVR5!%&_T3}$A2@=LQYzaXpjqIwv}BwXmunU2Zr1`s2TZ(4 zARuPWkzUG_+c>-#2Hw^ll`uG4OZd$_d4%WKyUiUdGrSqPP`$pixKdMvnKv2}oxhIW zclo};k*WfhBugDTp8cjtl3X-D%Z+oQ{clkjrndF}4IIu0+Iyy`NK;dG$Yo*)mBz`Gl4lq0C z?(zQNs%1hD#>gn?HqSt@sKLf|hIBo%9}!vlMM!3TmXHSK_7hw15X5hq9p8rVk8ry5 z%T>fo9GN7ocT`x$T!Grz6kT^zEp0)mgfomB#}%Q=HbTfsr#B!U%O3E6{?KaTaFP6+ z<>}W_`XV%W5>B{T{A-uK`_iJ7_BSD`k!xv z4ZW%y-AUImCDFCKcm|$44NV*`k+M8PTUv=3Z?4OSBg_^U(H z$V5Dyw?N~ni@d%&Q~Pb9B^D$(RVa!EMA#NiYPwptV2`nFrO? zA!rKdnV)u-m{=wiZ6TMeNUj`a4y&?EyK)?K%XK{SemrC#+d^C>NN-DvA(I2*`37Du zG{D1et{e^VVBw4}aH6=*r=A|xmLSKiy#dl-PrCHGPt>|J4Dtwk2AiLTM{N4z%yW1l z{KQh|^q4T3r3*>=+QF%JW|ov>X67MPtZY9(9#Tn|6Nj=*JM;p8q=xayI50wINWvB4 z;_#33-{#7V@SEE*B4Uu*jq@9-(BDga3wfA-`S8QXz)-ImgwF@`Gi~|Ds;mv6L4}d` zqQgt~X#4vC*82{Q^H4FJpPf@MOU-$e0S%)llgdr~$~xb3s(#v3=#Y{TY+4LMK-(3{ z&ZY3LFiZ@!-;AhqTaJFa{#KxJ)8Mf@$ zZiUows-Ob}aiV}tG6-FWd=@2e={f%-9qoyFYU7rZA0iD|#udN08m!WBhj!!%q=Dbs zI|Jms4XiO`Hx3B>FqKarFBoBj_dP+5pT|6GeJy}_B4#gSi!MM`aDX)M?2MRNES(nC z=&QpTCYDi+_?W=1gPBVGj5Wk^Wlc?4k2GnDhFMKqFc!t&-js>{)z>shvVg|L!9b6* zolj64n+05UJht;fGC$z);ujOE|MS#NyOxqh&z%T2gEw-l!*K1ZNqI=K?BO}xG;~B)f+XuFLIjiln|D?mm z3w4$eT55#dQHKBJGxP0E6|voS)@a%dx}Cj581_5I!IjpvX43Kbp1-v()x>*<;`vu| zk=pOSCs+76XmV#rwIcX26?EKh6IWp{%Q7eR^&Rf@Kl8MWL>4;QOy3W)E-;dFN&fCk2@-J z@JKgsVU_}=aT=SYtT0bqcpViC$4F&0Qoj`Yu+jM9)648l9kFB)VI$bH6A-sfN=hJ2 z>`^^?agJ95H1tSx_2#auhnVzIIpj=&(1{Li%RA;ypegm(M6on8Q1=UD=7d@!MZ|m= zyxfui%euU_cJzz|#QJ1>f31FGOX@tc@Yvm347gYVA`Ds@rRDhem zs2wL+Bzn;% z$d{cd=eyAD(!#?%_$r?;0*-jc!@(DA@1Wy0x<7ag*&LHQRi2g`0AKTT^R;L9F=4Ck zv6m^qpf&IM=y8?_ELJ&lJ9{&uTawqxT8cErI$%x)6pxPgb-`K>51fcu^g916fY4@T z=A6BgV-Rl$yg9hr`_MygT33+c^E5Ca(&1On5X^hl=D8Zo%vfXjKRpyTSdA8_EaAfw}B= zbo7HY4C&8H#IZ#nW_aei;WR)KlPx4`S?50|76(J`^~xmfN9bw^Ead&ZY&YH#?-4PI zAN@kl>bND!?P5t1OAz(e&Pik%l&??$PrdhiG362P^S!E}&&{1z#FDbmk^ zgyTDXe@5Y+_h*Z7j7dzh)H>0aZL?2c?A{Ic3+1+j4mP> z?>zoF$QEs`b+_PIOn25}F%9^{B@N=lxS@IDJ8AC;oq8v)dbB@M`g8Wjc?`0Pf=4-$ z%S?iSK=^|_c>2C?8rJS^&Ta|Xk`+S1eD(Rf5$e7j_CC@Y8~+vuMP>E-P=ys(bG-J{ zXRBWaM7mz9O_r^liJ1BiEr8I}%}Z&mYx$BPZl`|!OnPWy*l&$d=geKXXXOfs4v?B8 zOMR2rY{g*C*0RPQe(UKCkXNe-y*IV-4vaD+nV+!H9NJ6Toy3zjauir3a}y2|IdW zO&9A>gN=uj3eqHpJ#%@1-jJJja9}oYf;=nWAc3#FT~by$M;(lVdb+Ir0~EHnmU45R z?!>a)8<`tA(O<8(G~M}`VfD?CxUF${YvvCoQS+WunS#{t#1V;(jVuSl+z!o8wMQ%G z7NM=N&wqNkC^XKzUa8Hl=^JPebaRV*x)`C=H%PmrjiWWG z*&{HQi>f>p%%lsLd1Z8Htb`=U7|MJd{0u5 zr$_6`D4GuYpzmsJU5z>0^wJC3f}(bNu(hi6JsDlrPNNYIB4add9RW>!#CAEIfqPpZ zF-u=fb2eS)qOhbN*duXa^4;l>ol~I(y>LG?XZuS$5)d?DY^K)eyyM`77*l@q3kS=> zAfFbtkr>EKZLT>Jo<_2ScwDLyPe8wy44l>h)g_YDM0K|=5*Xe(^Zn}bXt2(8uI*1Q zL<*`_jh7;kFy{dhW^osSD9^{uOcG4U215{&x6mVkB@VTd3( z6?@aTQ4wi-+*1lv=wIm71}Rlp@Y`&L`i zi8wvpS3>bpuNrx@*m9MQVR?9w4}cUy6*Bw3DY4T-Qps#n-Dc=UX%Zg|J(4>{D=S3S=HqiCURmE|#^|CmXgD4A{P7uH062Oc&Mwj7uKCZAGs zmy+TI1J8EapU_pE1R1Y2bflWP)1z^%iY0;GE(4slr{@m?58xb`jvVGhc(E)|Q=t74 z#Oq;{CdWY2F**ohESXIt9-dol=vd83;^s;jO0X}jc83khd5G4=s@W105ij;nfoK}2 z(Z}OobOkX$e6@LPYZ6sZa-wxWo$fLp9X(o1BE0V=NMSf%s#3<*C6RMY#}EhZ)YPr@wk9fC2i&h5Qd5iU@F>gw;z?gZGQ>xucVs5^-i%qOgn%Vm1 zDWYe;f4u-Ozd?a_2$R}sVse{M?oZklf8EBo=CdJ9|GFj1XF`?v>agV9E-$zJFRWf9 z(cyDD^Hufi@Qah8_0A44sL1tOU1FL^=ppDjMUoFBgZe$wB2V91Tke$6DG3Q`X>R7f zG)_dT<3+`ID?>SOyd=dmsEIq5OlAc2_()y0udn#XB$gUl$>`hb^8-AXNVNcB7=KY$j9Pl5xnDxB7RH09 z;i-N9d7Ltp!T^KBkktUR&Y5Qr@A8+&eE1;nz;{XYd!0D#H6h3cIbfYW^9G@Y6Q0tg zSIUkKJk5ZqPVlj?_SF_eUQpEB2@q#bvcJB^k{pQ9hv4K~6MOSCyrgUEIV`y^van96 zIU=n8CcU$pJE_Yy?#s2)t}=2j0&$mt>kknRi!A&@_~(xZQlg1y z%eYANM|g2&X>KVDLy~f-jC11&G&7rQfOoR`)F{TJOZG+g7Ft_uxIX`L`HpdPE z8AOsWmA|!ucqN89W;~uv0HVK&eumyAwTF_8o!Jl^V6$WRvbYL2M>!&J6j*#Q+Nz zLEl;*F)Z)$y7Gb;uL7X>QR5jP+O%{V>8{|aXNl;pM7WwEwX)R*r)lqnM%Rt~bnkm6 zz*yY=+J=Td;LE_4ZLTJ&Z5ZNhUtzoHs(_GP*}!;m%T#(cI!RlI5V%}~mu-Gus?OtXlSRtY2Le|z#o=MY4CGbo5wRscI zKOCZ6Lh&o%!BBF$y`e9^+FdMlB2>r<4u#A-Py6D&HON_m>keqwUz6yszw=)RIq&X{ z$ECqUb*u|tm#_Cr|0MONMllo-A{HTI&2Fi3aN5i};iG)Zr!0nO=<=4FP_;dfMfvDn zBwu2@Lb7G4|Bzf^@bq6}Ki-F)32Z4V`Y0>ffO>Qs%`ti3m08$?RGo2B=jS_2YS%|c z%(d~xXYBrzsdT)??aaz}Al#BS9HJDym2!Vwd^Pa!B&-DlLOOPRGmVj5K)$tcq*amD zo8THS;|b9E?Qv%suw72Id)o&1FveW$E_44F4q=5vKW=la-j%R1AsTM+iVT^*MIcjg z8yVKR{<2{E;^9fqE#D2>i8+Y$VCymAZ^0@}`1e*TKb^11r9QEzGLtCE{fhcYhXC_x z`_MSUv(t7^szm0_;mRxFsLnfR#342dkhSZ?=}u>@6(ffE%um+81ZfAwh=F-a>Y$SS zK5)-%?Vi9R{d}8)sypZ9oApB5g25G}eaz=oaC`NRwQ9TV=3t)Uv;0$b7~GyIXdW$l zf4l4Y$X5%-a!o5(9%eV&VUE$%FjSoXug0q~#%a?%+ok~uco~@rWxXlxnzKLmilt(r zW2r#dX1WMEC_1x%?)G=)i>O*oQnh|i|s-D#xYowO^3+@1W(^_ewJQ4ndqb-a7 zU(jfh(9-DuRGkEIrd1%3N|iDXXa#D2WT~V~Us9}t;Mupv%x^tZ!^<(tYCFDMqPxjJ zpSr_j*owF4{9r%$9bH`zLUp=Qw;$W>$;t~%r@`UwJlM^4TS-G@@8~hzut{EBOXg=# zRO~$24a#LsFLzJxd_M$EW1SN;GV5i5$hrO>bv&c>bP72h=x2x7eMq|aX)&w&Pws}! zunq?(U^4cHPHTG?{nj>W%hw(gm*O`@@B7X95XaoV_zq*&6o5Zie90j8kxgEC3zBt$ zK#u`4#yaf0m)~I3?W=~6srP}ceEZ=*Wl25`Ij6cYbJr)pGRE4knY2ev1IyEwcx*{E zCSQlMP}2<$&#fU;Y1viV`;^4X^@3V3o+`0>Zg=pzqnoG0qv+*k;CYWfGm;NeO#snPdvNOCRIrCuD`Px-f3<8R@pkQ<4Q&WV4fKJ{Y3Brz! z1uxULczm2aJ}dgF-#)8KTNe>B`xPPgM_B)hB4QN;{a)fby(2X67b@l^X4wcN)(SL` zZU-3{|GvqWBhsgAIGi{G3mOO1D9XZ_jTF3$W~C}A4*Q6Je~E`Gi0|+Q*^otiW~)n0 zuGe|=P7>e=oxVw)i<+wS5r z-0z`nby(|f&%lr9+PdgbSC#UcLPVc{F4*nYt3-9MnXcqKZAg{&=lk{f;@0DVL-4#vEZhS;sG*mGaWr;5=h=+m>uc&F0-jJM8nJa#IvwltV!?(qJNCT@Mgx#@V`_6>%Yj~S`gVA5)j zi_4zF@dD;cUsLZ#T*RurroOhuKUkTFQ;O9*?Osf_R(Gb)1*hNPF@n)i{7^iFF}#Fw zoQ!HUqyX4P7J6>+7(b_qbkaN(MJ*8SX1n{PXQEIIYWi!D?xcEUJpO$tK32b13|{qG zUEA{zo=5SPsvg8Ma&!QW*__BQ+)+gL5Vl1mJ<&18vmXdP6ti!@@Ykg z7?(t5^PTARceDDGySv&7*KR%qZPHS>UaqYh893RxZN$Nm7&fM~5}czV^&CUBwq&*J z@_41KSTAo+`i81}+ig4jaau#cSk3We6VOxt8@bCw9UvvF9d7pKR*#Af-yg4~Kw0ae z5yYYR5g#uW*JA_eLQ}gC9S3LvlEWKQGa?=7xtAFYAodygj6~7;*(vV^=lvcrPdyM> z%dK8_e_Ah&jq!9noX0^F?j~C#qANC6C%2*%U1I)sQ@mPvE4~ES-B2zmi_;YvjI~yk6o?2pQU_S&Ww;l0=Od@aE>$B z0ur=)moD31d`L$4zPgd5Xr`t#yEMzJ3Qtx?$pW_X>3t`5VooTX}?(n8V7-PM}2q97vhWnXlv!(FS>YZ`Bai z?G#_j+>O<04iDsVIbW)Hh7vbpCf=9O;xDg{@`Kk;=X|`layOsv9b5@s1veu&G8w1krt-*=0CQ}hVd8!txZ6qa%hjQ#1+5_>TD-}L9rMF&NxL&Wx zj=1eRI}Wyc>mwIEMp#EwHS5j%V7=D3e6U^yZBjad(?&4N53AQYnVyFYkon|>uehP6 zpMZbNaDRHNGO3Q@o3un*D<9WR&J5ezN3*bxe$Cdp4o&dp`($1_K2M)d4^GSMPD0UO zue7IbTaS>zR~_;M*?v9rtt9#Z=#~ZQcsK(aY{1Ih3_x`xJ*e5=jWUw6dxkHSyMnRL zGd6Qtr_AyrIjbfrr18oO7o_HeIjRW|Ww38Xs)C^F5v~kGjoC>AXE;cc#F0ix|36hB zd6L}EKABTb;nXoqM8&SRaf)};pKHUf167oilVt3jE*#n-ydX#Do1)r>F2j4Abt_HH zp~%k&U$0!&^i_bxZgN>}X<$vPab92GR715;-+}q}!0%d4SrdA~;sONR)1Q&R5YaHY zBNO5s0E@kP!4l$DwC4c-3PCt}XI*-ki!7T8dU{UCf#P8EWrF3=_A;md%ubag`B$bG z=oXAu>46H?R!W-h&q@S5lVFheMNqL0;fF75gRc*7+u8A!Y`5PgJ?QBI6Bems+tNBD z^B8!UCpadU4S(&Eg}R)A^JgvEy{5mpCe_evN~FmcFy`eE1zcgm*3rHLUz+1;I1+Gnjz6qm z>gOp<161~tnlGcsn>NnlsSU-L282M4k|Lo)qx;yf+X<)HKr6rm4a^N(y8qwOh(-Ix zBukU+&Pk=CVuM8GB7X*lmwj0n%Fd3LY(AowDEHORf#DYCy6AZ8jV|@2z z#gEylL5xl{DSXzn@0`vV@jFpqELa!oer9M;Ll*a5we*xAZ3^aX(f=x~MTlbgZ7A!T zh(`EoIW)Q*P0i2(>d#cm;Ys#-o;$ex2AN;KRbEwVJ5(0FV#O4j950gm$&3V>cyC9u z&P20fuW%#XVDA*kWKOURuEWJR)~tB%;w=b-w_1mzb;Hut=J#yy+5tH)-=m9RouG6O5?xd4_1H8>-H4@3Cr`#%K_w%g5MLR8xo1|2q8)% z!6|~Ok5VEXw_)7V{GLK7usXPf+mL-4k@tF>*Vg=rV(`k-@h8j0sssbs1gn~NeJY)U z<-%nLXX8E#4qy5csai^EQYtm;UxMLS{67c=^#2vX@cv&2hB@l^{0lgxy6T23UJV~2 zn)#dom+{R3BqPTbMr2q=_A0yA&LC^^2yY%*#b+_5NLHwpWP8tw4Cnvk2~ur(+N0LO z5HTTcIz!7>&?q<}%{>EDIoDjaINYW2@P{%&!geA7RH$5gPB6>Fs7!b2K_IvJT*AEN z%U_qSD8Eb&(X&tPv?#{@c`8a+J}-uFyyoERfKwqHQjBaMhYA1zrC2tfku7?`7xxIY zTm9~Sgm@yy0+X;daXu#-p*+rE)t_{H-KT3e#l&+B2keJ000fM#@>wphnDc&a-^HsFT-|-2X`ixWrko;4p*Yy` za+U57Ib}Kjf{^N%CnSU*M~SA!ExF;E+U!-ByZ6T><7|u@z24wiKw6z(cDWkAcR4vW zHnugtbw8t|F@Cpm7UJXmj9n;fSw;pyU4eBZ+!KkrsXX!cYIMJ5Gn$DeCV!k@_hJ`h z`l6h@2k$#ku4_e9G>pLm1=#B_c)qg~1t}b5@VfZ5pW0_W=4?w1p6xB_J7aVSJEjgtTg_jukVra~)%YB4 zaoH}y3J>>x)Tmy5+H*y($lL@6&c-PD*q)c|*)G@BOiP;NwMh_XDd$bo<<997b1eF^ zq=4wJfRdG*R2Z(avKUm+t7oQ;-FeJSX^Q|V7|jV8P>|E)*AhBfRGC_5gVT|1x zI`WT3IKk{iLz7oZIdZffx_#5HX^aPRUg2Q0-0Ru$*g(=I#6SUYbl{8(o($ujt5-RWIEktV`pevBPe71wq~~GPNc@B#v4?!L(E;9mLOTGVVBX` z51>u9)j^#s@cY9f3EmBbD1!7p92nbyd_ym?d7s}AzDFt#@$t|Tmi97IcBPvEMjJz` zjZ^3OO8OKfd%2W2pahY0h&!@qvUh^P0f4IPtoxUTtBN<3O%lAbD!vL*`{9A?M$Zg9 zCo+Z$Ji^Vq(V`k_BZ`nh20x;7cn723aK_oQ5ye(s*5f&|HkAg8Sykhx3?az&Biwja zjNQtHzKimi88TgJouMY;3>fJ94Fgn-MY)-O3LI}Ud5@*uB1ZzLJUcWR{plKixtw>{ z`_I`45%RKm{f0ekd@M->Jas`&ZVc{~pVnknc!0A%(oZ{TV=OzT20tb5y5)Tfu9nfl z7arnTn{LVVjk7~CK=N&@UPNo{k)%7i8|l1r2!jqaMW0TAd5-Ak@c2-kLdGfZ z6a^w1>N`mMoogUT`zuc9L-FBbciV+Vw7u^?v;YRCZ(d=VUa5G0+LOVaoohb%sW2I2 zR<4VNi+<=^EGmeIh#WNut$1I5+RbQ*7%!rmu5^Yo%AyT}F4Mo+b&gu^WL*YEWOMr~ zn_E{$eA@(!?Yd~*mqAWXi9F5x3Sq3_Fa4_pweDJzZz`uPsGjhqhrD?nAdNmu4IBjK_Un^lj=)&I5IHf0yB3YsRWWA|d+M>Ol?3 zB))IOUo(aokxBSF({G!=jL9TVO^*^NRa-|S!HVGQj(9V_-=OruGX(|-d!(OpOSBgy z%#kBE{$i~{0RCy)m<4C4F+Q?^c@;A2s>4VA`!4;@^~f(z0Uo?ncb{&#hx@|k3aN_P zgvkA;CE1x&Gs~*t4!Pce*7)&>Zumak0BHqgAdDBh&lY&~A z;3p=rPezO$7Su(obPJOnDUXlhCFiqb#)R?q1%aSlQE6M)B^jQ*59!!Q10D&`TqZ*-Bnnogq#KMEyW$9`c_K*P>j zbljQ@lX7DM@sEbG>9vvz>+ABTO;BR(ZrSr%M9m%ypYc zLJ*QD1ps*b57q=dxe*jL%Z zs8vdyUn(Q>sz*09KO|C#iSHY7#=1|6xKjQFU21KemnRkytJQZqgEH4qly!y^mpeWh z$R#l2(?pwMlL2#^3%>sA=i;F7!C5b+PGvm%E&IT}9Rbtz6ZZfGUVGMPE3El~xvewq z(aFf-Py|P^?viGj>UcIh&k*qB3kA`Vn}-5Oma{(}Kh8{zz~5lOHtEkSb>tMb*7+F% z=aq@B>$w|@Q`Y1%U*&&sd6GM^-@GX6Cjf|f%c^N3q&rw`Mdn{X9JoiBZ$_&;NxN_W zdcw)ncUMz6C?}_-DQ!K}`vU#Z!qj7gF}Ov$u!N?6ZbxW!%1uf|D&z%+GgX9xNy*42 zzHR9{D~XN){Sr2n>dIZYlRb$h0gcL@b#CL5ROS~ARVxchNv~Zyy)keVZQm#kFVh{4 z)K3Ho;ab>$vwj1i`?E11^9G=^K!Y>95T^<4ZzQOU2Xn~J{)+s*VL!o;pWgk^3sHwckQZm3 zsXnESx0n6L?5s~RR0r5G7srCtirn5){RJNQhnnS*+;)x_*AsR{q?4XaP3fDhSd6y- z!@+d7G5tS@dZwykZaU4`e7v4%t|v-WR~>zIn*(dgO1$U2xVKSuyF1;}iK#o9?xdYO ztE!@TK;0t^;%|E(wmE+C$*K(Q-~XZhbvWpOO{uNs+Z`i#4Z}#v|8<@r9wbwvZcyka zMukS+6#mpo^*i&ZSR&lPiGoR-=u^JUBXgb$*4Xi?dZ<*9pDNsCYz(y`4G*&~Kw&LW;KF3pvvqQt03T@2`T1p?CDBeVSE{z^g9ElAb;rYvY<}QaL7Mdd+!U53u0#G zCfh1ZTUYa1_9gs9BBVy4MJe)p;V?j)4zQC>MX6Ed^`k$k>jlgm|&j}EyVUhG-)pu=lvZlFPqh~wicQkhE7Y|T8p)G7 zpF_k?P@z}c$8(JGKzGVdJi6)6b^vSjQ;yI{R@t zifBFq>vNsX&zsm3RkZI9rVPL`q3zv9^hJ z@U|Lqk7A@DQL*x#!90V6ifV=;pQfX{o`Iei*xR0ZeU<1L;uJ({$$O~JA~N$AEJhLn zWJt|zx48A@cx11ArYEvoy;^(tc-Dl%+7~)Q=KDo7OZ+XIf*hN843h~ z`Wdbyf&x~y1_wz>s4tR!V8p~?DD#ZwT$yQhTZMdy*X|* zGL;gxv>ynAyWMNJq($G)9Eu=TbzMk!XsB!!VzpkFDyF?oh8&odAzYzHOCgeK-^7v% z(BC(9QsqkYb<@&@-QD7h8+^`4S;T0O`)YFLi@Tjy|E?3c`rV2 zuTTl*K&2n82`7u2Y` z$IJQ8?Z^Tm{;wcoFpuYtY4RDGji1_tU4x7+prl}un8If$S2LA{#RcVcNQMUf^QPdC zTKf$X2Q5O{Fi{hcQ`j!?bC<QIBDino|g)H`(2FB*~$r zKE$uup-U$fos?J(kepiNwoUnSTJ8^UP&b$<6cB=fum7P5)+^1eVvRXaf^X*d5;NeS zURBlLm0E#>~q~e>%vp)XWjIlsbFYX^L{|ZJ9Srqq6k^qk=aZXN4 zU!QxAsLj$yj>tT&g`Ho zoUn(>l3%OVpTr320KCWM=DD=Voi0znVNY3l?xEELLto!DIX-o5iEFG z){noN>SZZy3)kyjb#{W-Wo~Mwvj9*|CRr>ypAhjn-YBylc!#Ik&?a@hclxbSt8(OH zdNU3uGsA$^`Z6ed=}(u|16Ia3KVWXgl_;N3A+=8IYEKL$3&_w*t$y~z9R+b29+dei zB_c=v&@HDo$@nocIkkto5C&@49tdE98GOE>{`m0r|m&uR?^i`ylx!PD!7&QruoLHW( zNCcP!X-+i0Xx>bY%5-1p7gYJLa_RyK)ewz1(zL0*_^MjwRVClqv}$~Xt%~kt0V-)M zxpJk6rvs(4)1gfK1u@r65Md@VVSl+_Wvb4}N(&)XPf)M#GUXBxa_i*ism73)Nl+|$ zm8(<0EYRc9xhfJ1sujZEB=JZB4rc-yoSJnz2YaB@elOn91NO;H6+;5jWk9Ai2@exZ zxtVDk3UXBPp_oGAcv8uhXmj3N1&?VrWz&*ENyFJ|j5Oxt$njQYx%F!>d60Bbt~MKf zZD4V!!((CFctc8mVA?q~emG`zUJ(m?Zz~`Ft_=U!=PpI$dif`4{yQ~%E5ld`yI$aXuE$%ST>eQvHc$3QY{QY>I4^JL4qAjYr( zN4VJmQuh)x&I&UFOafYdU8iuA+LTe{Iwt6DcTE z$BTCB9-;HiH_9`fG0>32m@<$s_EKi^qZs_qAp>8$D6CM4#-irfd>q8#ApQ$%77x*- zsB!3%KM^7Z#@`ze8`g<1g|dY}f4_Cp?9#+6)ys^RYvn(ltonqPCwo;z{-mb>s(rOp zJ-*%RQv(Y0d|xCsqS$VQ&9D_5V{bf&V#e%On`6W08n-CL?9PKtV`Y%_f&R{)hl$}! z!#;TVOzrJG-^S-5`J#v z(L@!Yrdp}q(Aedb)BIshW0calFeYjiGgcT=@2$>X9!qoBUtZE&;4^hXI}g$?7Equs zWbfkhcaOkK=nE1BL7*5J2W8+Rp{uh3P@+}I^_HJ z5_G>@UN03@{z)UJ>h~x>etufC3q`5P%G4l>WBOmHAu!MXLJi}eZ`f3BCq;l+buqe2 z3kzG4Ckxq;A{%VO1ch~ob<5^G4$)t5g-I?ME(Nk4n{XG+>1@r$cnXIo3tid>CUgzJ>Z6k0HBbv1sn}H&Ud6={ zDJv}+x)8+igDoNvNzx3lJTRj+A7T7m&X_EG8Jm!{C^P9IJ$z5s_N6S!j6jkGfq;lZ znkvG^GLlc7_);?-MkZMPAX!1Rn#>a0@YOkSjuoJfQ`d##!u3wPTu@Nz6^SBJc)?~% zqF`x|Yxu2o(nRAg4eGi49XD5Syst!fzRxSxHAa)2H@}?w$ z{3TD&C=Xd#k5!Qzps4D7^1UnOfI|1HU${*r?@J{17RL6~z`uA+Va$A@wBF2Mm$D;5 zmd}fJl#8mjkD?tkB>N!umI%D_) zBA2$f)sWxRW??8SL8VV&Ong(QeTb~ZmS0>_kYlnc=^IDloeRH;2s+I2QCc;8QiKK? zq-MC(mGQ+eq7HJog|X?lg&-@k#Y@qDpNUq1@VM>K%?+F}0jrm*|3o*&t&M}7_yg*e zbx5uu%S%~*3hz9nUB!r?+s~sJozeRaW{HJ_I_=TR&-7`{?X%rS&uBBLJ-a!^J45hw z*%XZ+`Fr}t^vora{|yH!5@nt?gce3J|NFF_jvA31K}=p%K~UQ^k}}4Jvrb*)C|Woy zuSdu%3e;*&a7Tn2DF{(g8I?%<-5I>sse6pPA%VAK2h+ zjzt$emRNe;FG6Rwrg2tb6J71<1S>@;S(%HhI)(SHo8cN@rYRfd8Yn^=W8w#-RJz&} zK54E`6y#We;o!XN#&9%W5y~*R@ce=CFfC#HO-D^bOx!RvvgyVK%r9E7Dt4vt)&L2{ zqi?h=_zzD!n&OZCMxOO{V4L>I{{Mm%eMpuJuK>Pz=--kCkMn;=B1^dd`Tbs(YCP%* zDH6li^~6)o<&RV3b*)5PDo|T-5l|?W`o+$br*Ol31@cl~kds%*8sZ@Q_slbD%14bg zH=(XTJT?&v?z_C@)`Vy+bPZ*vMIZ5Y=utjAPkE@3Eoh91hy8INPD7|V@uB+y4d<$RYbAcs2?;!H0S*P3T<2Ww9RRR;<>+=5oXu)0j?&2z9RknFi#KCms z{69jNI%ji}%Y|7r{RoIHaUeRSOti54u?WtrrTIKoUuJWTJVF ziA~H37&!UM-upK8()zF&SmO-XN=ErQL*I#&?g?T!uCy`9?Z4%VTAeLypHWIg3@c(+ z`Mi>-7Vf*G6N8D&&nqZsNB9N9)m8$mX2y(MgUVXa6C0>NfLYd?=M(XBpEj+@&@&Ky z#4%A{l%*}p84Z<8#e+M)JAq-Zd&x?-%_=|g&trV4bzy6#-;75mJ9<_euQVs@s$;qo ztDr`pqd9*DN%hUxbhIJ4@DGS|;bvM9naxe^#~rw{h(;0~m8thrmMvU3$(E@|X5LR2 zSvGaIPmdZyO^4R;q=LQ;cX<3Z7wV=U4Fy6juj}q!Y||34i3>KC>!Tj+OJ*gMT=Ae= z8Lgv}oani8aoXzU{7|oH&*+fyA-7Ymuc4RouG-`|)1tQj#6T;&p2ccWEi*d{$7`jq ztBvT(7!}Uu6zrBmTs>`&g@b7jhY3nz-ou_mjzxi~+#YIv_G@!2qnY{Kb9}x$53O6! zHfy*}1Ur4>)5F_Ya$f&lTk=X_gx&3!55JIYT%!k-3VXp{D>{nP1;v_+NAC9ai36Fe zmRAI~>n1PgfcyX>`J)U81=(G~Ao`bdnR=Bb>&Y5WOn8-o#OzEj-xGtB)s*>|Qoa(O z>mAf)%Inc*u-2&Z*anGLkS7+`dKqi6ZHy)+-c$T2Wl2>7W`w))StSr@P}C*!5&duX zKXZ&jXOkDdXh<&W=hoRG1!)TH&P~pavYzg&5(x1g^st=}RgYrZ52{ef@j_g=yLSE+ znToe(*qWqS-6MoQ-IbMG7jd%m%o2;XW|kgwmE@TCjNUmb9xd8G4f77qXB7+M-zki1 zm*EJG8+v{kt}{et?Idk?k1^p)7Mv2)3wVlaBOwnvgP zi-?Ur=bL9vMbLNgF}FR^fBl1UEg#{u+pnAj{vo*{OQS0Q56Ty7CPeI<0ut(f4)^bh+}015gi9>3)@H3O@LgP6bM!z3W(bAUq}%53oe z?@4Y^q+WhASj|a})*9l-70xGBJV}A`FNhj9+dj zt82l>wYkQ1Yeu~heE0hx{)sG*He90a2g%#hZ~MLFP8ziFSyt}86!=R5a&Ers(&Xs; zrlfXz4$d{_WNcjS-NisdV}oanDRA?!J#UIZ^c~%xN5}r0a6worNP}rIPZ~pC70s?YalY zHZZY3ZQ%U}KRrUdJg_-F=XxfXME?yqv)y;rp=r{3PyBz=WfvdRyC~7ed%|T(Jl^Bo z+0K|d+C|cAcFstbqn5dNldd(5$tx|`G*@3k!IbA=eSAc{<0a8&ou-WLpPtDsCJPmo zoIUiKeaj1q-KRgZZNhK1b$BM@P`8yN%&FOKn~tEGv66)H`KLq|f6%x5qA=UX=b|cI zu$W2cJHj~x-~7mop7iKVwmCjQUN7+pd40KOtJ#US-_S}Zn>l5X^t=o$G_X^saLuW( zA2+iuFJxB@VN4*|!!K$d=5P@B|A98|rXMAV{Tpo766y_^&nWI#k;>cYvoY7dPBswm zDq-hk!DUfMPhb7L82S5+Ic394u1p>h)8Nz?C6*|_cS=jpL7UQ~Al;HN(!w&7dcQXf zKHz_V&2RL0H@b=rWN5^Gz7kcL7) z+pR9lIYtTTVvJ5GsQU*1=m7o&dUvnlZ&}nW?hRZ1f|_yXRInf)f1w1A!-8Ii+Fh|M zkDZk&q3a)|WJ?ofbhwjfzUk*#EafJCZ{!N>YKi9E?g;5v(DbyRk@>wYSl%4l%Rf^X^{59 zvLP0hvsS&|Xn_MI<*1jjjxy9_2{NDN#n%))I;Dxzr`=1xCtow{n8J{ex~lurLH2-qIgxII4~`^ zW9}!W@r6X_W)%5Mc;HXrXlSk7ld#jAmxzmGtt*Mc9n+PwZLFc+1Ef+K z>OaHdnqyEK667AAj11gFk$(h8fc&osa{S^fiU2@hqSbU-OBK*mEc{{APDnR`?{GvK zwsggFV#eCP+3oi{N;74D?%~(*K1_s(O|3hLVs z+Zou#dIb@0FJ=yu?Tacp*4HT}M2@4l#7~{)l{EbIEfXpDY~!gNZVkD* zF6f+>(ezcMe=xl1M0#N2W@tGF<`kEvO_32tJCxX>T^k%HqwSTv+0w?Xx6%9yif;yX z<&0Yi)qowCX9owfyRt4Qs!NrsPnJl^$Oc?qq-QctR$>mS%TJ5O0<0JJY#w)nf2+ojuS5LPs%e5J%@5P!ugt!~JLTW7bMl5jnG#fGJ1U+Hos ziv8bUyc+YNIcZuVx+-5`YrbE455c1?RuF55Kmia+pJSo2!aBe$LSAC<9u)LGBKoW` zalm;%n1Xcb)a238v36+k4%E8Q$waidk(GGJUn6%b^Rmk9)6N zY2T*jZN-x!5JJPutCQP+YC z(xiVPkYvrVs4ZVm3vc{pWD0zh?Gp^W1gZ2cyo^@CPoY=qF<*n^Donm(Up~7-GmIb@ z)9uUH`%khHtQlH`-uI$*FBR4$9|nvkhUXCjuO@}q22WiPlPwdOw9R#z)ZRxN64i^J zOs?VgLf}c)NZ24!UgO}!M9}P~xCkgH79hbCs4NN=)GxH2Y+HLq09^{k0&FbuY*Ukr zxl!x{GB~ufSvc8P3L1k9s;P`A<6!%Ve5yiX_~6%}W1P}ML^%lcU{CKk6uqV1eB3Jw z4mwo54J@^b9a8!KAPP)C{*ppDgcK6FfW|2;65D|%$@6?Q&}MIuMIb`xrHjuBLbYXw>sK0;emO%v@x6i^w?+;70~ zZ%&P_f;||v%DjBiVi}|w_Zz$@@6pl}^qvukaVe`&pk%ZgdZRP^Wm8;sPnQl*Z#I_? zr9de@Z$7cgcy6fIGx^!9@<<69?Z~CF4c(!kauT57@NuqIt;ejWlRM<`0)sAKvL*>~ zciud&!#Z+1>IR=I2T%X85jLtMJgnXR+%U}z`}GPf<5BM$AMEj{$YGbM3tw|sVFEb5 z)bvvc?-?g2>&cGm=I(2pzL<4HCq8y5SmC0nJ~FWlDD(3}62L?+x1D2`_v~o$0=yJ%Bk8Wlx4*{%#d~AuZD!f`C#X( znTiY=d*s!QZKsI|4g>tos66H+4215ELt{=Bm9CZB`~o1Fj(!yxR|2FUFT>vFS=NABhYD=Et(}<43a&u9P<&2IoqH1(Y zE{rmuni*=TJX}RcF7Xg$Z>BX~98N_ADky52nK*LFAyi#>F zYoI2D{4NbzPJ+0^8ze>axw+MFP;zv7HOPr4eUjvcN^KT2a|u*-kE}w|YS6=pAS#L^ ziUF8>4Sy-Lfz*g`Q*Ib+n3F{Hcm~wPcFwZ!z>;gP(Pm~jb*tbRkQ9*)>}x*20fPa6 zDv^FqRke~u1}>nRrR0q9YIxqM;u+2N9k!Ai)7_Ig&d-Ym+4~t}7pCWpbyY>sIl_x2 z1m|;v6nAaUy;XAdmhQL)dRPK1u9e@y!q?aC;=h=G0W>K9fFzp3mOI8jn7ePKR9K)6 z(u6de|Dcc7@$w2LQ;gjt@DS%$z?P^>0l^G=9DAm_AM1v>YinA?6j^t#8dkoN*wZ+#{D$t{U0G^tT%_e4#D8a z$5;F^u4QO$8KVb_ZM@GhKdmkGEs9e!-D=@hN^03u>xL_f7UHLU?{VN#MuZp19#bFK zXBx%)1;kxTA)lw z?rYz+fc{U)iHJdJq(v8Cqp{_dMhpY{5MNEoFQ~*~tmY-Uty$N3F`+P{p7?|SK2Mo_ ztpY?wJR*mbr1bbBcc^{qOm(KFszvtcXgb`yQIW7AfTVO%P0PFqsVhL_l#?tC*UiE( zW8X#)uf_SWLe4DbvUJlK`Pn6rqBYs=RcbA^c>xUp4(b(U`CmhYa{R+T(T?JOZ*}J6 zZxuK;)K7p8U*HnNM+S>CW^L$}kMH6Bks;5JZ2Et-6G<6@_9>9fmV_A%4S)Zhi4Ph% z1qGNgK&=XBv>Kn*HFpj3M5(UX5TD}RdUHIniiL4hXdNmJY zjM>Vo)Y2=AzHM)AgW*JhcN1*hfNTO6CH+05zST zue2_?0Aj0di(P47I?B_g$Y^uXbXyu@@&8%S;rZVM9nz|0%H6#d-tm-*^Hmnx?D+`X z#01#}#?fF25C%Err+%1@q9=>FepIDNb!IBHXR578qF8Z#a1W)ii?5YiqAwW!O%>!|W0Ef~MNeb)0C@W)_ zmQ3fgi3qLV5#+2T9G*1**m%FSJ5gGA9fghEY@L97dO6_`4O)ZVc9zel72m$nEzh3F& zUYF-Lvya4YFLZ}P)VaVry8VEC-}KVZspqIGBIssMX-!p_kW23Ep`u+A3lAmE6y+#*A!b!{56~$k2IZ3et56nR| z2E$B!mL-itf0_P_?UqwjUa3R(*-YeZ?F>boiRINBu@ zCiAx%5qA^C<@$}C<`@!M0;;3|obLjV!AmWz`)y$#qTE&)T*|^4Dk&xwI_twp%^=_l zD{6#L?vqxN4>k5KWkeo4kBUuJn38ao7QUr#d0!Y}jwMBdKo|uOivv`aK_mrre{d5{ z)Vz};-*|A+Jmz>%SOep00bSM(?=rk*TC}H+;rW07-Soa&f11m4xO=P{pVz%VllDBI z=asN{<^DRy*BPW51&o)gMw$D_Y(?0cci|Z&AACB^mdK^IFx|FB?v3y@|JN z#l{}Q9Lr@*FI(>j5@`xCGz~7P*)7Fh_m2(ejRz3wKsIb4r6$R%hW!pEQb6&)v4mtq zvL>lIC#Bhhsp!U}xlTO;UUPE?ZNu>M*zmA+f0Voo4wm6mGWQ4Cw{w&_N(22G=UVr7 z_41(l-xxb`);u&46mJz_$En_(A7jw5r{Rs9`)~dAOg=wxFlwV1W;VjT?c@E@tfs=m z>6$WXBiGs6!>I{RFZV012VcgeS-2NAMBp$5MnO8^$O9+|%=T?$&s?1qQFGR`#H(XT zGb5SK{^(_#a$w^BoOZG2d@=Y#mQ6Znq!&4#)`c3gPQjQBD5EPiqogUJ9)+pQDow*3 zQht?sr%5M-3fn#>O&q;c7Sj7-U>r@nkVH8dC#taJqA?aKhy&r_=P^r7 z6B7G2ryfOvW_8MAlgns8N>Y+L-CvZ`R~NM^NOL0%aBKuv_4W55E-0)Wsl++q9Up&@ z-K+?%w_fYxLe?a?=(t9?l$hYrq#@w>_^ZuPbHwQ?w8o1uJg?C|JM9s?v273gtkELC zyJB#CT6{WM{^IPHP9j24nrn2!*K}xt82xc|8-eq=DL*Au*-&R=qAgh~|EPgavLYED z$qdDcyA!7k=3g!Ty7h`mKK;1gIz*^~06F-B?b<-m56}49zdIg{NIO#|Bhc@)xj?nTc{HBlP}OHt`jC2 zeem%$HvLbz0J7w)-=6q73I>!^imJL?X?KW?5eVvkSL}xK*OwTiMUFVXeck`4c1<+& zRO@GC-y)BsbG|j$nZ)POY}JLDxzE@Tn{H=MT1MG?Fu@+)m9f~O*kZ9sqIL;JhdObZ zkr68hFlvSd#rQKuhtZ%1aTwN`3Eqb#)6UKaypPm1(R%c#{xs~sgtjbC|JEvHcU1+q zR`a6t(Nt~9MwSZ`f+KT!)B#-+Z-(9sau;NQ=)YtW7%f3_AUQFO3}HIj97tJwrw;rr zY#D!2x%mE3_`ba^z#$hx-)7K%N+sZG*uQ2R#Hj=SM``cpjZC6ISCH(!l>i zS#q;rI9%AAG=#s(?GlrmGd!Hg(b?bJz1iq1I9E)8QoOf@+gOwl6b4LJ!nhtVj{ZEQ z=*+HwMwFN`VrKcwEIFryT{{1e71jt~jjl z3mb2DJj6K`886pGNm6tc%cy=I=WUXnoq@yOg>28sL}Kvk@z$AjnfkQd(F#LK^NT?C zYQjFx!`~|7_*#D6eR?yI)Z_V({*%uK>BK(w{fvCIyE_TF`--&5+NtQo=P>7FTF2`0 z{wMC&VHoS$_~*#Viuu&P8|c~UvqS%C-V;?j(RTUyDO0hJtuu+s2&WvvDigU2$MKGl zad(`frz=epvrvgZdgjM2vW>DfNW(8n<0;h%%!!S+F}SEoXDlb;`VP@|0axBK!_{5P z(ns!Hjb*_fK_d2nFuiJl{MQh??52hoBW#PeP0gH-C?^cz;zPj1p2yDsJkXsu{C^Gb z`Oq>Xs1XVA2bh#)D{VO{hX~i%*yLe+{7%@gq#PJdSPK$>448vl#)sjHf}Y-KgH_zF zKIaeDBNW#;So{(=B)fRK8|#7(nYbihJ8Wo9`Gq4O!y?r$yoY~~MEFYPfo3bX7Ff6+ zOf$kd-W0bYd0Kkv%Kn|{Z+es=(!rCjEUk%E&5ZLocb);&FJqA=i4hSIKvZzk|Etd& zm*TG_fGduxFKo<2I6groa{XFZQAvcAI35F^?7AA9q1k%)Ys+E|3M%II$t(I4q_TE= z@LY(jHErc|5QVX9IWo1Z;%F2V=~fhk?0&*LFPBxj*_&r&b4wzF!!;8{cTb+y6&ag&{D>afoGLAn|1;Ix)5%Xuu9T!jdK#eEx}}iasuDuzkL>tj>~;(K zQy`C)6!Wdz8(eU`s_QBuQDY)}Jk$?sSeiNqJCtPU3kzzZ%QM57cC2GEKi*j1JF^ht zw#;8k!#%zkA68WticmPG&Xu}di%1Y|RopLSA*DCn)Au!Swu?MUbGFmoROHe3=ewjh z$HsytS|x9=i!73*At~Nh14iZf`_K|Ej330qqw_5fEa5}2(7k}EmjCoqyBmN`uSt8a&ikj4H@Pu z+Ij@Kaf3N1@(7}Y02;8MzQoNFQc{@c+aA|AdC_P;yWf8P#CN)F-@^MeCYo?+p!+Xu zEcs7xHEEVifI^tPusCqiq6vhwt=T~PE*wfQC*Iowa-%W*Z6E!%foIp3!G+#78fs_v zIceT(F5veNf*@DUQaUF-SBJsCS25eupDr0{P{HNZLtO2DJVKgNxcfq7Utf<`EbP?{UHi{Z7pvNr!cU8nZ5pbQ z^+uS~G6w?#kSiEi#Y22~`vPY3d8EKu@iL~I6Az&C=#BjmjJQB#w7^EedSj#@wiKW^ z7X!aB$iwO5iO!qns;#LntDRNX-m>S>lMU5%)Keft>%Vd^r*X2z?=u^0-ZPHhr|%Cq zM>bs`c#?$VZMK@zlVbJ*M3+jCEl|JKW+zm>0Jhss^hxpzsV#S?*N?WrNfQG7v>Gr& z=~tzgna7xphg|8}17({VZ%)BEe+bFk{23$`wI~YSpenyM82F zX@jH4q2Uq|$M{P{jY@t*hN#TGrytCG#`QO@XX1Qa*}0>`_LMF5Bi&D=;uP!fZJ{O^ysvomtuyV&;rD-S^+)Mc|6Ql5AWcCWS5PaLWULqf zCK`?HInA*Mu}JmP?XZcNJN}ymk|Ul++_Kki^sy2!+1t%yx{Z2z!*%ay$CHlNcSt%} zGC=g@*Zs!D=9yoeA}%1c3v~r_0ndEA{jctiNx@bA65u4S?;9hTsvf zjLM}616Q6|wL>*@Aw`b&NbbjlpoNKg}52+TJ~A-ZMCKzLp<+J7Fqja(cj~J@7#gvUb7Z=O^nx z+jwbRv`<8+Pj=W?AoAk5N7Qtf>|SG2v(IVBB$1q;pGcxKiU0?mjY;6>kYNzvWMPQbqI9;~fM!8>O)Z&sSmB@dmD z-qGyRGsivLwWWw%9O;c=*>WH82rdwn-4*xQwQ`(R&;H8mLk4%gT>+;V@D$8^ovm>* z-W$5^lJUYs4)mw`Qw2YtoaBd;W4z{-mZIaC6j8t?Qm!(W;@2t%BHC|_V?TY5Jrq|z*{WOo+4Ic=jA{)CK=g@+x(Z%YN1cV}x zAc)OvYY9Kgjexv{iF3m2kgGd9QdCeD64{W{OEW9|ZX@(9EwEI1jL~#XL%ughO-d~Z zWh*uZIc4f@yCThE2X{Ud@4}=*-x_n5@iESLUz7l^ozs)irah@0Sw~e@t zDP#c%!a%ivhkI@)!oTfu#Uqe@*AR7JNcn$j!X-X|#J}|bmRKm#KNkZ-%7+8aZ>sML zeSa08AZFLa# z7^*I8bz_`Z&C5uf^HHW@X*1H%B3o~2;8mK?b;!NEv(qogepNv4PL(tc3;5skf|hngqpU587Y6=$xt`^*Ub2c zEnHF%sF8XvQ^t`(Q6_NCD+Sk{msZQ-EcUr8v-CN3S;fR1KROCbm?1_2+R5c9#%#tlK(Ere9()dWHg01O69 z%nW+TcvclTH#yJj*S%L^Q9H*+A3Qtn(EuB-f|Fg}2(PyRR}~8tkM?a#LG)QXk(b_AyTx^uv% z%|v|ScVdF>Bzk(q=}42o@?HzDOqY9eOhtrZw84M@3;ivW3^8I@yqY|t0;R>eWCCXo zwM|P{L9FJgl$9Ns^LIu-dG28UGg4d!<4*=aw)VT=CcAZ^U$(p);9!cUoer zYp%H#cfw3w?}_s8=VNkkE69V4jz0($5Gx{=|_lth$T52R~K2`@TpAfdOMXvqCWaddDaarNS25k=vf z7t717+^ZPuSi`!zojZ&N_EChn6^uzn?z%Ia^0@Mp{5wi? z8Yl)*1dK*9P=~CX0Dx6QtvvrIe$P*0)0-$D#RhZVumLY+2?IZY?T5=iO@)_t0IVj> z+cW?q`f+|}UzrWOzK%KL@p5PsI-}r42GkM)B#9$6Ew?bzKM6`w;(qc}b);`F0b(F$ z-aE!fzv0O1rq*6yNjq&3HMENeaGDrju(THI_oO~i5H63V<}($l9Ik{VzRmE(_;1c zm@(|A?GBhuF^-R`<`@heH0e)%%ptub$xZi3rE`HO%JsKjr@H=`2BIvi{V0Wfm;?tA zHFx+xt6ghdaTM0gF4V9!C>#BO_hb$R{&&=swpNT30!+2XJ-Ffu%$CS6WwX(o92>sv zC3fjz$0LV8QK#+Uf>vmQ%dP#fO#_EHJ3}#@iM`)?f%J2AuCE0Dqz8eccNskfr>cmI z>gTv?cfvbu75m*Jdw+!+-oLHacm+l9K+~`iQeH82%Z(y&)_Cnxoih^lw|Qi2+R70A zj`SWY|GLJaM$6c{h@3j6vRK)CBJ*jXzQ;HT^xyY<^N%Ygcb+?|^|zUoY;0N(0z45% z5+7m2Qum;c`e!9|q${*##B6ZjjxSlq+!^!10?8TEIda}r1hw(^PCb^3q2(=@hGS}A z?shwy-ro!{-#rKV#enRg8hoAL0RFqxjS_U#-_=dUi{}+98kjwvGyng`T$ z6N7(*$eOijaak3jWGQ-#8}+0#G~8eZ42%pR^w9TY)Us^#jL?+cbaO>=b-sIVn*XkC zhzgFCK1w1?fmh%$C-~*QbXQ6x+=_}*TC1lR|9lMeesyQzX=uM6wG^n``Jnb&zM&`` zjECRI8fg0tRFMx4E(AzD3g=yIzbd4|l->OS%p& z{sn(0Q)VMh5IG z13S58IYF~*zgadq#Hv-{D*aUGtb*TkWF1uM3oaLhO85JCCx$)L)H{D0l3cwZlqf7v z&{`Hs+MI5pL(i@ih%3*>T2O|e#Tuop$Y1y&{rh}~XhdIYb8~>|{M$h;c_PJ@y`Zq1 za-T>4NeXYW)#mbkSn$5A{-uHU_eZWMkqB8ys4y}q6J;Zc6bk}GI%W?AO^un#u*9iq z$e57`eK**JGMk$cw~Ips{r`;n#HOb7vX`%Y)jpK$JF@!RY;6m7wddEqxPU?=ZF8p6 zly%EHtnMqBN*{@d=-Ro2OIJ?O&d$l`#qoQe6uOG`PpObJkWg5v8!_EA-nSvDZP~)J z-#2dIF*;*lThhAv?k2k)U?}}Q56l`5*LAD>4W8k>ddBW5##rx#QJXvbcX|A)3n#)2`M1`TI=Uyqno? z@qOBQD)ZmdrPnP@=Zl_tE&ue!w#a>7E=kXs=dWkl+y61EtMWT$1kdSg*=l)d96bsol zHEiQ`v!fg9)Za~MtU0VB+;H>a8RP1#6I?rfdipm^S=-`V`c`J^!HM0;F*CnSx-rYW z*lf&#ZD&D#91gyF!6Z+m<`@Lygz<2pOCfikksM%dOT0< zk4*M|yV&5ZZMEgsH=F0o@hbN@n84s0t#v?R!*kII>7O2!?CN;swr1zikdWNvs)dc+ z>t37foM#^VP?a0pO0GWlj3e-* z%B*FH1~VBAQX(!)>P%hdrTpe$`(MAB{P!D*mn@xi>4U=j>aE>+ya^rm+vK19zCHWL zl>-*{=geC+TlY;tM4}p#mz}ljf|q7x7CHhQO^*`N_cawC=sq8J%53tnpZ`|>DXE#6 zxa2`--N!!~&$h4&|46^|CM+b!6?l`f>I^>D%X2n3VAiQE`E!K1AI^-La%GXC6Q^-& zs_TyD%Q)08+q8Qszv>OR@lTP@e$k?g%@*hXPMZIxE+bVr5|qHzwre^~Ihb;6##HB= z5{BlhZaFm)!F3m{^DP!na4!;Vt8#zFEYfgp#bwv$-^{*EOZj|Oj(OQKaY8~ybz1m=>Yl*3Z!C#s&8Ih3@#W;FJG_57eX^pHcj`(PhVKCh zoy{*LyQC_2>}&w;>;Jp&YKY^}8JtHJTxk-`Yg)cGct#{i{}33 t+w^~8e{sb#jr;FoHc7<-^ZJhTKlXCQ)k|&%Ov+^d0#8>zmvv4FO#q(g3$OqH literal 0 HcmV?d00001 diff --git a/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown b/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown new file mode 100644 index 000000000000..9d6446992000 --- /dev/null +++ b/doc/tutorials/introduction/linux_gdb_pretty_printer/linux_gdb_pretty_printer.markdown @@ -0,0 +1,38 @@ +Using OpenCV with gdb-powered IDEs {#tutorial_linux_gdb_pretty_printer} +===================== + +@prev_tutorial{tutorial_linux_install} +@next_tutorial{tutorial_linux_gcc_cmake} + +| | | +| -: | :- | +| Original author | Egor Smirnov | +| Compatibility | OpenCV >= 4.0 | + +@tableofcontents + +# Capabilities {#tutorial_linux_gdb_pretty_printer_capabilities} + +This pretty-printer can show element type, `is_continuous`, `is_submatrix` flags and (possibly truncated) matrix. It is known to work in Clion, VS Code and gdb. + +![Clion example](images/example.png) + + +# Installation {#tutorial_linux_gdb_pretty_printer_installation} + +Move into `opencv/samples/gdb/`. Place `mat_pretty_printer.py` in a convinient place, rename `gdbinit` to `.gdbinit` and move it into your home folder. Change 'source' line of `.gdbinit` to point to your `mat_pretty_printer.py` path. + +In order to check version of python bundled with your gdb, use the following commands from the gdb shell: + + python + import sys + print(sys.version_info) + end + +If the version of python 3 installed in your system doesn't match the version in gdb, create a new virtual environment with the exact same version, install `numpy` and change the path to python3 in `.gdbinit` accordingly. + + +# Usage {#tutorial_linux_gdb_pretty_printer_usage} + +The fields in a debugger prefixed with `view_` are pseudo-fields added for convinience, the rest are left as is. +If you feel that the number of elements in truncated view is too low, you can edit `mat_pretty_printer.py` - `np.set_printoptions` controlls everything matrix display-related. diff --git a/doc/tutorials/introduction/linux_install/linux_install.markdown b/doc/tutorials/introduction/linux_install/linux_install.markdown index 5083fac282f8..e69f6ea70749 100644 --- a/doc/tutorials/introduction/linux_install/linux_install.markdown +++ b/doc/tutorials/introduction/linux_install/linux_install.markdown @@ -1,7 +1,7 @@ Installation in Linux {#tutorial_linux_install} ===================== -@next_tutorial{tutorial_linux_gcc_cmake} +@next_tutorial{tutorial_linux_gdb_pretty_printer} | | | | -: | :- | diff --git a/doc/tutorials/introduction/table_of_content_introduction.markdown b/doc/tutorials/introduction/table_of_content_introduction.markdown index d1f2aa3ca319..8fa89d7d7f9b 100644 --- a/doc/tutorials/introduction/table_of_content_introduction.markdown +++ b/doc/tutorials/introduction/table_of_content_introduction.markdown @@ -6,6 +6,7 @@ Introduction to OpenCV {#tutorial_table_of_content_introduction} ##### Linux - @subpage tutorial_linux_install +- @subpage tutorial_linux_gdb_pretty_printer - @subpage tutorial_linux_gcc_cmake - @subpage tutorial_linux_eclipse diff --git a/samples/gdb/gdbinit b/samples/gdb/gdbinit new file mode 100644 index 000000000000..228e8f702367 --- /dev/null +++ b/samples/gdb/gdbinit @@ -0,0 +1,23 @@ +set auto-load local-gdbinit on +set print elements 0 +add-auto-load-safe-path / + +python +# Update GDB's Python paths with the `sys.path` values of the local +# Python installation, whether that is brew'ed Python, a virtualenv, +# or another system python. + +# Convert GDB to interpret in Python + +import os, subprocess, sys + +# Execute a Python using the user's shell and pull out the sys.path (for site-packages) +paths = subprocess.check_output('/usr/bin/python3 -c "import os,sys;print(os.linesep.join(sys.path).strip())"',shell=True).decode("utf-8").split() + +# Extend GDB's Python's search path +sys.path.extend(paths) + +end + + +source /your/path/to/mat_pretty_printer.py diff --git a/samples/gdb/mat_pretty_printer.py b/samples/gdb/mat_pretty_printer.py new file mode 100644 index 000000000000..e6ad2cbde212 --- /dev/null +++ b/samples/gdb/mat_pretty_printer.py @@ -0,0 +1,212 @@ +import gdb +import numpy as np +from enum import Enum + +np.set_printoptions(suppress=True) # prevent numpy exponential notation on print, default False +# np.set_printoptions(threshold=sys.maxsize) + + +def conv(obj, t): + return gdb.parse_and_eval(f'({t})({obj})') + + +def booli(obj): + return conv(str(obj).lower(), 'bool') + + +def stri(obj): + s = f'"{obj}"' + return conv(s.translate(s.maketrans('\n', ' ')), 'char*') + + +class MagicValues(Enum): + MAGIC_VAL = 0x42FF0000 + AUTO_STEP = 0 + CONTINUOUS_FLAG = 1 << 14 + SUBMATRIX_FLAG = 1 << 15 + + +class MagicMasks(Enum): + MAGIC_MASK = 0xFFFF0000 + TYPE_MASK = 0x00000FFF + DEPTH_MASK = 7 + + +class Depth(Enum): + CV_8U = 0 + CV_8S = 1 + CV_16U = 2 + CV_16S = 3 + CV_32S = 4 + CV_32F = 5 + CV_64F = 6 + CV_16F = 7 + + +def create_enum(n): + def make_type(depth, cn): + return depth.value + ((cn - 1) << 3) + defs = [(f'{depth.name}C{i}', make_type(depth, i)) for depth in Depth for i in range(1, n + 1)] + return Enum('Type', defs) + + +Type = create_enum(512) + + +class Flags: + def depth(self): + return Depth(self.flags & MagicMasks.DEPTH_MASK.value) + + def dtype(self): + depth = self.depth() + ret = None + + if depth == Depth.CV_8U: + ret = (np.uint8, 'uint8_t') + elif depth == Depth.CV_8S: + ret = (np.int8, 'int8_t') + elif depth == Depth.CV_16U: + ret = (np.uint16, 'uint16_t') + elif depth == Depth.CV_16S: + ret = (np.int16, 'int16_t') + elif depth == Depth.CV_32S: + ret = (np.int32, 'int32_t') + elif depth == Depth.CV_32F: + ret = (np.float32, 'float') + elif depth == Depth.CV_64F: + ret = (np.float64, 'double') + elif depth == Depth.CV_16F: + ret = (np.float16, 'float16') + + return ret + + def type(self): + return Type(self.flags & MagicMasks.TYPE_MASK.value) + + def channels(self): + return ((self.flags & (511 << 3)) >> 3) + 1 + + def is_continuous(self): + return (self.flags & MagicValues.CONTINUOUS_FLAG.value) != 0 + + def is_submatrix(self): + return (self.flags & MagicValues.SUBMATRIX_FLAG.value) != 0 + + def __init__(self, flags): + self.flags = flags + + def __iter__(self): + return iter({ + 'type': stri(self.type().name), + 'is_continuous': booli(self.is_continuous()), + 'is_submatrix': booli(self.is_submatrix()) + }.items()) + + +class Size: + def __init__(self, ptr): + self.ptr = ptr + + def dims(self): + return int((self.ptr - 1).dereference()) + + def to_numpy(self): + return np.array([int(self.ptr[i]) for i in range(self.dims())], dtype=np.int64) + + def __iter__(self): + return iter({'size': stri(self.to_numpy())}.items()) + + +class Mat: + def __init__(self, m, size, flags): + (dtype, ctype) = flags.dtype() + elsize = np.dtype(dtype).itemsize + + ptr = m['data'] + dataptr = int(ptr) + length = (int(m['dataend']) - dataptr) // elsize + start = (int(m['datastart']) - dataptr) // elsize + + if length == 0: + self.mat = np.array([]) + self.view = self.mat + return + + if dtype != np.float16: + ctype = gdb.lookup_type(ctype) + ptr = ptr.cast(ctype.array(length - 1).pointer()).dereference() + self.mat = np.array([ptr[i] for i in range(length)], dtype=dtype) + else: + u16 = gdb.lookup_type('uint16_t') + ptr = ptr.cast(u16.array(length - 1).pointer()).dereference() + self.mat = np.array([ptr[i] for i in range(length)], dtype=np.uint16) + self.mat = self.mat.view(np.float16) + + steps = np.asarray([int(m['step']['p'][i]) for i in range(size.dims())], dtype=np.int64) + self.view = np.lib.stride_tricks.as_strided(self.mat[start:], shape=size.to_numpy(), strides=steps) + + def __iter__(self): + return iter({'data': stri(self.view)}.items()) + + +class MatPrinter: + """Print a cv::Mat""" + + def __init__(self, mat): + self.mat = mat + + def views(self): + m = self.mat + + flags = Flags(int(m['flags'])) + size = Size(m['size']['p']) + data = Mat(m, size, flags) + + for x in [flags, size, data]: + for k, v in x: + yield 'view_' + k, v + + def real(self): + m = self.mat + + for field in m.type.fields(): + k = field.name + v = m[k] + yield k, v + + # TODO: add an enum in interface.h with all cv::Mat element types and use that instead + # yield 'test', gdb.parse_and_eval(f'(cv::MatTypes)0') + + def children(self): # TODO: hide real members under new child somehow + yield from self.views() + yield from self.real() + + +def get_type(val): + # Get the type. + vtype = val.type + + # If it points to a reference, get the reference. + if vtype.code == gdb.TYPE_CODE_REF: + vtype = vtype.target() + + # Get the unqualified type, stripped of typedefs. + vtype = vtype.unqualified().strip_typedefs() + + # Get the type name. + typename = vtype.tag + + return typename + + +def mat_printer(val): + typename = get_type(val) + + if typename is None: + return None + + if str(typename) == 'cv::Mat': + return MatPrinter(val) + + +gdb.pretty_printers.append(mat_printer) From 210bfaf8d68b556697a90aba54729418a1f29290 Mon Sep 17 00:00:00 2001 From: thezane <10068531+thezane@users.noreply.github.com> Date: Tue, 17 Aug 2021 13:09:25 -0400 Subject: [PATCH 119/128] Merge pull request #20483 from thezane:support-cumsum-layer-for-onnx * Support cumsum layer for onnx * Add unit tests * Address review comments --- .../dnn/include/opencv2/dnn/all_layers.hpp | 9 ++ modules/dnn/src/init.cpp | 1 + modules/dnn/src/layers/cumsum_layer.cpp | 131 ++++++++++++++++++ modules/dnn/src/onnx/onnx_importer.cpp | 22 ++- modules/dnn/test/test_onnx_importer.cpp | 9 ++ 5 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 modules/dnn/src/layers/cumsum_layer.cpp diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 9c96c5a5f187..794bfeedda33 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -723,6 +723,15 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + class CV_EXPORTS CumSumLayer : public Layer + { + public: + int exclusive; + int reverse; + + static Ptr create(const LayerParams& params); + }; + //! @} //! @} CV__DNN_INLINE_NS_END diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index ebd887999b83..1916aa0ec94f 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -140,6 +140,7 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(LSTM, LSTMLayer); CV_DNN_REGISTER_LAYER_CLASS(GRU, GRULayer); + CV_DNN_REGISTER_LAYER_CLASS(CumSum, CumSumLayer); } CV__DNN_INLINE_NS_END diff --git a/modules/dnn/src/layers/cumsum_layer.cpp b/modules/dnn/src/layers/cumsum_layer.cpp new file mode 100644 index 000000000000..9c70f306d486 --- /dev/null +++ b/modules/dnn/src/layers/cumsum_layer.cpp @@ -0,0 +1,131 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" + +#include + +namespace cv +{ +namespace dnn +{ + +class CumSumLayerImpl CV_FINAL : public CumSumLayer +{ +public: + CumSumLayerImpl(const LayerParams ¶ms) + { + axis_raw = params.get("axis", 0); + exclusive_raw = params.get("exclusive", 0); + reverse_raw = params.get("reverse", 0); + setParamsFrom(params); + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + return true; + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + if (inputs_arr.depth() == CV_16S) + { + forward_fallback(inputs_arr, outputs_arr, internals_arr); + return; + } + + std::vector inputs, outputs, internals; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + // Get x tensor. + const auto &src_mat = inputs[0]; + const auto *src_ptr = src_mat.ptr(); + + // Get axis. + const int axis = normalize_axis(axis_raw, src_mat.dims); + + // Get y tensor. + auto &dst_mat = outputs[0]; + src_mat.copyTo(dst_mat); + auto *dst_ptr = dst_mat.ptr(); + + // Get flags. + const auto exclusive = exclusive_raw == 1; + const auto reverse = reverse_raw == 1; + + // Get parameters to iterate outer dimension. + const size_t outer_size = src_mat.total(0, axis); + const size_t outer_step_length = src_mat.total(axis); + + // Get parameters to iterate inner dimension. + const size_t inner_size = src_mat.size[axis]; + + if (!inner_size) + return; + + const size_t inner_step_length = src_mat.total(axis + 1); + const int inner_step = (reverse ? -1 : 1) * inner_step_length; + const int inner_start = reverse ? inner_size - 1 : 0; + const int inner_stop = reverse ? -1 : inner_size; + const int inner_delta = reverse ? -1 : 1; + + // Get parameters to populate channels. + const size_t num_channels = src_mat.total(axis + 1); + + for (size_t outer_dim = 0; outer_dim < outer_size; outer_dim++) + { + const size_t outer_offset = outer_dim * outer_step_length; + size_t src_offset = outer_offset + inner_start * inner_step_length; + + // Populate first element of inner dimension. + for (size_t channel = 0; channel < num_channels; channel++) + { + if (exclusive) + { + dst_ptr[src_offset + channel] = 0.0f; + } + else + { + dst_ptr[src_offset + channel] = src_ptr[src_offset + channel]; + src_offset += inner_step; + } + } + + // Populate remaining elements of inner dimension. + for (int inner_dim = inner_start + inner_delta; inner_dim != inner_stop; inner_dim += inner_delta) + { + const size_t dst_offset = outer_offset + inner_dim * inner_step_length; + + for (size_t channel = 0; channel < num_channels; channel++) + { + const size_t previous_dst_offset = dst_offset - inner_step; + dst_ptr[dst_offset + channel] = dst_ptr[previous_dst_offset + channel] + + src_ptr[src_offset + channel]; + src_offset += inner_step; + } + } + } + } + + int axis_raw; + int exclusive_raw; + int reverse_raw; +}; + +Ptr CumSumLayer::create(const LayerParams& params) +{ + return Ptr(new CumSumLayerImpl(params)); +} + +} +} diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 946623fd4092..3379ea3a0bb6 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -177,6 +177,7 @@ class ONNXImporter void parseUpsample (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); + void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseCustom (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); const DispatchMap dispatch; @@ -641,7 +642,8 @@ const std::set& ONNXImporter::getSupportedTypes() "Dropout", "Identity", "Crop", - "Normalize" + "Normalize", + "CumSum" }; return layerTypes; } @@ -2399,6 +2401,23 @@ void ONNXImporter::parseDetectionOutput(LayerParams& layerParams, const opencv_o addLayer(layerParams, node_proto); } +void ONNXImporter::parseCumSum(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +{ + layerParams.type = "CumSum"; + + // Get axis. + const std::string& input1 = node_proto.input(1); + + if (constBlobs.find(input1) != constBlobs.end()) + { + Mat axis_blob = getBlob(input1); + CV_Assert(axis_blob.total() == 1u); + layerParams.set("axis", axis_blob.at(0)); + } + + addLayer(layerParams, node_proto); +} + void ONNXImporter::parseCustom(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { for (int j = 0; j < node_proto.input_size(); j++) { @@ -2456,6 +2475,7 @@ const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap() dispatch["Upsample"] = &ONNXImporter::parseUpsample; dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax; dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput; + dispatch["CumSum"] = &ONNXImporter::parseCumSum; dispatch["Custom"] = &ONNXImporter::parseCustom; return dispatch; diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index 26e5fb632c93..737a184cbdd5 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -1362,6 +1362,15 @@ TEST_P(Test_ONNX_nets, Resnet34_kinetics) expectNoFallbacksFromIE(net); } +TEST_P(Test_ONNX_layers, CumSum) +{ + testONNXModels("cumsum_1d_exclusive_1"); + testONNXModels("cumsum_1d_reverse"); + testONNXModels("cumsum_1d_exclusive_1_reverse"); + testONNXModels("cumsum_2d_dim_1"); + testONNXModels("cumsum_3d_dim_2"); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets()); }} // namespace From 46fb88c76fbe8d4b7ff3cc8eb53e5842753f6c3e Mon Sep 17 00:00:00 2001 From: Sergey Ivanov Date: Tue, 17 Aug 2021 20:11:22 +0300 Subject: [PATCH 120/128] Merge pull request #20546 from sivanov-work:initial_vpl_source G-API: oneVPL (simplification) source base commit * oneVPL source initial * Fix compilation * Fix compilation path * Fix NO VPL compile * Fix unused vars * Fix unused vars in example * Simplify oneVPL search: no custom path & download * Fix standalone GAPI * Apply comments --- modules/gapi/CMakeLists.txt | 15 ++ modules/gapi/cmake/init.cmake | 7 + modules/gapi/cmake/standalone.cmake | 7 + .../gapi/streaming/onevpl/onevpl_source.hpp | 44 +++ .../gapi/samples/onevpl_infer_single_roi.cpp | 254 ++++++++++++++++++ .../src/streaming/onevpl/onevpl_source.cpp | 48 ++++ .../streaming/onevpl/onevpl_source_priv.cpp | 63 +++++ .../streaming/onevpl/onevpl_source_priv.hpp | 62 +++++ 8 files changed, 500 insertions(+) create mode 100644 modules/gapi/include/opencv2/gapi/streaming/onevpl/onevpl_source.hpp create mode 100644 modules/gapi/samples/onevpl_infer_single_roi.cpp create mode 100644 modules/gapi/src/streaming/onevpl/onevpl_source.cpp create mode 100644 modules/gapi/src/streaming/onevpl/onevpl_source_priv.cpp create mode 100644 modules/gapi/src/streaming/onevpl/onevpl_source_priv.hpp diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt index c5046e8be6d8..69c0aaaae817 100644 --- a/modules/gapi/CMakeLists.txt +++ b/modules/gapi/CMakeLists.txt @@ -163,6 +163,10 @@ set(gapi_srcs src/backends/ie/bindings_ie.cpp src/backends/python/gpythonbackend.cpp + # Streaming source + src/streaming/onevpl/onevpl_source.cpp + src/streaming/onevpl/onevpl_source_priv.cpp + # Utils (ITT tracing) src/utils/itt.cpp ) @@ -234,6 +238,17 @@ if(HAVE_PLAIDML) ocv_target_include_directories(${the_module} SYSTEM PRIVATE ${PLAIDML_INCLUDE_DIRS}) endif() +if(HAVE_GAPI_ONEVPL) + if(TARGET opencv_test_gapi) + ocv_target_compile_definitions(opencv_test_gapi PRIVATE -DHAVE_ONEVPL) + ocv_target_link_libraries(opencv_test_gapi PRIVATE ${VPL_IMPORTED_TARGETS}) + endif() + ocv_target_compile_definitions(${the_module} PRIVATE -DHAVE_ONEVPL) + ocv_target_link_libraries(${the_module} PRIVATE ${VPL_IMPORTED_TARGETS}) + if(HAVE_D3D11 AND HAVE_OPENCL) + ocv_target_include_directories(${the_module} SYSTEM PRIVATE ${OPENCL_INCLUDE_DIRS}) + endif() +endif() if(WIN32) # Required for htonl/ntohl on Windows diff --git a/modules/gapi/cmake/init.cmake b/modules/gapi/cmake/init.cmake index 4c25c75f555c..1c464328ca1d 100644 --- a/modules/gapi/cmake/init.cmake +++ b/modules/gapi/cmake/init.cmake @@ -32,3 +32,10 @@ if(WITH_PLAIDML) set(HAVE_PLAIDML TRUE) endif() endif() + +if(WITH_GAPI_ONEVPL) + find_package(VPL) + if(VPL_FOUND) + set(HAVE_GAPI_ONEVPL TRUE) + endif() +endif() diff --git a/modules/gapi/cmake/standalone.cmake b/modules/gapi/cmake/standalone.cmake index d08eda1be5eb..f81c1c8a85de 100644 --- a/modules/gapi/cmake/standalone.cmake +++ b/modules/gapi/cmake/standalone.cmake @@ -6,6 +6,13 @@ if (NOT TARGET ade ) find_package(ade 0.1.0 REQUIRED) endif() +if (WITH_GAPI_ONEVPL) + find_package(VPL) + if(VPL_FOUND) + set(HAVE_GAPI_ONEVPL TRUE) + endif() +endif() + set(FLUID_TARGET fluid) set(FLUID_ROOT "${CMAKE_CURRENT_LIST_DIR}/../") diff --git a/modules/gapi/include/opencv2/gapi/streaming/onevpl/onevpl_source.hpp b/modules/gapi/include/opencv2/gapi/streaming/onevpl/onevpl_source.hpp new file mode 100644 index 000000000000..fec8c73dffeb --- /dev/null +++ b/modules/gapi/include/opencv2/gapi/streaming/onevpl/onevpl_source.hpp @@ -0,0 +1,44 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef OPENCV_GAPI_STREAMING_ONEVPL_ONEVPL_SOURCE_HPP +#define OPENCV_GAPI_STREAMING_ONEVPL_ONEVPL_SOURCE_HPP + +#include +#include +#include + +namespace cv { +namespace gapi { +namespace wip { + +class GAPI_EXPORTS OneVPLSource : public IStreamSource +{ +public: + struct Priv; + + explicit OneVPLSource(const std::string& filePath); + ~OneVPLSource() override; + + bool pull(cv::gapi::wip::Data& data) override; + GMetaArg descr_of() const override; + +private: + explicit OneVPLSource(std::unique_ptr&& impl); + std::unique_ptr m_priv; +}; + +template +GAPI_EXPORTS_W cv::Ptr inline make_vpl_src(const std::string& filePath, Args&&... args) +{ + return make_src(filePath, std::forward(args)...); +} + +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // OPENCV_GAPI_STREAMING_ONEVPL_ONEVPL_SOURCE_HPP diff --git a/modules/gapi/samples/onevpl_infer_single_roi.cpp b/modules/gapi/samples/onevpl_infer_single_roi.cpp new file mode 100644 index 000000000000..8a7efafabfd8 --- /dev/null +++ b/modules/gapi/samples/onevpl_infer_single_roi.cpp @@ -0,0 +1,254 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include // CommandLineParser + +const std::string about = + "This is an OpenCV-based version of oneVPLSource decoder example"; +const std::string keys = + "{ h help | | Print this help message }" + "{ input | | Path to the input demultiplexed video file }" + "{ output | | Path to the output RAW video file. Use .avi extension }" + "{ facem | face-detection-adas-0001.xml | Path to OpenVINO IE face detection model (.xml) }" + "{ faced | CPU | Target device for face detection model (e.g. CPU, GPU, VPU, ...) }"; + +namespace { +std::string get_weights_path(const std::string &model_path) { + const auto EXT_LEN = 4u; + const auto sz = model_path.size(); + CV_Assert(sz > EXT_LEN); + + auto ext = model_path.substr(sz - EXT_LEN); + std::transform(ext.begin(), ext.end(), ext.begin(), [](unsigned char c){ + return static_cast(std::tolower(c)); + }); + CV_Assert(ext == ".xml"); + return model_path.substr(0u, sz - EXT_LEN) + ".bin"; +} +} // anonymous namespace + +namespace custom { +G_API_NET(FaceDetector, , "face-detector"); + +using GDetections = cv::GArray; +using GRect = cv::GOpaque; +using GSize = cv::GOpaque; +using GPrims = cv::GArray; + +G_API_OP(LocateROI, , "sample.custom.locate-roi") { + static cv::GOpaqueDesc outMeta(const cv::GOpaqueDesc &) { + return cv::empty_gopaque_desc(); + } +}; + +G_API_OP(ParseSSD, , "sample.custom.parse-ssd") { + static cv::GArrayDesc outMeta(const cv::GMatDesc &, const cv::GOpaqueDesc &, const cv::GOpaqueDesc &) { + return cv::empty_array_desc(); + } +}; + +G_API_OP(BBoxes, , "sample.custom.b-boxes") { + static cv::GArrayDesc outMeta(const cv::GArrayDesc &, const cv::GOpaqueDesc &) { + return cv::empty_array_desc(); + } +}; + +GAPI_OCV_KERNEL(OCVLocateROI, LocateROI) { + // This is the place where we can run extra analytics + // on the input image frame and select the ROI (region + // of interest) where we want to detect our objects (or + // run any other inference). + // + // Currently it doesn't do anything intelligent, + // but only crops the input image to square (this is + // the most convenient aspect ratio for detectors to use) + + static void run(const cv::Size& in_size, cv::Rect &out_rect) { + + // Identify the central point & square size (- some padding) + const auto center = cv::Point{in_size.width/2, in_size.height/2}; + auto sqside = std::min(in_size.width, in_size.height); + + // Now build the central square ROI + out_rect = cv::Rect{ center.x - sqside/2 + , center.y - sqside/2 + , sqside + , sqside + }; + } +}; + +GAPI_OCV_KERNEL(OCVParseSSD, ParseSSD) { + static void run(const cv::Mat &in_ssd_result, + const cv::Rect &in_roi, + const cv::Size &in_parent_size, + std::vector &out_objects) { + const auto &in_ssd_dims = in_ssd_result.size; + CV_Assert(in_ssd_dims.dims() == 4u); + + const int MAX_PROPOSALS = in_ssd_dims[2]; + const int OBJECT_SIZE = in_ssd_dims[3]; + CV_Assert(OBJECT_SIZE == 7); // fixed SSD object size + + const cv::Size up_roi = in_roi.size(); + const cv::Rect surface({0,0}, in_parent_size); + + out_objects.clear(); + + const float *data = in_ssd_result.ptr(); + for (int i = 0; i < MAX_PROPOSALS; i++) { + const float image_id = data[i * OBJECT_SIZE + 0]; + const float label = data[i * OBJECT_SIZE + 1]; + const float confidence = data[i * OBJECT_SIZE + 2]; + const float rc_left = data[i * OBJECT_SIZE + 3]; + const float rc_top = data[i * OBJECT_SIZE + 4]; + const float rc_right = data[i * OBJECT_SIZE + 5]; + const float rc_bottom = data[i * OBJECT_SIZE + 6]; + (void) label; // unused + + if (image_id < 0.f) { + break; // marks end-of-detections + } + if (confidence < 0.5f) { + continue; // skip objects with low confidence + } + + // map relative coordinates to the original image scale + // taking the ROI into account + cv::Rect rc; + rc.x = static_cast(rc_left * up_roi.width); + rc.y = static_cast(rc_top * up_roi.height); + rc.width = static_cast(rc_right * up_roi.width) - rc.x; + rc.height = static_cast(rc_bottom * up_roi.height) - rc.y; + rc.x += in_roi.x; + rc.y += in_roi.y; + out_objects.emplace_back(rc & surface); + } + } +}; + +GAPI_OCV_KERNEL(OCVBBoxes, BBoxes) { + // This kernel converts the rectangles into G-API's + // rendering primitives + static void run(const std::vector &in_face_rcs, + const cv::Rect &in_roi, + std::vector &out_prims) { + out_prims.clear(); + const auto cvt = [](const cv::Rect &rc, const cv::Scalar &clr) { + return cv::gapi::wip::draw::Rect(rc, clr, 2); + }; + out_prims.emplace_back(cvt(in_roi, CV_RGB(0,255,255))); // cyan + for (auto &&rc : in_face_rcs) { + out_prims.emplace_back(cvt(rc, CV_RGB(0,255,0))); // green + } + } +}; + +} // namespace custom + +int main(int argc, char *argv[]) { + + cv::CommandLineParser cmd(argc, argv, keys); + cmd.about(about); + if (cmd.has("help")) { + cmd.printMessage(); + return 0; + } + + // get file name + std::string file_path = cmd.get("input"); + const std::string output = cmd.get("output"); + const auto face_model_path = cmd.get("facem"); + + // check ouput file extension + if (!output.empty()) { + auto ext = output.find_last_of("."); + if (ext == std::string::npos || (output.substr(ext + 1) != "avi")) { + std::cerr << "Output file should have *.avi extension for output video" << std::endl; + return -1; + } + } + + auto face_net = cv::gapi::ie::Params { + face_model_path, // path to topology IR + get_weights_path(face_model_path), // path to weights + cmd.get("faced"), // device specifier + }; + auto kernels = cv::gapi::kernels + < custom::OCVLocateROI + , custom::OCVParseSSD + , custom::OCVBBoxes>(); + auto networks = cv::gapi::networks(face_net); + + // Create source + cv::Ptr cap; + try { + cap = cv::gapi::wip::make_vpl_src(file_path); + std::cout << "oneVPL source desription: " << cap->descr_of() << std::endl; + } catch (const std::exception& ex) { + std::cerr << "Cannot create source: " << ex.what() << std::endl; + return -1; + } + + cv::GMetaArg descr = cap->descr_of(); + auto frame_descr = cv::util::get(descr); + + // Now build the graph + cv::GFrame in; + auto size = cv::gapi::streaming::size(in); + auto roi = custom::LocateROI::on(size); + auto blob = cv::gapi::infer(roi, in); + auto rcs = custom::ParseSSD::on(blob, roi, size); + auto out_frame = cv::gapi::wip::draw::renderFrame(in, custom::BBoxes::on(rcs, roi)); + auto out = cv::gapi::streaming::BGR(out_frame); + + cv::GStreamingCompiled pipeline; + try { + pipeline = cv::GComputation(cv::GIn(in), cv::GOut(out)) + .compileStreaming(cv::compile_args(kernels, networks)); + } catch (const std::exception& ex) { + std::cerr << "Exception occured during pipeline construction: " << ex.what() << std::endl; + return -1; + } + // The execution part + + // TODO USE may set pool size from outside and set queue_capacity size, + // compile arg: cv::gapi::streaming::queue_capacity + pipeline.setSource(std::move(cap)); + pipeline.start(); + + int framesCount = 0; + cv::TickMeter t; + cv::VideoWriter writer; + if (!output.empty() && !writer.isOpened()) { + const auto sz = cv::Size{frame_descr.size.width, frame_descr.size.height}; + writer.open(output, cv::VideoWriter::fourcc('M','J','P','G'), 25.0, sz); + CV_Assert(writer.isOpened()); + } + + cv::Mat outMat; + t.start(); + while (pipeline.pull(cv::gout(outMat))) { + cv::imshow("Out", outMat); + cv::waitKey(1); + if (!output.empty()) { + writer << outMat; + } + framesCount++; + } + t.stop(); + std::cout << "Elapsed time: " << t.getTimeSec() << std::endl; + std::cout << "FPS: " << framesCount / t.getTimeSec() << std::endl; + std::cout << "framesCount: " << framesCount << std::endl; + + return 0; +} diff --git a/modules/gapi/src/streaming/onevpl/onevpl_source.cpp b/modules/gapi/src/streaming/onevpl/onevpl_source.cpp new file mode 100644 index 000000000000..988986f6d9d9 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/onevpl_source.cpp @@ -0,0 +1,48 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#include + +#include "streaming/onevpl/onevpl_source_priv.hpp" + +namespace cv { +namespace gapi { +namespace wip { + +#ifdef HAVE_ONEVPL +OneVPLSource::OneVPLSource(const std::string& filePath) : + OneVPLSource(std::unique_ptr(new OneVPLSource::Priv(filePath))) { + + if (filePath.empty()) { + util::throw_error(std::logic_error("Cannot create 'OneVPLSource' on empty source file name")); + } +} +#else +OneVPLSource::OneVPLSource(const std::string&) { + GAPI_Assert(false && "Unsupported: G-API compiled without `WITH_GAPI_ONEVPL=ON`"); +} +#endif + +OneVPLSource::OneVPLSource(std::unique_ptr&& impl) : + IStreamSource(), + m_priv(std::move(impl)) { +} + +OneVPLSource::~OneVPLSource() { +} + +bool OneVPLSource::pull(cv::gapi::wip::Data& data) +{ + return m_priv->pull(data); +} + +GMetaArg OneVPLSource::descr_of() const +{ + return m_priv->descr_of(); +} +} // namespace wip +} // namespace gapi +} // namespace cv diff --git a/modules/gapi/src/streaming/onevpl/onevpl_source_priv.cpp b/modules/gapi/src/streaming/onevpl/onevpl_source_priv.cpp new file mode 100644 index 000000000000..5c4e8e694175 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/onevpl_source_priv.cpp @@ -0,0 +1,63 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#include +#include + +#include "streaming/onevpl/onevpl_source_priv.hpp" +#include "logger.hpp" + +#ifndef HAVE_ONEVPL +namespace cv { +namespace gapi { +namespace wip { +bool OneVPLSource::Priv::pull(cv::gapi::wip::Data&) { + return true; +} +GMetaArg OneVPLSource::Priv::descr_of() const { + return {}; +} +} // namespace wip +} // namespace gapi +} // namespace cv + +#else // HAVE_ONEVPL + +namespace cv { +namespace gapi { +namespace wip { +OneVPLSource::Priv::Priv() : + mfx_handle(MFXLoad()) +{ + GAPI_LOG_INFO(nullptr, "Initialized MFX handle: " << mfx_handle); + description_is_valid = false; +} + +OneVPLSource::Priv::Priv(const std::string&) : + OneVPLSource::Priv() +{ +} + +OneVPLSource::Priv::~Priv() +{ + GAPI_LOG_INFO(nullptr, "Unload MFX handle: " << mfx_handle); + MFXUnload(mfx_handle); +} + +bool OneVPLSource::Priv::pull(cv::gapi::wip::Data&) +{ + return false; +} + +GMetaArg OneVPLSource::Priv::descr_of() const +{ + return {}; +} +} // namespace wip +} // namespace gapi +} // namespace cv + +#endif // HAVE_ONEVPL diff --git a/modules/gapi/src/streaming/onevpl/onevpl_source_priv.hpp b/modules/gapi/src/streaming/onevpl/onevpl_source_priv.hpp new file mode 100644 index 000000000000..b139add99372 --- /dev/null +++ b/modules/gapi/src/streaming/onevpl/onevpl_source_priv.hpp @@ -0,0 +1,62 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2021 Intel Corporation + +#ifndef OPENCV_GAPI_STREAMING_ONEVPL_ONEVPL_SOURCE_PRIV_HPP +#define OPENCV_GAPI_STREAMING_ONEVPL_ONEVPL_SOURCE_PRIV_HPP + +#include + +#include +#include + +#include +#include +#include + +#ifdef HAVE_ONEVPL +#if (MFX_VERSION >= 2000) +#include +#endif // MFX_VERSION + +#include + +#include + +namespace cv { +namespace gapi { +namespace wip { + +struct OneVPLSource::Priv +{ + explicit Priv(const std::string& file_path); + ~Priv(); + + bool pull(cv::gapi::wip::Data& data); + GMetaArg descr_of() const; +private: + Priv(); + mfxLoader mfx_handle; + bool description_is_valid; +}; +} // namespace wip +} // namespace gapi +} // namespace cv + +#else // HAVE_ONEVPL + +namespace cv { +namespace gapi { +namespace wip { +struct OneVPLSource::Priv final +{ + bool pull(cv::gapi::wip::Data&); + GMetaArg descr_of() const; +}; +} // namespace wip +} // namespace gapi +} // namespace cv +#endif // HAVE_ONEVPL +#endif // OPENCV_GAPI_STREAMING_ONEVPL_ONEVPL_SOURCE_PRIV_HPP From 95919051e0470237dd508a2086d8e519f5e091bf Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Wed, 18 Aug 2021 10:42:32 +0300 Subject: [PATCH 121/128] Merge pull request #20528 from TolyaTalamanov:at/fix-overwrite-blob-precision-bug [G-API] Prohibit setPrecision & preprocessing for importedNetworks * Prohibit preprocessing for imported networks * Fix typo input_names -> output_names * Move setBlob logic to separate function * Change comment --- modules/gapi/src/backends/ie/giebackend.cpp | 194 +++++++++++------- .../src/backends/ie/giebackend/giewrapper.cpp | 18 -- .../src/backends/ie/giebackend/giewrapper.hpp | 3 - 3 files changed, 121 insertions(+), 94 deletions(-) diff --git a/modules/gapi/src/backends/ie/giebackend.cpp b/modules/gapi/src/backends/ie/giebackend.cpp index fc9fc502ef6d..007f0db7afcc 100644 --- a/modules/gapi/src/backends/ie/giebackend.cpp +++ b/modules/gapi/src/backends/ie/giebackend.cpp @@ -243,10 +243,6 @@ struct IEUnit { this_plugin = cv::gimpl::ie::wrap::getPlugin(params); this_plugin.SetConfig(params.config); this_network = cv::gimpl::ie::wrap::importNetwork(this_plugin, params, rctx); - // FIXME: ICNNetwork returns InputsDataMap/OutputsDataMap, - // but ExecutableNetwork returns ConstInputsDataMap/ConstOutputsDataMap - inputs = cv::gimpl::ie::wrap::toInputsDataMap(this_network.GetInputsInfo()); - outputs = cv::gimpl::ie::wrap::toOutputsDataMap(this_network.GetOutputsInfo()); if (!params.reshape_table.empty() || !params.layer_names_to_reshape.empty()) { GAPI_LOG_WARNING(NULL, "Reshape isn't supported for imported network"); } @@ -270,10 +266,18 @@ struct IEUnit { + params.model_path)); } if (params.num_in == 1u && params.input_names.empty()) { - params.input_names = { inputs.begin()->first }; + if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { + params.input_names = { inputs.begin()->first }; + } else { + params.input_names = { this_network.GetInputsInfo().begin()->first }; + } } if (params.num_out == 1u && params.output_names.empty()) { - params.output_names = { outputs.begin()->first }; + if (params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { + params.output_names = { outputs.begin()->first }; + } else { + params.output_names = { this_network.GetOutputsInfo().begin()->first }; + } } if (!params.reshape_table.empty()) { GAPI_Assert((params.reshape_table.size() + params.layer_names_to_reshape.size()) <= @@ -533,6 +537,24 @@ inline IE::Blob::Ptr extractBlob(IECallContext& ctx, std::size_t i) { } GAPI_Assert(false); } + + +static void setBlob(InferenceEngine::InferRequest& req, + cv::gapi::ie::detail::ParamDesc::Kind kind, + const std::string& layer_name, + IE::Blob::Ptr blob) { + // NB: In case importNetwork preprocessing must be + // passed as SetBlob argument. + if (kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { + req.SetBlob(layer_name, blob); + } else { + GAPI_Assert(kind == cv::gapi::ie::detail::ParamDesc::Kind::Import); + IE::PreProcessInfo info; + info.setResizeAlgorithm(IE::RESIZE_BILINEAR); + req.SetBlob(layer_name, blob, info); + } +} + } // anonymous namespace std::vector cv::gimpl::ie::IECompiled::createInferRequests() { @@ -891,25 +913,30 @@ struct Infer: public cv::detail::KernelTag { // meta order. GAPI_Assert(uu.params.input_names.size() == in_metas.size() && "Known input layers count doesn't match input meta count"); - for (auto &&it : ade::util::zip(ade::util::toRange(uu.params.input_names), - ade::util::toRange(in_metas))) { - const auto &input_name = std::get<0>(it); - auto &&ii = uu.inputs.at(input_name); - const auto & mm = std::get<1>(it); - configureInputInfo(ii, mm); - if (uu.params.layer_names_to_reshape.find(input_name) != - uu.params.layer_names_to_reshape.end()) { - configureInputReshapeByImage(ii, mm, input_reshape_table); + // NB: Configuring input precision and network reshape must be done + // only in the loadNetwork case. + if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { + for (auto &&it : ade::util::zip(ade::util::toRange(uu.params.input_names), + ade::util::toRange(in_metas))) { + const auto &input_name = std::get<0>(it); + auto &&ii = uu.inputs.at(input_name); + const auto & mm = std::get<1>(it); + + configureInputInfo(ii, mm); + if (uu.params.layer_names_to_reshape.find(input_name) != + uu.params.layer_names_to_reshape.end()) { + configureInputReshapeByImage(ii, mm, input_reshape_table); + } + ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); } - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); - } - // FIXME: This isn't the best place to call reshape function. - // Сorrect solution would be to do this in compile() method of network, - // but now input meta isn't passed to compile() method. - if (!input_reshape_table.empty()) { - const_cast(&uu.net)->reshape(input_reshape_table); + // FIXME: This isn't the best place to call reshape function. + // Сorrect solution would be to do this in compile() method of network, + // but now input meta isn't passed to compile() method. + if (!input_reshape_table.empty()) { + const_cast(&uu.net)->reshape(input_reshape_table); + } } // FIXME: It would be nice here to have an exact number of network's @@ -941,7 +968,10 @@ struct Infer: public cv::detail::KernelTag { // and redirect our data producers to this memory // (A memory dialog comes to the picture again) IE::Blob::Ptr this_blob = extractBlob(*ctx, i); - req.SetBlob(ctx->uu.params.input_names[i], this_blob); + setBlob(req, + ctx->uu.params.kind, + ctx->uu.params.input_names[i], + this_blob); } // FIXME: Should it be done by kernel ? // What about to do that in RequestPool ? @@ -973,22 +1003,26 @@ struct InferROI: public cv::detail::KernelTag { GAPI_Assert(1u == uu.params.input_names.size()); GAPI_Assert(2u == in_metas.size()); - // 0th is ROI, 1st is input image - const auto &input_name = uu.params.input_names.at(0); - auto &&ii = uu.inputs.at(input_name); - auto &&mm = in_metas.at(1u); - configureInputInfo(ii, mm); - if (uu.params.layer_names_to_reshape.find(input_name) != - uu.params.layer_names_to_reshape.end()) { - configureInputReshapeByImage(ii, mm, input_reshape_table); - } - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); + // NB: Configuring input precision and network reshape must be done + // only in the loadNetwork case. + if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { + // 0th is ROI, 1st is input image + const auto &input_name = uu.params.input_names.at(0); + auto &&ii = uu.inputs.at(input_name); + auto &&mm = in_metas.at(1u); + configureInputInfo(ii, mm); + if (uu.params.layer_names_to_reshape.find(input_name) != + uu.params.layer_names_to_reshape.end()) { + configureInputReshapeByImage(ii, mm, input_reshape_table); + } + ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); - // FIXME: This isn't the best place to call reshape function. - // Сorrect solution would be to do this in compile() method of network, - // but now input meta isn't passed to compile() method. - if (!input_reshape_table.empty()) { - const_cast(&uu.net)->reshape(input_reshape_table); + // FIXME: This isn't the best place to call reshape function. + // Сorrect solution would be to do this in compile() method of network, + // but now input meta isn't passed to compile() method. + if (!input_reshape_table.empty()) { + const_cast(&uu.net)->reshape(input_reshape_table); + } } // FIXME: It would be nice here to have an exact number of network's @@ -1017,10 +1051,11 @@ struct InferROI: public cv::detail::KernelTag { auto&& this_roi = ctx->inArg(0).rref(); IE::Blob::Ptr this_blob = extractBlob(*ctx, 1); - - req.SetBlob(*(ctx->uu.params.input_names.begin()), - IE::make_shared_blob(this_blob, toIE(this_roi))); - + setBlob(req, + ctx->uu.params.kind, + *(ctx->uu.params.input_names.begin()), + IE::make_shared_blob(this_blob, + toIE(this_roi))); // FIXME: Should it be done by kernel ? // What about to do that in RequestPool ? req.StartAsync(); @@ -1055,23 +1090,27 @@ struct InferList: public cv::detail::KernelTag { GAPI_Assert(uu.params.input_names.size() == (in_metas.size() - 1u) && "Known input layers count doesn't match input meta count"); - std::size_t idx = 1u; - for (auto &&input_name : uu.params.input_names) { - auto &&ii = uu.inputs.at(input_name); - const auto & mm = in_metas[idx++]; - configureInputInfo(ii, mm); - if (uu.params.layer_names_to_reshape.find(input_name) != - uu.params.layer_names_to_reshape.end()) { - configureInputReshapeByImage(ii, mm, input_reshape_table); + // NB: Configuring input precision and network reshape must be done + // only in the loadNetwork case. + if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { + std::size_t idx = 1u; + for (auto &&input_name : uu.params.input_names) { + auto &&ii = uu.inputs.at(input_name); + const auto & mm = in_metas[idx++]; + configureInputInfo(ii, mm); + if (uu.params.layer_names_to_reshape.find(input_name) != + uu.params.layer_names_to_reshape.end()) { + configureInputReshapeByImage(ii, mm, input_reshape_table); + } + ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); } - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); - } - // FIXME: This isn't the best place to call reshape function. - // Сorrect solution would be to do this in compile() method of network, - // but now input meta isn't passed to compile() method. - if (!input_reshape_table.empty()) { - const_cast(&uu.net)->reshape(input_reshape_table); + // FIXME: This isn't the best place to call reshape function. + // Сorrect solution would be to do this in compile() method of network, + // but now input meta isn't passed to compile() method. + if (!input_reshape_table.empty()) { + const_cast(&uu.net)->reshape(input_reshape_table); + } } // roi-list version is much easier at the moment. @@ -1117,7 +1156,10 @@ struct InferList: public cv::detail::KernelTag { cv::gimpl::ie::RequestPool::Task { [ctx, rc, this_blob](InferenceEngine::InferRequest &req) { IE::Blob::Ptr roi_blob = IE::make_shared_blob(this_blob, toIE(rc)); - req.SetBlob(ctx->uu.params.input_names[0u], roi_blob); + setBlob(req, + ctx->uu.params.kind, + ctx->uu.params.input_names[0u], + roi_blob); req.StartAsync(); }, std::bind(callback, std::placeholders::_1, pos) @@ -1191,19 +1233,23 @@ struct InferList2: public cv::detail::KernelTag { && "Non-array inputs are not supported"); if (op.k.inKinds[idx] == cv::detail::OpaqueKind::CV_RECT) { - // This is a cv::Rect -- configure the IE preprocessing - configureInputInfo(ii, mm_0); - if (uu.params.layer_names_to_reshape.find(input_name) != - uu.params.layer_names_to_reshape.end()) { - configureInputReshapeByImage(ii, mm_0, input_reshape_table); - } - ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); - - // FIXME: This isn't the best place to call reshape function. - // Сorrect solution would be to do this in compile() method of network, - // but now input meta isn't passed to compile() method. - if (!input_reshape_table.empty()) { - const_cast(&uu.net)->reshape(input_reshape_table); + // NB: Configuring input precision and network reshape must be done + // only in the loadNetwork case. + if (uu.params.kind == cv::gapi::ie::detail::ParamDesc::Kind::Load) { + // This is a cv::Rect -- configure the IE preprocessing + configureInputInfo(ii, mm_0); + if (uu.params.layer_names_to_reshape.find(input_name) != + uu.params.layer_names_to_reshape.end()) { + configureInputReshapeByImage(ii, mm_0, input_reshape_table); + } + ii->getPreProcess().setResizeAlgorithm(IE::RESIZE_BILINEAR); + + // FIXME: This isn't the best place to call reshape function. + // Сorrect solution would be to do this in compile() method of network, + // but now input meta isn't passed to compile() method. + if (!input_reshape_table.empty()) { + const_cast(&uu.net)->reshape(input_reshape_table); + } } } else { // This is a cv::GMat (equals to: cv::Mat) @@ -1268,8 +1314,10 @@ struct InferList2: public cv::detail::KernelTag { GAPI_Assert(false && "Only Rect and Mat types are supported for infer list 2!"); } - - req.SetBlob(ctx->uu.params.input_names[in_idx], this_blob); + setBlob(req, + ctx->uu.params.kind, + ctx->uu.params.input_names[in_idx], + this_blob); } req.StartAsync(); }, diff --git a/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp b/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp index d4ec806e4846..1f9721dbf4ef 100644 --- a/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp +++ b/modules/gapi/src/backends/ie/giebackend/giewrapper.cpp @@ -22,24 +22,6 @@ namespace IE = InferenceEngine; namespace giewrap = cv::gimpl::ie::wrap; using GIEParam = cv::gapi::ie::detail::ParamDesc; -IE::InputsDataMap giewrap::toInputsDataMap (const IE::ConstInputsDataMap& inputs) { - IE::InputsDataMap transformed; - auto convert = [](const std::pair& p) { - return std::make_pair(p.first, std::const_pointer_cast(p.second)); - }; - std::transform(inputs.begin(), inputs.end(), std::inserter(transformed, transformed.end()), convert); - return transformed; -} - -IE::OutputsDataMap giewrap::toOutputsDataMap (const IE::ConstOutputsDataMap& outputs) { - IE::OutputsDataMap transformed; - auto convert = [](const std::pair& p) { - return std::make_pair(p.first, std::const_pointer_cast(p.second)); - }; - std::transform(outputs.begin(), outputs.end(), std::inserter(transformed, transformed.end()), convert); - return transformed; -} - #if INF_ENGINE_RELEASE < 2020000000 // < 2020.1 // Load extensions (taken from DNN module) std::vector giewrap::getExtensions(const GIEParam& params) { diff --git a/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp b/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp index 7e67cb8989d6..2e4bac12704a 100644 --- a/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp +++ b/modules/gapi/src/backends/ie/giebackend/giewrapper.hpp @@ -29,9 +29,6 @@ namespace wrap { GAPI_EXPORTS std::vector getExtensions(const GIEParam& params); GAPI_EXPORTS IE::CNNNetwork readNetwork(const GIEParam& params); -IE::InputsDataMap toInputsDataMap (const IE::ConstInputsDataMap& inputs); -IE::OutputsDataMap toOutputsDataMap(const IE::ConstOutputsDataMap& outputs); - #if INF_ENGINE_RELEASE < 2019020000 // < 2019.R2 using Plugin = IE::InferencePlugin; GAPI_EXPORTS IE::InferencePlugin getPlugin(const GIEParam& params); From 03b989251d30efff3e90e0b5e02bae1e2c4d825f Mon Sep 17 00:00:00 2001 From: Anatoliy Talamanov Date: Wed, 18 Aug 2021 18:12:27 +0300 Subject: [PATCH 122/128] Check adapter in executor --- modules/gapi/src/executor/gexecutor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/gapi/src/executor/gexecutor.cpp b/modules/gapi/src/executor/gexecutor.cpp index 6f313197ba36..9b51e70d5dae 100644 --- a/modules/gapi/src/executor/gexecutor.cpp +++ b/modules/gapi/src/executor/gexecutor.cpp @@ -159,8 +159,8 @@ void writeBackExec(const Mag& mag, const RcDesc &rc, GRunArgP &g_arg) // a real copy (add a pass to StreamingBackend?) auto& out_mat = *util::get(g_arg); const auto& rmat = mag.template slot().at(rc.id); - auto mag_data = rmat.get()->data(); - if (out_mat.data != mag_data) { + auto* adapter = rmat.get(); + if (adapter != nullptr && out_mat.data != adapter->data()) { auto view = rmat.access(RMat::Access::R); asMat(view).copyTo(out_mat); } From fa90e14b0606b76a4efc9739c4ea87e659c2aab8 Mon Sep 17 00:00:00 2001 From: SamFC10 Date: Thu, 19 Aug 2021 09:56:47 +0530 Subject: [PATCH 123/128] int8 layers and 8-bit quantization support --- modules/dnn/CMakeLists.txt | 1 + .../dnn/include/opencv2/dnn/all_layers.hpp | 87 +- modules/dnn/include/opencv2/dnn/dnn.hpp | 46 + modules/dnn/src/dnn.cpp | 383 +++++- modules/dnn/src/init.cpp | 38 + .../dnn/src/int8layers/batch_norm_layer.cpp | 178 +++ .../dnn/src/int8layers/convolution_layer.cpp | 1136 +++++++++++++++ .../dnn/src/int8layers/elementwise_layers.cpp | 190 +++ modules/dnn/src/int8layers/eltwise_layer.cpp | 577 ++++++++ .../src/int8layers/fully_connected_layer.cpp | 266 ++++ modules/dnn/src/int8layers/layers_common.hpp | 41 + .../dnn/src/int8layers/layers_common.simd.hpp | 637 +++++++++ modules/dnn/src/int8layers/pooling_layer.cpp | 595 ++++++++ .../int8layers/quantize_dequantize_layer.cpp | 157 +++ modules/dnn/src/int8layers/scale_layer.cpp | 211 +++ modules/dnn/src/int8layers/softmax_layer.cpp | 176 +++ modules/dnn/src/layers/batch_norm_layer.cpp | 12 + modules/dnn/src/layers/blank_layer.cpp | 5 + modules/dnn/src/layers/concat_layer.cpp | 32 +- modules/dnn/src/layers/const_layer.cpp | 9 + modules/dnn/src/layers/convolution_layer.cpp | 42 + modules/dnn/src/layers/elementwise_layers.cpp | 179 +++ modules/dnn/src/layers/eltwise_layer.cpp | 31 + modules/dnn/src/layers/flatten_layer.cpp | 5 + .../dnn/src/layers/fully_connected_layer.cpp | 39 + modules/dnn/src/layers/padding_layer.cpp | 12 + modules/dnn/src/layers/permute_layer.cpp | 63 +- modules/dnn/src/layers/pooling_layer.cpp | 17 + modules/dnn/src/layers/reorg_layer.cpp | 5 + modules/dnn/src/layers/reshape_layer.cpp | 5 + modules/dnn/src/layers/scale_layer.cpp | 8 + .../dnn/src/layers/shuffle_channel_layer.cpp | 6 + modules/dnn/src/layers/slice_layer.cpp | 30 +- modules/dnn/src/layers/softmax_layer.cpp | 16 + modules/dnn/src/layers/split_layer.cpp | 11 + modules/dnn/test/test_int8_layers.cpp | 1220 +++++++++++++++++ 36 files changed, 6400 insertions(+), 66 deletions(-) create mode 100644 modules/dnn/src/int8layers/batch_norm_layer.cpp create mode 100644 modules/dnn/src/int8layers/convolution_layer.cpp create mode 100644 modules/dnn/src/int8layers/elementwise_layers.cpp create mode 100644 modules/dnn/src/int8layers/eltwise_layer.cpp create mode 100644 modules/dnn/src/int8layers/fully_connected_layer.cpp create mode 100644 modules/dnn/src/int8layers/layers_common.hpp create mode 100644 modules/dnn/src/int8layers/layers_common.simd.hpp create mode 100644 modules/dnn/src/int8layers/pooling_layer.cpp create mode 100644 modules/dnn/src/int8layers/quantize_dequantize_layer.cpp create mode 100644 modules/dnn/src/int8layers/scale_layer.cpp create mode 100644 modules/dnn/src/int8layers/softmax_layer.cpp create mode 100644 modules/dnn/test/test_int8_layers.cpp diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt index 70f9a5a73e5a..c1236c4653f2 100644 --- a/modules/dnn/CMakeLists.txt +++ b/modules/dnn/CMakeLists.txt @@ -9,6 +9,7 @@ endif() set(the_description "Deep neural network module. It allows to load models from different frameworks and to make forward pass") ocv_add_dispatched_file_force_all("layers/layers_common" AVX AVX2 AVX512_SKX RVV) +ocv_add_dispatched_file_force_all("int8layers/layers_common" AVX2 AVX512_SKX) ocv_add_module(dnn opencv_core opencv_imgproc WRAP python java objc js) diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index 794bfeedda33..fbe16850d4d5 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -258,6 +258,14 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + class CV_EXPORTS ConvolutionLayerInt8 : public BaseConvolutionLayer + { + public: + int input_zp, output_zp; + float output_sc; + static Ptr create(const LayerParams& params); + }; + class CV_EXPORTS DeconvolutionLayer : public BaseConvolutionLayer { public: @@ -300,6 +308,13 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + class CV_EXPORTS PoolingLayerInt8 : public PoolingLayer + { + public: + int input_zp, output_zp; + static Ptr create(const LayerParams& params); + }; + class CV_EXPORTS SoftmaxLayer : public Layer { public: @@ -308,6 +323,14 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + class CV_EXPORTS SoftmaxLayerInt8 : public SoftmaxLayer + { + public: + float output_sc; + int output_zp; + static Ptr create(const LayerParams& params); + }; + class CV_EXPORTS InnerProductLayer : public Layer { public: @@ -315,6 +338,13 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + class CV_EXPORTS InnerProductLayerInt8 : public InnerProductLayer + { + public: + int output_zp; + static Ptr create(const LayerParams& params); + }; + class CV_EXPORTS MVNLayer : public Layer { public: @@ -341,6 +371,22 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS QuantizeLayer : public Layer + { + public: + float scale; + int zeropoint; + static Ptr create(const LayerParams ¶ms); + }; + + class CV_EXPORTS DequantizeLayer : public Layer + { + public: + float scale; + int zeropoint; + static Ptr create(const LayerParams ¶ms); + }; + class CV_EXPORTS ConcatLayer : public Layer { public: @@ -352,6 +398,7 @@ CV__DNN_INLINE_NS_BEGIN * Details: https://github.com/torch/nn/blob/master/doc/containers.md#depthconcat */ bool padding; + int paddingValue; static Ptr create(const LayerParams ¶ms); }; @@ -459,7 +506,11 @@ CV__DNN_INLINE_NS_BEGIN { public: virtual void forwardSlice(const float* src, float* dst, int len, - size_t outPlaneSize, int cn0, int cn1) const = 0; + size_t outPlaneSize, int cn0, int cn1) const {}; + virtual void forwardSlice(const int* src, const int* lut, int* dst, int len, + size_t outPlaneSize, int cn0, int cn1) const {}; + virtual void forwardSlice(const int8_t* src, const int8_t* lut, int8_t* dst, int len, + size_t outPlaneSize, int cn0, int cn1) const {}; }; class CV_EXPORTS ReLULayer : public ActivationLayer @@ -542,6 +593,12 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS ActivationLayerInt8 : public ActivationLayer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + /* Layers used in semantic segmentation */ class CV_EXPORTS CropLayer : public Layer @@ -563,6 +620,12 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS EltwiseLayerInt8 : public Layer + { + public: + static Ptr create(const LayerParams ¶ms); + }; + class CV_EXPORTS BatchNormLayer : public ActivationLayer { public: @@ -572,6 +635,14 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams ¶ms); }; + class CV_EXPORTS BatchNormLayerInt8 : public BatchNormLayer + { + public: + float input_sc, output_sc; + int input_zp, output_zp; + static Ptr create(const LayerParams ¶ms); + }; + class CV_EXPORTS MaxUnpoolLayer : public Layer { public: @@ -591,12 +662,26 @@ CV__DNN_INLINE_NS_BEGIN static Ptr create(const LayerParams& params); }; + class CV_EXPORTS ScaleLayerInt8 : public ScaleLayer + { + public: + float output_sc; + int output_zp; + static Ptr create(const LayerParams ¶ms); + }; + class CV_EXPORTS ShiftLayer : public Layer { public: static Ptr create(const LayerParams& params); }; + class CV_EXPORTS ShiftLayerInt8 : public Layer + { + public: + static Ptr create(const LayerParams& params); + }; + class CV_EXPORTS DataAugmentationLayer : public Layer { public: diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index a498039f6571..bf1670051ac0 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -235,6 +235,15 @@ CV__DNN_INLINE_NS_BEGIN */ virtual void forward(InputArrayOfArrays inputs, OutputArrayOfArrays outputs, OutputArrayOfArrays internals); + /** @brief Tries to quantize the given layer and compute the quantization parameters required for fixed point implementation. + * @param[in] scales input and output scales. + * @param[in] zeropoints input and output zeropoints. + * @param[out] params Quantized parameters required for fixed point implementation of that layer. + * @returns True if layer can be quantized. + */ + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params); + /** @brief Given the @p input blobs, computes the output @p blobs. * @param[in] inputs the input blobs. * @param[out] outputs allocated output blobs, which will store results of the computation. @@ -368,6 +377,16 @@ CV__DNN_INLINE_NS_BEGIN */ virtual void getScaleShift(Mat& scale, Mat& shift) const; + /** + * @brief Returns scale and zeropoint of layers + * @param[out] scale Output scale + * @param[out] zeropoint Output zeropoint + * + * By default, @p scale is 1 and @p zeropoint is 0. + */ + virtual void getScaleZeropoint(float& scale, int& zeropoint) const; + + /** * @brief "Deattaches" all the layers, attached to particular layer. */ @@ -453,13 +472,21 @@ CV__DNN_INLINE_NS_BEGIN /** @brief Adds new layer to the net. * @param name unique name of the adding layer. * @param type typename of the adding layer (type must be registered in LayerRegister). + * @param dtype datatype of output blobs. * @param params parameters which will be used to initialize the creating layer. * @returns unique identifier of created layer, or -1 if a failure will happen. */ + int addLayer(const String &name, const String &type, const int &dtype, LayerParams ¶ms); + + /** @overload Datatype of output blobs set to default CV_32F */ int addLayer(const String &name, const String &type, LayerParams ¶ms); + /** @brief Adds new layer and connects its first input to the first output of previously added layer. * @see addLayer() */ + int addLayerToPrev(const String &name, const String &type, const int &dtype, LayerParams ¶ms); + + /** @overload */ int addLayerToPrev(const String &name, const String &type, LayerParams ¶ms); /** @brief Converts string name of the layer to the integer identifier. @@ -551,6 +578,25 @@ CV__DNN_INLINE_NS_BEGIN CV_WRAP_AS(forwardAndRetrieve) void forward(CV_OUT std::vector >& outputBlobs, const std::vector& outBlobNames); + /** @brief Returns a quantized Net from a floating-point Net. + * @param calibData Calibration data to compute the quantization parameters. + * @param inputsDtype Datatype of quantized net's inputs. Can be CV_32F or CV_8S. + * @param outputsDtype Datatype of quantized net's outputs. Can be CV_32F or CV_8S. + */ + CV_WRAP Net quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype); + + /** @brief Returns input scale and zeropoint for a quantized Net. + * @param scales output parameter for returning input scales. + * @param zeropoints output parameter for returning input zeropoints. + */ + CV_WRAP void getInputDetails(CV_OUT std::vector& scales, CV_OUT std::vector& zeropoints) const; + + /** @brief Returns output scale and zeropoint for a quantized Net. + * @param scales output parameter for returning output scales. + * @param zeropoints output parameter for returning output zeropoints. + */ + CV_WRAP void getOutputDetails(CV_OUT std::vector& scales, CV_OUT std::vector& zeropoints) const; + /** * @brief Compile Halide layers. * @param[in] scheduler Path to YAML file with scheduling directives. diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 52a5fcba28ce..492ad166d038 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -574,9 +574,9 @@ struct LayerPin struct LayerData { - LayerData() : id(-1), skip(false), flag(0) {} - LayerData(int _id, const String &_name, const String &_type, LayerParams &_params) - : id(_id), name(_name), type(_type), params(_params), skip(false), flag(0) + LayerData() : id(-1), dtype(CV_32F), skip(false), flag(0) {} + LayerData(int _id, const String &_name, const String &_type, const int &_dtype, LayerParams &_params) + : id(_id), name(_name), type(_type), dtype(_dtype), params(_params), skip(false), flag(0) { CV_TRACE_FUNCTION(); @@ -588,6 +588,7 @@ struct LayerData int id; String name; String type; + int dtype; // Datatype of output blobs. LayerParams params; std::vector inputBlobsId; @@ -944,7 +945,7 @@ struct BlobManager } } - void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, bool use_half) + void reuseOrCreate(const MatShape& shape, const LayerPin& lp, Mat& dst, const int& dtype) { if (!DNN_DISABLE_MEMORY_OPTIMIZATIONS) { @@ -966,7 +967,8 @@ struct BlobManager { Mat& unusedBlob = hostIt->second; if (unusedBlob.total() >= targetTotal && - unusedBlob.total() < bestBlobTotal) + unusedBlob.total() < bestBlobTotal && + unusedBlob.type() == dtype) { bestBlobPin = hostIt->first; bestBlob = unusedBlob; @@ -985,14 +987,13 @@ struct BlobManager { // if dst already has been allocated with total(shape) elements, // it won't be recreated and pointer of dst.data remains the same. - dst.create(shape, use_half ? CV_16S : CV_32F); + dst.create(shape, dtype); addHost(lp, dst); } } void allocateBlobsForLayer(LayerData &ld, const LayerShapes& layerShapes, - std::vector& pinsForInternalBlobs, - bool use_half = false) + std::vector& pinsForInternalBlobs) { CV_TRACE_FUNCTION(); @@ -1063,7 +1064,7 @@ struct BlobManager reuse(ld.inputBlobsId[0], blobPin); } else - reuseOrCreate(shapes[index], blobPin, *blobs[index], use_half); + reuseOrCreate(shapes[index], blobPin, *blobs[index], ld.dtype); } } } @@ -1193,6 +1194,7 @@ struct Net::Impl : public detail::NetImplBase lastLayerId = 0; netWasAllocated = false; + netWasQuantized = false; fusion = true; isAsync = false; preferableBackend = DNN_BACKEND_DEFAULT; @@ -1217,6 +1219,7 @@ struct Net::Impl : public detail::NetImplBase int lastLayerId; bool netWasAllocated; + bool netWasQuantized; bool fusion; bool isAsync; std::vector layersTimings; @@ -1372,7 +1375,7 @@ struct Net::Impl : public detail::NetImplBase currLayer->unsetAttached(); } - + netWasAllocated = false; layersTimings.clear(); } @@ -2541,10 +2544,11 @@ struct Net::Impl : public detail::NetImplBase CV_Assert(layerShapesIt != layersShapes.end()); + if (preferableBackend == DNN_BACKEND_OPENCV && preferableTarget == DNN_TARGET_OPENCL_FP16 && ld.dtype == CV_32F) + ld.dtype = CV_16S; + std::vector pinsForInternalBlobs; - blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs, - preferableBackend == DNN_BACKEND_OPENCV && - preferableTarget == DNN_TARGET_OPENCL_FP16); + blobManager.allocateBlobsForLayer(ld, layerShapesIt->second, pinsForInternalBlobs); ld.outputBlobsWrappers.resize(ld.outputBlobs.size()); for (int i = 0; i < ld.outputBlobs.size(); ++i) ld.outputBlobsWrappers[i] = wrap(ld.outputBlobs[i]); @@ -3148,7 +3152,8 @@ struct Net::Impl : public detail::NetImplBase Mat& inp = layers[0].outputBlobs[i]; CV_Assert(inp.total()); if (preferableBackend == DNN_BACKEND_OPENCV && - preferableTarget == DNN_TARGET_OPENCL_FP16) + preferableTarget == DNN_TARGET_OPENCL_FP16 && + layers[0].dtype == CV_32F) { layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); } @@ -3458,6 +3463,25 @@ struct Net::Impl : public detail::NetImplBase #endif } + void getQuantizationParams(const Mat& src, std::vector& scales, std::vector& zeropoints) + { + const int qmin = -128; // INT8_MIN + const int qmax = 127; // INT8_MAX + + double rmin, rmax, sc, zp; + cv::minMaxIdx(src, &rmin, &rmax); + + // 0 must be present in the range [rmin, rmax] + rmin = std::min(rmin, 0.0); + rmax = std::max(rmax, 0.0); + + sc = (rmax == rmin) ? 1.0 : (rmax - rmin)/(qmax - qmin); + zp = qmin - (rmin/sc); + + scales.push_back((float)sc); + zeropoints.push_back((int)std::round(zp)); + } + void getLayerShapesRecursively(int id, LayersShapesMap& inOutShapes) { std::vector& inputLayerIds = layers[id].inputBlobsId; @@ -3588,7 +3612,8 @@ struct Net::Impl : public detail::NetImplBase Mat& inp = layers[0].outputBlobs[i]; CV_Assert(inp.total()); if (preferableBackend == DNN_BACKEND_OPENCV && - preferableTarget == DNN_TARGET_OPENCL_FP16) + preferableTarget == DNN_TARGET_OPENCL_FP16 && + layers[0].dtype == CV_32F) { layers[0].outputBlobs[i].create(inp.dims, inp.size, CV_16S); } @@ -3614,7 +3639,7 @@ struct Net::Impl : public detail::NetImplBase const MatShape& shape = layersShapes[inputLayerId].out[inputLayerIds[i].oid]; layersShapes[layerId].in.push_back(shape); } - it->second.layerInstance->updateMemoryShapes(layersShapes[layerId].in); + it->second.getLayerInstance()->updateMemoryShapes(layersShapes[layerId].in); } } } @@ -4019,7 +4044,7 @@ Net::~Net() { } -int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) +int Net::addLayer(const String &name, const String &type, const int &dtype, LayerParams ¶ms) { CV_TRACE_FUNCTION(); @@ -4042,23 +4067,35 @@ int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) id = ++impl->lastLayerId; impl->layerNameToId.insert(std::make_pair(name, id)); - impl->layers.insert(std::make_pair(id, LayerData(id, name, type, params))); + impl->layers.insert(std::make_pair(id, LayerData(id, name, type, dtype, params))); if (params.get("has_dynamic_shapes", false)) impl->hasDynamicShapes = true; return id; } -int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms) +int Net::addLayer(const String &name, const String &type, LayerParams ¶ms) +{ + CV_TRACE_FUNCTION(); + return addLayer(name, type, CV_32F, params); +} + +int Net::addLayerToPrev(const String &name, const String &type, const int &dtype, LayerParams ¶ms) { CV_TRACE_FUNCTION(); int prvLid = impl->lastLayerId; - int newLid = this->addLayer(name, type, params); + int newLid = this->addLayer(name, type, dtype, params); this->connect(prvLid, 0, newLid, 0); return newLid; } +int Net::addLayerToPrev(const String &name, const String &type, LayerParams ¶ms) +{ + CV_TRACE_FUNCTION(); + return addLayerToPrev(name, type, CV_32F, params); +} + void Net::connect(int outLayerId, int outNum, int inpLayerId, int inpNum) { CV_TRACE_FUNCTION(); @@ -4169,16 +4206,19 @@ void Net::forward(OutputArrayOfArrays outputBlobs, const String& outputName) ld.outputBlobsWrappers[i]->copyToHost(); } } - if (ld.outputBlobs[0].depth() == CV_32F) + if (ld.outputBlobs[0].depth() == CV_16S) { - std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); - outputvec = ld.outputBlobs; - } else { std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); outputvec.resize(ld.outputBlobs.size()); for (int i = 0; i < outputvec.size(); i++) convertFp16(ld.outputBlobs[i], outputvec[i]); } + else + { + // Output depth can be CV_32F or CV_8S + std::vector & outputvec = *(std::vector *)outputBlobs.getObj(); + outputvec = ld.outputBlobs; + } } else if (outputBlobs.isUMatVector()) { @@ -4264,11 +4304,277 @@ void Net::forward(std::vector >& outputBlobs, } } +Net Net::quantize(InputArrayOfArrays calibData, int inputsDtype, int outputsDtype) +{ + CV_TRACE_FUNCTION(); + + // Net can be quantized only once. + if (impl->netWasQuantized) + CV_Error(Error::StsBadArg, "Cannot quantize a quantized net"); + + CV_CheckType(inputsDtype, inputsDtype == CV_32F || inputsDtype == CV_8S, "Input depth should be CV_32F or CV_8S"); + CV_CheckType(outputsDtype, outputsDtype == CV_32F || outputsDtype == CV_8S, "Output depth should be CV_32F or CV_8S"); + + bool originalFusion = impl->fusion; + int prefBackend = impl->preferableBackend; + int prefTarget = impl->preferableTarget; + + // Disable fusions and use CPU backend to quantize net + setPreferableBackend(DNN_BACKEND_OPENCV); + setPreferableTarget(DNN_TARGET_CPU); + enableFusion(false); + + if (calibData.isMat()) + { + setInput(calibData.getMat()); + } + else if (calibData.isMatVector()) + { + std::vector calibDataVec; + calibData.getMatVector(calibDataVec); + + std::vector inpNames = impl->netInputLayer->outNames; + CV_CheckEQ(calibDataVec.size(), inpNames.size(), "Calibration data size should be equal to number of inputs"); + for (int i = 0; i < calibDataVec.size(); i++) + setInput(calibDataVec[i], inpNames[i]); + } + + std::vector outNames = getUnconnectedOutLayersNames(); + std::vector pins; + for (int i = 0; i < outNames.size(); i++) + pins.push_back(impl->getPinByAlias(outNames[i])); + impl->setUpNet(pins); + + // Compute scales and zeropoints for all the layers + std::vector > scales; + std::vector > zeropoints; + for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) + { + LayerData& ld = it->second; + if (!ld.skip) + { + Ptr layer = ld.layerInstance; + std::vector inps(ld.inputBlobs.size()); + for (int i = 0; i < ld.inputBlobs.size(); ++i) + inps[i] = *ld.inputBlobs[i]; + layer->forward(inps, ld.outputBlobs, ld.internals); + } + + std::vector sc; + std::vector zp; + if (ld.type == "TanH") + { + sc.push_back(1.f/128); + zp.push_back(0); + } + else if (ld.type == "Sigmoid" || ld.type == "Softmax" || ld.type == "SoftMax") + { + if (ld.params.get("log_softmax", false)) + { + sc.push_back(16.f/256); + zp.push_back(127); + } + else + { + sc.push_back(1.f/256); + zp.push_back(-128); + } + } + else if (ld.type == "Split" || ld.type == "Slice" || ld.type == "Crop") + { + std::vector inp_sc; std::vector inp_zp; + impl->getQuantizationParams(*ld.inputBlobs[0], inp_sc, inp_zp); + sc.assign(ld.outputBlobs.size(), inp_sc[0]); + zp.assign(ld.outputBlobs.size(), inp_zp[0]); + } + else + { + for (int i = 0; i < ld.outputBlobs.size(); i++) + impl->getQuantizationParams(ld.outputBlobs[i], sc, zp); + } + scales.push_back(sc); + zeropoints.push_back(zp); + } + + // For some layers, the input and output scales/zeropoints must be equal so that rescaling of inputs + // is not needed during quantized inference. We start from the last layer and modify the layer's input scales/zeropoints + // TODO : Need a different approach. Current solution fails when 2 such layers have the same input layer + for (Impl::MapIdToLayerData::reverse_iterator it = impl->layers.rbegin(); it != impl->layers.rend(); ++it) + { + LayerData& ld = it->second; + // Layers with multiple outputs. Number of outputs is equal to number of inputs + if (ld.type == "Blank" || ld.type == "Dropout" || ld.type == "Identity" || ld.type == "Silence" || + ld.type == "Flatten" || ld.type == "Padding" || ld.type == "Permute" || ld.type == "Reshape" || + ld.type == "ReLU6" || ld.type == "Reorg" || ld.type == "ShuffleChannel" || + (ld.type == "ReLU" && !ld.params.get("negative_slope", 0.f)) /* ReLU with negative slope 0 */) + { + for (int i = 0; i < ld.outputBlobs.size(); i++) + { + LayerPin &pin = ld.inputBlobsId[i]; + scales[pin.lid][pin.oid] = scales[ld.id][i]; + zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][i]; + } + } + // Layers with multiple inputs and single output. + else if ((ld.type == "Pooling" && toLowerCase(ld.params.get("pool", "max")) == "max") /* Max Pooling */ || + (ld.type == "Eltwise" && toLowerCase(ld.params.get("operation", "sum")) == "max") /* Elementwise max */ || + ld.type == "Concat") + { + for (int i = 0; i < ld.inputBlobsId.size(); i++) + { + LayerPin &pin = ld.inputBlobsId[i]; + scales[pin.lid][pin.oid] = scales[ld.id][0]; + zeropoints[pin.lid][pin.oid] = zeropoints[ld.id][0]; + } + } + } + + // Create a new Net and add quantized layers to it. + Net dstNet; + dstNet.impl->netWasQuantized = true; + dstNet.setInputsNames(impl->netInputLayer->outNames); + dstNet.setPreferableBackend(prefBackend); + dstNet.setPreferableTarget(prefTarget); + dstNet.enableFusion(originalFusion); + + for (Impl::MapIdToLayerData::iterator it = impl->layers.begin(); it != impl->layers.end(); it++) + { + LayerData ld = it->second; + if (ld.id == 0) + { + LayerData &quantInpLd = dstNet.impl->layers[0]; + quantInpLd.dtype = inputsDtype; + quantInpLd.params.set("scales", DictValue::arrayReal(scales[0].data(), scales[0].size())); + quantInpLd.params.set("zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); + continue; + } + + std::vector inpPins = ld.inputBlobsId; + // Fill input and output scales/zeropoints for the layer + std::vector > inp_out_sc(2); + std::vector > inp_out_zp(2); + for (int i = 0; i < inpPins.size(); i++) + { + LayerPin &pin = inpPins[i]; + inp_out_sc[0].push_back(scales[pin.lid][pin.oid]); + inp_out_zp[0].push_back(zeropoints[pin.lid][pin.oid]); + } + inp_out_sc[1] = scales[ld.id]; + inp_out_zp[1] = zeropoints[ld.id]; + + // Quantize layer + Ptr layer = ld.layerInstance; + if (layer->tryQuantize(inp_out_sc, inp_out_zp, ld.params)) + { + ld.type += "Int8"; + ld.dtype = CV_8S; + } + ld.params.set("scales", DictValue::arrayReal(inp_out_sc[1].data(), inp_out_sc[1].size())); + ld.params.set("zeropoints", DictValue::arrayInt(inp_out_zp[1].data(), inp_out_zp[1].size())); + + // Check and add quantize/dequantize node before layer + for (int i = 0; i < inpPins.size(); i++) + { + LayerPin &pin = inpPins[i]; + LayerData &inpLd = dstNet.impl->getLayerData(impl->getLayerName(pin.lid)); + pin.lid = inpLd.id; + if (inpLd.dtype != ld.dtype) + { + String layerName = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? cv::format("quantize/%s/%d", inpLd.name.c_str(), pin.oid) + : cv::format("dequantize/%s/%d", inpLd.name.c_str(), pin.oid); + // Check if quantize/dequantize node for the input layer already exists + if (dstNet.impl->getLayerId(layerName) >= 0) + { + pin.lid = dstNet.impl->getLayerId(layerName); + pin.oid = 0; + } + else + { + LayerParams lp; + lp.set("scales", inp_out_sc[0][i]); + lp.set("zeropoints", inp_out_zp[0][i]); + lp.name = layerName; + lp.type = (inpLd.dtype == CV_32F && ld.dtype == CV_8S) ? "Quantize" : "Dequantize"; + int newLid = dstNet.addLayer(lp.name, lp.type, ld.dtype, lp); + dstNet.connect(pin.lid, pin.oid, newLid, 0); + pin.lid = newLid; pin.oid = 0; + } + } + } + + // Add quantized layer to Net and connect to its inputs. + int newLid = dstNet.addLayer(ld.name, ld.type, ld.dtype, ld.params); + for( int i = 0; i < inpPins.size(); i++ ) + dstNet.connect(inpPins[i].lid, inpPins[i].oid, newLid, i); + + // If the layer is a output layer, add quantize/dequantize node after it based on output's data type. + if (ld.requiredOutputs.size() == 0 && ld.dtype != outputsDtype) + { + LayerParams lp; + lp.set("scales", inp_out_sc[1][0]); + lp.set("zeropoints", inp_out_zp[1][0]); + lp.name = ((ld.dtype == CV_32F && outputsDtype == CV_8S) ? "quantize/" : "dequantize/") + ld.name; + lp.type = (ld.dtype == CV_32F && outputsDtype == CV_8S) ? "Quantize" : "Dequantize"; + dstNet.addLayerToPrev(lp.name, lp.type, outputsDtype, lp); + } + } + // Restore FP32 Net's backend, target and fusion + setPreferableBackend(prefBackend); + setPreferableTarget(prefTarget); + enableFusion(originalFusion); + return dstNet; +} + +void Net::getInputDetails(std::vector& scales, std::vector& zeropoints) const +{ + if (!impl->netWasQuantized) + CV_Error(Error::StsBadFunc, "Net isn't quantized"); + + LayerParams &lp = impl->layers[0].params; + DictValue sc = lp.get("scales"); + DictValue zp = lp.get("zeropoints"); + + for (int i = 0; i < sc.size(); i++) + { + scales.push_back(sc.get(i)); + zeropoints.push_back(zp.get(i)); + } +} + +void Net::getOutputDetails(std::vector& scales, std::vector& zeropoints) const +{ + if (!impl->netWasQuantized) + CV_Error(Error::StsBadFunc, "Net isn't quantized"); + + std::vector outLayerIds = getUnconnectedOutLayers(); + for (auto &lid : outLayerIds) + { + LayerParams &lp = impl->layers[lid].params; + DictValue sc = lp.get("scales"); + DictValue zp = lp.get("zeropoints"); + + for (int i = 0; i < sc.size(); i++) + { + scales.push_back(sc.get(i)); + zeropoints.push_back(zp.get(i)); + } + } +} + void Net::setPreferableBackend(int backendId) { CV_TRACE_FUNCTION(); CV_TRACE_ARG(backendId); + if (backendId == DNN_BACKEND_DEFAULT) + backendId = (Backend)PARAM_DNN_BACKEND_DEFAULT; + + if (impl->netWasQuantized && backendId != DNN_BACKEND_OPENCV) + { + CV_LOG_WARNING(NULL, "DNN: Only default backend supports quantized networks"); + backendId = DNN_BACKEND_OPENCV; + } + #ifdef HAVE_INF_ENGINE if (backendId == DNN_BACKEND_INFERENCE_ENGINE) backendId = getInferenceEngineBackendTypeParam(); @@ -4277,7 +4583,6 @@ void Net::setPreferableBackend(int backendId) if( impl->preferableBackend != backendId ) { impl->preferableBackend = backendId; - impl->netWasAllocated = false; impl->clear(); } } @@ -4287,6 +4592,13 @@ void Net::setPreferableTarget(int targetId) CV_TRACE_FUNCTION(); CV_TRACE_ARG(targetId); + if (impl->netWasQuantized && targetId != DNN_TARGET_CPU && + targetId != DNN_TARGET_OPENCL && targetId != DNN_TARGET_OPENCL_FP16) + { + CV_LOG_WARNING(NULL, "DNN: Only CPU and OpenCL/OpenCL FP16 target is supported by quantized networks"); + targetId = DNN_TARGET_CPU; + } + if( impl->preferableTarget != targetId ) { impl->preferableTarget = targetId; @@ -4306,7 +4618,6 @@ void Net::setPreferableTarget(int targetId) impl->preferableTarget = DNN_TARGET_OPENCL; #endif } - impl->netWasAllocated = false; impl->clear(); } } @@ -4935,9 +5246,10 @@ void Net::getMemoryConsumption(const int layerId, ShapesVec inLayerShapes, outLayerShapes; getLayerShapes(netInputShapes, layerId, inLayerShapes, outLayerShapes); + size_t elemSize = (impl->netWasQuantized) ? sizeof(char) : sizeof(float); for(int i = 0; i < outLayerShapes.size(); i++) { - blobs += total(outLayerShapes[i]) * sizeof(float); + blobs += total(outLayerShapes[i]) * elemSize; } } @@ -4986,7 +5298,7 @@ void Net::getMemoryConsumption(const std::vector& netInputShapes, std::vector > inLayerShapes, outLayerShapes; getLayersShapes(netInputShapes, layerIds, inLayerShapes, outLayerShapes); - + size_t elemSize = (impl->netWasQuantized) ? sizeof(char) : sizeof(float); for(int i = 0; i < layerIds.size(); i++) { int w = 0, b = 0; @@ -5001,7 +5313,7 @@ void Net::getMemoryConsumption(const std::vector& netInputShapes, for(int j = 0; j < outLayerShapes[i].size(); j++) { - b += total(outLayerShapes[i][j]) * sizeof(float); + b += total(outLayerShapes[i][j]) * elemSize; } weights.push_back(w); @@ -5021,7 +5333,6 @@ void Net::enableFusion(bool fusion) if( impl->fusion != fusion ) { impl->fusion = fusion; - impl->netWasAllocated = false; impl->clear(); } } @@ -5195,6 +5506,12 @@ void Layer::getScaleShift(Mat& scale, Mat& shift) const shift = Mat(); } +void Layer::getScaleZeropoint(float& scale, int& zeropoint) const +{ + scale = 1.f; + zeropoint = 0; +} + void Layer::unsetAttached() { setActivation(Ptr()); @@ -5321,6 +5638,12 @@ void Layer::run(const std::vector &inputs, std::vector &outputs, std:: this->forward(inputs, outputs, internals); } +bool Layer::tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) +{ + return false; +} + Layer::~Layer() {} bool Layer::getMemoryShapes(const std::vector &inputs, diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp index 1916aa0ec94f..9d8a3783a2e9 100644 --- a/modules/dnn/src/init.cpp +++ b/modules/dnn/src/init.cpp @@ -141,6 +141,44 @@ void initializeLayerFactory() CV_DNN_REGISTER_LAYER_CLASS(LSTM, LSTMLayer); CV_DNN_REGISTER_LAYER_CLASS(GRU, GRULayer); CV_DNN_REGISTER_LAYER_CLASS(CumSum, CumSumLayer); + + CV_DNN_REGISTER_LAYER_CLASS(Quantize, QuantizeLayer); + CV_DNN_REGISTER_LAYER_CLASS(Dequantize, DequantizeLayer); + CV_DNN_REGISTER_LAYER_CLASS(ConvolutionInt8, ConvolutionLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(InnerProductInt8, InnerProductLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(PoolingInt8, PoolingLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(EltwiseInt8, EltwiseLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(BatchNormInt8, BatchNormLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(ScaleInt8, ScaleLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(ShiftInt8, ShiftLayerInt8); + + CV_DNN_REGISTER_LAYER_CLASS(ReLUInt8, ActivationLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(ReLU6Int8, ActivationLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(SigmoidInt8, ActivationLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(TanHInt8, ActivationLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(SwishInt8, ActivationLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(MishInt8, ActivationLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(ELUInt8, ActivationLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(BNLLInt8, ActivationLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(AbsValInt8, ActivationLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(SoftmaxInt8, SoftmaxLayerInt8); + CV_DNN_REGISTER_LAYER_CLASS(SoftMaxInt8, SoftmaxLayerInt8); + + CV_DNN_REGISTER_LAYER_CLASS(ConcatInt8, ConcatLayer); + CV_DNN_REGISTER_LAYER_CLASS(FlattenInt8, FlattenLayer); + CV_DNN_REGISTER_LAYER_CLASS(PaddingInt8, PaddingLayer); + CV_DNN_REGISTER_LAYER_CLASS(BlankInt8, BlankLayer); + CV_DNN_REGISTER_LAYER_CLASS(DropoutInt8, BlankLayer); + CV_DNN_REGISTER_LAYER_CLASS(IdentityInt8, BlankLayer); + CV_DNN_REGISTER_LAYER_CLASS(SilenceInt8, BlankLayer); + CV_DNN_REGISTER_LAYER_CLASS(ConstInt8, ConstLayer); + CV_DNN_REGISTER_LAYER_CLASS(ReshapeInt8, ReshapeLayer); + CV_DNN_REGISTER_LAYER_CLASS(SplitInt8, SplitLayer); + CV_DNN_REGISTER_LAYER_CLASS(SliceInt8, SliceLayer); + CV_DNN_REGISTER_LAYER_CLASS(CropInt8, CropLayer); + CV_DNN_REGISTER_LAYER_CLASS(PermuteInt8, PermuteLayer); + CV_DNN_REGISTER_LAYER_CLASS(ReorgInt8, ReorgLayer); + CV_DNN_REGISTER_LAYER_CLASS(ShuffleChannelInt8, ShuffleChannelLayer); } CV__DNN_INLINE_NS_END diff --git a/modules/dnn/src/int8layers/batch_norm_layer.cpp b/modules/dnn/src/int8layers/batch_norm_layer.cpp new file mode 100644 index 000000000000..c5b8c3d9e9b0 --- /dev/null +++ b/modules/dnn/src/int8layers/batch_norm_layer.cpp @@ -0,0 +1,178 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" +#include + +namespace cv +{ +namespace dnn +{ + +class BatchNormLayerInt8Impl CV_FINAL : public BatchNormLayerInt8 +{ +public: + Mat origin_weights, origin_bias; + Mat weights_, bias_; + mutable int dims; + + BatchNormLayerInt8Impl(const LayerParams& params) + : dims(-1) + { + setParamsFrom(params); + useGlobalStats = params.get("use_global_stats", true); + input_sc = params.get("input_scale"); + input_zp = params.get("input_zeropoint"); + output_sc = params.get("scales"); + output_zp = params.get("zeropoints"); + + CV_Assert(blobs.size() == 2); + size_t n = blobs[0].total(); + CV_Assert(blobs[1].total() == n && + blobs[0].isContinuous() && blobs[1].isContinuous() && + blobs[0].type() == CV_32F && blobs[1].type() == CV_32F); + + origin_weights = blobs[0]; + origin_bias = blobs[1]; + } + + virtual void finalize(InputArrayOfArrays, OutputArrayOfArrays) CV_OVERRIDE + { + origin_weights.convertTo(weights_, CV_32F, input_sc/output_sc); + addWeighted(origin_bias, 1.0/output_sc, weights_, -input_zp, output_zp, bias_, CV_32F); + } + + void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE + { + scale = origin_weights; + shift = origin_bias; + } + + void getScaleZeropoint(float& scale, int& zeropoint) const CV_OVERRIDE + { + scale = output_sc; + zeropoint = output_zp; + } + + virtual bool tryFuse(Ptr& top) CV_OVERRIDE + { + Mat w_, b_; + top->getScaleShift(w_, b_); + if (w_.empty() && b_.empty()) + return false; + + const int numChannels = weights_.total(); + const int numFusedWeights = w_.total(); + const int numFusedBias = b_.total(); + + if ((numFusedWeights != numChannels && numFusedWeights != 1 && !w_.empty()) || + (numFusedBias != numChannels && numFusedBias != 1 && !b_.empty())) + return false; + + float new_sc; + int new_zp; + top->getScaleZeropoint(new_sc, new_zp); + + Mat w = numFusedWeights == 1 ? Mat(1, numChannels, CV_32F, Scalar(w_.at(0))) : + (w_.empty() ? Mat::ones(1, numChannels, CV_32F) : w_.reshape(1, 1)); + + Mat b = numFusedBias == 1 ? Mat(1, numChannels, CV_32F, Scalar(b_.at(0))) : + (b_.empty() ? Mat::zeros(1, numChannels, CV_32F) : b_.reshape(1, 1)); + + weights_ = Mat(); bias_ = Mat(); + multiply(origin_weights, w, weights_, input_sc/new_sc, CV_32F); + multiply(origin_bias, w, bias_); + add(bias_, b, bias_); + addWeighted(bias_, 1.0/new_sc, weights_, -input_zp, new_zp, bias_, CV_32F); + return true; + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + dims = inputs[0].size(); + if (!useGlobalStats && inputs[0][0] != 1) + CV_Error(Error::StsNotImplemented, "Batch normalization in training mode with batch size > 1"); + Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + return true; + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + bool setActivation(const Ptr& layer) CV_OVERRIDE + { + Ptr activ_int8 = layer.dynamicCast(); + if (!activ_int8.empty()) + { + return activ_int8->blobs.empty(); + } + return false; + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + CV_Assert(blobs.size() == 2); + CV_Assert(inputs.size() == 1); + + Mat &inpBlob = inputs[0]; + int planeSize = 1; + for (size_t i = 2; i < inpBlob.dims; i++) { + planeSize *= inpBlob.size[i]; + } + + for (size_t ii = 0; ii < outputs.size(); ii++) + { + Mat &outBlob = outputs[ii]; + + for(int num = 0; num < outBlob.size[0]; num++) + { + for (int n = 0; n < outBlob.size[1]; n++) + { + float w = weights_.at(n); + float b = bias_.at(n); + Mat inpBlobPlane(1, planeSize, CV_8S, inpBlob.ptr(num, n)); + Mat outBlobPlane(1, planeSize, CV_8S, outBlob.ptr(num, n)); + inpBlobPlane.convertTo(outBlobPlane, CV_8S, w, b); + } + } + } + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_UNUSED(outputs); // suppress unused variable warning + + int64 flops = 0; + for(int i = 0; i < inputs.size(); i++) + { + flops += 3*total(inputs[i]); + } + return flops; + } + +private: + bool useGlobalStats; +}; + +Ptr BatchNormLayerInt8::create(const LayerParams& params) +{ + return Ptr(new BatchNormLayerInt8Impl(params)); +} + +} // namespace dnn +} // namespace cv diff --git a/modules/dnn/src/int8layers/convolution_layer.cpp b/modules/dnn/src/int8layers/convolution_layer.cpp new file mode 100644 index 000000000000..05749885c05b --- /dev/null +++ b/modules/dnn/src/int8layers/convolution_layer.cpp @@ -0,0 +1,1136 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" + +#include + +#include "opencv2/core/hal/hal.hpp" +#include "opencv2/core/hal/intrin.hpp" +#include +#include + +namespace cv +{ +namespace dnn +{ + +#if CV_SIMD +static inline void v_expand_mul_add(const v_int8x16& a, const v_int8x16& b, + v_int32x4& out0, v_int32x4& out1, v_int32x4& out2, v_int32x4& out3) +{ + v_int16x8 a0, a1, b0, b1; + v_expand(a, a0, a1); + v_expand(b, b0, b1); + + v_int32x4 t0, t1; + v_mul_expand(a0, b0, t0, t1); + out0 += t0; out1 += t1; + + v_mul_expand(a1, b1, t0, t1); + out2 += t0; out3 += t1; +} +#endif + +class BaseConvolutionLayerInt8Impl : public ConvolutionLayerInt8 +{ +public: + BaseConvolutionLayerInt8Impl(const LayerParams ¶ms) + { + setParamsFrom(params); + getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads); + + numOutput = params.get("num_output"); + int ngroups = params.get("group", 1); + CV_Assert(numOutput % ngroups == 0); + + input_zp = params.get("input_zeropoint"); + output_zp = params.get("zeropoints"); + output_sc = params.get("scales"); + + if (kernel_size.size() == 2) { + kernel = Size(kernel_size[1], kernel_size[0]); + stride = Size(strides[1], strides[0]); + for (int i = 0; i < pads_begin.size(); i++) { + if (pads_begin[i] != pads_end[i]) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); + } + pad = Size(pads_begin[1], pads_begin[0]); + dilation = Size(dilations[1], dilations[0]); + + adjustPad.height = adjust_pads[0]; + adjustPad.width = adjust_pads[1]; + } + + for (int i = 0; i < adjust_pads.size(); i++) { + CV_Assert(adjust_pads[i] < strides[i]); + } + } + + virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + { + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + // blobs[0] - Weights (INT8) + // blobs[1] - Biases (INT32) + // blobs[2] - Multipliers for convolution output stage (FP32) + CV_Assert(!inputs.empty() && blobs.size() == 3); + MatSize weightShape = blobs[0].size; + + CV_Assert(inputs[0].dims == outputs[0].dims); + if (weightShape.dims() == 3) + { + kernel_size.assign(1, kernel_size[0]); + strides.assign(1, strides[0]); + dilations.assign(1, dilations[0]); + pads_begin.assign(1, pads_begin[0]); + pads_end.assign(1, pads_end[0]); + } + CV_Assert(weightShape.dims() == kernel_size.size() + 2); + for (int i = 0; i < kernel_size.size(); i++) { + CV_Assert(weightShape[i + 2] == kernel_size[i]); + } + + const Mat &input = inputs[0]; + CV_Assert(((input.dims == 3 && kernel_size.size() == 1) || input.dims == 4 || input.dims == 5) && input.type() == CV_8S); + for (size_t i = 0; i < outputs.size(); i++) + { + CV_Assert(inputs[i].type() == input.type()); + CV_Assert(((input.dims == 3 && kernel_size.size() == 1) || inputs[i].dims == 4 || inputs[i].dims == 5) && inputs[i].size[1] == input.size[1]); + for (int j = 0; j < inputs[i].dims; j++) { + CV_Assert(inputs[i].size[j] == input.size[j]); + } + } + + std::vector inpShape; + std::vector outShape; + for (int i = 2; i < inputs[0].dims; i++) { + inpShape.push_back(inputs[0].size[i]); + outShape.push_back(outputs[0].size[i]); + } + getConvPoolPaddings(inpShape, kernel_size, strides, padMode, pads_begin, pads_end); + if (pads_begin.size() == 2) { + for (int i = 0; i < pads_begin.size(); i++) { + if (pads_begin[i] != pads_end[i]) + CV_Error(Error::StsNotImplemented, "Unsupported asymmetric padding in convolution layer"); + } + pad = Size(pads_begin[1], pads_begin[0]); + } + } + + virtual MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const = 0; + bool is1x1() const + { + return (kernel.height == 1 && kernel.width == 1) && + (stride.height == 1 && stride.width == 1) && + (dilation.height == 1 && dilation.width == 1); + } + + virtual bool tryFuse(Ptr& top) CV_OVERRIDE + { + Mat w, b; + top->getScaleShift(w, b); + if (w.empty() && b.empty()) + return false; + + CV_Assert((w.empty() || w.type() == CV_32F) && + (b.empty() || b.type() == CV_32F)); + + float new_sc; + int new_zp; + top->getScaleZeropoint(new_sc, new_zp); + fuseWeights(w, b, new_sc); + output_sc = new_sc; + output_zp = new_zp; + return true; + } + + virtual void fuseWeights(const Mat& w_, const Mat& b_, const float& new_sc) = 0; +}; + +//TODO: simultaneously convolution and bias addition for cache optimization +class ConvolutionLayerInt8Impl CV_FINAL : public BaseConvolutionLayerInt8Impl +{ +public: + enum { VEC_ALIGN = 32, DFT_TYPE = CV_8S }; + Mat weightsMat; + std::vector biasvec; + Mat outputMultiplier; + Mat activationLUT; + Ptr activ; + + ConvolutionLayerInt8Impl(const LayerParams ¶ms) : BaseConvolutionLayerInt8Impl(params){} + + MatShape computeColRowShape(const MatShape &inpShape, const MatShape &outShape) const CV_OVERRIDE + { + CV_Assert(!blobs.empty()); + int dims = inpShape.size(); + int inpD = dims == 5 ? inpShape[2] : 1; + int inpH = inpShape[dims - 2]; + int inpW = inpShape.back(); + int inpGroupCn = blobs[0].size[1]; + int ksize = inpGroupCn * std::accumulate(kernel_size.begin(), kernel_size.end(), + 1, std::multiplies()); + return shape(inpD * inpH * inpW, ksize); + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + size_t ksize = kernel_size.size(); + // Only default backend and Conv1D/Conv2D/Conv3D are supported + return backendId == DNN_BACKEND_OPENCV && ksize >= 1 && ksize <= 3; + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(!blobs.empty()); + const int* weightShape = blobs[0].size.p; + CV_Assert(blobs[1].total() == (size_t)weightShape[0]); + + internals.clear(); + + CV_Assert(inputs.size() != 0); + std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); + + int outCn = weightShape[0]; + std::vector outShape; + outShape.push_back(inputs[0][0]); + outShape.push_back(outCn); + + int inpCn = inputs[0][1]; + if (padMode.empty()) + { + for (int i = 0; i < inpShape.size(); i++) + outShape.push_back((inpShape[i] + pads_begin[i] + pads_end[i] - dilations[i] * (kernel_size[i] - 1) - 1) / strides[i] + 1); + } + else + { + getConvPoolOutParams(inpShape, kernel_size, strides, padMode, dilations, outShape); + } + + int ngroups = inpCn / weightShape[1]; + if (ngroups == 0 || ngroups * weightShape[1] != inpCn) + CV_Error(Error::StsError, format("Number of input channels should " + "be multiple of %d but got %d", weightShape[1], inpCn)); + CV_Assert(ngroups > 0 && inpCn % ngroups == 0 && outCn % ngroups == 0); + + outputs.resize(1, outShape); + + return false; + } + + virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + { + BaseConvolutionLayerInt8Impl::finalize(inputs_arr, outputs_arr); + + std::vector inputs; + inputs_arr.getMatVector(inputs); + // prepare weightsMat where each row is aligned and has enough zero padding on the right to + // use vectorized (i.e. with intrinsics) loops without tail processing + Mat wm = blobs[0].reshape(1, numOutput); + if( wm.step1() % VEC_ALIGN != 0 ) + { + int newcols = (int)alignSize(wm.step1(), VEC_ALIGN); + Mat wm_buffer = Mat(numOutput, newcols, wm.type()); + Mat wm_padding = wm_buffer.colRange(wm.cols, newcols); + wm_padding.setTo(Scalar::all(0)); + Mat wm_aligned = wm_buffer.colRange(0, wm.cols); + wm.copyTo(wm_aligned); + wm = wm_aligned; + } + weightsMat = wm; + + Mat biasMat = blobs[1]; + biasvec.resize(numOutput+2); + for(int i = 0; i < numOutput; i++ ) + biasvec[i] = biasMat.at(i); + + outputMultiplier = blobs[2]; + } + + bool setActivation(const Ptr& layer) CV_OVERRIDE + { + Ptr activ_int8 = layer.dynamicCast(); + if (!activ_int8.empty()) + { + activ = activ_int8; + if (!activ_int8->blobs.empty()) + activ_int8->blobs[0].convertTo(activationLUT, CV_32S); + return true; + } + return false; + } + + virtual bool tryFuse(Ptr& top) CV_OVERRIDE + { + return BaseConvolutionLayerInt8Impl::tryFuse(top); + } + + void fuseWeights(const Mat& w_, const Mat& b_, const float& new_sc) CV_OVERRIDE + { + const int outCn = weightsMat.size[0]; + Mat w = w_.total() == 1 ? Mat(1, outCn, CV_32F, Scalar(w_.at(0))) : w_; + Mat b = b_.total() == 1 ? Mat(1, outCn, CV_32F, Scalar(b_.at(0))) : b_; + CV_Assert_N(!weightsMat.empty(), biasvec.size() == outCn + 2, + w.empty() || outCn == w.total(), b.empty() || outCn == b.total()); + + for (int i = 0; i < outCn; ++i) + { + float off = outputMultiplier.at(i) * output_sc; + if (!w.empty()) + off *= w.at(i); + + if (!b.empty()) + biasvec[i] += (int)std::round(b.at(i)/off); + + outputMultiplier.at(i) = off/new_sc; + } + biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1]; + } + + class ParallelConv : public cv::ParallelLoopBody + { + public: + enum { BLK_SIZE = 32, BLK_SIZE_CN = 64 }; + + const Mat* input_; + const Mat* weights_; + Mat* output_; + int outShape[4]; // used only for conv2d + std::vector kernel_size, pads_begin, pads_end, strides, dilations; + int ngroups_, nstripes_; + std::vector ofstab_; + const std::vector* biasvec_; + const Mat* activLUT_; + const ActivationLayerInt8* activ_; + bool is1x1_; + bool useAVX2; + bool useAVX512; + int blk_size_cn; + int inpZp, outZp; + const float* multiplier; + + ParallelConv() + : input_(0), weights_(0), output_(0), ngroups_(0), nstripes_(0), + biasvec_(0), activLUT_(0), activ_(0), is1x1_(false), useAVX2(false), useAVX512(false) + , blk_size_cn(0), inpZp(0), outZp(0), multiplier(0) + {} + + static void run( const Mat& input, Mat& output, const Mat& weights, const Mat& multipliers, + const std::vector& biasvec, const Mat& activLUT, + const std::vector& kernel_size, const std::vector& strides, + const std::vector& pads_begin, const std::vector& pads_end, + const std::vector& dilations, + const ActivationLayerInt8* activ, int ngroups, int nstripes, int inp_Zp, int out_Zp) + { + size_t karea = std::accumulate(kernel_size.begin(), kernel_size.end(), + 1, std::multiplies()); + bool isConv1D = input.dims == 3; + bool isConv2D = input.dims == 4; + bool isConv3D = input.dims == 5; + CV_CheckEQ(static_cast(kernel_size.size()), input.dims - 2, ""); + CV_Assert_N(input.dims == output.dims, + input.size[0] == output.size[0], + weights.rows == output.size[1], + weights.cols == (input.size[1]/ngroups)*karea, + input.type() == CV_8SC1, + output.type() == CV_32SC1, + input.type() == weights.type(), + input.isContinuous(), + output.isContinuous(), + biasvec.size() == (size_t)output.size[1]+2); + CV_Check(weights.step1(), weights.step1() % VEC_ALIGN == 0, ""); + ParallelConv p; + + p.input_ = &input; + p.weights_ = &weights; + p.output_ = &output; + int max_ind = isConv1D? 3: 4; + for( int i = 0; i < max_ind; i++ ) p.outShape[i] = output.size[i]; + p.outShape[1] /= ngroups; + + p.kernel_size = kernel_size; p.strides = strides; p.dilations = dilations; + p.pads_begin = pads_begin; p.pads_end = pads_end; + + p.ngroups_ = ngroups; + p.nstripes_ = nstripes; + + int inpCnAll = input.size[1]; + int depth = (input.dims == 5) ? input.size[2] : 1; + int width = input.size[input.dims - 1]; + int height = isConv1D? 1 : input.size[input.dims - 2]; + int inpCn = inpCnAll / ngroups; + + p.is1x1_ = (isConv2D && kernel_size[0] == 1 && kernel_size[1] == 1 && + pads_begin[0] == 0 && pads_begin[1] == 0) || + (isConv1D && pads_begin[0] == 0 && kernel_size[0] == 1); + + p.useAVX2 = checkHardwareSupport(CPU_AVX2) && isConv2D; + p.useAVX512 = CV_CPU_HAS_SUPPORT_AVX512_SKX && isConv2D; + + int kernel_d = isConv3D? kernel_size[0] : 1; + int kernel_h = isConv1D? 1 : kernel_size[kernel_size.size() - 2]; + int kernel_w = kernel_size.back(); + + int blk_size_cn0 = cvCeil(1600./(kernel_w*kernel_h)); + int ncn = 32; + while (ncn*2 < blk_size_cn0 && ncn < inpCn) + ncn *= 2; + ncn = std::min(ncn, inpCn); + p.blk_size_cn = ncn; + + int dil_d = isConv3D? dilations[0] : 1; + int dil_h = isConv1D? 1 : dilations[dilations.size() - 2]; + int dil_w = dilations.back(); + + p.inpZp = inp_Zp; + p.outZp = out_Zp; + p.multiplier = multipliers.ptr(0); + + p.ofstab_.resize(karea * ncn); + int* ofstab = &p.ofstab_[0]; + + if (isConv1D) + { + for( int k = 0; k < ncn; k++ ) + for( int k_c = 0; k_c < kernel_w; k_c++ ) + ofstab[k*kernel_w + k_c] = k*width + k_c*dil_w; + } + else if (isConv2D) + { + for( int k = 0; k < ncn; k++ ) + for( int k_r = 0; k_r < kernel_h; k_r++ ) + for( int k_c = 0; k_c < kernel_w; k_c++ ) + ofstab[(k*kernel_h + k_r)*kernel_w + k_c] = + (k*height + k_r*dil_h)*width + k_c*dil_w; + } + else + { + for( int k = 0; k < ncn; k++ ) + for (int k_d = 0; k_d < kernel_d; k_d++) + for( int k_r = 0; k_r < kernel_h; k_r++ ) + for( int k_c = 0; k_c < kernel_w; k_c++ ) + ofstab[(k*kernel_d*kernel_h + k_d*kernel_h + k_r)*kernel_w + k_c] = + (k*depth*height + k_d*dil_d*height + k_r*dil_h)*width + k_c*dil_w; + } + + p.biasvec_ = &biasvec; + p.activLUT_ = &activLUT; + p.activ_ = !activLUT.empty() ? activ : 0; + + parallel_for_(Range(0, nstripes), p, nstripes); + } + + virtual void operator ()(const Range &r0) const CV_OVERRIDE + { + const int valign = ConvolutionLayerInt8Impl::VEC_ALIGN; + int ngroups = ngroups_, batchSize = input_->size[0]*ngroups; + bool isConv1D = input_->dims == 3; + bool isConv2D = input_->dims == 4; + bool isConv3D = input_->dims == 5; + + int outW = output_->size[output_->dims - 1]; + int outH = isConv1D? 1 : output_->size[output_->dims - 2]; + int outCn = output_->size[1]/ngroups; + + int depth = isConv3D? input_->size[2] : 1; + int height = isConv1D? 1 : input_->size[input_->dims - 2]; + int width = input_->size[input_->dims - 1]; + int inpCn = input_->size[1]/ngroups; + + const int nstripes = nstripes_; + + int kernel_d = isConv3D? kernel_size[0] : 1; + int kernel_h = isConv1D? 1 : kernel_size[kernel_size.size() - 2]; + int kernel_w = kernel_size.back(); + int karea = kernel_w*kernel_h*kernel_d; + + int pad_d = isConv3D? pads_begin[0] : 0; + int pad_t = isConv1D? 0 : pads_begin[pads_begin.size() - 2]; + int pad_l = pads_begin.back(); + + int stride_d = isConv3D? strides[0] : 0; + int stride_h = isConv1D? 0 : strides[strides.size() - 2]; + int stride_w = strides.back(); + + int dilation_d = isConv3D? dilations[0] : 1; + int dilation_h = isConv1D? 1 : dilations[dilations.size() - 2]; + int dilation_w = dilations.back(); + + int i, j, k, d; + int inpPlaneSize = (int)input_->total(2); + int outPlaneSize = (int)output_->total(2); + bool is1x1 = is1x1_; + + int stripesPerSample; + int stripeSize; + Range r = r0; + bool depthWiseConvolution = !is1x1 && isConv2D && ngroups > 1 && inpCn == 1 && + outCn == 1 && kernel_d == 1 && dilation_d == 1 && stride_d == 0 && pad_d == 0 && + width >= 16 + dilation_w*(kernel_w - 1); + // for now only 3x3 depth-wise convolutions are supported + depthWiseConvolution = depthWiseConvolution && kernel_w == 3 && kernel_h == 3 && + // computing at most 1 pixel from each side can involve padding + max(stride_w, dilation_w) >= pad_l && max(stride_h, dilation_h) >= pad_t && + pad_l <= 1 && pad_t <= 1; + + if( !depthWiseConvolution && nstripes >= batchSize*2 ) + { + stripesPerSample = nstripes/batchSize; + stripeSize = (int)alignSize((outPlaneSize + stripesPerSample - 1)/stripesPerSample, 8); + stripeSize = std::min(stripeSize, outPlaneSize); + } + else + { + stripesPerSample = 1; + int samplesPerStripe = std::max((batchSize + nstripes - 1)/nstripes, 1); + r.start *= samplesPerStripe; + r.end *= samplesPerStripe; + stripeSize = outPlaneSize; + } + + const int8_t* data_inp0_ = input_->ptr(); + const int* ofstab = &ofstab_[0]; + const int8_t* wptr_orig_ = weights_->ptr(); + size_t wstep = weights_->step1(); + const int* biasptr_ = &biasvec_->at(0); + const int* lutptr_ = !activLUT_->empty() ? activLUT_->ptr() : 0; + int* data_out0_ = output_->ptr(); + AutoBuffer rowbuf0_; + int8_t* rowbuf0 = 0; + bool use_rowbuf = !depthWiseConvolution; + int blk_size = depthWiseConvolution ? outPlaneSize : min((int)BLK_SIZE, stripeSize); + + // im2row buffer is not used for depth-wise convolution + if(use_rowbuf) + { + size_t rowbufsz = alignSize(karea*blk_size_cn, valign)*min((int)BLK_SIZE, blk_size); + //printf("karea=%d, blk_size_cn=%d, rowbufsz=%d, stripeSize=%d\n", karea, blk_size_cn, (int)rowbufsz, stripeSize); + rowbuf0_.allocate(rowbufsz + valign); + rowbuf0 = alignPtr(rowbuf0_.data(), (int)(valign*sizeof(int8_t))); + // we clear the buffer once; ultimately, it lets us to avoid + // tail processing after running the unrolled/vectorized loop. + // the main idea is to make sure that the tail (a.k.a. padding) of each row + // (i.e. the elements with indices between vsz=karea*ncn and vsz_a) + // does not contain NaNs or Infs. Because the padding in the weights + // matrix is explicitly initialized with 0's, we handle all other + // cases nicely, i.e. we can skip expliciting re-initialization + // of the padding - we just retain elements from the previous iteration + // of the loop over channels (cn0). + memset(rowbuf0, (int8_t)inpZp, rowbufsz*sizeof(rowbuf0[0]) ); + } + + for( int stripe = r.start; stripe < r.end; stripe++ ) + { + int subsampleIdx = stripe/stripesPerSample; + if( subsampleIdx >= batchSize ) + break; + int stripeStart = (int)((stripe - subsampleIdx*stripesPerSample)*stripeSize); + int stripeEnd = (int)std::min(stripeStart + stripeSize, outPlaneSize); + const int8_t* data_inp0 = data_inp0_ + subsampleIdx*inpPlaneSize*inpCn; + int* data_out0 = data_out0_ + subsampleIdx*outPlaneSize*outCn; + int startOutCn = (subsampleIdx % ngroups)*outCn; + const int8_t* wptr_orig = wptr_orig_ + wstep*startOutCn; + const int* biasptr = biasptr_ + startOutCn; + const float* multptr = multiplier + startOutCn; + + for( int cn0 = 0; cn0 < inpCn; cn0 += blk_size_cn ) + { + int cn1 = std::min(cn0 + blk_size_cn, inpCn); + int ncn = cn1 - cn0, vsz = karea*ncn; + int vsz_a = (int)alignSize(vsz, valign); + const int8_t* wptr = wptr_orig + cn0*karea; + + for( int ofs0 = stripeStart; ofs0 < stripeEnd; ofs0 += blk_size ) + { + int ofs, ofs1 = std::min(ofs0 + blk_size, stripeEnd); + int bsz = ofs1 - ofs0; + + int out_d = ofs0 / (outH * outW); + int out_i = (ofs0 - out_d * outH * outW) / outW; + int out_j = ofs0 % outW; + + if (depthWiseConvolution) + { + CV_Assert(out_i == 0 && out_j == 0); + int in_d = out_d * stride_d - pad_d; + const int8_t* inptr_ = data_inp0 + (cn0*depth*height + in_d*height)*width; + int* outptr_ = data_out0 + ofs0; + + #if CV_TRY_AVX2 + if(useAVX2) + opt_AVX2::fastDepthwiseConv(wptr, kernel_h, kernel_w, + stride_h, stride_w, dilation_h, dilation_w, pad_t, pad_l, + biasptr, multptr, inptr_, height, width, outptr_, out_d, outH, outW, inpZp, outZp); + else + #endif + { + const int8_t w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2], + w10 = wptr[3], w11 = wptr[4], w12 = wptr[5], + w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8]; + int outW1 = min(outW, (width - dilation_w*(kernel_w - 1) + pad_l)/stride_w); + int bias = biasptr[out_d], biasCopy; + float mult = multptr[out_d]; + + for (int out_i = 0; out_i < outH; out_i++) + { + int in_i = out_i * stride_h - pad_t, out_j = 0; + const int8_t* imgptr0 = inptr_ + in_i*width; + const int8_t* imgptr1 = imgptr0 + dilation_h*width; + const int8_t* imgptr2 = imgptr0 + (dilation_h*2)*width; + int8_t w00 = w00_, w01 = w01_, w02 = w02_; + int8_t w20 = w20_, w21 = w21_, w22 = w22_; + int out, out1; + // Bias has a fused offset component. bias = bias_quantized - input_zeropoint*sum_of_weights. + // In some cases below, certain weights are not used for convolution or set to zero. + // So we create a copy of bias at the start and remove the weight's components as necessary. + biasCopy = bias; + + if (in_i < 0) + { + biasCopy += inpZp * (w00 + w01 + w02); + w00 = w01 = w02 = 0; + imgptr0 = imgptr1; + } + else if (in_i + dilation_h*(kernel_h-1) >= height) + { + biasCopy += inpZp * (w20 + w21 + w22); + w20 = w21 = w22 = 0; + imgptr2 = imgptr1; + } + int* outptr = outptr_ + out_i*outW; + if (pad_l > 0) + { + out = (int)imgptr0[0]*w01 + (int)imgptr0[dilation_w]*w02 + + (int)imgptr1[0]*w11 + (int)imgptr1[dilation_w]*w12 + + (int)imgptr2[0]*w21 + (int)imgptr2[dilation_w]*w22 + + biasCopy + inpZp*(w00 + w10 + w20); + out1 = outZp + (int)std::round(out*mult); + outptr[0] = std::min(std::max(out1, -128), 127); + out_j = 1; + } + #if CV_SIMD + if( stride_w == 1 ) + { + const int out_delta = 16; + v_int8x16 vw00 = v_setall_s8(w00), vw01 = v_setall_s8(w01), vw02 = v_setall_s8(w02), + vw10 = v_setall_s8(w10), vw11 = v_setall_s8(w11), vw12 = v_setall_s8(w12), + vw20 = v_setall_s8(w20), vw21 = v_setall_s8(w21), vw22 = v_setall_s8(w22); + v_int32x4 vout0, vout1, vout2, vout3, vbias = v_setall_s32(biasCopy), voutzp = v_setall_s32(outZp), + outmin = v_setall_s32(-128), outmax = v_setall_s32(127); + v_float32x4 vmult = v_setall_f32(mult); + for( ; out_j < outW1; out_j += out_delta ) + { + if (out_j + out_delta > outW1) + { + if (out_j <= pad_l) + break; + out_j = outW1 - out_delta; + } + int in_j = out_j * stride_w - pad_l; + v_int8x16 v00 = v_load(imgptr0 + in_j), + v01 = v_load(imgptr0 + in_j + dilation_w), + v02 = v_load(imgptr0 + in_j + dilation_w*2), + v10 = v_load(imgptr1 + in_j), + v11 = v_load(imgptr1 + in_j + dilation_w), + v12 = v_load(imgptr1 + in_j + dilation_w*2), + v20 = v_load(imgptr2 + in_j), + v21 = v_load(imgptr2 + in_j + dilation_w), + v22 = v_load(imgptr2 + in_j + dilation_w*2); + + vout0 = vout1 = vout2 = vout3 = vbias; + v_expand_mul_add(v00, vw00, vout0, vout1, vout2, vout3); + v_expand_mul_add(v01, vw01, vout0, vout1, vout2, vout3); + v_expand_mul_add(v02, vw02, vout0, vout1, vout2, vout3); + v_expand_mul_add(v10, vw10, vout0, vout1, vout2, vout3); + v_expand_mul_add(v11, vw11, vout0, vout1, vout2, vout3); + v_expand_mul_add(v12, vw12, vout0, vout1, vout2, vout3); + v_expand_mul_add(v20, vw20, vout0, vout1, vout2, vout3); + v_expand_mul_add(v21, vw21, vout0, vout1, vout2, vout3); + v_expand_mul_add(v22, vw22, vout0, vout1, vout2, vout3); + + vout0 = voutzp + v_round(v_cvt_f32(vout0)*vmult); + vout1 = voutzp + v_round(v_cvt_f32(vout1)*vmult); + vout2 = voutzp + v_round(v_cvt_f32(vout2)*vmult); + vout3 = voutzp + v_round(v_cvt_f32(vout3)*vmult); + + vout0 = v_min(v_max(vout0, outmin), outmax); + vout1 = v_min(v_max(vout1, outmin), outmax); + vout2 = v_min(v_max(vout2, outmin), outmax); + vout3 = v_min(v_max(vout3, outmin), outmax); + + v_store(outptr + out_j, vout0); + v_store(outptr + out_j + 4, vout1); + v_store(outptr + out_j + 8, vout2); + v_store(outptr + out_j + 12, vout3); + } + } + #endif + for (; out_j < outW1; out_j++) + { + int in_j = out_j * stride_w - pad_l; + out = (int)imgptr0[in_j]*w00 + (int)imgptr0[in_j + dilation_w]*w01 + (int)imgptr0[in_j + dilation_w*2]*w02 + + (int)imgptr1[in_j]*w10 + (int)imgptr1[in_j + dilation_w]*w11 + (int)imgptr1[in_j + dilation_w*2]*w12 + + (int)imgptr2[in_j]*w20 + (int)imgptr2[in_j + dilation_w]*w21 + (int)imgptr2[in_j + dilation_w*2]*w22 + biasCopy; + out1 = outZp + (int)std::round(out*mult); + outptr[out_j] = std::min(std::max(out1, -128), 127); + } + + for (; out_j < outW; out_j++ ) + { + int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w*2; + int s0 = 1, s1 = 1, s2 = 1; + if (in_j0 >= width) + { + in_j0 = 0; + s0 = 0; + biasCopy += inpZp*(w00 + w10 + w20); + } + if (in_j1 >= width) + { + in_j1 = 0; + s1 = 0; + biasCopy += inpZp*(w01 + w11 + w21); + } + if (in_j2 >= width) + { + in_j2 = 0; + s2 = 0; + biasCopy += inpZp*(w02 + w12 + w22); + } + out = (int)imgptr0[in_j0]*w00*s0 + (int)imgptr0[in_j1]*w01*s1 + (int)imgptr0[in_j2]*w02*s2 + + (int)imgptr1[in_j0]*w10*s0 + (int)imgptr1[in_j1]*w11*s1 + (int)imgptr1[in_j2]*w12*s2 + + (int)imgptr2[in_j0]*w20*s0 + (int)imgptr2[in_j1]*w21*s1 + (int)imgptr2[in_j2]*w22*s2 + biasCopy; + out1 = outZp + (int)std::round(out*mult); + outptr[out_j] = std::min(std::max(out1, -128), 127); + } + } + } + continue; + } + // do im2row for a part of input tensor + int8_t* rowbuf = rowbuf0; + + if (isConv1D) + { + for( ofs = ofs0; ofs < ofs1; out_j = 0, ++out_i ) + { + int delta = std::min(ofs1 - ofs, outW - out_j); + int out_j1 = out_j + delta; + + int in_j = out_j * stride_w - pad_l; + const int8_t* imgptr = data_inp0 + cn0*width + in_j; + ofs += delta; + + // do im2row for a part of input tensor + if( is1x1 ) + { + for( ; out_j < out_j1; out_j++, rowbuf += vsz_a, imgptr += stride_w ) + { + for( k = 0; k < vsz; k++ ) + rowbuf[k] = imgptr[k*inpPlaneSize]; + } + } + else + { + for( ; out_j < out_j1; out_j++, rowbuf += vsz_a, imgptr += stride_w, in_j += stride_w ) + { + // this condition should be true for most of the tensor elements, i.e. + // most of the time the kernel aperture is inside the tensor X-Y plane. + if( out_j + 2 <= out_j1 && 0 <= in_j && in_j + stride_w*2 <= width - (kernel_w-1)*dilation_w ) + { + for( k = 0; k < vsz; k++ ) + { + int k1 = ofstab[k]; + int8_t v0 = imgptr[k1]; + int8_t v1 = imgptr[k1 + stride_w]; + rowbuf[k] = v0; + rowbuf[k+vsz_a] = v1; + } + out_j++; + rowbuf += vsz_a; + imgptr += stride_w; + in_j += stride_w; + } + else + { + int i0 = std::max(0, (-in_j + dilation_w-1)/dilation_w); + int i1 = std::min(kernel_w, (width - in_j + dilation_w-1)/dilation_w); + + // here some non-continuous sub-row of the row will not be + // filled from the tensor; we need to make sure that the uncovered + // elements are explicitly set to 0's. the easiest way is to + // set all the elements to 0's before the loop. + memset(rowbuf, (int8_t)inpZp, vsz*sizeof(rowbuf[0])); + for( k = 0; k < ncn; k++ ) + { + for( i = i0; i < i1; i++ ) + { + int imgofs = k*width + i*dilation_w; + rowbuf[k*kernel_w + i] = imgptr[imgofs]; + } + } + } + } + } + } + } + else if (isConv2D) + { + if( is1x1 && stride_w == 1 && stride_h == 1 ) + { + const int8_t* imgptr = data_inp0 + (cn0*height + out_i)*width + out_j; + for( int j = 0; j < bsz; j++, rowbuf += vsz_a ) + { + if( j + 4 <= bsz ) + { + k = 0; + for( ; k < vsz; k++ ) + { + const int8_t* inp = imgptr + j + k*inpPlaneSize; + int8_t v0 = inp[0], v1 = inp[1], v2 = inp[2], v3 = inp[3]; + rowbuf[k] = v0; + rowbuf[k + vsz_a] = v1; + rowbuf[k + vsz_a*2] = v2; + rowbuf[k + vsz_a*3] = v3; + } + j += 3; + rowbuf += vsz_a*3; + } + else + { + for( k = 0; k < vsz; k++ ) + { + rowbuf[k] = imgptr[j + k*inpPlaneSize]; + } + } + } + } + else + for( ofs = ofs0; ofs < ofs1; out_j = 0, ++out_i ) + { + int delta = std::min(ofs1 - ofs, outW - out_j); + int out_j1 = out_j + delta; + + int in_i = out_i * stride_h - pad_t; + int in_j = out_j * stride_w - pad_l; + const int8_t* imgptr = data_inp0 + (cn0*height + in_i)*width + in_j; + ofs += delta; + + // do im2row for a part of input tensor + if( is1x1 ) + { + for( ; out_j < out_j1; out_j++, rowbuf += vsz_a, imgptr += stride_w ) + { + for( k = 0; k < vsz; k++ ) + rowbuf[k] = imgptr[k*inpPlaneSize]; + } + } + else + { + bool ok_i = 0 <= in_i && in_i < height - (kernel_h-1)*dilation_h; + int i0 = std::max(0, (-in_i + dilation_h-1)/dilation_h); + int i1 = std::min(kernel_h, (height - in_i + dilation_h-1)/dilation_h); + + for( ; out_j < out_j1; out_j++, rowbuf += vsz_a, imgptr += stride_w, in_j += stride_w ) + { + // this condition should be true for most of the tensor elements, i.e. + // most of the time the kernel aperture is inside the tensor X-Y plane. + if( ok_i && out_j + 2 <= out_j1 && 0 <= in_j && in_j + stride_w*2 <= width - (kernel_w-1)*dilation_w ) + { + for( k = 0; k < vsz; k++ ) + { + int k1 = ofstab[k]; + int8_t v0 = imgptr[k1]; + int8_t v1 = imgptr[k1 + stride_w]; + rowbuf[k] = v0; + rowbuf[k+vsz_a] = v1; + } + out_j++; + rowbuf += vsz_a; + imgptr += stride_w; + in_j += stride_w; + } + else + { + int j0 = std::max(0, (-in_j + dilation_w-1)/dilation_w); + int j1 = std::min(kernel_w, (width - in_j + dilation_w-1)/dilation_w); + + // here some non-continuous sub-row of the row will not be + // filled from the tensor; we need to make sure that the uncovered + // elements are explicitly set to 0's. the easiest way is to + // set all the elements to 0's before the loop. + memset(rowbuf, (int8_t)inpZp, vsz*sizeof(rowbuf[0])); + for( k = 0; k < ncn; k++ ) + { + for( i = i0; i < i1; i++ ) + { + for( j = j0; j < j1; j++ ) + { + int imgofs = k*(width*height) + i*(dilation_h*width) + j*dilation_w; + rowbuf[(k*kernel_h + i)*kernel_w + j] = imgptr[imgofs]; + } + } + } + } + } + } + } + } + else + { + for( ofs = ofs0; ofs < ofs1; out_d += (out_i + 1) / outH, out_i = (out_i + 1) % outH, out_j = 0 ) + { + int delta = std::min(ofs1 - ofs, outW - out_j); + int out_j1 = out_j + delta; + + int in_d = out_d * stride_d - pad_d; + int in_i = out_i * stride_h - pad_t; + int in_j = out_j * stride_w - pad_l; + const int8_t* imgptr = data_inp0 + (cn0*depth*height + in_d*height + in_i)*width + in_j; + ofs += delta; + + int d0 = std::max(0, (-in_d + dilation_d - 1) / dilation_d); + int d1 = std::min(kernel_d, (depth - in_d + dilation_d - 1) / dilation_d); + + int i0 = std::max(0, (-in_i + dilation_h-1)/dilation_h); + int i1 = std::min(kernel_h, (height - in_i + dilation_h-1)/dilation_h); + + for( ; out_j < out_j1; out_j++, rowbuf += vsz_a, imgptr += stride_w, in_j += stride_w ) + { + int j0 = std::max(0, (-in_j + dilation_w-1)/dilation_w); + int j1 = std::min(kernel_w, (width - in_j + dilation_w-1)/dilation_w); + + // here some non-continuous sub-row of the row will not be + // filled from the tensor; we need to make sure that the uncovered + // elements are explicitly set to 0's. the easiest way is to + // set all the elements to 0's before the loop. + memset(rowbuf, (int8_t)inpZp, vsz*sizeof(rowbuf[0])); + for( k = 0; k < ncn; k++ ) + { + for ( d = d0; d < d1; d++) + { + for( i = i0; i < i1; i++ ) + { + for( j = j0; j < j1; j++ ) + { + int imgofs = k*(depth*width*height) + d*dilation_d*width*height + i*(dilation_h*width) + j*dilation_w; + rowbuf[(k*kernel_d*kernel_h + d*kernel_h + i)*kernel_w + j] = imgptr[imgofs]; + } + } + } + } + } + } + } + // now compute dot product of the weights + // and im2row-transformed part of the tensor + #if CV_TRY_AVX512_SKX + if(useAVX512) + opt_AVX2::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0, + outShape, bsz, vsz, vsz_a, outZp, multptr, cn0 == 0, cn1 == inpCn); + else + #endif + #if CV_TRY_AVX2 + if(useAVX2) + opt_AVX2::fastConv(wptr, wstep, biasptr, rowbuf0, data_out0 + ofs0, + outShape, bsz, vsz, vsz_a, outZp, multptr, cn0 == 0, cn1 == inpCn); + else + #endif + for( int i = 0; i < outCn; i += 2 ) + { + const int8_t* wptr0 = wptr + i*wstep; + const int8_t* wptr1 = wptr0 + wstep; + int* outptr0 = data_out0 + ofs0 + i*outPlaneSize; + int* outptr1 = outptr0 + outPlaneSize; + int bias0 = biasptr[i], bias1 = biasptr[i+1]; + float mult0 = multptr[i], mult1 = multptr[i+1]; + + if( i+1 >= outCn ) + { + wptr1 = wptr0; + outptr1 = outptr0; + bias1 = bias0; + mult1 = mult0; + } + int j = 0; + #if CV_SIMD128 + v_int32x4 voutzp = v_setall_s32(outZp), outmin = v_setall_s32(-128), outmax = v_setall_s32(127); + v_float32x4 vmult0 = v_setall_f32(mult0), vmult1 = v_setall_f32(mult1); + for( ; j <= bsz - 4; j += 4 ) + { + const int8_t* rptr = rowbuf0 + j*vsz_a; + v_int32x4 s0, s1; + + if( cn0 == 0 ) + { + s0 = v_setall_s32(bias0); + s1 = v_setall_s32(bias1); + } + else + { + s0 = v_load(outptr0 + j); + s1 = v_load(outptr1 + j); + } + + v_int32x4 vs00 = v_setzero_s32(), vs01 = v_setzero_s32(), + vs02 = v_setzero_s32(), vs03 = v_setzero_s32(), + vs10 = v_setzero_s32(), vs11 = v_setzero_s32(), + vs12 = v_setzero_s32(), vs13 = v_setzero_s32(); + for( k = 0; k < vsz; k += 16, rptr += 16 ) + { + v_int8x16 w0 = v_load_aligned(wptr0 + k); + v_int8x16 w1 = v_load_aligned(wptr1 + k); + v_int8x16 r0 = v_load_aligned(rptr); + v_int8x16 r1 = v_load_aligned(rptr + vsz_a); + v_int8x16 r2 = v_load_aligned(rptr + vsz_a*2); + v_int8x16 r3 = v_load_aligned(rptr + vsz_a*3); + + vs00 = v_dotprod_expand_fast(w0, r0, vs00); + vs01 = v_dotprod_expand_fast(w0, r1, vs01); + vs02 = v_dotprod_expand_fast(w0, r2, vs02); + vs03 = v_dotprod_expand_fast(w0, r3, vs03); + + vs10 = v_dotprod_expand_fast(w1, r0, vs10); + vs11 = v_dotprod_expand_fast(w1, r1, vs11); + vs12 = v_dotprod_expand_fast(w1, r2, vs12); + vs13 = v_dotprod_expand_fast(w1, r3, vs13); + } + s0 += v_int32x4(v_reduce_sum(vs00), v_reduce_sum(vs01), v_reduce_sum(vs02), v_reduce_sum(vs03)); + s1 += v_int32x4(v_reduce_sum(vs10), v_reduce_sum(vs11), v_reduce_sum(vs12), v_reduce_sum(vs13)); + if( cn1 == inpCn ) + { + s0 = voutzp + v_round(v_cvt_f32(s0)*vmult0); + s1 = voutzp + v_round(v_cvt_f32(s1)*vmult1); + + s0 = v_min(v_max(s0, outmin), outmax); + s1 = v_min(v_max(s1, outmin), outmax); + } + v_store(outptr0 + j, s0); + v_store(outptr1 + j, s1); + } + #endif + for( ; j < bsz; j++ ) + { + const int8_t* rptr = rowbuf0 + j*vsz_a; + int s00, s10; + + if( cn0 == 0 ) + { + s00 = bias0; + s10 = bias1; + } + else + { + s00 = outptr0[j]; + s10 = outptr1[j]; + } + + for( k = 0; k < vsz; k++ ) + { + int8_t r0 = rptr[k]; + s00 += (int)wptr0[k] * r0; + s10 += (int)wptr1[k] * r0; + } + if( cn1 == inpCn ) + { + int out0 = outZp + (int)std::round(s00*mult0); + int out1 = outZp + (int)std::round(s10*mult1); + + s00 = std::min(std::max(out0, -128), 127); + s10 = std::min(std::max(out1, -128), 127); + } + + outptr0[j] = s00; + outptr1[j] = s10; + } + } + } + } + if( activ_ ) + activ_->forwardSlice(data_out0 + stripeStart, lutptr_, + data_out0 + stripeStart, (int)(stripeEnd - stripeStart), + outPlaneSize, startOutCn, startOutCn + outCn); + } + } + }; + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + +#if CV_SSE3 + uint32_t ftzMode = _MM_GET_FLUSH_ZERO_MODE(); + uint32_t dazMode = _MM_GET_DENORMALS_ZERO_MODE(); + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + /*if (inputs[0].dims > 3) { + printf("conv %s: input (%d x %d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n", + name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], inputs[0].size[3], + kernel.width, kernel.height, pad.width, pad.height, + stride.width, stride.height, dilation.width, dilation.height); + } + else { + printf("conv %s: input (%d x %d x %d), kernel (%d x %d), pad (%d x %d), stride (%d x %d), dilation (%d x %d)\n", + name.c_str(), inputs[0].size[0], inputs[0].size[1], inputs[0].size[2], + kernel.width, kernel.height, pad.width, pad.height, + stride.width, stride.height, dilation.width, dilation.height); + }*/ + + int inpGroupCn = blobs[0].size[1]; + CV_Assert_N(inputs.size() == (size_t)1, inputs[0].size[1] % inpGroupCn == 0, + outputs.size() == 1, inputs[0].data != outputs[0].data); + + int ngroups = inputs[0].size[1] / inpGroupCn; + CV_Assert(outputs[0].size[1] % ngroups == 0); + + int nstripes = std::max(getNumThreads(), 1); + Mat outputInt32 = Mat(shape(outputs[0]), CV_32S); + + ParallelConv::run(inputs[0], outputInt32, weightsMat, outputMultiplier, biasvec, activationLUT, kernel_size, strides, + pads_begin, pads_end, dilations, activ.get(), ngroups, nstripes, input_zp, output_zp); + + outputInt32.convertTo(outputs[0], CV_8S); + +#if CV_SSE3 + _MM_SET_FLUSH_ZERO_MODE(ftzMode); + _MM_SET_DENORMALS_ZERO_MODE(dazMode); +#endif + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_Assert(inputs.size() == outputs.size()); + + int64 flops = 0; + int karea = std::accumulate(kernel_size.begin(), kernel_size.end(), 1, std::multiplies()); + for (int i = 0; i < outputs.size(); i++) + { + flops += total(outputs[i])*(CV_BIG_INT(2)*karea*inputs[i][1] + 1); + } + return flops; + } +}; + +Ptr ConvolutionLayerInt8::create(const LayerParams ¶ms) +{ + return Ptr(new ConvolutionLayerInt8Impl(params)); +} + +} +} diff --git a/modules/dnn/src/int8layers/elementwise_layers.cpp b/modules/dnn/src/int8layers/elementwise_layers.cpp new file mode 100644 index 000000000000..75118b6bc123 --- /dev/null +++ b/modules/dnn/src/int8layers/elementwise_layers.cpp @@ -0,0 +1,190 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" + +#include +#include + +namespace cv +{ +namespace dnn +{ + +class ActivationLayerInt8Impl CV_FINAL : public ActivationLayerInt8 +{ +public: + ActivationLayerInt8Impl(const LayerParams ¶ms) + { + setParamsFrom(params); + activationLUT = !blobs.empty() ? blobs[0] : Mat(); + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + return true; + } + + class Activation : public cv::ParallelLoopBody + { + public: + const Mat* src; + const Mat* lut; + Mat* dst; + int nstripes; + + Activation() : src(0), lut(0), dst(0), nstripes(0){} + + static void run(const Mat& src, const Mat& lut, Mat& dst, int nstripes) + { + Activation p; + + p.src = &src; + p.lut = &lut; + p.dst = &dst; + p.nstripes = nstripes; + + parallel_for_(Range(0, nstripes), p, nstripes); + } + + void operator()(const Range &r) const CV_OVERRIDE + { + const int8_t* table = lut->ptr(); + int nsamples = 1, outCn = 1; + size_t planeSize = 1; + + if (src->dims > 1) + { + nsamples = src->size[0]; + outCn = src->size[1]; + } + else + outCn = src->size[0]; + + for (int i = 2; i < src->dims; ++i) + planeSize *= src->size[i]; + + size_t stripeSize = (planeSize + nstripes - 1)/nstripes; + size_t stripeStart = r.start*stripeSize; + size_t stripeEnd = std::min(r.end*stripeSize, planeSize); + int len = (int)(stripeEnd - stripeStart); + + for( int i = 0; i < nsamples; i++ ) + { + const int8_t* srcptr = src->ptr(i) + stripeStart; + int8_t* dstptr = dst->ptr(i) + stripeStart; + for( int cn = 0; cn < outCn; cn++, srcptr += planeSize, dstptr += planeSize ) + { + int i = 0; +#if CV_SIMD128 + for( ; i <= len - 16; i += 16 ) + { + v_int8x16 out(table[srcptr[i] + 128], table[srcptr[i+1] + 128], table[srcptr[i+2] + 128], table[srcptr[i+3] + 128], + table[srcptr[i+4] + 128], table[srcptr[i+5] + 128], table[srcptr[i+6] + 128], table[srcptr[i+7] + 128], + table[srcptr[i+8] + 128], table[srcptr[i+9] + 128], table[srcptr[i+10] + 128], table[srcptr[i+11] + 128], + table[srcptr[i+12] + 128], table[srcptr[i+13] + 128], table[srcptr[i+14] + 128], table[srcptr[i+15] + 128]); + v_store(dstptr + i, out); + } +#endif + for( ; i < len; i++ ) + { + dstptr[i] = table[srcptr[i] + 128]; + } + } + } + } + }; + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + for (size_t i = 0; i < inputs.size(); i++) + { + const Mat &src = inputs[i]; + if (!activationLUT.empty()) + { + const int nstripes = getNumThreads(); + Mat &dst = outputs[i]; + CV_Assert(src.size == dst.size && src.type() == dst.type() && + src.isContinuous() && dst.isContinuous() && src.type() == CV_8S); + + Activation::run(src, activationLUT, dst, nstripes); + } + else + { + src.copyTo(outputs[i]); + } + } + } + + void forwardSlice(const int8_t* src, const int8_t* lut, int8_t* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE + { + for( int cn = cn0; cn < cn1; cn++, src += planeSize, dst += planeSize ) + { + int i = 0; +#if CV_SIMD128 + for( ; i <= len - 16; i += 16 ) + { + v_int8x16 out(lut[src[i] + 128], lut[src[i+1] + 128], lut[src[i+2] + 128], lut[src[i+3] + 128], + lut[src[i+4] + 128], lut[src[i+5] + 128], lut[src[i+6] + 128], lut[src[i+7] + 128], + lut[src[i+8] + 128], lut[src[i+9] + 128], lut[src[i+10] + 128], lut[src[i+11] + 128], + lut[src[i+12] + 128], lut[src[i+13] + 128], lut[src[i+14] + 128], lut[src[i+15] + 128]); + v_store(dst + i, out); + } +#endif + for( ; i < len; i++ ) + dst[i] = lut[src[i] + 128]; + } + } + + void forwardSlice(const int* src, const int* lut, int* dst, int len, size_t planeSize, int cn0, int cn1) const CV_OVERRIDE + { + for( int cn = cn0; cn < cn1; cn++, src += planeSize, dst += planeSize ) + { + int i = 0; +#if CV_SIMD128 + for( ; i <= len - 16; i += 16 ) + { + v_int32x4 out0(lut[src[i] + 128], lut[src[i+1] + 128], lut[src[i+2] + 128], lut[src[i+3] + 128]); + v_int32x4 out1(lut[src[i+4] + 128], lut[src[i+5] + 128], lut[src[i+6] + 128], lut[src[i+7] + 128]); + v_int32x4 out2(lut[src[i+8] + 128], lut[src[i+9] + 128], lut[src[i+10] + 128], lut[src[i+11] + 128]); + v_int32x4 out3(lut[src[i+12] + 128], lut[src[i+13] + 128], lut[src[i+14] + 128], lut[src[i+15] + 128]); + + v_store(dst + i, out0); + v_store(dst + i + 4, out1); + v_store(dst + i + 8, out2); + v_store(dst + i + 12, out3); + } +#endif + for( ; i < len; i++ ) + dst[i] = lut[src[i] + 128]; + } + + } + + Mat activationLUT; +}; + +Ptr ActivationLayerInt8::create(const LayerParams& params) +{ + return Ptr(new ActivationLayerInt8Impl(params)); +} + +} +} diff --git a/modules/dnn/src/int8layers/eltwise_layer.cpp b/modules/dnn/src/int8layers/eltwise_layer.cpp new file mode 100644 index 000000000000..be7a32b1efd9 --- /dev/null +++ b/modules/dnn/src/int8layers/eltwise_layer.cpp @@ -0,0 +1,577 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" +#include + +namespace cv +{ +namespace dnn +{ + +class EltwiseLayerInt8Impl CV_FINAL : public EltwiseLayerInt8 +{ +public: + enum EltwiseOp + { + PROD = 0, + SUM = 1, + MAX = 2 + } op; + std::vector coeffs; + std::vector zeropoints; + + enum OutputChannelsMode + { + ELTWISE_CHANNNELS_SAME = 0, //!< number of channels from inputs must be the same and equal to output's number of channels + ELTWISE_CHANNNELS_INPUT_0, //!< number of channels from inputs may be different, + //!< output's number of channels is equal to number of channels of first input + //!< number of channels of other inputs should not be greater than number of channels of first input + ELTWISE_CHANNNELS_INPUT_0_TRUNCATE, //!< number of channels from inputs may be different, + //!< output's number of channels is equal to number of channels of first input + //!< there is restriction on number of channels of other inputs + //!< extra channels of other inputs is ignored + ELTWISE_CHANNNELS_USE_MAX, //!< number of channels from inputs may be different, + //!< output's number of channels is equal to maximal number of input channels + //!< @note supported operation: `SUM` + } channelsModeInput; + + + mutable OutputChannelsMode channelsMode; //!< "optimized" channels mode (switch to ELTWISE_CHANNNELS_SAME if number of input channels are equal) + mutable /*size_t*/int outputChannels; + + EltwiseLayerInt8Impl(const LayerParams& params) + : outputChannels(0) + { + setParamsFrom(params); + offset = params.get("offset", 0.f); + hasVecInput = false; + op = SUM; + if (params.has("operation")) + { + String operation = toLowerCase(params.get("operation")); + if (operation == "prod") + op = PROD; + else if (operation == "sum") + op = SUM; + else if (operation == "max") + op = MAX; + else + CV_Error(cv::Error::StsBadArg, "Unknown operation type \"" + operation + "\""); + } + + if (params.has("coeff")) + { + DictValue paramCoeff = params.get("coeff"); + int i, n = paramCoeff.size(); + coeffs.resize(n); + for (i = 0; i < n; i++) + { + coeffs[i] = paramCoeff.get(i); + } + } + + if (params.has("input_zeropoints")) + { + DictValue zp = params.get("input_zeropoints"); + int i, n = zp.size(); + zeropoints.resize(n); + for (i = 0; i < n; i++) + { + zeropoints[i] = zp.get(i); + } + } + + channelsModeInput = ELTWISE_CHANNNELS_SAME; + if (params.has("output_channels_mode")) + { + String v = toLowerCase(params.get("output_channels_mode")); + if (v == "same") + { + channelsModeInput = ELTWISE_CHANNNELS_SAME; + } + else if (v == "input_0") + { + channelsModeInput = ELTWISE_CHANNNELS_INPUT_0; + } + else if (v == "input_0_truncate") + { + channelsModeInput = ELTWISE_CHANNNELS_INPUT_0_TRUNCATE; + } + else if (v == "max_input_channels") + { + channelsModeInput = ELTWISE_CHANNNELS_USE_MAX; + if (op != SUM) + CV_Error(cv::Error::StsBadArg, "[" + type + "]:(" + name + ") 'max' channels mode is limited to SUM operation only"); + } + else + CV_Error(cv::Error::StsBadArg, "[" + type + "]:(" + name + ") unknown channels mode: \"" + v + "\""); + } + channelsMode = channelsModeInput; + + // TODO Must have checks for other unknown options + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() >= 2); + CV_Assert(inputs[0].size() >= 2); + CV_Assert(coeffs.size() == 0 || coeffs.size() == inputs.size()); + CV_Assert(op == SUM || op == PROD || coeffs.size() == 0); + + int dims = inputs[0].size(); + // Number of channels in output shape is determined by the first input tensor. + bool variableChannels = false; + int numChannels = inputs[0][1]; + for (size_t i = 1; i < inputs.size(); i++) + { + CV_Assert(inputs[0][0] == inputs[i][0]); // batch sizes are equal + + int input_channels = inputs[i][1]; + if (numChannels != input_channels) + variableChannels = true; + + if (channelsModeInput == ELTWISE_CHANNNELS_SAME) + { + CV_Assert(numChannels == input_channels); + } + else if (channelsModeInput == ELTWISE_CHANNNELS_INPUT_0) + { + CV_Assert(numChannels >= input_channels); + } + else if (channelsModeInput == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE) + { + // nothing to check + } + else if (channelsModeInput == ELTWISE_CHANNNELS_USE_MAX) + { + numChannels = std::max(numChannels, input_channels); + } + else + { + CV_Assert(0 && "Internal error"); + } + } + + channelsMode = variableChannels ? channelsModeInput : ELTWISE_CHANNNELS_SAME; + outputChannels = numChannels; + + outputs.assign(1, inputs[0]); + outputs[0][1] = numChannels; + + if (dims > 2) + { + size_t vecIdx = 0; + bool isVecFound = false; + for (size_t i = 0; i < inputs.size(); i++) + { + bool allOnes = isAllOnes(inputs[i], 2, dims); + if (!allOnes && !isVecFound) + { + vecIdx = i; + isVecFound = true; + } + + if (!allOnes && i != vecIdx) + { + for (size_t j = 2; j < dims; j++) + { + CV_Assert(inputs[vecIdx][j] == inputs[i][j]); + } + } + } + + if (channelsModeInput == ELTWISE_CHANNNELS_SAME && isVecFound) + { + for (size_t j = 2; j < dims; j++) + { + outputs[0][j] = inputs[vecIdx][j]; + } + } + } + + return false; + } + + void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE + { + std::vector inputs; + inputs_arr.getMatVector(inputs); + + for (size_t i = 0; i < inputs.size(); i++) + { + MatShape inpShape = shape(inputs[i].size); + if (isAllOnes(inpShape, 2, inputs[i].dims)) + { + hasVecInput = true; + return; + } + } + } + + class EltwiseInvoker : public ParallelLoopBody + { + EltwiseLayerInt8Impl& self; + std::vector srcs; + std::vector srcNumChannels; + int nsrcs; + Mat* dst; + Mat* buf; + std::vector coeffs; + std::vector zeropoints; + int nstripes; + const Mat* activLUT; + const ActivationLayerInt8* activ; + int channels; + size_t planeSize; + float offset; + + EltwiseInvoker(EltwiseLayerInt8Impl& self_) + : self(self_) + , nsrcs(0), dst(0), buf(0), nstripes(0), activ(0), channels(0) + , planeSize(0), offset(0) + {} + + public: + static void run(EltwiseLayerInt8Impl& self, + const Mat* srcs, int nsrcs, Mat& buf, Mat& dst, + int nstripes, float offset) + { + const EltwiseOp op = self.op; + CV_Check(dst.dims, 1 < dst.dims && dst.dims <= 5, ""); CV_CheckTypeEQ(dst.type(), CV_8SC1, ""); CV_Assert(dst.isContinuous()); + CV_Assert(self.coeffs.empty() || self.coeffs.size() == (size_t)nsrcs); + CV_CheckGE(nsrcs, 2, ""); + + CV_Assert(self.outputChannels == dst.size[1]); + + EltwiseInvoker p(self); + p.srcs.resize(nsrcs); + p.srcNumChannels.resize(nsrcs); + p.coeffs = self.coeffs; // can be sorted + p.zeropoints = self.zeropoints; + + bool sortInputs = false; + for( int i = 0; i < nsrcs; i++ ) + { + p.srcs[i] = &srcs[i]; + CV_CheckEQ(srcs[i].dims, dst.dims, ""); + CV_Assert(srcs[i].isContinuous()); + CV_Assert(srcs[i].type() == dst.type()); + p.srcNumChannels[i] = (srcs[i].dims >= 4) ? srcs[i].size[1] : 1; + + if (self.channelsMode == ELTWISE_CHANNNELS_SAME) + { + CV_Assert(srcs[i].size == dst.size); + } + else if (self.channelsMode == ELTWISE_CHANNNELS_INPUT_0) + { + if (i == 0) + CV_Assert(srcs[0].size == dst.size); + CV_Assert(self.outputChannels >= p.srcNumChannels[i]); + sortInputs = true; + } + else if (self.channelsMode == ELTWISE_CHANNNELS_INPUT_0_TRUNCATE) + { + if (i == 0) + CV_Assert(srcs[0].size == dst.size); + sortInputs = true; + } + else if (self.channelsMode == ELTWISE_CHANNNELS_USE_MAX) + { + CV_Assert(op == SUM); + CV_Assert(self.outputChannels >= p.srcNumChannels[i]); + sortInputs = true; + } + else + { + CV_Assert(0 && "Internal error"); + } + + if (sortInputs) + { + // Sort srcs and coefficients in the desc order by number of channels + for (int j = i; j >= 1; j--) + { + if (std::min(self.outputChannels, p.srcs[j - 1]->size[1]) < std::min(self.outputChannels, p.srcs[j]->size[1])) + { + std::swap(p.srcs[j - 1], p.srcs[j]); + std::swap(p.srcNumChannels[j - 1], p.srcNumChannels[j]); + if (!p.coeffs.empty()) + std::swap(p.coeffs[j - 1], p.coeffs[j]); + if (!p.zeropoints.empty()) + std::swap(p.zeropoints[j - 1], p.zeropoints[j]); + } + else + break; + } + } + } + + p.nsrcs = nsrcs; + p.dst = &dst; + p.buf = &buf; + p.nstripes = nstripes; + p.offset = offset; + p.channels = (dst.dims >= 4 ? dst.size[1] : 1); + + p.planeSize = dst.total(dst.dims >= 4 ? 2 : 1); + CV_CheckEQ(dst.total(), dst.size[0] * p.channels * p.planeSize, ""); + p.activLUT = &self.activationLUT; + p.activ = !self.activationLUT.empty() ? self.activ.get() : 0; + + parallel_for_(Range(0, nstripes), p, nstripes); + } + + void operator()(const Range& r) const CV_OVERRIDE + { + const EltwiseOp op = self.op; + size_t total = dst->size[0]*planeSize; + size_t stripeSize = (total + nstripes - 1)/nstripes; + size_t stripeStart = r.start*stripeSize; + size_t stripeEnd = std::min(r.end*stripeSize, total); + const float* coeffsptr = !coeffs.empty() ? &coeffs[0] : 0; + const int* zeropointsptr = !zeropoints.empty() ? &zeropoints[0] : 0; + const int8_t* lutptr = !activLUT->empty() ? activLUT->ptr() : 0; + int8_t* dstptr0 = dst->ptr(); + float* bufptr0 = buf->ptr(); + int blockSize0 = 1 << 12; + + for (size_t ofs = stripeStart; ofs < stripeEnd; ) + { + int sampleIdx = (int)(ofs / planeSize); + int delta = (int)ofs - sampleIdx * planeSize; + int blockSize = std::min(blockSize0, std::min((int)(stripeEnd - ofs), (int)planeSize - delta)); + if( blockSize <= 0 ) + break; + ofs += blockSize; + + for (int c = 0; c < channels; c++) + { + size_t dstIdx = delta + (sampleIdx*channels + c)*planeSize; + int8_t* dstptr = dstptr0 + dstIdx; + float* bufptr = bufptr0 + dstIdx; + + // process first two inputs + { + const int8_t* srcptr0 = srcs[0]->ptr() + dstIdx; + + const int inputIdx = 1; + int src1_channels = srcNumChannels[inputIdx]; + if (c >= src1_channels) + { + // no data from second input + if (!coeffsptr) + { + for (int j = 0; j < blockSize; j++) + { + dstptr[j] = srcptr0[j]; + } + } + else + { + float c0 = coeffsptr[0]; + int z0 = op == PROD ? zeropointsptr[0] : 0; + for (int j = 0; j < blockSize; j++) + { + bufptr[j] = c0 * (srcptr0[j] - z0); + } + } + } + else + { + size_t srcIdx = delta + (sampleIdx * src1_channels + c) * planeSize; + const int8_t* srcptrI = srcs[inputIdx]->ptr() + srcIdx; + + if (op == PROD) + { + float c0 = coeffsptr[0]; + float c1 = coeffsptr[1]; + int z0 = zeropointsptr[0]; + int z1 = zeropointsptr[1]; + for (int j = 0; j < blockSize; j++) + { + bufptr[j] = (c0*(srcptr0[j] - z0)) * (c1*(srcptrI[j] - z1)); + } + } + else if (op == MAX) + { + for (int j = 0; j < blockSize; j++) + { + dstptr[j] = std::max(srcptr0[j], srcptrI[j]); + } + } + else if (op == SUM) + { + float c0 = coeffsptr[0]; + float c1 = coeffsptr[1]; + for (int j = 0; j < blockSize; j++) + { + bufptr[j] = c0*srcptr0[j] + c1*srcptrI[j]; + } + } + else + CV_Error(Error::StsInternal, ""); + } + } + + // aggregate other inputs (3+) + for (size_t inputIdx = 2; inputIdx < nsrcs; inputIdx++) + { + int srcI_channels = srcNumChannels[inputIdx]; + if (c >= srcI_channels) + continue; // no data from second input + size_t srcIdx = delta + (sampleIdx * srcI_channels + c) * planeSize; + const int8_t* srcptrI = srcs[inputIdx]->ptr() + srcIdx; + + if (op == PROD) + { + float cI = coeffsptr[inputIdx]; + int zI = zeropointsptr[inputIdx]; + for (int j = 0; j < blockSize; j++) + { + bufptr[j] *= cI*(srcptrI[j] - zI); + } + } + else if (op == MAX) + { + for (int j = 0; j < blockSize; j++) + { + dstptr[j] = std::max(dstptr[j], srcptrI[j]); + } + } + else if (op == SUM) + { + float cI = coeffsptr[inputIdx]; + for (int j = 0; j < blockSize; j++) + { + bufptr[j] += cI * srcptrI[j]; + } + } + else + CV_Error(Error::StsInternal, ""); + } + + // add offset and saturate cast to int8 + if (op == SUM || op == PROD) + { + for (int j = 0; j < blockSize; j++) + { + dstptr[j] = saturate_cast(std::round(bufptr[j] + offset)); + } + } + } + if( activ ) + { + int8_t* ptr = dstptr0 + delta + sampleIdx*channels*planeSize; + activ->forwardSlice(ptr, lutptr, ptr, blockSize, planeSize, 0, channels); + } + } + } + }; + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + CV_Assert(outputs.size() == 1); + const int nstripes = getNumThreads(); + + if (channelsModeInput == ELTWISE_CHANNNELS_SAME && inputs[0].dims > 2) + { + for (size_t i = 0; i < inputs.size(); i++) + { + MatShape inpShape = shape(inputs[i].size); + bool allOnes = isAllOnes(inpShape, 2, inputs[i].dims); + + if (allOnes) + { + Mat tmpInput = inputs[i]; + MatShape outShape = shape(outputs[0].size); + size_t xSize = outShape[2]; + for (size_t j = 3; j < outShape.size(); j++) + xSize *= outShape[j]; + + int dimVec[3] = {outShape[0], outShape[1], (int) xSize}; + std::vector matSizesVec(&dimVec[0], &dimVec[0] + 3); + inputs[i] = Mat(matSizesVec, tmpInput.type()); + + std::vector idx(outShape.size(), 0); + std::vector outIdx(inpShape.size(), 0); + + for (size_t j = 0; j < outShape[0]; j++) + { + outIdx[0] = idx[0] = j; + for(size_t k = 0; k < outShape[1]; k++) + { + outIdx[1] = idx[1] = k; + for (size_t x = 0; x < xSize; x++) + { + outIdx[2] = x; + inputs[i].at(outIdx.data()) = tmpInput.at(idx.data()); + } + } + } + inputs[i] = inputs[i].reshape(0, outShape); + } + } + } + + Mat buf = Mat(shape(outputs[0]), CV_32F); // to store intermediate results + EltwiseInvoker::run(*this, &inputs[0], (int)inputs.size(), buf, outputs[0], nstripes, offset); + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_UNUSED(outputs); // suppress unused variable warning + CV_Assert(inputs.size()); + + // FIXIT: handle inputs with different number of channels + long flops = inputs.size() * total(inputs[0]); + + return flops; + } + + bool setActivation(const Ptr& layer) CV_OVERRIDE + { + Ptr activ_int8 = layer.dynamicCast(); + if (!activ_int8.empty()) + { + activ = activ_int8; + if (!activ_int8->blobs.empty()) + activationLUT = activ_int8->blobs[0]; + return true; + } + return false; + } + + Mat activationLUT; + Ptr activ; + +private: + bool hasVecInput; + float offset; +}; + +Ptr EltwiseLayerInt8::create(const LayerParams& params) +{ + return Ptr(new EltwiseLayerInt8Impl(params)); +} + +} +} diff --git a/modules/dnn/src/int8layers/fully_connected_layer.cpp b/modules/dnn/src/int8layers/fully_connected_layer.cpp new file mode 100644 index 000000000000..83da677a47f6 --- /dev/null +++ b/modules/dnn/src/int8layers/fully_connected_layer.cpp @@ -0,0 +1,266 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" + +#include + +namespace cv +{ +namespace dnn +{ + +class FullyConnectedLayerInt8Impl CV_FINAL : public InnerProductLayerInt8 +{ +public: + enum { VEC_ALIGN = 32 }; + FullyConnectedLayerInt8Impl(const LayerParams& params) + { + setParamsFrom(params); + output_zp = params.get("zeropoints"); + axis = params.get("axis", 1); + if (blobs.size() == 3) + { + // blobs[0] - Weights + // blobs[1] - Bias fused with offset + // blobs[2] - Multipliers for output stage + int numOutput = params.get("num_output"); + int innerSize = (int)blobs[0].total() / numOutput; + + CV_Assert(blobs[0].dims >= 2 && (size_t)(innerSize * numOutput) == blobs[0].total()); + CV_Assert((size_t)numOutput == blobs[1].total()); + + weightsMat = blobs[0] = blobs[0].reshape(1, numOutput); + int vecsize = weightsMat.cols; + if (vecsize % VEC_ALIGN != 0) + { + int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN); + Mat weightsBuf(weightsMat.rows, vecsize_aligned, weightsMat.type()); + Mat wpadding = weightsBuf.colRange(vecsize, vecsize_aligned); + wpadding.setTo(Scalar::all(0)); + weightsMat = weightsBuf.colRange(0, vecsize); + blobs[0].copyTo(weightsMat); + } + biasMat = blobs[1] = blobs[1].reshape(1, 1); + outputMultiplier = blobs[2]; + } + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &) const CV_OVERRIDE + { + int numOutput, cAxis; + CV_CheckEQ(inputs.size(), (size_t)1, ""); + CV_CheckEQ(blobs[0].dims, 2, ""); + numOutput = blobs[0].size[0]; + CV_Assert((size_t)numOutput == blobs[1].total()); + cAxis = normalize_axis(axis, inputs[0]); + + MatShape outShape(cAxis + 1); + for (int i = 0; i < cAxis; ++i) + outShape[i] = inputs[0][i]; + outShape.back() = numOutput; + + outputs.resize(1, outShape); + return false; + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + virtual bool setActivation(const Ptr& layer) CV_OVERRIDE + { + Ptr activ_int8 = layer.dynamicCast(); + if (!activ_int8.empty()) + { + activ = activ_int8; + if (!activ_int8->blobs.empty()) + activ_int8->blobs[0].convertTo(activationLUT, CV_32S); + return true; + } + return false; + } + + class FullyConnected : public ParallelLoopBody + { + public: + FullyConnected() : srcMat(0), weights(0), biasMat(0), outputMultiplier(0), activationLUT(0), activ(0), + dstMat(0), nstripes(0), outZp(0), useAVX2(false), useAVX512(false) {} + + static void run(const Mat& srcMat, const Mat& weights, const Mat& biasMat, const Mat& outputMultiplier, + const Mat& activationLUT, Mat& dstMat, const ActivationLayerInt8* activ, int nstripes, int outZp) + { + CV_Assert( srcMat.dims == 2 && srcMat.cols == weights.cols && + dstMat.rows == srcMat.rows && dstMat.cols == weights.rows && + srcMat.type() == weights.type() && srcMat.type() == CV_8S && + dstMat.type() == CV_32S && biasMat.type() == CV_32S && + biasMat.isContinuous() && (int)biasMat.total() == dstMat.cols ); + + FullyConnected p; + + p.srcMat = &srcMat; + p.weights = &weights; + p.biasMat = &biasMat; + p.outputMultiplier = &outputMultiplier; + p.activationLUT = &activationLUT; + p.dstMat = &dstMat; + p.nstripes = nstripes; + p.outZp = outZp; + p.activ = !activationLUT.empty() ? activ : 0; + p.useAVX2 = checkHardwareSupport(CPU_AVX2); + p.useAVX512 = CV_CPU_HAS_SUPPORT_AVX512_SKX; + + parallel_for_(Range(0, nstripes), p, nstripes); + } + + void operator()(const Range& r) const CV_OVERRIDE + { + int valign = FullyConnectedLayerInt8Impl::VEC_ALIGN; + int nsamples = srcMat->rows; + int nw0 = weights->rows; + int k, vecsize = srcMat->cols; + int vecsize_aligned = (int)alignSize(vecsize, VEC_ALIGN); + size_t total = (size_t)nsamples*nw0; + size_t stripeSize = (total + nstripes - 1)/nstripes; + size_t stripeStart = r.start*stripeSize; + size_t stripeEnd = r.end == nstripes ? total : std::min(r.end*stripeSize, total); + size_t wstep = weights->step1(); + AutoBuffer srcbuf(vecsize_aligned + valign); + int8_t* sptr = alignPtr(srcbuf.data(), (int)(valign*sizeof(int8_t))); + const int* lutptr = !activationLUT->empty() ? activationLUT->ptr() : 0; + + for( k = vecsize; k < vecsize_aligned; k++ ) + sptr[k] = 0; + + for( size_t ofs = stripeStart; ofs < stripeEnd; ) + { + int sampleIdx = (int)(ofs / nw0); + int delta = (int)(ofs - (size_t)sampleIdx*nw0); + const int8_t* sptr_ = srcMat->ptr(sampleIdx); + const int8_t* wptr = weights->ptr(delta); + int* dptr = dstMat->ptr(sampleIdx) + delta; + const int* biasptr = biasMat->ptr() + delta; + const float* multptr = outputMultiplier->ptr() + delta; + int nw = std::min(nw0 - delta, (int)(stripeEnd - ofs)); + + memcpy(sptr, sptr_, vecsize*sizeof(sptr[0])); + #if CV_TRY_AVX512_SKX + if( useAVX512 ) + opt_AVX512_SKX::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp ); + else + #endif + #if CV_TRY_AVX2 + if( useAVX2 ) + opt_AVX2::fastGEMM1T( sptr, wptr, wstep, biasptr, multptr, dptr, nw, vecsize, outZp ); + else + #endif + { + int i = 0; + #if CV_SIMD + for( ; i <= nw - 4; i += 4, wptr += 4*wstep ) + { + v_int32x4 vs0 = v_setzero_s32(), vs1 = v_setzero_s32(), + vs2 = v_setzero_s32(), vs3 = v_setzero_s32(); + v_int32x4 outzp = v_setall_s32(outZp), outmin = v_setall_s32(-128), outmax = v_setall_s32(127); + v_int32x4 s = v_load(biasptr + i); + v_float32x4 mult = v_load(multptr + i); + + for( k = 0; k < vecsize; k += 16 ) + { + v_int8x16 v = v_load_aligned(sptr + k); + vs0 = v_dotprod_expand_fast(v, v_load_aligned(wptr + k), vs0); + vs1 = v_dotprod_expand_fast(v, v_load_aligned(wptr + wstep + k), vs1); + vs2 = v_dotprod_expand_fast(v, v_load_aligned(wptr + wstep*2 + k), vs2); + vs3 = v_dotprod_expand_fast(v, v_load_aligned(wptr + wstep*3 + k), vs3); + } + + s += v_int32x4(v_reduce_sum(vs0), v_reduce_sum(vs1), v_reduce_sum(vs2), v_reduce_sum(vs3)); + v_int32x4 out = outzp + v_round(v_cvt_f32(s)*mult); + v_store(dptr + i, v_min(v_max(out, outmin), outmax)); + } + #endif + + for( ; i < nw; i++, wptr += wstep ) + { + int s0 = biasptr[i]; + float mult0 = multptr[i]; + + for( k = 0; k < vecsize; k++ ) + { + int8_t v = sptr[k]; + s0 += (int)v*wptr[k]; + } + int out0 = outZp + (int)std::round(s0*mult0); + dptr[i] = std::min(std::max(out0, -128), 127); + } + } + + if(activ) + activ->forwardSlice(dptr, lutptr, dptr, 1, 1, delta, delta + nw); + + ofs += nw; + } + } + + const Mat *srcMat, *weights, *biasMat, *outputMultiplier, *activationLUT; + const ActivationLayerInt8* activ; + Mat* dstMat; + int nstripes, outZp; + bool useAVX2; + bool useAVX512; + }; + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + std::vector input, output; + inputs_arr.getMatVector(input); + outputs_arr.getMatVector(output); + + int axisCan = normalize_axis(axis, input[0].dims); + int outerSize = input[0].total(0, axisCan); + Mat srcMat = input[0].reshape(1, outerSize); + + Mat dstMat = output[0].reshape(1, outerSize); + Mat dstMatInt32= Mat(shape(dstMat), CV_32S); + + const int nstripes = getNumThreads(); + FullyConnected::run(srcMat, weightsMat, biasMat, outputMultiplier, activationLUT, dstMatInt32, activ.get(), nstripes, output_zp); + dstMatInt32.convertTo(dstMat, CV_8S); + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_UNUSED(inputs); // suppress unused variable warning + long flops = 0; + + int innerSize = blobs[0].size[1]; + for(int i = 0; i < outputs.size(); i++) + { + flops += CV_BIG_INT(3)*innerSize*total(outputs[i]); + } + + return flops; + + } + + Mat weightsMat, biasMat, outputMultiplier, activationLUT; + Ptr activ; +}; + +Ptr InnerProductLayerInt8::create(const LayerParams& params) +{ + return Ptr(new FullyConnectedLayerInt8Impl(params)); +} + +} +} diff --git a/modules/dnn/src/int8layers/layers_common.hpp b/modules/dnn/src/int8layers/layers_common.hpp new file mode 100644 index 000000000000..cb185a9edaa4 --- /dev/null +++ b/modules/dnn/src/int8layers/layers_common.hpp @@ -0,0 +1,41 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__ +#define __OPENCV_DNN_LAYERS_LAYERS_COMMON_HPP__ +#include +#include + +#define CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY +// dispatched AVX/AVX2 optimizations +#include "./layers_common.simd.hpp" +#include "int8layers/layers_common.simd_declarations.hpp" +#undef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +#ifdef HAVE_OPENCL +#include "../ocl4dnn/include/ocl4dnn.hpp" +#endif + +namespace cv +{ +namespace dnn +{ +void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, + std::vector& pads_end, std::vector& strides, std::vector& dilations, + cv::String &padMode, std::vector& adjust_pads); + +void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& globalPooling, + std::vector& pads_begin, std::vector& pads_end, std::vector& strides, cv::String &padMode); + +void getConvPoolOutParams(const std::vector& inp, const std::vector& kernel, + const std::vector& stride, const String &padMode, + const std::vector& dilation, std::vector& out); + + void getConvPoolPaddings(const std::vector& inp, const std::vector& kernel, + const std::vector& strides, const String &padMode, + std::vector& pads_begin, std::vector& pads_end); +} +} + +#endif diff --git a/modules/dnn/src/int8layers/layers_common.simd.hpp b/modules/dnn/src/int8layers/layers_common.simd.hpp new file mode 100644 index 000000000000..bf6149e5c958 --- /dev/null +++ b/modules/dnn/src/int8layers/layers_common.simd.hpp @@ -0,0 +1,637 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "opencv2/core/hal/intrin.hpp" + +namespace cv { +namespace dnn { +CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN + +void fastConv( const int8_t* weights, size_t wstep, const int* bias, + const int8_t* rowbuf, int* output, const int* outShape, + int blockSize, int vecsize, int vecsize_aligned, int outZp, + const float* multiplier, bool initOutput, bool finalOutput ); +void fastDepthwiseConv( const int8_t* wptr, + int kernel_h, int kernel_w, + int stride_h, int stride_w, + int dilation_h, int dilation_w, + int pad_t, int pad_l, + const int* biasptr, const float* multptr, + const int8_t* inptr_, + int height, int width, + int* outptr_, + int out_d, int outH, int outW, + int inpZp, int outZp ); +void fastGEMM1T( const int8_t* vec, const int8_t* weights, + size_t wstep, const int* bias, const float* multiplier, + int* dst, int nvecs, int vecsize, int outZp ); + +#if !defined(CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY) && CV_AVX2 +#define OPENCV_FMADD_EPI8(_Tpvec, func) \ + inline _Tpvec _##func##_fmaddepi8_epi32(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \ + { \ + _Tpvec even_a = _##func##_srai_epi16(_##func##_bslli_epi128(a, 1), 8); \ + _Tpvec odd_a = _##func##_srai_epi16(a, 8); \ + \ + _Tpvec even_b = _##func##_srai_epi16(_##func##_bslli_epi128(b, 1), 8); \ + _Tpvec odd_b = _##func##_srai_epi16(b, 8); \ + \ + _Tpvec prod0 = _##func##_madd_epi16(even_a, even_b); \ + _Tpvec prod1 = _##func##_madd_epi16(odd_a, odd_b); \ + return _##func##_add_epi32(_##func##_add_epi32(prod0, prod1), c); \ + } +OPENCV_FMADD_EPI8(__m256i, mm256) +//OPENCV_FMADD_EPI8(__m512i, mm512) + +enum { FASCONV_BASE_VECSZ = 4 }; + +void fastConv( const int8_t* weights, size_t wstep, const int* bias, + const int8_t* rowbuf, int* output, const int* outShape, + int blockSize, int vecsize, int vecsize_aligned, int outZp, + const float* multiplier, bool initOutput, bool finalOutput ) +{ + int outCn = outShape[1]; + size_t outPlaneSize = outShape[2]*outShape[3]; + int CV_DECL_ALIGNED(16) maskbuf[FASCONV_BASE_VECSZ] = {0}; + int rsz = blockSize % FASCONV_BASE_VECSZ; + for( int i = 0; i < rsz; i++ ) + maskbuf[FASCONV_BASE_VECSZ - i - 1] = -1; + __m128 mask = _mm_loadu_ps((const float*)maskbuf); + + // now compute dot product of the weights + // and im2row-transformed part of the tensor + for( int i = 0; i < outCn; i += 3 ) + { + const int8_t* wptr0 = weights + i*wstep; + const int8_t* wptr1 = wptr0 + wstep; + const int8_t* wptr2 = wptr1 + wstep; + int* outptr0 = output + i*outPlaneSize; + int* outptr1 = outptr0 + outPlaneSize; + int* outptr2 = outptr1 + outPlaneSize; + int bias0 = bias[i], bias1 = bias[i+1], bias2 = bias[i+2]; + float mult0 = multiplier[i], mult1 = multiplier[i+1], mult2 = multiplier[i+2]; + + if( i+2 >= outCn ) + { + wptr2 = wptr1; + outptr2 = outptr1; + bias2 = bias1; + mult2 = mult1; + + if( i+1 >= outCn ) + { + wptr2 = wptr1 = wptr0; + outptr2 = outptr1 = outptr0; + bias2 = bias1 = bias0; + mult2 = mult1 = mult0; + } + } + int j = 0; + for( ; j < blockSize; j += FASCONV_BASE_VECSZ ) + { + bool tail = false; + if (j + FASCONV_BASE_VECSZ > blockSize) + { + if (j == 0) + break; + j = blockSize - FASCONV_BASE_VECSZ; + tail = true; + } + int k = 0; + const int8_t* rptr = rowbuf + j*vecsize_aligned; + + __m256i vs00 = _mm256_setzero_si256(), vs01 = _mm256_setzero_si256(), + vs02 = _mm256_setzero_si256(), vs03 = _mm256_setzero_si256(), + vs10 = _mm256_setzero_si256(), vs11 = _mm256_setzero_si256(), + vs12 = _mm256_setzero_si256(), vs13 = _mm256_setzero_si256(), + vs20 = _mm256_setzero_si256(), vs21 = _mm256_setzero_si256(), + vs22 = _mm256_setzero_si256(), vs23 = _mm256_setzero_si256(); + + /* TODO : Fix AVX-512 path. Segmentation fault in Conv2D Tests. +#if CV_AVX512_SKX // AVX512VL is necessary to avoid register spilling + if (vecsize >= 64) + { + __m512i vs00_5 = _mm512_setzero_si512(), vs01_5 = _mm512_setzero_si512(), + vs02_5 = _mm512_setzero_si512(), vs03_5 = _mm512_setzero_si512(), + vs10_5 = _mm512_setzero_si512(), vs11_5 = _mm512_setzero_si512(), + vs12_5 = _mm512_setzero_si512(), vs13_5 = _mm512_setzero_si512(), + vs20_5 = _mm512_setzero_si512(), vs21_5 = _mm512_setzero_si512(), + vs22_5 = _mm512_setzero_si512(), vs23_5 = _mm512_setzero_si512(); + + for (; k <= vecsize - 64; k += 64, rptr += 64) + { + __m512i w0 = _mm512_load_si512(wptr0 + k); + __m512i w1 = _mm512_load_si512(wptr1 + k); + __m512i w2 = _mm512_load_si512(wptr2 + k); + __m512i r0 = _mm512_load_si512(rptr); + + vs00_5 = _mm512_fmaddepi8_epi32(w0, r0, vs00_5); + vs10_5 = _mm512_fmaddepi8_epi32(w1, r0, vs10_5); + vs20_5 = _mm512_fmaddepi8_epi32(w2, r0, vs20_5); + + r0 = _mm512_load_si512(rptr + vecsize_aligned); + vs01_5 = _mm512_fmaddepi8_epi32(w0, r0, vs01_5); + vs11_5 = _mm512_fmaddepi8_epi32(w1, r0, vs11_5); + vs21_5 = _mm512_fmaddepi8_epi32(w2, r0, vs21_5); + + r0 = _mm512_load_si512(rptr + vecsize_aligned*2); + vs02_5 = _mm512_fmaddepi8_epi32(w0, r0, vs02_5); + vs12_5 = _mm512_fmaddepi8_epi32(w1, r0, vs12_5); + vs22_5 = _mm512_fmaddepi8_epi32(w2, r0, vs22_5); + + r0 = _mm512_load_si512(rptr + vecsize_aligned*3); + vs03_5 = _mm512_fmaddepi8_epi32(w0, r0, vs03_5); + vs13_5 = _mm512_fmaddepi8_epi32(w1, r0, vs13_5); + vs23_5 = _mm512_fmaddepi8_epi32(w2, r0, vs23_5); + } + + // now fold the 512 bit accumulator vectors into 256 bit vectors so that the AVX2 code can finish + // the tail of the vector + + vs00 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs00_5, 0), _mm512_extracti32x8_epi32(vs00_5, 1)); + vs10 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs10_5, 0), _mm512_extracti32x8_epi32(vs10_5, 1)); + vs20 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs20_5, 0), _mm512_extracti32x8_epi32(vs20_5, 1)); + + vs01 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs01_5, 0), _mm512_extracti32x8_epi32(vs01_5, 1)); + vs11 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs11_5, 0), _mm512_extracti32x8_epi32(vs11_5, 1)); + vs21 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs21_5, 0), _mm512_extracti32x8_epi32(vs21_5, 1)); + + vs02 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs02_5, 0), _mm512_extracti32x8_epi32(vs02_5, 1)); + vs12 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs12_5, 0), _mm512_extracti32x8_epi32(vs12_5, 1)); + vs22 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs22_5, 0), _mm512_extracti32x8_epi32(vs22_5, 1)); + + vs03 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs03_5, 0), _mm512_extracti32x8_epi32(vs03_5, 1)); + vs13 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs13_5, 0), _mm512_extracti32x8_epi32(vs13_5, 1)); + vs23 = _mm256_add_epi32( _mm512_extracti32x8_epi32(vs23_5, 0), _mm512_extracti32x8_epi32(vs23_5, 1)); + } +#endif + */ + for (; k < vecsize; k += 32, rptr += 32 ) + { + __m256i w0 = _mm256_load_si256((const __m256i*)(wptr0 + k)); + __m256i w1 = _mm256_load_si256((const __m256i*)(wptr1 + k)); + __m256i w2 = _mm256_load_si256((const __m256i*)(wptr2 + k)); + __m256i r0 = _mm256_load_si256((const __m256i*)rptr); + + vs00 = _mm256_fmaddepi8_epi32(w0, r0, vs00); + vs10 = _mm256_fmaddepi8_epi32(w1, r0, vs10); + vs20 = _mm256_fmaddepi8_epi32(w2, r0, vs20); + + r0 = _mm256_load_si256((const __m256i*)(rptr + vecsize_aligned)); + vs01 = _mm256_fmaddepi8_epi32(w0, r0, vs01); + vs11 = _mm256_fmaddepi8_epi32(w1, r0, vs11); + vs21 = _mm256_fmaddepi8_epi32(w2, r0, vs21); + + r0 = _mm256_load_si256((const __m256i*)(rptr + vecsize_aligned*2)); + vs02 = _mm256_fmaddepi8_epi32(w0, r0, vs02); + vs12 = _mm256_fmaddepi8_epi32(w1, r0, vs12); + vs22 = _mm256_fmaddepi8_epi32(w2, r0, vs22); + + r0 = _mm256_load_si256((const __m256i*)(rptr + vecsize_aligned*3)); + vs03 = _mm256_fmaddepi8_epi32(w0, r0, vs03); + vs13 = _mm256_fmaddepi8_epi32(w1, r0, vs13); + vs23 = _mm256_fmaddepi8_epi32(w2, r0, vs23); + } + + __m256i t0 = _mm256_hadd_epi32(_mm256_hadd_epi32(vs00, vs01), _mm256_hadd_epi32(vs02, vs03)); + __m256i t1 = _mm256_hadd_epi32(_mm256_hadd_epi32(vs10, vs11), _mm256_hadd_epi32(vs12, vs13)); + __m256i t2 = _mm256_hadd_epi32(_mm256_hadd_epi32(vs20, vs21), _mm256_hadd_epi32(vs22, vs23)); + + t0 = _mm256_add_epi32(t0, _mm256_permute2x128_si256(t0, t0, 1)); + t1 = _mm256_add_epi32(t1, _mm256_permute2x128_si256(t1, t1, 1)); + t2 = _mm256_add_epi32(t2, _mm256_permute2x128_si256(t2, t2, 1)); + + __m128i s0, s1, s2; + + if( initOutput ) + { + s0 = _mm_set1_epi32(bias0); + s1 = _mm_set1_epi32(bias1); + s2 = _mm_set1_epi32(bias2); + } + else + { + s0 = _mm_loadu_si128((__m128i*)(outptr0 + j)); + s1 = _mm_loadu_si128((__m128i*)(outptr1 + j)); + s2 = _mm_loadu_si128((__m128i*)(outptr2 + j)); + } + + s0 = _mm_add_epi32(s0, _mm256_castsi256_si128(t0)); + s1 = _mm_add_epi32(s1, _mm256_castsi256_si128(t1)); + s2 = _mm_add_epi32(s2, _mm256_castsi256_si128(t2)); + + if( finalOutput ) + { + __m128i voutzp = _mm_set1_epi32(outZp); + __m128i outmin = _mm_set1_epi32(-128), outmax = _mm_set1_epi32(127); + s0 = _mm_add_epi32(voutzp, _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(s0), _mm_set1_ps(mult0)))); + s1 = _mm_add_epi32(voutzp, _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(s1), _mm_set1_ps(mult1)))); + s2 = _mm_add_epi32(voutzp, _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(s2), _mm_set1_ps(mult2)))); + + s0 = _mm_min_epi32(_mm_max_epi32(s0, outmin), outmax); + s1 = _mm_min_epi32(_mm_max_epi32(s1, outmin), outmax); + s2 = _mm_min_epi32(_mm_max_epi32(s2, outmin), outmax); + } + if( tail ) + { + s0 = _mm_castps_si128(_mm_blendv_ps(_mm_loadu_ps((const float*)outptr0 + j), _mm_castsi128_ps(s0), mask)); + s1 = _mm_castps_si128(_mm_blendv_ps(_mm_loadu_ps((const float*)outptr1 + j), _mm_castsi128_ps(s1), mask)); + s2 = _mm_castps_si128(_mm_blendv_ps(_mm_loadu_ps((const float*)outptr2 + j), _mm_castsi128_ps(s2), mask)); + } + _mm_storeu_si128((__m128i*)(outptr0 + j), s0); + _mm_storeu_si128((__m128i*)(outptr1 + j), s1); + _mm_storeu_si128((__m128i*)(outptr2 + j), s2); + } + + for( ; j <= blockSize - 2; j += 2 ) + { + const int8_t* rptr0 = rowbuf + j*vecsize_aligned; + const int8_t* rptr1 = rowbuf + (j+1)*vecsize_aligned; + int s00, s01, s10, s11, s20, s21; + + if( initOutput ) + { + s00 = s01 = bias0; + s10 = s11 = bias1; + s20 = s21 = bias2; + } + else + { + s00 = outptr0[j]; s01 = outptr0[j+1]; + s10 = outptr1[j]; s11 = outptr1[j+1]; + s20 = outptr2[j]; s21 = outptr2[j+1]; + } + + for( int k = 0; k < vecsize; k++ ) + { + int8_t w0 = wptr0[k], w1 = wptr1[k], w2 = wptr2[k]; + int8_t r = rptr0[k]; + s00 += (int)w0*r; s10 += (int)w1*r; s20 += (int)w2*r; + r = rptr1[k]; + s01 += (int)w0*r; s11 += (int)w1*r; s21 += (int)w2*r; + } + + if( finalOutput ) + { + s00 = std::min(std::max(outZp + (int)std::round(s00*mult0), -128), 127); + s01 = std::min(std::max(outZp + (int)std::round(s01*mult0), -128), 127); + s10 = std::min(std::max(outZp + (int)std::round(s10*mult1), -128), 127); + s11 = std::min(std::max(outZp + (int)std::round(s11*mult1), -128), 127); + s20 = std::min(std::max(outZp + (int)std::round(s20*mult2), -128), 127); + s21 = std::min(std::max(outZp + (int)std::round(s21*mult2), -128), 127); + } + outptr0[j] = s00; + outptr0[j+1] = s01; + outptr1[j] = s10; + outptr1[j+1] = s11; + outptr2[j] = s20; + outptr2[j+1] = s21; + } + + for( ; j < blockSize; j++ ) + { + const int8_t* rptr0 = rowbuf + j*vecsize_aligned; + int s00, s10, s20; + + if( initOutput ) + { + s00 = bias0; + s10 = bias1; + s20 = bias2; + } + else + { + s00 = outptr0[j]; + s10 = outptr1[j]; + s20 = outptr2[j]; + } + + for( int k = 0; k < vecsize; k++ ) + { + int8_t w0 = wptr0[k], w1 = wptr1[k], w2 = wptr2[k]; + int8_t r = rptr0[k]; + s00 += (int)w0*r; s10 += (int)w1*r; s20 += (int)w2*r; + } + + if( finalOutput ) + { + s00 = std::min(std::max(outZp + (int)std::round(s00*mult0), -128), 127); + s10 = std::min(std::max(outZp + (int)std::round(s10*mult1), -128), 127); + s20 = std::min(std::max(outZp + (int)std::round(s20*mult2), -128), 127); + } + outptr0[j] = s00; + outptr1[j] = s10; + outptr2[j] = s20; + } + } + _mm256_zeroupper(); +} + +static inline void _mm256_expand_mul_add(const __m256i& a, const __m256i& b, + __m256i& out0, __m256i& out1, __m256i& out2, __m256i& out3) +{ + __m256i a0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(a)); + __m256i a1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(a, 1)); + + __m256i b0 = _mm256_cvtepi8_epi16(_mm256_castsi256_si128(b)); + __m256i b1 = _mm256_cvtepi8_epi16(_mm256_extracti128_si256(b, 1)); + + __m256i a0b0 = _mm256_mullo_epi16(a0, b0); + __m256i a1b1 = _mm256_mullo_epi16(a1, b1); + + out0 = _mm256_add_epi32(out0, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(a0b0))); + out1 = _mm256_add_epi32(out1, _mm256_cvtepi16_epi32(_mm256_extracti128_si256(a0b0, 1))); + out2 = _mm256_add_epi32(out2, _mm256_cvtepi16_epi32(_mm256_castsi256_si128(a1b1))); + out3 = _mm256_add_epi32(out3, _mm256_cvtepi16_epi32(_mm256_extracti128_si256(a1b1, 1))); +} + +static inline void _mm256_load_deinterleave(const int8_t* ptr, __m256i& a, __m256i& b) +{ + __m256i t0 = _mm256_loadu_si256((const __m256i*)ptr); + __m256i t1 = _mm256_loadu_si256((const __m256i*)(ptr + 32)); + + const __m256i sh = _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15, + 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15); + __m256i p0 = _mm256_shuffle_epi8(t0, sh); + __m256i p1 = _mm256_shuffle_epi8(t1, sh); + __m256i lo = _mm256_permute2x128_si256(p0, p1, 0 + 2*16); + __m256i hi = _mm256_permute2x128_si256(p0, p1, 1 + 3*16); + a = _mm256_unpacklo_epi64(lo, hi); + b = _mm256_unpackhi_epi64(lo, hi); +} + +void fastDepthwiseConv( const int8_t* wptr, + int kernel_h, int kernel_w, + int stride_h, int stride_w, + int dilation_h, int dilation_w, + int pad_t, int pad_l, + const int* biasptr, const float* multptr, + const int8_t* inptr_, + int height, int width, + int* outptr_, + int out_d, int outH, int outW, + int inpZp, int outZp) +{ + const int8_t w00_ = wptr[0], w01_ = wptr[1], w02_ = wptr[2], + w10 = wptr[3], w11 = wptr[4], w12 = wptr[5], + w20_ = wptr[6], w21_ = wptr[7], w22_ = wptr[8]; + int outW1 = min(outW, (width - dilation_w*(kernel_w - 1) + pad_l)/stride_w); + float mult = multptr[out_d]; + int bias = biasptr[out_d]; + int biasCopy; + + for (int out_i = 0; out_i < outH; out_i++) + { + int in_i = out_i * stride_h - pad_t, out_j = 0; + const int8_t* imgptr0 = inptr_ + in_i*width; + const int8_t* imgptr1 = imgptr0 + dilation_h*width; + const int8_t* imgptr2 = imgptr0 + (dilation_h*2)*width; + int8_t w00 = w00_, w01 = w01_, w02 = w02_; + int8_t w20 = w20_, w21 = w21_, w22 = w22_; + int out; + biasCopy = bias; + if (in_i < 0) + { + biasCopy += inpZp * (w00 + w01 + w02); + w00 = w01 = w02 = 0; + imgptr0 = imgptr1; + } + else if (in_i + dilation_h*(kernel_h-1) >= height) + { + biasCopy += inpZp * (w20 + w21 + w22); + w20 = w21 = w22 = 0; + imgptr2 = imgptr1; + } + int* outptr = outptr_ + out_i*outW; + if (pad_l > 0) + { + out = (int)imgptr0[0]*w01 + (int)imgptr0[dilation_w]*w02 + + (int)imgptr1[0]*w11 + (int)imgptr1[dilation_w]*w12 + + (int)imgptr2[0]*w21 + (int)imgptr2[dilation_w]*w22 + + biasCopy + inpZp*(w00 + w10 + w20); + outptr[0] = std::min(std::max(outZp + (int)std::round(out*mult), -128), 127); + out_j = 1; + } + + if (stride_w == 1 || (stride_w == 2 && dilation_w == 1)) + { + const int VECSZ = 32; + __m256i vw00 = _mm256_set1_epi8(w00), vw01 = _mm256_set1_epi8(w01), vw02 = _mm256_set1_epi8(w02), + vw10 = _mm256_set1_epi8(w10), vw11 = _mm256_set1_epi8(w11), vw12 = _mm256_set1_epi8(w12), + vw20 = _mm256_set1_epi8(w20), vw21 = _mm256_set1_epi8(w21), vw22 = _mm256_set1_epi8(w22); + __m256i vbias = _mm256_set1_epi32(biasCopy), voutzp = _mm256_set1_epi32(outZp), + outmin = _mm256_set1_epi32(-128), outmax = _mm256_set1_epi32(127); + __m256 vmult = _mm256_set1_ps(mult); + __m256i vout0, vout1, vout2, vout3; + + if( stride_w == 1 ) + { + for( ; out_j < outW1; out_j += VECSZ ) + { + if (out_j + VECSZ > outW1) + { + if (out_j <= pad_l) + break; + out_j = outW1 - VECSZ; + } + int in_j = out_j * stride_w - pad_l; + __m256i v00 = _mm256_loadu_si256((const __m256i*)(imgptr0 + in_j)), + v01 = _mm256_loadu_si256((const __m256i*)(imgptr0 + in_j + dilation_w)), + v02 = _mm256_loadu_si256((const __m256i*)(imgptr0 + in_j + dilation_w*2)), + v10 = _mm256_loadu_si256((const __m256i*)(imgptr1 + in_j)), + v11 = _mm256_loadu_si256((const __m256i*)(imgptr1 + in_j + dilation_w)), + v12 = _mm256_loadu_si256((const __m256i*)(imgptr1 + in_j + dilation_w*2)), + v20 = _mm256_loadu_si256((const __m256i*)(imgptr2 + in_j)), + v21 = _mm256_loadu_si256((const __m256i*)(imgptr2 + in_j + dilation_w)), + v22 = _mm256_loadu_si256((const __m256i*)(imgptr2 + in_j + dilation_w*2)); + + vout0 = vout1 = vout2 = vout3 = vbias; + _mm256_expand_mul_add(v00, vw00, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v01, vw01, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v02, vw02, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v10, vw10, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v11, vw11, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v12, vw12, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v20, vw20, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v21, vw21, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v22, vw22, vout0, vout1, vout2, vout3); + + vout0 = _mm256_add_epi32(voutzp, _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_cvtepi32_ps(vout0), vmult))); + vout1 = _mm256_add_epi32(voutzp, _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_cvtepi32_ps(vout1), vmult))); + vout2 = _mm256_add_epi32(voutzp, _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_cvtepi32_ps(vout2), vmult))); + vout3 = _mm256_add_epi32(voutzp, _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_cvtepi32_ps(vout3), vmult))); + + vout0 = _mm256_min_epi32(_mm256_max_epi32(vout0, outmin), outmax); + vout1 = _mm256_min_epi32(_mm256_max_epi32(vout1, outmin), outmax); + vout2 = _mm256_min_epi32(_mm256_max_epi32(vout2, outmin), outmax); + vout3 = _mm256_min_epi32(_mm256_max_epi32(vout3, outmin), outmax); + + _mm256_storeu_si256((__m256i*)(outptr + out_j), vout0); + _mm256_storeu_si256((__m256i*)(outptr + out_j + 8), vout1); + _mm256_storeu_si256((__m256i*)(outptr + out_j + 16), vout2); + _mm256_storeu_si256((__m256i*)(outptr + out_j + 24), vout3); + } + } + else + { + for( ; out_j < outW1; out_j += VECSZ ) + { + if (out_j + VECSZ > outW1) + { + if (out_j <= pad_l) + break; + out_j = outW1 - VECSZ; + } + int in_j = out_j * stride_w - pad_l; + __m256i v00, v01, v02, v10, v11, v12, v20, v21, v22, unused; + _mm256_load_deinterleave(imgptr0 + in_j, v00, v01); + _mm256_load_deinterleave(imgptr0 + in_j + 2, v02, unused); + _mm256_load_deinterleave(imgptr1 + in_j, v10, v11); + _mm256_load_deinterleave(imgptr1 + in_j + 2, v12, unused); + _mm256_load_deinterleave(imgptr2 + in_j, v20, v21); + _mm256_load_deinterleave(imgptr2 + in_j + 2, v22, unused); + + vout0 = vout1 = vout2 = vout3 = vbias; + _mm256_expand_mul_add(v00, vw00, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v01, vw01, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v02, vw02, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v10, vw10, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v11, vw11, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v12, vw12, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v20, vw20, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v21, vw21, vout0, vout1, vout2, vout3); + _mm256_expand_mul_add(v22, vw22, vout0, vout1, vout2, vout3); + + vout0 = _mm256_add_epi32(voutzp, _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_cvtepi32_ps(vout0), vmult))); + vout1 = _mm256_add_epi32(voutzp, _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_cvtepi32_ps(vout1), vmult))); + vout2 = _mm256_add_epi32(voutzp, _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_cvtepi32_ps(vout2), vmult))); + vout3 = _mm256_add_epi32(voutzp, _mm256_cvtps_epi32(_mm256_mul_ps(_mm256_cvtepi32_ps(vout3), vmult))); + + vout0 = _mm256_min_epi32(_mm256_max_epi32(vout0, outmin), outmax); + vout1 = _mm256_min_epi32(_mm256_max_epi32(vout1, outmin), outmax); + vout2 = _mm256_min_epi32(_mm256_max_epi32(vout2, outmin), outmax); + vout3 = _mm256_min_epi32(_mm256_max_epi32(vout3, outmin), outmax); + + _mm256_storeu_si256((__m256i*)(outptr + out_j), vout0); + _mm256_storeu_si256((__m256i*)(outptr + out_j + 8), vout1); + _mm256_storeu_si256((__m256i*)(outptr + out_j + 16), vout2); + _mm256_storeu_si256((__m256i*)(outptr + out_j + 24), vout3); + } + } + } + + for (; out_j < outW1; out_j++) + { + int in_j = out_j * stride_w - pad_l; + out = (int)imgptr0[in_j]*w00 + (int)imgptr0[in_j + dilation_w]*w01 + (int)imgptr0[in_j + dilation_w*2]*w02 + + (int)imgptr1[in_j]*w10 + (int)imgptr1[in_j + dilation_w]*w11 + (int)imgptr1[in_j + dilation_w*2]*w12 + + (int)imgptr2[in_j]*w20 + (int)imgptr2[in_j + dilation_w]*w21 + (int)imgptr2[in_j + dilation_w*2]*w22 + biasCopy; + outptr[out_j] = std::min(std::max(outZp + (int)std::round(out*mult), -128), 127); + } + + for (; out_j < outW; out_j++ ) + { + int in_j0 = out_j * stride_w - pad_l, in_j1 = in_j0 + dilation_w, in_j2 = in_j0 + dilation_w*2; + int s0 = 1, s1 = 1, s2 = 1; + if (in_j0 >= width) + { + in_j0 = 0; + s0 = 0; + biasCopy += inpZp*(w00 + w10 + w20); + } + if (in_j1 >= width) + { + in_j1 = 0; + s1 = 0; + biasCopy += inpZp*(w01 + w11 + w21); + } + if (in_j2 >= width) + { + in_j2 = 0; + s2 = 0; + biasCopy += inpZp*(w02 + w12 + w22); + } + out = (int)imgptr0[in_j0]*w00*s0 + (int)imgptr0[in_j1]*w01*s1 + (int)imgptr0[in_j2]*w02*s2 + + (int)imgptr1[in_j0]*w10*s0 + (int)imgptr1[in_j1]*w11*s1 + (int)imgptr1[in_j2]*w12*s2 + + (int)imgptr2[in_j0]*w20*s0 + (int)imgptr2[in_j1]*w21*s1 + (int)imgptr2[in_j2]*w22*s2 + biasCopy; + outptr[out_j] = std::min(std::max(outZp + (int)std::round(out*mult), -128), 127); + } + } + _mm256_zeroupper(); +} + +// dst = vec * weights^t + bias +void fastGEMM1T( const int8_t* vec, const int8_t* weights, + size_t wstep, const int* bias, const float* multiplier, + int* dst, int nvecs, int vecsize, int outZp ) +{ + int i = 0; + + for( ; i <= nvecs - 8; i += 8 ) + { + const int8_t* wptr = weights + i*wstep; + __m256i vs0 = _mm256_setzero_si256(), vs1 = _mm256_setzero_si256(), + vs2 = _mm256_setzero_si256(), vs3 = _mm256_setzero_si256(), + vs4 = _mm256_setzero_si256(), vs5 = _mm256_setzero_si256(), + vs6 = _mm256_setzero_si256(), vs7 = _mm256_setzero_si256(); + + __m128i voutzp = _mm_set1_epi32(outZp); + __m128i outmin = _mm_set1_epi32(-128), outmax = _mm_set1_epi32(127); + + for( int k = 0; k < vecsize; k += 32, wptr += 32 ) + { + __m256i v = _mm256_load_si256((const __m256i*)(vec + k)); + + vs0 = _mm256_fmaddepi8_epi32(_mm256_load_si256((const __m256i*)wptr), v, vs0); + vs1 = _mm256_fmaddepi8_epi32(_mm256_load_si256((const __m256i*)(wptr + wstep)), v, vs1); + vs2 = _mm256_fmaddepi8_epi32(_mm256_load_si256((const __m256i*)(wptr + wstep*2)), v, vs2); + vs3 = _mm256_fmaddepi8_epi32(_mm256_load_si256((const __m256i*)(wptr + wstep*3)), v, vs3); + vs4 = _mm256_fmaddepi8_epi32(_mm256_load_si256((const __m256i*)(wptr + wstep*4)), v, vs4); + vs5 = _mm256_fmaddepi8_epi32(_mm256_load_si256((const __m256i*)(wptr + wstep*5)), v, vs5); + vs6 = _mm256_fmaddepi8_epi32(_mm256_load_si256((const __m256i*)(wptr + wstep*6)), v, vs6); + vs7 = _mm256_fmaddepi8_epi32(_mm256_load_si256((const __m256i*)(wptr + wstep*7)), v, vs7); + } + + __m256i s0 = _mm256_hadd_epi32(_mm256_hadd_epi32(vs0, vs1), _mm256_hadd_epi32(vs2, vs3)); + __m256i s1 = _mm256_hadd_epi32(_mm256_hadd_epi32(vs4, vs5), _mm256_hadd_epi32(vs6, vs7)); + + s0 = _mm256_add_epi32(s0, _mm256_permute2x128_si256(s0, s0, 1)); + s1 = _mm256_add_epi32(s1, _mm256_permute2x128_si256(s1, s1, 1)); + + __m128i t0 = _mm_add_epi32(_mm256_castsi256_si128(s0), _mm_loadu_si128((__m128i*)(bias + i))); + __m128i t1 = _mm_add_epi32(_mm256_castsi256_si128(s1), _mm_loadu_si128((__m128i*)(bias + i + 4))); + + t0 = _mm_add_epi32(voutzp, _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(t0), _mm_loadu_ps(multiplier + i)))); + t1 = _mm_add_epi32(voutzp, _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(t1), _mm_loadu_ps(multiplier + i + 4)))); + + t0 = _mm_min_epi32(_mm_max_epi32(t0, outmin), outmax); + t1 = _mm_min_epi32(_mm_max_epi32(t1, outmin), outmax); + + _mm_storeu_si128((__m128i*)(dst + i), t0); + _mm_storeu_si128((__m128i*)(dst + i + 4), t1); + } + + for( ; i < nvecs; i++ ) + { + const int8_t* wptr = weights + i*wstep; + __m256i vs0 = _mm256_setzero_si256(); + + for( int k = 0; k < vecsize; k += 32, wptr += 32 ) + { + __m256i v = _mm256_load_si256((const __m256i*)(vec + k)); + vs0 = _mm256_fmaddepi8_epi32(_mm256_load_si256((const __m256i*)wptr), v, vs0); + } + + __m256i s0 = _mm256_hadd_epi32(_mm256_hadd_epi32(vs0, vs0), vs0); + s0 = _mm256_add_epi32(s0, _mm256_permute2x128_si256(s0, s0, 1)); + int temp = _mm_extract_epi32(_mm256_castsi256_si128(s0), 0); + dst[i] = outZp + (int)std::round((temp + bias[i]) * multiplier[i]); + } + + _mm256_zeroupper(); +} +#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY + +CV_CPU_OPTIMIZATION_NAMESPACE_END +}} // namespace diff --git a/modules/dnn/src/int8layers/pooling_layer.cpp b/modules/dnn/src/int8layers/pooling_layer.cpp new file mode 100644 index 000000000000..20a0486a4625 --- /dev/null +++ b/modules/dnn/src/int8layers/pooling_layer.cpp @@ -0,0 +1,595 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" +#include "opencv2/core/hal/intrin.hpp" + +#include +#include +#include +using std::max; +using std::min; + +namespace cv +{ +namespace dnn +{ + +class PoolingLayerInt8Impl CV_FINAL : public PoolingLayerInt8 +{ +public: + PoolingLayerInt8Impl(const LayerParams& params) + { + computeMaxIdx = false; + globalPooling = false; + isGlobalPooling = std::vector(3, false); + output_zp = params.get("zeropoints"); + input_zp = params.get("input_zeropoint", 0); + multiplier = params.get("multiplier", 1.f); + + hasDynamicShapes = params.get("has_dynamic_shapes", false); + shapesInitialized = !hasDynamicShapes; + + if (params.has("pool") || params.has("kernel_size") || + params.has("kernel_w") || params.has("kernel_h")) + { + String pool = toLowerCase(params.get("pool", "max")); + if (pool == "max") + type = MAX; + else if (pool == "ave") + type = AVE; + else if (pool == "sum") + type = SUM; + else + CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\""); + + getPoolingKernelParams(params, kernel_size, isGlobalPooling, pads_begin, pads_end, strides, padMode); + globalPooling = isGlobalPooling[0] || isGlobalPooling[1] || isGlobalPooling[2]; + } + else + CV_Error(Error::StsBadArg, "Cannot determine pooling type"); + setParamsFrom(params); + ceilMode = params.get("ceil_mode", true); + spatialScale = params.get("spatial_scale", 1); + avePoolPaddedArea = params.get("ave_pool_padded_area", true); + } + + void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + { + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + CV_Assert(!inputs.empty()); + CV_Assert(outputs.size() == 1); + + std::vector inp; + std::vector out; + for (int i = 2; i < inputs[0].dims; i++) { + inp.push_back(inputs[0].size[i]); + out.push_back(outputs[0].size[i]); + } + if (globalPooling) { + std::vector finalKernel; + for (int i = 0; i < inp.size(); i++) { + int idx = isGlobalPooling.size() - inp.size() + i; + finalKernel.push_back(isGlobalPooling[idx] ? inp[i] : kernel_size[idx]); + } + kernel_size = finalKernel; + } + + getConvPoolPaddings(inp, kernel_size, strides, padMode, pads_begin, pads_end); + + if (inputs[0].dims == 3) + { + // Pool1D + kernel_size.assign(1, kernel_size[0]); + strides.assign(1, strides[0]); + pads_begin.assign(1, pads_begin[0]); + pads_end.assign(1, pads_end[0]); + } + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + if (backendId == DNN_BACKEND_OPENCV) + { + if (kernel_size.size() == 3) + return preferableTarget == DNN_TARGET_CPU; + if (kernel_size.size() <= 2) + return true; + else + return false; + } + return false; + } + + bool setActivation(const Ptr& layer) CV_OVERRIDE + { + Ptr activ_int8 = layer.dynamicCast(); + if (!activ_int8.empty()) + { + return activ_int8->blobs.empty(); + } + return false; + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + switch (type) + { + case MAX: + { + CV_Assert_N(inputs.size() == 1, outputs.size() == 1); + maxPooling(inputs[0], outputs[0]); + break; + } + case AVE: case SUM: + CV_Assert_N(inputs.size() == 1, outputs.size() == 1); + avePooling(inputs[0], outputs[0]); + break; + default: + CV_Error(Error::StsNotImplemented, "Not implemented"); + break; + } + } + + class PoolingInvoker : public ParallelLoopBody + { + public: + const Mat* src, *rois; + Mat *dst; + int pad_l, pad_t, pad_r, pad_b; + bool avePoolPaddedArea; + int nstripes, inpZp, outZp; + std::vector ofsbuf; + int poolingType; + float multiplier; + float spatialScale; + + std::vector pads_begin, pads_end; + std::vector kernel_size; + std::vector strides; + + PoolingInvoker() : src(0), rois(0), dst(0), pad_l(0), pad_t(0), pad_r(0), pad_b(0), + avePoolPaddedArea(false), nstripes(0), inpZp(0), outZp(0), + poolingType(MAX), multiplier(1), spatialScale(0){} + + static void run(const Mat& src, const Mat& rois, Mat& dst, + std::vector kernel_size, std::vector strides, + std::vector pads_begin, std::vector pads_end, + bool avePoolPaddedArea, int poolingType, float spatialScale, + float multiplier, int inpZp, int outZp, int nstripes) + { + CV_Assert_N( + src.isContinuous(), dst.isContinuous(), + src.type() == CV_8S, src.type() == dst.type(), + src.dims == 3 || src.dims == 4 || src.dims == 5, dst.dims == 3 || dst.dims == 4 || dst.dims == 5, + src.size[0] == dst.size[0], src.size[1] == dst.size[1], rois.empty()); + + PoolingInvoker p; + + bool isPool1D = src.dims == 3; + bool isPool3D = src.dims == 5; + + p.src = &src; + p.rois = &rois; + p.dst = &dst; + + p.kernel_size = kernel_size; + p.strides = strides; + p.pads_begin = pads_begin; + p.pads_end = pads_end; + + p.pad_l = pads_begin.back(); + p.pad_t = isPool1D ? 0 : pads_begin[pads_begin.size() - 2]; + p.pad_r = pads_end.back(); + p.pad_b = isPool1D ? 0 : pads_end[pads_end.size() - 2]; + + p.avePoolPaddedArea = avePoolPaddedArea; + p.nstripes = nstripes; + p.inpZp = inpZp; + p.outZp = outZp; + p.poolingType = poolingType; + p.spatialScale = spatialScale; + p.multiplier = multiplier; + + int height = isPool1D ? 1 : src.size[src.dims - 2]; + int width = src.size[src.dims - 1]; + + int kernel_d = isPool3D ? kernel_size[0] : 1; + int kernel_h = isPool1D ? 1 : kernel_size[kernel_size.size() - 2]; + int kernel_w = kernel_size.back(); + + p.ofsbuf.resize(kernel_d * kernel_h * kernel_w); + for (int i = 0; i < kernel_d; ++i) { + for (int j = 0; j < kernel_h; ++j) { + for (int k = 0; k < kernel_w; ++k) { + p.ofsbuf[i * kernel_h * kernel_w + j * kernel_w + k] = width * height * i + width * j + k; + } + } + } + + parallel_for_(Range(0, nstripes), p, nstripes); + } + + void operator()(const Range& r) const CV_OVERRIDE + { + int channels = dst->size[1]; + + bool isPool3D = src->dims == 5; + bool isPool2D = src->dims == 4; + bool isPool1D = src->dims == 3; + int depth = isPool3D? dst->size[2] : 1; + int height = isPool1D? 1 : dst->size[dst->dims - 2]; + int width = dst->size[dst->dims - 1]; + + int inp_depth = isPool3D? src->size[2] : 1; + int inp_height = isPool1D? 1 : src->size[src->dims - 2]; + int inp_width = src->size[src->dims - 1]; + + size_t total = dst->total(); + size_t stripeSize = (total + nstripes - 1)/nstripes; + size_t stripeStart = r.start*stripeSize; + size_t stripeEnd = std::min(r.end*stripeSize, total); + + int kernel_d = isPool3D? kernel_size[0] : 1; + int kernel_h = isPool1D? 1 : kernel_size[kernel_size.size() - 2]; + int kernel_w = kernel_size.back(); + + int stride_d = isPool3D? strides[0] : 0; + int stride_h = isPool1D? 1 :strides[strides.size() - 2]; + int stride_w = strides.back(); + +#if CV_SIMD128 + const int* ofsptr = (const int*)&ofsbuf[0]; + if (poolingType == MAX && !ofsptr) + CV_Error(Error::StsBadArg, "ofsbuf should be initialized in this mode"); +#endif + + for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; ) + { + size_t ofs = ofs0; + int x0 = (int)(ofs % width); + ofs /= width; + int y0 = (int)(ofs % height); + ofs /= height; + + int d0 = (int)(ofs % depth); + ofs /= depth; + + int c = (int)(ofs % channels); + int n = (int)(ofs / channels); + int ystart, yend; + int dstart = 0, dend = 1; + + const int8_t *srcData = 0; + int pad_d_begin = (pads_begin.size() == 3) ? pads_begin[0] : 0; + dstart = d0 * stride_d - pad_d_begin; + dend = min(dstart + kernel_d, (int)(inp_depth + pads_end[0])); + + ystart = y0 * stride_h - pad_t; + yend = min(ystart + kernel_h, inp_height + pad_b); + srcData = src->ptr(n, c); + + int ddelta = dend - dstart; + dstart = max(dstart, 0); + dend = min(dend, inp_depth); + int ydelta = yend - ystart; + ystart = max(ystart, 0); + yend = min(yend, inp_height); + int8_t *dstData = &dst->ptr(n, c, d0)[y0 * width]; + + int delta = std::min((int)(stripeEnd - ofs0), width - x0); + ofs0 += delta; + int x1 = x0 + delta; + + if( poolingType == MAX ) + for( ; x0 < x1; x0++ ) + { + int xstart = x0 * stride_w - pad_l; + int xend = min(xstart + kernel_w, inp_width); + xstart = max(xstart, 0); + if (xstart >= xend || ystart >= yend) + { + dstData[x0] = (int8_t)outZp; + continue; + } +#if CV_SIMD128 + if( isPool2D && xstart > 0 && x0 + 15 < x1 && (x0 + 15) * stride_w - pad_l + kernel_w < inp_width ) + { + v_int8x16 max_val0 = v_setall_s8(-128); + if( yend - ystart == kernel_h ) + { + const int8_t* srcData1 = srcData + ystart*inp_width + xstart; + if( stride_w == 1 ) + for (int k = 0; k < kernel_w*kernel_h; k++) + { + int index = ofsptr[k]; + v_int8x16 v0 = v_load(srcData1 + index); + max_val0 = v_max(max_val0, v0); + } + else if( stride_w == 2 ) + for (int k = 0; k < kernel_w*kernel_h; k++) + { + int index = ofsptr[k]; + v_int8x16 v0, dummy; + v_load_deinterleave(srcData1 + index, v0, dummy); + max_val0 = v_max(max_val0, v0); + } + else + for (int k = 0; k < kernel_w*kernel_h; k++) + { + int index = ofsptr[k]; + v_int8x16 v0(srcData1[index], srcData1[index + stride_w], + srcData1[index + stride_w*2], srcData1[index + stride_w*3], + srcData1[index + stride_w*4], srcData1[index + stride_w*5], + srcData1[index + stride_w*6], srcData1[index + stride_w*7], + srcData1[index + stride_w*8], srcData1[index + stride_w*9], + srcData1[index + stride_w*10], srcData1[index + stride_w*11], + srcData1[index + stride_w*12], srcData1[index + stride_w*13], + srcData1[index + stride_w*14], srcData1[index + stride_w*15]); + max_val0 = v_max(max_val0, v0); + } + } + else + { + for (int y = ystart; y < yend; ++y) + { + for (int x = xstart; x < xend; ++x) + { + const int index = y * inp_width + x; + v_int8x16 v0(srcData[index], srcData[index + stride_w], + srcData[index + stride_w*2], srcData[index + stride_w*3], + srcData[index + stride_w*4], srcData[index + stride_w*5], + srcData[index + stride_w*6], srcData[index + stride_w*7], + srcData[index + stride_w*8], srcData[index + stride_w*9], + srcData[index + stride_w*10], srcData[index + stride_w*11], + srcData[index + stride_w*12], srcData[index + stride_w*13], + srcData[index + stride_w*14], srcData[index + stride_w*15]); + max_val0 = v_max(max_val0, v0); + } + } + } + v_store(dstData + x0, max_val0); + x0 += 15; + } + else +#else + CV_UNUSED(isPool2D); +#endif + if( isPool1D ) + { + const int8_t* first = srcData + xstart; + const int8_t* last = srcData + xend; + const int8_t* max_elem = std::max_element(first, last); + if (max_elem != last) + dstData[x0] = *max_elem; + } + else + { + int8_t max_val = -128; + for (int d = dstart; d < dend; ++d) { + for (int y = ystart; y < yend; ++y) { + for (int x = xstart; x < xend; ++x) { + const int index = d * inp_width * inp_height + y * inp_width + x; + int8_t val = srcData[index]; + max_val = std::max(max_val, val); + } + } + } + dstData[x0] = max_val; + } + } + else if (poolingType == AVE || poolingType == SUM) + { + for( ; x0 < x1; ++x0) + { + int xstart = x0 * stride_w - pad_l; + int xend = min(xstart + kernel_w, inp_width + pad_r); + int xdelta = xend - xstart; + xstart = max(xstart, 0); + xend = min(xend, inp_width); + + int real_kernel_area = (dend - dstart) * (yend - ystart) * (xend - xstart); + int padded_kernel_area = xdelta * ydelta * ddelta; + int kernel_area = avePoolPaddedArea ? padded_kernel_area : real_kernel_area; + + int bias = (avePoolPaddedArea ? (padded_kernel_area - real_kernel_area) * inpZp : 0) + - (inpZp * kernel_area); + float inv_kernel_area = poolingType == AVE ? multiplier / kernel_area : multiplier; +#if CV_SIMD128 + if( isPool2D && xstart > 0 && x0 + 15 < x1 && (x0 + 15) * stride_w - pad_l + kernel_w < inp_width ) + { + v_int32x4 sum_val0 = v_setall_s32(bias), sum_val1 = v_setall_s32(bias), + sum_val2 = v_setall_s32(bias), sum_val3 = v_setall_s32(bias), + voutzp = v_setall_s32(outZp); + v_float32x4 ikarea = v_setall_f32(inv_kernel_area); + + for (int y = ystart; y < yend; ++y) + { + for (int x = xstart; x < xend; ++x) + { + const int index = y * inp_width + x; + v_int32x4 v0((int)srcData[index], (int)srcData[index + stride_w], + (int)srcData[index + stride_w*2], (int)srcData[index + stride_w*3]); + v_int32x4 v1((int)srcData[index + stride_w*4], (int)srcData[index + stride_w*5], + (int)srcData[index + stride_w*6], (int)srcData[index + stride_w*7]); + v_int32x4 v2((int)srcData[index + stride_w*8], (int)srcData[index + stride_w*9], + (int)srcData[index + stride_w*10], (int)srcData[index + stride_w*11]); + v_int32x4 v3((int)srcData[index + stride_w*12], (int)srcData[index + stride_w*13], + (int)srcData[index + stride_w*14], (int)srcData[index + stride_w*15]); + sum_val0 += v0; + sum_val1 += v1; + sum_val2 += v2; + sum_val3 += v3; + } + } + + sum_val0 = v_round(v_cvt_f32(sum_val0)*ikarea) + voutzp; + sum_val1 = v_round(v_cvt_f32(sum_val1)*ikarea) + voutzp; + sum_val2 = v_round(v_cvt_f32(sum_val2)*ikarea) + voutzp; + sum_val3 = v_round(v_cvt_f32(sum_val3)*ikarea) + voutzp; + + v_store(dstData + x0, v_pack(v_pack(sum_val0, sum_val1), v_pack(sum_val2, sum_val3))); + x0 += 15; + } + else +#endif + if( isPool1D ) + { + const int8_t* first = srcData + xstart; + const int8_t* last = srcData + xend; + int sum_val = bias + std::accumulate(first, last, 0); + dstData[x0] = saturate_cast(outZp + std::round(sum_val*inv_kernel_area)); + } + else + { + int sum_val = bias; + for (int d = dstart; d < dend; ++d) { + for (int y = ystart; y < yend; ++y) { + for (int x = xstart; x < xend; ++x) { + const int index = d * inp_width * inp_height + y * inp_width + x; + int8_t val = srcData[index]; + sum_val += (int)val; + } + } + } + dstData[x0] = saturate_cast(outZp + std::round(sum_val*inv_kernel_area)); + } + } + } + } + } + }; + + void maxPooling(Mat &src, Mat &dst) + { + const int nstripes = getNumThreads(); + Mat rois; + PoolingInvoker::run(src, rois, dst, kernel_size, strides, pads_begin, pads_end, avePoolPaddedArea, type, + spatialScale, multiplier, input_zp, output_zp, nstripes); + } + + void avePooling(Mat &src, Mat &dst) + { + const int nstripes = getNumThreads(); + Mat rois; + PoolingInvoker::run(src, rois, dst, kernel_size, strides, pads_begin, pads_end, avePoolPaddedArea, type, + spatialScale, multiplier, input_zp, output_zp, nstripes); + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() != 0); + + bool isPool1D = inputs[0].size() == 3; + std::vector inpShape(inputs[0].begin() + 2, inputs[0].end()); + std::vector outShape(inputs[0].begin(), inputs[0].begin() + 2); + + std::vector local_kernel; + if (globalPooling) { + for (int i = 0; i < inpShape.size(); i++) { + int idx = isGlobalPooling.size() - inpShape.size() + i; + local_kernel.push_back(isGlobalPooling[idx] ? inpShape[i] : kernel_size[idx]); + } + } else { + local_kernel = kernel_size; + } + + if (hasDynamicShapes && !shapesInitialized) + { + //Just copy input shapes for width and height to prevent errors on loading stage + for (int i = 0; i < inpShape.size(); i++) + outShape.push_back(inpShape[i]); + } + else if (padMode.empty()) + { + int addedDims = isPool1D? inpShape.size() : local_kernel.size(); + for (int i = 0; i < addedDims; i++) { + float dst = (float) (inpShape[i] + pads_begin[i] + pads_end[i] - local_kernel[i]) / strides[i]; + outShape.push_back(1 + (ceilMode ? ceil(dst) : floor(dst))); + } + + // If we have padding, ensure that the last pooling starts strictly + // inside the image (instead of at the padding); otherwise clip the last. + for (int i = 0; i < addedDims; i++) { + if (pads_end[i] && (outShape[2 + i] - 1) * strides[i] >= inpShape[i] + pads_end[i]) { + --outShape[2 + i]; + CV_Assert((outShape[2 + i] - 1) * strides[i] < inpShape[i] + pads_end[i]); + } + } + } + else { + getConvPoolOutParams(inpShape, local_kernel, strides, padMode, + std::vector(local_kernel.size(), 1), outShape); + } + + outputs.assign(1, outShape); + return false; + } + + bool updateMemoryShapes(const std::vector &inputs) CV_OVERRIDE + { + int dims = inputs[0].size(); + CV_Assert(inputs[0][dims - 1] > 0 && inputs[0][dims - 2] > 0); + shapesInitialized = true; + return true; + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_UNUSED(inputs); // suppress unused variable warning + long flops = 0; + bool isPool1D = inputs[0].size() == 3; + size_t karea = std::accumulate(kernel_size.begin(), isPool1D? kernel_size.begin() + 1 : kernel_size.end(), + 1, std::multiplies()); + for(int i = 0; i < outputs.size(); i++) + { + if (type == MAX) + { + if (i%2 == 0) + flops += total(outputs[i])*karea; + } + else + { + flops += total(outputs[i])*(karea + 1); + } + } + return flops; + } +private: + enum Type + { + MAX, + AVE, + STOCHASTIC, + SUM, + ROI, // RoI pooling, https://arxiv.org/pdf/1504.08083.pdf + PSROI // Position-sensitive RoI pooling, https://arxiv.org/pdf/1605.06409.pdf + }; + bool hasDynamicShapes; + bool shapesInitialized; + float multiplier; +}; + +Ptr PoolingLayerInt8::create(const LayerParams& params) +{ + return Ptr(new PoolingLayerInt8Impl(params)); +} + +} +} diff --git a/modules/dnn/src/int8layers/quantize_dequantize_layer.cpp b/modules/dnn/src/int8layers/quantize_dequantize_layer.cpp new file mode 100644 index 000000000000..2ddb76a0e80d --- /dev/null +++ b/modules/dnn/src/int8layers/quantize_dequantize_layer.cpp @@ -0,0 +1,157 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" + +namespace cv +{ +namespace dnn +{ + +class QuantizeLayerImpl CV_FINAL : public QuantizeLayer +{ +public: + QuantizeLayerImpl(const LayerParams& params) + { + scale = params.get("scales", 1.0f); + zeropoint = params.get("zeropoints", 0); + setParamsFrom(params); + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() == 1); + Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + return false; + } + + virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + { + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + } + +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + std::vector inputs, outputs; + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + + if (inputs_.depth() == CV_16S) + { + UMat inputFp32(shape(inputs[0]), CV_32F); + convertFp16(inputs[0], inputFp32); + inputFp32.copyTo(inputs[0]); + } + + inputs[0].convertTo(outputs[0], CV_8S, 1.f/scale, zeropoint); + return true; + } +#endif + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + inputs[0].convertTo(outputs[0], CV_8S, 1.f/scale, zeropoint); + } +}; + +class DequantizeLayerImpl CV_FINAL : public DequantizeLayer +{ +public: + DequantizeLayerImpl(const LayerParams& params) + { + scale = params.get("scales", 1.0f); + zeropoint = params.get("zeropoints", 0); + setParamsFrom(params); + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + CV_Assert(inputs.size() == 1); + Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + return false; + } + + virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr) CV_OVERRIDE + { + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + } + +#ifdef HAVE_OPENCL + bool forward_ocl(InputArrayOfArrays inputs_, OutputArrayOfArrays outputs_, OutputArrayOfArrays internals_) + { + std::vector inputs, outputs; + inputs_.getUMatVector(inputs); + outputs_.getUMatVector(outputs); + + UMat outputFp32(shape(outputs[0]), CV_32F); + inputs[0].convertTo(outputFp32, CV_32F, scale, -(scale*zeropoint)); + + if (outputs_.depth() == CV_16S) + convertFp16(outputFp32, outputs[0]); + else + outputFp32.copyTo(outputs[0]); + return true; + } +#endif + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), + forward_ocl(inputs_arr, outputs_arr, internals_arr)) + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + inputs[0].convertTo(outputs[0], CV_32F, scale, -(scale*zeropoint)); + } +}; + +Ptr QuantizeLayer::create(const LayerParams& params) +{ + return Ptr(new QuantizeLayerImpl(params)); +} + +Ptr DequantizeLayer::create(const LayerParams& params) +{ + return Ptr(new DequantizeLayerImpl(params)); +} + +} +} diff --git a/modules/dnn/src/int8layers/scale_layer.cpp b/modules/dnn/src/int8layers/scale_layer.cpp new file mode 100644 index 000000000000..d7f676d047ab --- /dev/null +++ b/modules/dnn/src/int8layers/scale_layer.cpp @@ -0,0 +1,211 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" +#include +#include + +namespace cv +{ +namespace dnn +{ + +class ScaleLayerInt8Impl CV_FINAL : public ScaleLayerInt8 +{ +public: + Mat weights, bias; + ScaleLayerInt8Impl(const LayerParams& params) + { + setParamsFrom(params); + hasBias = params.get("bias_term", false); + axis = params.get("axis", 1); + hasWeights = false; + + output_sc = params.get("scales"); + output_zp = params.get("zeropoints"); + + DictValue inpSc = params.get("input_scales"); + DictValue inpZp = params.get("input_zeropoints"); + + for (int i = 0; i < inpSc.size(); i++) + { + inp_sc.push_back(inpSc.get(i)); + inp_zp.push_back(inpZp.get(i)); + } + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + outputs.assign(1, inputs[0]); + return true; + } + + virtual void finalize(InputArrayOfArrays inputs_arr, OutputArrayOfArrays) CV_OVERRIDE + { + std::vector inputs; + inputs_arr.getMatVector(inputs); + hasWeights = blobs.size() == 2 || (blobs.size() <= 1 && !hasBias); + CV_Assert((inputs.size() == 2 && blobs.empty()) || blobs.size() == (int)hasWeights + (int)hasBias); + + if (!blobs.empty()) + { + Mat w = hasWeights ? blobs[0] : Mat::ones(blobs[0].size(), CV_32F); + Mat b = hasBias ? blobs.back() : Mat::zeros(blobs.back().size(), CV_32F); + + w = w.reshape(1, 1); + b = b.reshape(1, 1); + + w.convertTo(weights, CV_32F, inp_sc[0]/output_sc); + addWeighted(b, 1.0/output_sc, weights, -inp_zp[0], output_zp, bias, CV_32F); + } + else + { + // initialized during forward() + weights = Mat(); bias = Mat(); + } + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + bool setActivation(const Ptr& layer) CV_OVERRIDE + { + Ptr activ_int8 = layer.dynamicCast(); + if (!activ_int8.empty()) + { + return activ_int8->blobs.empty(); + } + return false; + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + std::vector inputs, outputs; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + + Mat &inpBlob = inputs[0]; + Mat &outBlob = outputs[0]; + + if (blobs.empty()) + { + CV_Assert(inp_sc.size() == 2 && inp_zp.size() == 2); + Mat inp_dequantized, w, b; + inputs[1].reshape(1, 1).convertTo(inp_dequantized, CV_32F, inp_sc[1], -(inp_sc[1]*inp_zp[1])); + w = hasWeights ? inp_dequantized : Mat::ones(inp_dequantized.size(), CV_32F); + b = hasBias ? inp_dequantized : Mat::zeros(inp_dequantized.size(), CV_32F); + + w.convertTo(weights, CV_32F, inp_sc[0]/output_sc); + addWeighted(b, 1.0/output_sc, weights, -inp_zp[0], output_zp, bias, CV_32F); + } + + MatShape inpShape = shape(inpBlob); + const int numWeights = weights.total(); + CV_Assert(numWeights != 0); + CV_CheckEQ(weights.total(), bias.total(), "Incompatible weights/bias blobs"); + + int endAxis; + for (endAxis = axis + 1; endAxis <= inpBlob.dims; ++endAxis) + { + if (total(inpShape, axis, endAxis) == numWeights) + break; + } + CV_Assert(total(inpShape, axis, endAxis) == numWeights); + CV_CheckTypeEQ(inpBlob.type(), CV_8SC1, ""); CV_CheckTypeEQ(outBlob.type(), CV_8SC1, ""); + + int numSlices = total(inpShape, 0, axis); + int8_t* inpData = (int8_t*)inpBlob.data; + int8_t* outData = (int8_t*)outBlob.data; + + if (endAxis != inpBlob.dims) + { + float* weightsData = (float*)weights.data; + float* biasesData = (float*)bias.data; + int spatialSize = total(inpShape, endAxis); // spatialSize != 1 + for (int i = 0; i < numSlices; ++i) + { + for (int j = 0; j < numWeights; ++j) + { + float w = weightsData[j]; + float b = biasesData[j]; + Mat inpSlice(1, spatialSize, CV_8S, inpData); + Mat outSlice(1, spatialSize, CV_8S, outData); + inpSlice.convertTo(outSlice, CV_8S, w, b); + inpData += spatialSize; + outData += spatialSize; + } + } + } + else + { + for (int i = 0; i < numSlices; ++i) + { + Mat inpSlice(1, numWeights, CV_8S, inpData); + Mat outSlice(1, numWeights, CV_8S, outData); + + multiply(inpSlice, weights, outSlice, 1.0, CV_8S); + add(outSlice, bias, outSlice, Mat(), CV_8S); + + inpData += numWeights; + outData += numWeights; + } + } + } + + void getScaleShift(Mat& scale, Mat& shift) const CV_OVERRIDE + { + scale = (hasWeights && !blobs.empty()) ? blobs[0] : Mat(); + shift = (hasBias && !blobs.empty()) ? blobs.back() : Mat(); + } + + void getScaleZeropoint(float& scale, int& zeropoint) const CV_OVERRIDE + { + scale = output_sc; + zeropoint = output_zp; + } + + virtual int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_UNUSED(outputs); // suppress unused variable warning + long flops = 0; + for(int i = 0; i < inputs.size(); i++) + { + flops += 2*total(inputs[i]); + } + return flops; + } + +private: + bool hasWeights; + std::vector inp_sc; + std::vector inp_zp; +}; + + +Ptr ScaleLayerInt8::create(const LayerParams& params) +{ + return Ptr(new ScaleLayerInt8Impl(params)); +} + +Ptr ShiftLayerInt8::create(const LayerParams& params) +{ + LayerParams scaleParams = params; + scaleParams.type = "ScaleInt8"; + scaleParams.set("bias_term", true); + scaleParams.set("axis", 0); + return Ptr(new ScaleLayerInt8Impl(scaleParams)); +} + +} // namespace dnn +} // namespace cv diff --git a/modules/dnn/src/int8layers/softmax_layer.cpp b/modules/dnn/src/int8layers/softmax_layer.cpp new file mode 100644 index 000000000000..7e3c82bc21ab --- /dev/null +++ b/modules/dnn/src/int8layers/softmax_layer.cpp @@ -0,0 +1,176 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "../precomp.hpp" +#include "layers_common.hpp" + +#include +#include + +namespace cv +{ +namespace dnn +{ + +class SoftMaxLayerInt8Impl CV_FINAL : public SoftmaxLayerInt8 +{ +public: + + SoftMaxLayerInt8Impl(const LayerParams& params) + { + axisRaw = params.get("axis", 1); + logSoftMax = params.get("log_softmax", false); + output_sc = params.get("scales"); + output_zp = params.get("zeropoints"); + setParamsFrom(params); + } + + bool getMemoryShapes(const std::vector &inputs, + const int requiredOutputs, + std::vector &outputs, + std::vector &internals) const CV_OVERRIDE + { + bool inplace = Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals); + MatShape shape = inputs[0]; + int cAxis = normalize_axis(axisRaw, shape.size()); + shape[cAxis] = 1; + internals.assign(1, shape); + return inplace; + } + + virtual bool supportBackend(int backendId) CV_OVERRIDE + { + return backendId == DNN_BACKEND_OPENCV; + } + + virtual bool tryFuse(Ptr& top) CV_OVERRIDE + { + Ptr dequantize_layer = top.dynamicCast(); + return !dequantize_layer.empty() && preferableTarget != DNN_TARGET_OPENCL_FP16; + } + + void forward(InputArrayOfArrays inputs_arr, OutputArrayOfArrays outputs_arr, OutputArrayOfArrays internals_arr) CV_OVERRIDE + { + CV_TRACE_FUNCTION(); + CV_TRACE_ARG_VALUE(name, "name", name.c_str()); + + std::vector inputs, outputs, internals; + inputs_arr.getMatVector(inputs); + outputs_arr.getMatVector(outputs); + internals_arr.getMatVector(internals); + + const Mat &src = inputs[0]; + Mat &dst = outputs[0]; + + int axis = normalize_axis(axisRaw, src.dims); + size_t outerSize = src.total(0, axis), channels = src.size[axis], + innerSize = src.total(axis + 1); + + CV_Assert(src.type() == CV_8S && (dst.type() == CV_8S || dst.type() == CV_32F)); + CV_Assert(src.isContinuous() && dst.isContinuous()); + + size_t outerStep = src.total(axis); + size_t cnStep = src.total(axis + 1); + const int8_t *srcPtr = src.ptr(); + const float *expPtr = blobs[0].ptr(); + + if (dst.type() == CV_32F) + { + float *dstPtr = dst.ptr(); + for (size_t outerDim = 0; outerDim < outerSize; outerDim++) + { + size_t srcOffset = outerDim * outerStep; + std::vector expSum(innerSize, 0.f); + + // sum exp along axis + for (size_t cnDim = 0; cnDim < channels; cnDim++) + { + const int offset = srcOffset + cnDim * cnStep; + for (size_t i = 0; i < innerSize; i++) + expSum[i] += expPtr[srcPtr[offset + i] + 128]; + } + + // divide by computed sum + for (size_t cnDim = 0; cnDim < channels; cnDim++) + { + const int offset = srcOffset + cnDim * cnStep; + for (size_t i = 0; i < innerSize; i++) + dstPtr[offset + i] = expPtr[srcPtr[offset + i] + 128]/expSum[i]; + } + + if (logSoftMax) + { + for (size_t cnDim = 0; cnDim < channels; cnDim++) + { + const int offset = srcOffset + cnDim * cnStep; + for (size_t i = 0; i < innerSize; i++) + dstPtr[offset + i] = log(dstPtr[offset + i]); + } + } + } + } + else + { + const float inv_scale = 1.f/output_sc; + int8_t *dstPtr = dst.ptr(); + for (size_t outerDim = 0; outerDim < outerSize; outerDim++) + { + size_t srcOffset = outerDim * outerStep; + std::vector expSum(innerSize, 0.f); + + // sum exp along axis + for (size_t cnDim = 0; cnDim < channels; cnDim++) + { + const int offset = srcOffset + cnDim * cnStep; + for (size_t i = 0; i < innerSize; i++) + expSum[i] += expPtr[srcPtr[offset + i] + 128]; + } + + // divide by computed sum and quantize to int8 + if (logSoftMax) + { + for (size_t cnDim = 0; cnDim < channels; cnDim++) + { + const int offset = srcOffset + cnDim * cnStep; + for (size_t i = 0; i < innerSize; i++) + dstPtr[offset + i] = saturate_cast(output_zp + std::round(inv_scale*log(expPtr[srcPtr[offset + i] + 128]/expSum[i]))); + } + } + else + { + for (size_t cnDim = 0; cnDim < channels; cnDim++) + { + const int offset = srcOffset + cnDim * cnStep; + for (size_t i = 0; i < innerSize; i++) + dstPtr[offset + i] = saturate_cast(output_zp + std::round(inv_scale*(expPtr[srcPtr[offset + i] + 128]/expSum[i]))); + } + } + } + } + } + + int64 getFLOPS(const std::vector &inputs, + const std::vector &outputs) const CV_OVERRIDE + { + CV_UNUSED(outputs); // suppress unused variable warning + int64 flops = 0; + + for (int i = 0; i < inputs.size(); i++) + { + flops += 4*total(inputs[i]); + } + + return flops; + } + + int axisRaw; +}; + +Ptr SoftmaxLayerInt8::create(const LayerParams& params) +{ + return Ptr(new SoftMaxLayerInt8Impl(params)); +} + +} +} diff --git a/modules/dnn/src/layers/batch_norm_layer.cpp b/modules/dnn/src/layers/batch_norm_layer.cpp index e28c964689b9..49804c5c13e9 100644 --- a/modules/dnn/src/layers/batch_norm_layer.cpp +++ b/modules/dnn/src/layers/batch_norm_layer.cpp @@ -409,6 +409,18 @@ class BatchNormLayerImpl CV_FINAL : public BatchNormLayer } #endif // HAVE_DNN_NGRAPH + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + params.set("input_scale", scales[0][0]); + params.set("input_zeropoint", zeropoints[0][0]); + + params.blobs.clear(); + params.blobs.push_back(origin_weights); + params.blobs.push_back(origin_bias); + return true; + } + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { diff --git a/modules/dnn/src/layers/blank_layer.cpp b/modules/dnn/src/layers/blank_layer.cpp index 5f93b458869d..59548a9c0c51 100644 --- a/modules/dnn/src/layers/blank_layer.cpp +++ b/modules/dnn/src/layers/blank_layer.cpp @@ -166,6 +166,11 @@ class BlankLayerImpl CV_FINAL : public BlankLayer } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + return true; + } }; Ptr BlankLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/concat_layer.cpp b/modules/dnn/src/layers/concat_layer.cpp index a950c56167fd..536114fcd772 100644 --- a/modules/dnn/src/layers/concat_layer.cpp +++ b/modules/dnn/src/layers/concat_layer.cpp @@ -70,6 +70,7 @@ class ConcatLayerImpl CV_FINAL : public ConcatLayer setParamsFrom(params); axis = params.get("axis", 1); padding = params.get("padding", false); + paddingValue = params.get("padding_value", 0); } virtual bool getMemoryShapes(const std::vector &inputs, @@ -119,13 +120,14 @@ class ConcatLayerImpl CV_FINAL : public ConcatLayer (backendId == DNN_BACKEND_VKCOM && haveVulkan() && !padding); } + template class ChannelConcatInvoker : public ParallelLoopBody { public: std::vector* inputs; Mat* output; int nstripes; - std::vector chptrs; + std::vector chptrs; static void run(std::vector& inputs, Mat& output, int nstripes) { @@ -139,14 +141,14 @@ class ConcatLayerImpl CV_FINAL : public ConcatLayer for( i = 0; i < ninputs; i++ ) { Mat& inp = inputs[i]; - CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16S) && + CV_Assert( inp.isContinuous() && (inp.type() == CV_32F || inp.type() == CV_16S || inp.type() == CV_8S) && inp.dims == 4 && inp.size[0] == output.size[0] && inp.size[2] == output.size[2] && inp.size[3] == output.size[3] ); nchannels += inp.size[1]; } CV_Assert( nchannels == output.size[1] ); - CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16S) ); + CV_Assert( output.isContinuous() && (output.type() == CV_32F || output.type() == CV_16S || output.type() == CV_8S) ); cc.chptrs.resize(nchannels*batchsz); @@ -157,7 +159,7 @@ class ConcatLayerImpl CV_FINAL : public ConcatLayer for( int j = 0; j < batchsz; j++ ) for( int k = 0; k < inp.size[1]; k++ ) { - const float* ptr = inp.ptr(j, k); + const T* ptr = inp.ptr(j, k); cc.chptrs[ofs + j*nchannels + k] = ptr; } ofs += inp.size[1]; @@ -176,8 +178,8 @@ class ConcatLayerImpl CV_FINAL : public ConcatLayer size_t stripeSize = (total + nstripes - 1)/nstripes; size_t stripeStart = r.start*stripeSize; size_t stripeEnd = std::min(total, r.end*stripeSize); - const float** ptrs = (const float**)&chptrs[0]; - float* outptr = output->ptr(); + const T** ptrs = (const T**)&chptrs[0]; + T* outptr = output->ptr(); size_t blockSize0 = 1 << 16; for( size_t ofs0 = stripeStart; ofs0 < stripeEnd; ) @@ -248,7 +250,8 @@ class ConcatLayerImpl CV_FINAL : public ConcatLayer CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && + inputs_arr.depth() != CV_8S, forward_ocl(inputs_arr, outputs_arr, internals_arr)) std::vector inputs, outputs; @@ -259,12 +262,15 @@ class ConcatLayerImpl CV_FINAL : public ConcatLayer Mat& outMat = outputs[0]; if (padding) - outMat.setTo(0); + outMat.setTo(paddingValue); if( cAxis == 1 && outMat.dims == 4 && !padding) { int nstripes = getNumThreads(); - ChannelConcatInvoker::run(inputs, outMat, nstripes); + if (outMat.type() == CV_8S) + ChannelConcatInvoker::run(inputs, outMat, nstripes); + else + ChannelConcatInvoker::run(inputs, outMat, nstripes); } else { @@ -394,6 +400,14 @@ class ConcatLayerImpl CV_FINAL : public ConcatLayer return Ptr(new InfEngineNgraphNode(concat)); } #endif // HAVE_DNN_NGRAPH + + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + if (padding) + params.set("padding_value", zeropoints[1][0]); + return true; + } }; Ptr ConcatLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/const_layer.cpp b/modules/dnn/src/layers/const_layer.cpp index bbea3e3f2c72..18f190b36b9a 100644 --- a/modules/dnn/src/layers/const_layer.cpp +++ b/modules/dnn/src/layers/const_layer.cpp @@ -112,6 +112,15 @@ class ConstLayerImpl CV_FINAL : public ConstLayer } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + Mat quantizedBlob; + blobs[0].convertTo(quantizedBlob, CV_8S, 1.f/scales[1][0], zeropoints[1][0]); + params.blobs.clear(); + params.blobs.push_back(quantizedBlob); + return true; + } }; Ptr ConstLayer::create(const LayerParams& params) diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 68c543be2477..ec2904ee69be 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -2083,6 +2083,48 @@ class ConvolutionLayerImpl CV_FINAL : public BaseConvolutionLayerImpl } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + // References - https://arxiv.org/pdf/1712.05877.pdf + + // Quantized convolution with variable weights is not supported. + if (blobs.empty()) + return false; + + float inputScale = scales[0][0], outputScale = scales[1][0]; + int inputZp = zeropoints[0][0]; + params.set("input_zeropoint", inputZp); + + Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S); + Mat biasQuantized(1, numOutput, CV_32S); + Mat outputMultiplier(1, numOutput, CV_32F); + double realMin, realMax, weightsScale; + + for( int i = 0; i < numOutput; i++ ) + { + // Quantize weights + cv::minMaxIdx(weightsMat.row(i), &realMin, &realMax); + realMin = std::min(realMin, 0.0); + realMax = std::max(realMax, 0.0); + weightsScale = (realMax == realMin) ? 1.0 : std::max(-realMin, realMax)/127; + weightsMat.row(i).convertTo(weightsQuantized.row(i), CV_8S, 1.f/weightsScale); + + // Quantize biases + float biasScale = inputScale * weightsScale; + biasQuantized.at(i) = (int)std::round(biasvec[i]/biasScale) - inputZp*(cv::sum(weightsQuantized.row(i))[0]); + + // Store multiplier + outputMultiplier.at(i) = biasScale / outputScale; + } + + params.blobs.clear(); + params.blobs.push_back(weightsQuantized.reshape(1, shape(blobs[0]))); + params.blobs.push_back(biasQuantized); + params.blobs.push_back(outputMultiplier); + return true; + } + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { diff --git a/modules/dnn/src/layers/elementwise_layers.cpp b/modules/dnn/src/layers/elementwise_layers.cpp index 9bb5be342f76..6dc1813c8bee 100644 --- a/modules/dnn/src/layers/elementwise_layers.cpp +++ b/modules/dnn/src/layers/elementwise_layers.cpp @@ -255,6 +255,12 @@ class ElementWiseLayer : public Func::Layer } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + return func.tryQuantize(scales, zeropoints, params); + } + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { @@ -288,6 +294,8 @@ struct BaseFunctor bool tryFuse(Ptr&) { return false; } void getScaleShift(Mat&, Mat&) const {} + + bool tryQuantize(const std::vector>&, const std::vector>&, LayerParams&) { return false; } }; struct ReLUFunctor : public BaseFunctor @@ -436,6 +444,29 @@ struct ReLUFunctor : public BaseFunctor } #endif // HAVE_VULKAN + bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) + { + if (slope != 0.f) + { + float inpScale = scales[0][0], outScale = scales[1][0]; + int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inpScale*(i - inpZp); + float y = x >= 0.f ? x : slope*x; + int quantized = outZp + (int)std::round(y/outScale); + table[i+128] = saturate_cast(quantized); + } + params.blobs.clear(); + params.blobs.push_back(lookUpTable); + } + return true; + } + int64 getFLOPSPerElement() const { return 1; } }; @@ -559,6 +590,12 @@ struct ReLU6Functor : public BaseFunctor } #endif // HAVE_VULKAN + bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) + { + return true; + } + int64 getFLOPSPerElement() const { return 2; } }; @@ -651,6 +688,26 @@ struct TanHFunctor : public BaseFunctor } #endif // HAVE_VULKAN + bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) + { + float inpScale = scales[0][0], outScale = scales[1][0]; + int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inpScale*(i - inpZp); + float y = tanh(x); + int quantized = outZp + (int)std::round(y/outScale); + table[i+128] = saturate_cast(quantized); + } + params.blobs.clear(); + params.blobs.push_back(lookUpTable); + return true; + } + int64 getFLOPSPerElement() const { return 1; } }; @@ -743,6 +800,26 @@ struct SwishFunctor : public BaseFunctor } #endif // HAVE_VULKAN + bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) + { + float inpScale = scales[0][0], outScale = scales[1][0]; + int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inpScale*(i - inpZp); + float y = x / (1.0f + exp(-x)); + int quantized = outZp + (int)std::round(y/outScale); + table[i+128] = saturate_cast(quantized); + } + params.blobs.clear(); + params.blobs.push_back(lookUpTable); + return true; + } + int64 getFLOPSPerElement() const { return 3; } }; @@ -848,6 +925,28 @@ struct MishFunctor : public BaseFunctor } #endif // HAVE_VULKAN + bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) + { + float inpScale = scales[0][0], outScale = scales[1][0]; + int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inpScale*(i - inpZp); + float eX = exp(x); + float n = (eX + 2) * eX; + float y = (x * n) / (n + 2); + int quantized = outZp + (int)std::round(y/outScale); + table[i+128] = saturate_cast(quantized); + } + params.blobs.clear(); + params.blobs.push_back(lookUpTable); + return true; + } + int64 getFLOPSPerElement() const { return 3; } }; @@ -940,6 +1039,26 @@ struct SigmoidFunctor : public BaseFunctor } #endif // HAVE_VULKAN + bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) + { + float inpScale = scales[0][0], outScale = scales[1][0]; + int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inpScale*(i - inpZp); + float y = 1.f/(1.f + exp(-x)); + int quantized = outZp + (int)std::round(y/outScale); + table[i+128] = saturate_cast(quantized); + } + params.blobs.clear(); + params.blobs.push_back(lookUpTable); + return true; + } + int64 getFLOPSPerElement() const { return 3; } }; @@ -1032,6 +1151,26 @@ struct ELUFunctor : public BaseFunctor } #endif // HAVE_VULKAN + bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) + { + float inpScale = scales[0][0], outScale = scales[1][0]; + int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inpScale*(i - inpZp); + float y = x >= 0.f ? x : exp(x) - 1; + int quantized = outZp + (int)std::round(y/outScale); + table[i+128] = saturate_cast(quantized); + } + params.blobs.clear(); + params.blobs.push_back(lookUpTable); + return true; + } + int64 getFLOPSPerElement() const { return 2; } }; @@ -1130,6 +1269,26 @@ struct AbsValFunctor : public BaseFunctor } #endif // HAVE_VULKAN + bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) + { + float inpScale = scales[0][0], outScale = scales[1][0]; + int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inpScale*(i - inpZp); + float y = abs(x); + int quantized = outZp + (int)std::round(y/outScale); + table[i+128] = saturate_cast(quantized); + } + params.blobs.clear(); + params.blobs.push_back(lookUpTable); + return true; + } + int64 getFLOPSPerElement() const { return 1; } }; @@ -1223,6 +1382,26 @@ struct BNLLFunctor : public BaseFunctor } #endif // HAVE_VULKAN + bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) + { + float inpScale = scales[0][0], outScale = scales[1][0]; + int inpZp = zeropoints[0][0], outZp = zeropoints[1][0]; + + Mat lookUpTable(1, 256, CV_8S); + int8_t* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inpScale*(i - inpZp); + float y = x > 0 ? x + log(1. + exp(-x)) : log(1. + exp(x)); + int quantized = outZp + (int)std::round(y/outScale); + table[i+128] = saturate_cast(quantized); + } + params.blobs.clear(); + params.blobs.push_back(lookUpTable); + return true; + } + int64 getFLOPSPerElement() const { return 5; } }; diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp index a337c48d9e69..860560213d92 100644 --- a/modules/dnn/src/layers/eltwise_layer.cpp +++ b/modules/dnn/src/layers/eltwise_layer.cpp @@ -864,6 +864,37 @@ class EltwiseLayerImpl CV_FINAL : public EltwiseLayer } #endif // HAVE_DNN_NGRAPH + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + if (op == SUM) + { + std::vector newCoeffs; + float offset = zeropoints[1][0]; + float out_sc = scales[1][0]; + for (int i = 0; i < scales[0].size(); i++) + { + float coeff = coeffs.empty() ? 1.f : coeffs[i]; + float newcoeff = (scales[0][i] * coeff) / out_sc; + newCoeffs.push_back(newcoeff); + offset -= (newcoeff * zeropoints[0][i]); + } + params.set("coeff", DictValue::arrayReal(newCoeffs.data(), newCoeffs.size())); + params.set("offset", offset); + return true; + } + else if (op == PROD) + { + std::vector newCoeffs = scales[0]; + newCoeffs[0] /= scales[1][0]; + params.set("coeff", DictValue::arrayReal(newCoeffs.data(), newCoeffs.size())); + params.set("offset", zeropoints[1][0]); + params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); + return true; + } + return op == MAX; + } + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { diff --git a/modules/dnn/src/layers/flatten_layer.cpp b/modules/dnn/src/layers/flatten_layer.cpp index 7cf01a14fa33..8ff862fab030 100644 --- a/modules/dnn/src/layers/flatten_layer.cpp +++ b/modules/dnn/src/layers/flatten_layer.cpp @@ -227,6 +227,11 @@ virtual Ptr initNgraph(const std::vector >& inp } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + return true; + } int _startAxis; int _endAxis; diff --git a/modules/dnn/src/layers/fully_connected_layer.cpp b/modules/dnn/src/layers/fully_connected_layer.cpp index 529f3c04fdef..28ea7f347fef 100644 --- a/modules/dnn/src/layers/fully_connected_layer.cpp +++ b/modules/dnn/src/layers/fully_connected_layer.cpp @@ -618,6 +618,45 @@ class FullyConnectedLayerImpl CV_FINAL : public InnerProductLayer } #endif // HAVE_DNN_NGRAPH + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + if (blobs.empty()) + return false; + + int numOutput = blobs[0].size[0]; + float inputScale = scales[0][0], outputScale = scales[1][0]; + int inputZp = zeropoints[0][0]; + + Mat weightsQuantized(weightsMat.rows, weightsMat.cols, CV_8S); + Mat biasQuantized(1, numOutput, CV_32S); + Mat outputMultiplier(1, numOutput, CV_32F); + + double realMin, realMax, weightsScale; + for( int i = 0; i < numOutput; i++ ) + { + // Quantize weights + cv::minMaxIdx(weightsMat.row(i), &realMin, &realMax); + realMin = std::min(realMin, 0.0); + realMax = std::max(realMax, 0.0); + weightsScale = (realMax == realMin) ? 1.0 : std::max(-realMin, realMax)/127; + weightsMat.row(i).convertTo(weightsQuantized.row(i), CV_8S, 1.f/weightsScale); + + // Quantize biases + float biasScale = inputScale * weightsScale; + biasQuantized.at(i) = (int)std::round(biasMat.at(i)/biasScale) - inputZp*(cv::sum(weightsQuantized.row(i))[0]); + + // Store multiplier + outputMultiplier.at(i) = biasScale / outputScale; + } + + params.blobs.clear(); + params.blobs.push_back(weightsQuantized.reshape(1, shape(blobs[0]))); + params.blobs.push_back(biasQuantized); + params.blobs.push_back(outputMultiplier); + return true; + } + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { diff --git a/modules/dnn/src/layers/padding_layer.cpp b/modules/dnn/src/layers/padding_layer.cpp index d18256879580..c1979ce701ac 100644 --- a/modules/dnn/src/layers/padding_layer.cpp +++ b/modules/dnn/src/layers/padding_layer.cpp @@ -134,6 +134,8 @@ class PaddingLayerImpl CV_FINAL : public PaddingLayer cv::convertFp16(paddingValue_fp32, paddingValue_fp16); outputs[0].setTo(paddingValue_fp16[0]); } + else if (inputs_arr.depth() == CV_8S) + outputs[0].setTo(saturate_cast(paddingValue)); else outputs[0].setTo(paddingValue); inputs[0].copyTo(outputs[0](dstRanges)); @@ -264,6 +266,16 @@ class PaddingLayerImpl CV_FINAL : public PaddingLayer } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + float outputScale = scales[1][0]; + int outputZp = zeropoints[1][0]; + float padValue = outputZp + std::round(params.get("value", 0)/outputScale); + params.set("value", padValue); + return true; + } + private: std::vector > paddings; // Pairs pad before, pad after. std::vector dstRanges; diff --git a/modules/dnn/src/layers/permute_layer.cpp b/modules/dnn/src/layers/permute_layer.cpp index c525c3f82f78..77c2469c050f 100644 --- a/modules/dnn/src/layers/permute_layer.cpp +++ b/modules/dnn/src/layers/permute_layer.cpp @@ -194,6 +194,7 @@ class PermuteLayerImpl CV_FINAL : public PermuteLayer #endif } + template class PermuteInvoker : public ParallelLoopBody { public: @@ -229,7 +230,7 @@ class PermuteLayerImpl CV_FINAL : public PermuteLayer size_t stripeStart = r.start*stripeSize; size_t stripeEnd = std::min(r.end*stripeSize, orows); - const size_t esz = sizeof(float); + const size_t esz = sizeof(T); size_t ostep0 = out->step[0]/esz, ostep1 = out->step[1]/esz, ostep2 = out->step[2]/esz; const size_t* ord = &order->at(0); size_t istep0 = inp->step[ord[0]]/esz, istep1 = inp->step[ord[1]]/esz, @@ -241,13 +242,13 @@ class PermuteLayerImpl CV_FINAL : public PermuteLayer int i1 = (int)(val % n1); int i0 = (int)(val / n1); - const float* inptr_orig = inp->ptr(); - float* outptr_orig = out->ptr(); + const T* inptr_orig = inp->ptr(); + T* outptr_orig = out->ptr(); for( size_t ofs = stripeStart; ofs < stripeEnd; ofs++ ) { - const float* inptr = inptr_orig + i0*istep0 + i1*istep1 + i2*istep2; - float* outptr = outptr_orig + i0*ostep0 + i1*ostep1 + i2*ostep2; + const T* inptr = inptr_orig + i0*istep0 + i1*istep1 + i2*istep2; + T* outptr = outptr_orig + i0*ostep0 + i1*ostep1 + i2*ostep2; for( int i3 = 0; i3 < n3; i3++ ) outptr[i3] = inptr[i3*istep3]; @@ -321,7 +322,8 @@ class PermuteLayerImpl CV_FINAL : public PermuteLayer CV_TRACE_FUNCTION(); CV_TRACE_ARG_VALUE(name, "name", name.c_str()); - CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget), + CV_OCL_RUN(IS_DNN_OPENCL_TARGET(preferableTarget) && + inputs_arr.depth() != CV_8S, forward_ocl(inputs_arr, outputs_arr, internals_arr)) if (inputs_arr.depth() == CV_16S) @@ -365,24 +367,48 @@ class PermuteLayerImpl CV_FINAL : public PermuteLayer if( numAxes == 4 ) { int nstripes = getNumThreads(); - PermuteInvoker::run(inp, out, _order, nstripes); + if (inp.type() == CV_8S) + PermuteInvoker::run(inp, out, _order, nstripes); + else + PermuteInvoker::run(inp, out, _order, nstripes); } else { - const float *srcData = inp.ptr(); - float *dstData = out.ptr(); + if (inp.type() == CV_8S) + { + const int8_t *srcData = inp.ptr(); + int8_t *dstData = out.ptr(); - for (i = 0; i < count; ++i) + for (i = 0; i < count; ++i) + { + size_t oldPosition = 0; + size_t newPosition = i; + + for (j = 0; j < numAxes; ++j) + { + oldPosition += (newPosition / newStride[j]) * oldStride[order[j]]; + newPosition %= newStride[j]; + } + dstData[i] = srcData[oldPosition]; + } + } + else { - size_t oldPosition = 0; - size_t newPosition = i; + const float *srcData = inp.ptr(); + float *dstData = out.ptr(); - for (j = 0; j < numAxes; ++j) + for (i = 0; i < count; ++i) { - oldPosition += (newPosition / newStride[j]) * oldStride[order[j]]; - newPosition %= newStride[j]; + size_t oldPosition = 0; + size_t newPosition = i; + + for (j = 0; j < numAxes; ++j) + { + oldPosition += (newPosition / newStride[j]) * oldStride[order[j]]; + newPosition %= newStride[j]; + } + dstData[i] = srcData[oldPosition]; } - dstData[i] = srcData[oldPosition]; } } } @@ -436,6 +462,11 @@ class PermuteLayerImpl CV_FINAL : public PermuteLayer } #endif // HAVE_VULKAN + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + return true; + } size_t _count; std::vector _order; diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp index b8e2cfdf8f84..7653e536680c 100644 --- a/modules/dnn/src/layers/pooling_layer.cpp +++ b/modules/dnn/src/layers/pooling_layer.cpp @@ -1327,6 +1327,23 @@ class PoolingLayerImpl CV_FINAL : public PoolingLayer return true; } + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + if (type == MAX && !computeMaxIdx) + { + return true; + } + else if (type == AVE || type == SUM) + { + float multiplier = scales[0][0] / scales[1][0]; + params.set("multiplier", multiplier); + params.set("input_zeropoint", zeropoints[0][0]); + return true; + } + return false; + } + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { diff --git a/modules/dnn/src/layers/reorg_layer.cpp b/modules/dnn/src/layers/reorg_layer.cpp index da1c61adac0e..797df4819d9e 100644 --- a/modules/dnn/src/layers/reorg_layer.cpp +++ b/modules/dnn/src/layers/reorg_layer.cpp @@ -231,6 +231,11 @@ class ReorgLayerImpl CV_FINAL : public ReorgLayer } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + return true; + } virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE diff --git a/modules/dnn/src/layers/reshape_layer.cpp b/modules/dnn/src/layers/reshape_layer.cpp index ab8f41c7b6dd..4c10d155c8ae 100644 --- a/modules/dnn/src/layers/reshape_layer.cpp +++ b/modules/dnn/src/layers/reshape_layer.cpp @@ -343,6 +343,11 @@ class ReshapeLayerImpl CV_FINAL : public ReshapeLayer } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + return true; + } private: std::vector outShapes; diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp index a5c268214e86..001db24a2df8 100644 --- a/modules/dnn/src/layers/scale_layer.cpp +++ b/modules/dnn/src/layers/scale_layer.cpp @@ -344,6 +344,14 @@ class ScaleLayerImpl CV_FINAL : public ScaleLayer shift = (hasBias && !blobs.empty()) ? blobs.back() : Mat(); } + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + params.set("input_scales", DictValue::arrayReal(scales[0].data(), scales[0].size())); + params.set("input_zeropoints", DictValue::arrayInt(zeropoints[0].data(), zeropoints[0].size())); + return true; + } + virtual int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { diff --git a/modules/dnn/src/layers/shuffle_channel_layer.cpp b/modules/dnn/src/layers/shuffle_channel_layer.cpp index 6db74d1abda7..2a698d270fa8 100644 --- a/modules/dnn/src/layers/shuffle_channel_layer.cpp +++ b/modules/dnn/src/layers/shuffle_channel_layer.cpp @@ -147,6 +147,12 @@ class ShuffleChannelLayerImpl CV_FINAL : public ShuffleChannelLayer } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + return true; + } + private: Ptr permute; std::vector permuteInpShape, permuteOutShape; diff --git a/modules/dnn/src/layers/slice_layer.cpp b/modules/dnn/src/layers/slice_layer.cpp index 54e234038710..9efd95cf48df 100644 --- a/modules/dnn/src/layers/slice_layer.cpp +++ b/modules/dnn/src/layers/slice_layer.cpp @@ -531,7 +531,12 @@ class SliceLayerImpl : public SliceLayer { std::vector inpIdx(dimsNum, 0); std::vector outIdx(dimsNum, 0); - getSliceRecursive(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx); + if (inpMat.type() == CV_16S) + getSliceRecursive(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx); + else if (inpMat.type() == CV_8S) + getSliceRecursive(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx); + else + getSliceRecursive(inpMat, inpIdx, finalSliceRanges[i], sliceSteps[i], 0, dimsNum, outputs[i], outIdx); } } } @@ -647,8 +652,20 @@ class SliceLayerImpl : public SliceLayer } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + const int numOutputs = scales[1].size(); + for (int i = 0; i < numOutputs; i++) + { + if (scales[1][i] != scales[0][0]) + return false; + } + return true; + } private: + template void getSliceRecursive(const Mat &inpMat, std::vector &inpIdx, const std::vector &sliceRanges, const std::vector &sliceSteps, int dim, int dimsNum, @@ -658,8 +675,6 @@ class SliceLayerImpl : public SliceLayer int end = sliceRanges[dim].end; int step = !sliceSteps.empty() ? sliceSteps[dim] : 1; - const bool is32F = inpMat.depth() == CV_32F; - // TODO optimization is required (for 2D tail case at least) for (int k = begin, j = 0; k < end; k += step, j++) { @@ -667,14 +682,9 @@ class SliceLayerImpl : public SliceLayer outIdx[dim] = j; if (dim + 1 < dimsNum) - getSliceRecursive(inpMat, inpIdx, sliceRanges, sliceSteps, dim + 1, dimsNum, outputs, outIdx); + getSliceRecursive(inpMat, inpIdx, sliceRanges, sliceSteps, dim + 1, dimsNum, outputs, outIdx); else - { - if (is32F) - outputs.at(outIdx.data()) = inpMat.at(inpIdx.data()); - else - outputs.at(outIdx.data()) = inpMat.at(inpIdx.data()); // 16F emulation - } + outputs.at(outIdx.data()) = inpMat.at(inpIdx.data()); } } diff --git a/modules/dnn/src/layers/softmax_layer.cpp b/modules/dnn/src/layers/softmax_layer.cpp index 546c1017add8..e937e98f8c77 100644 --- a/modules/dnn/src/layers/softmax_layer.cpp +++ b/modules/dnn/src/layers/softmax_layer.cpp @@ -374,6 +374,22 @@ class SoftMaxLayerImpl CV_FINAL : public SoftmaxLayer } #endif // HAVE_DNN_NGRAPH + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + float inpScale = scales[0][0]; + Mat lookUpTable(1, 256, CV_32F); + float* table = lookUpTable.ptr(); + for (int i = -128; i < 128; i++) + { + float x = inpScale*(i - 127); // ensures exp(x) is always between (0, 1) + table[i+128] = std::exp(x); + } + params.blobs.clear(); + params.blobs.push_back(lookUpTable); + return true; + } + int64 getFLOPS(const std::vector &inputs, const std::vector &outputs) const CV_OVERRIDE { diff --git a/modules/dnn/src/layers/split_layer.cpp b/modules/dnn/src/layers/split_layer.cpp index b025d5ff1e49..2a4417615264 100644 --- a/modules/dnn/src/layers/split_layer.cpp +++ b/modules/dnn/src/layers/split_layer.cpp @@ -117,6 +117,17 @@ class SplitLayerImpl CV_FINAL : public SplitLayer } #endif + virtual bool tryQuantize(const std::vector > &scales, + const std::vector > &zeropoints, LayerParams& params) CV_OVERRIDE + { + const int numOutputs = scales[1].size(); + for (int i = 0; i < numOutputs; i++) + { + if (scales[1][i] != scales[0][0]) + return false; + } + return true; + } }; Ptr SplitLayer::create(const LayerParams& params) diff --git a/modules/dnn/test/test_int8_layers.cpp b/modules/dnn/test/test_int8_layers.cpp new file mode 100644 index 000000000000..1fcb1d0dba7c --- /dev/null +++ b/modules/dnn/test/test_int8_layers.cpp @@ -0,0 +1,1220 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "test_precomp.hpp" +#include "npy_blob.hpp" +#include +#include +namespace opencv_test { namespace { + +template +static std::string _tf(TString filename) +{ + return (getOpenCVExtraDir() + "dnn/") + filename; +} + +class Test_Int8_layers : public DNNTestLayer +{ +public: + void testLayer(const String& basename, const String& importer, double l1, double lInf, + int numInps = 1, int numOuts = 1, bool useCaffeModel = false, + bool useCommonInputBlob = true, bool hasText = false) + { + CV_Assert_N(numInps >= 1, numInps <= 10, numOuts >= 1, numOuts <= 10); + std::vector inps(numInps), inps_int8(numInps); + std::vector refs(numOuts), outs_int8(numOuts), outs_dequantized(numOuts); + std::vector inputScale, outputScale; + std::vector inputZp, outputZp; + String inpPath, outPath; + Net net, qnet; + + if (importer == "Caffe") + { + String prototxt = _tf("layers/" + basename + ".prototxt"); + String caffemodel = _tf("layers/" + basename + ".caffemodel"); + net = readNetFromCaffe(prototxt, useCaffeModel ? caffemodel : String()); + + inpPath = _tf("layers/" + (useCommonInputBlob ? "blob" : basename + ".input")); + outPath = _tf("layers/" + basename); + } + else if (importer == "TensorFlow") + { + String netPath = _tf("tensorflow/" + basename + "_net.pb"); + String netConfig = hasText ? _tf("tensorflow/" + basename + "_net.pbtxt") : ""; + net = readNetFromTensorflow(netPath, netConfig); + + inpPath = _tf("tensorflow/" + basename + "_in"); + outPath = _tf("tensorflow/" + basename + "_out"); + } + else if (importer == "ONNX") + { + String onnxmodel = _tf("onnx/models/" + basename + ".onnx"); + net = readNetFromONNX(onnxmodel); + + inpPath = _tf("onnx/data/input_" + basename); + outPath = _tf("onnx/data/output_" + basename); + } + ASSERT_FALSE(net.empty()); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + for (int i = 0; i < numInps; i++) + inps[i] = blobFromNPY(inpPath + ((numInps > 1) ? cv::format("_%d.npy", i) : ".npy")); + + for (int i = 0; i < numOuts; i++) + refs[i] = blobFromNPY(outPath + ((numOuts > 1) ? cv::format("_%d.npy", i) : ".npy")); + + qnet = net.quantize(inps, CV_8S, CV_8S); + qnet.getInputDetails(inputScale, inputZp); + qnet.getOutputDetails(outputScale, outputZp); + + // Quantize inputs to int8 + // int8_value = float_value/scale + zero-point + for (int i = 0; i < numInps; i++) + { + inps[i].convertTo(inps_int8[i], CV_8S, 1.f/inputScale[i], inputZp[i]); + String inp_name = numInps > 1 ? (importer == "Caffe" ? cv::format("input_%d", i) : cv::format("%d", i)) : ""; + qnet.setInput(inps_int8[i], inp_name); + } + qnet.forward(outs_int8); + + // Dequantize outputs and compare with reference outputs + // float_value = scale*(int8_value - zero-point) + for (int i = 0; i < numOuts; i++) + { + outs_int8[i].convertTo(outs_dequantized[i], CV_32F, outputScale[i], -(outputScale[i] * outputZp[i])); + normAssert(refs[i], outs_dequantized[i], "", l1, lInf); + } + } +}; + +TEST_P(Test_Int8_layers, Convolution1D) +{ + testLayer("conv1d", "ONNX", 0.00302, 0.00909); + testLayer("conv1d_bias", "ONNX", 0.00306, 0.00948); +} + +TEST_P(Test_Int8_layers, Convolution2D) +{ + testLayer("layer_convolution", "Caffe", 0.0174, 0.0758, 1, 1, true); + testLayer("single_conv", "TensorFlow", 0.00413, 0.02201); + testLayer("depthwise_conv2d", "TensorFlow", 0.0388, 0.169); + testLayer("atrous_conv2d_valid", "TensorFlow", 0.0193, 0.0633); + testLayer("atrous_conv2d_same", "TensorFlow", 0.0185, 0.1322); + testLayer("keras_atrous_conv2d_same", "TensorFlow", 0.0056, 0.0244); + testLayer("convolution", "ONNX", 0.0052, 0.01516); + testLayer("two_convolution", "ONNX", 0.00295, 0.00840); +} + +TEST_P(Test_Int8_layers, Convolution3D) +{ + testLayer("conv3d", "TensorFlow", 0.00734, 0.02434); + testLayer("conv3d", "ONNX", 0.00353, 0.00941); + testLayer("conv3d_bias", "ONNX", 0.00129, 0.00249); +} + +TEST_P(Test_Int8_layers, Flatten) +{ + testLayer("flatten", "TensorFlow", 0.0036, 0.0069, 1, 1, false, true, true); + testLayer("unfused_flatten", "TensorFlow", 0.0014, 0.0028); + testLayer("unfused_flatten_unknown_batch", "TensorFlow", 0.0043, 0.0051); +} + +TEST_P(Test_Int8_layers, Padding) +{ + testLayer("padding_valid", "TensorFlow", 0.0026, 0.0064); + testLayer("padding_same", "TensorFlow", 0.0081, 0.032); + testLayer("spatial_padding", "TensorFlow", 0.0078, 0.028); + testLayer("mirror_pad", "TensorFlow", 0.0064, 0.013); + testLayer("pad_and_concat", "TensorFlow", 0.0021, 0.0098); + testLayer("padding", "ONNX", 0.0005, 0.0069); + testLayer("ReflectionPad2d", "ONNX", 0.00062, 0.0018); + testLayer("ZeroPad2d", "ONNX", 0.00037, 0.0018); +} + +TEST_P(Test_Int8_layers, AvePooling) +{ + testLayer("layer_pooling_ave", "Caffe", 0.0021, 0.0075); + testLayer("ave_pool_same", "TensorFlow", 0.00153, 0.0041); + testLayer("average_pooling_1d", "ONNX", 0.002, 0.0048); + testLayer("average_pooling", "ONNX", 0.0014, 0.0032); + testLayer("average_pooling_dynamic_axes", "ONNX", 0.0014, 0.006); + + if (target != DNN_TARGET_CPU) + throw SkipTestException("Only CPU is supported"); + testLayer("ave_pool3d", "TensorFlow", 0.00175, 0.0047); + testLayer("ave_pool3d", "ONNX", 0.00063, 0.0016); +} + +TEST_P(Test_Int8_layers, MaxPooling) +{ + testLayer("pool_conv_1d", "ONNX", 0.0006, 0.0015); + if (target != DNN_TARGET_CPU) + throw SkipTestException("Only CPU is supported"); + testLayer("pool_conv_3d", "ONNX", 0.0033, 0.0124); + + /* All the below tests have MaxPooling as last layer, so computeMaxIdx is set to true + which is not supported by int8 maxpooling + testLayer("layer_pooling_max", "Caffe", 0.0021, 0.004); + testLayer("max_pool_even", "TensorFlow", 0.0048, 0.0139); + testLayer("max_pool_odd_valid", "TensorFlow", 0.0043, 0.012); + testLayer("conv_pool_nchw", "TensorFlow", 0.007, 0.025); + testLayer("max_pool3d", "TensorFlow", 0.0025, 0.0058); + testLayer("maxpooling_1d", "ONNX", 0.0018, 0.0037); + testLayer("two_maxpooling_1d", "ONNX", 0.0037, 0.0052); + testLayer("maxpooling", "ONNX", 0.0034, 0.0065); + testLayer("two_maxpooling", "ONNX", 0.0025, 0.0052); + testLayer("max_pool3d", "ONNX", 0.0028, 0.0069);*/ +} + +TEST_P(Test_Int8_layers, Reduce) +{ + testLayer("reduce_mean", "TensorFlow", 0.0005, 0.0014); + testLayer("reduce_mean", "ONNX", 0.00062, 0.0014); + testLayer("reduce_mean_axis1", "ONNX", 0.00032, 0.0007); + testLayer("reduce_mean_axis2", "ONNX", 0.00033, 0.001); + + testLayer("reduce_sum", "TensorFlow", 0.015, 0.031); + testLayer("reduce_sum_channel", "TensorFlow", 0.008, 0.019); + testLayer("sum_pool_by_axis", "TensorFlow", 0.012, 0.032); + testLayer("reduce_sum", "ONNX", 0.0025, 0.0048); + + testLayer("reduce_max", "ONNX", 0, 0); + testLayer("reduce_max_axis_0", "ONNX", 0.0042, 0.007); + testLayer("reduce_max_axis_1", "ONNX", 0.0018, 0.0036); + + if (target != DNN_TARGET_CPU) + throw SkipTestException("Only CPU is supported"); + testLayer("reduce_mean3d", "ONNX", 0.00048, 0.0016); +} + +TEST_P(Test_Int8_layers, ReLU) +{ + testLayer("layer_relu", "Caffe", 0.0005, 0.002); + testLayer("ReLU", "ONNX", 0.0012, 0.0047); +} + +TEST_P(Test_Int8_layers, LeakyReLU) +{ + testLayer("leaky_relu", "TensorFlow", 0.0002, 0.0004); +} + +TEST_P(Test_Int8_layers, ReLU6) +{ + testLayer("keras_relu6", "TensorFlow", 0.0018, 0.0062); + testLayer("keras_relu6", "TensorFlow", 0.0018, 0.0062, 1, 1, false, true, true); + testLayer("clip_by_value", "TensorFlow", 0.0009, 0.002); + testLayer("clip", "ONNX", 0.00006, 0.00037); +} + +TEST_P(Test_Int8_layers, Sigmoid) +{ + testLayer("maxpooling_sigmoid", "ONNX", 0.0011, 0.0032); + testLayer("maxpooling_sigmoid_dynamic_axes", "ONNX", 0.0011, 0.0032); + testLayer("maxpooling_sigmoid_1d", "ONNX", 0.0011, 0.0037); +} + +TEST_P(Test_Int8_layers, Mish) +{ + testLayer("mish", "ONNX", 0.0015, 0.0025); +} + +TEST_P(Test_Int8_layers, Softmax) +{ + testLayer("layer_softmax", "Caffe", 0.0011, 0.0036); + testLayer("keras_softmax", "TensorFlow", 0.00093, 0.0027); + testLayer("slim_softmax", "TensorFlow", 0.0016, 0.0034); + testLayer("slim_softmax_v2", "TensorFlow", 0.0029, 0.017); + testLayer("softmax", "ONNX", 0.0016, 0.0028); + testLayer("log_softmax", "ONNX", 0.014, 0.025); + testLayer("softmax_unfused", "ONNX", 0.0009, 0.0021); +} + +TEST_P(Test_Int8_layers, Concat) +{ + testLayer("layer_concat_shared_input", "Caffe", 0.0076, 0.029, 1, 1, true, false); + testLayer("concat_axis_1", "TensorFlow", 0.0056, 0.017); + testLayer("keras_pad_concat", "TensorFlow", 0.0032, 0.0089); + testLayer("concat_3d", "TensorFlow", 0.005, 0.014); + testLayer("concatenation", "ONNX", 0.0032, 0.009); +} + +TEST_P(Test_Int8_layers, BatchNorm) +{ + testLayer("layer_batch_norm", "Caffe", 0.0061, 0.019, 1, 1, true); + testLayer("fused_batch_norm", "TensorFlow", 0.0063, 0.02); + testLayer("batch_norm_text", "TensorFlow", 0.0048, 0.013, 1, 1, false, true, true); + testLayer("unfused_batch_norm", "TensorFlow", 0.0076, 0.019); + testLayer("fused_batch_norm_no_gamma", "TensorFlow", 0.0067, 0.015); + testLayer("unfused_batch_norm_no_gamma", "TensorFlow", 0.0123, 0.044); + testLayer("switch_identity", "TensorFlow", 0.0035, 0.011); + testLayer("batch_norm3d", "TensorFlow", 0.0077, 0.02); + testLayer("batch_norm", "ONNX", 0.0012, 0.0049); + testLayer("batch_norm_3d", "ONNX", 0.0039, 0.012); + testLayer("frozenBatchNorm2d", "ONNX", 0.001, 0.0018); + testLayer("batch_norm_subgraph", "ONNX", 0.0049, 0.0098); +} + +TEST_P(Test_Int8_layers, Scale) +{ + testLayer("batch_norm", "TensorFlow", 0.0028, 0.0098); + testLayer("scale", "ONNX", 0.0025, 0.0071); + testLayer("expand_hw", "ONNX", 0.0012, 0.0012); + testLayer("flatten_const", "ONNX", 0.0024, 0.0048); +} + +TEST_P(Test_Int8_layers, InnerProduct) +{ + testLayer("layer_inner_product", "Caffe", 0.005, 0.02, 1, 1, true); + testLayer("matmul", "TensorFlow", 0.0061, 0.019); + testLayer("nhwc_transpose_reshape_matmul", "TensorFlow", 0.0009, 0.0091); + testLayer("nhwc_reshape_matmul", "TensorFlow", 0.03, 0.071); + testLayer("matmul_layout", "TensorFlow", 0.035, 0.06); + testLayer("tf2_dense", "TensorFlow", 0, 0); + testLayer("matmul_add", "ONNX", 0.041, 0.082); + testLayer("linear", "ONNX", 0.0018, 0.0029); + testLayer("constant", "ONNX", 0.00021, 0.0006); + testLayer("lin_with_constant", "ONNX", 0.0011, 0.0016); +} + +TEST_P(Test_Int8_layers, Reshape) +{ + testLayer("reshape_layer", "TensorFlow", 0.0032, 0.0082); + testLayer("reshape_nchw", "TensorFlow", 0.0089, 0.029); + testLayer("reshape_conv", "TensorFlow", 0.035, 0.054); + testLayer("reshape_reduce", "TensorFlow", 0.0042, 0.0078); + testLayer("reshape_as_shape", "TensorFlow", 0.0014, 0.0028); + testLayer("reshape_no_reorder", "TensorFlow", 0.0014, 0.0028); + testLayer("shift_reshape_no_reorder", "TensorFlow", 0.0063, 0.014); + testLayer("dynamic_reshape", "ONNX", 0.0047, 0.0079); + testLayer("dynamic_reshape_opset_11", "ONNX", 0.0048, 0.0081); + testLayer("flatten_by_prod", "ONNX", 0.0048, 0.0081); + testLayer("squeeze", "ONNX", 0.0048, 0.0081); + testLayer("unsqueeze", "ONNX", 0.0033, 0.0053); + testLayer("squeeze_and_conv_dynamic_axes", "ONNX", 0.0054, 0.0154); + testLayer("unsqueeze_and_conv_dynamic_axes", "ONNX", 0.0037, 0.0151); +} + +TEST_P(Test_Int8_layers, Permute) +{ + testLayer("tf2_permute_nhwc_ncwh", "TensorFlow", 0.0028, 0.006); + testLayer("transpose", "ONNX", 0.0015, 0.0046); +} + +TEST_P(Test_Int8_layers, Identity) +{ + testLayer("expand_batch", "ONNX", 0.0027, 0.0036); + testLayer("expand_channels", "ONNX", 0.0013, 0.0019); + testLayer("expand_neg_batch", "ONNX", 0.00071, 0.0019); +} + +TEST_P(Test_Int8_layers, Slice) +{ + testLayer("split", "TensorFlow", 0.0033, 0.0056); + testLayer("slice_4d", "TensorFlow", 0.003, 0.0073); + testLayer("strided_slice", "TensorFlow", 0.008, 0.0142); + testLayer("slice", "ONNX", 0.0046, 0.0077); + testLayer("slice_dynamic_axes", "ONNX", 0.0039, 0.0084); + testLayer("slice_opset_11_steps_2d", "ONNX", 0.0052, 0.0124); + testLayer("slice_opset_11_steps_3d", "ONNX", 0.0068, 0.014); + testLayer("slice_opset_11_steps_4d", "ONNX", 0.0041, 0.008); + testLayer("slice_opset_11_steps_5d", "ONNX", 0.0085, 0.021); +} + +TEST_P(Test_Int8_layers, Dropout) +{ + testLayer("layer_dropout", "Caffe", 0.0021, 0.004); + testLayer("dropout", "ONNX", 0.0029, 0.004); +} + +TEST_P(Test_Int8_layers, Eltwise) +{ + testLayer("layer_eltwise", "Caffe", 0.062, 0.15); + testLayer("conv_2_inps", "Caffe", 0.0086, 0.0232, 2, 1, true, false); + testLayer("eltwise_sub", "TensorFlow", 0.015, 0.047); + testLayer("eltwise_add_vec", "TensorFlow", 0.037, 0.21); // tflite 0.0095, 0.0365 + testLayer("eltwise_mul_vec", "TensorFlow", 0.173, 1.14); // tflite 0.0028, 0.017 + testLayer("channel_broadcast", "TensorFlow", 0.0025, 0.0063); + testLayer("split_equals", "TensorFlow", 0.02, 0.065); + testLayer("mul", "ONNX", 0.0039, 0.014); + testLayer("split_max", "ONNX", 0.004, 0.012); +} + +INSTANTIATE_TEST_CASE_P(/**/, Test_Int8_layers, dnnBackendsAndTargets()); + +class Test_Int8_nets : public DNNTestLayer +{ +public: + void testClassificationNet(Net baseNet, const Mat& blob, const Mat& ref, double l1, double lInf) + { + Net qnet = baseNet.quantize(blob, CV_32F, CV_32F); + qnet.setPreferableBackend(backend); + qnet.setPreferableTarget(target); + + qnet.setInput(blob); + Mat out = qnet.forward(); + normAssert(ref, out, "", l1, lInf); + } + + void testDetectionNet(Net baseNet, const Mat& blob, const Mat& ref, + double confThreshold, double scoreDiff, double iouDiff) + { + Net qnet = baseNet.quantize(blob, CV_32F, CV_32F); + qnet.setPreferableBackend(backend); + qnet.setPreferableTarget(target); + + qnet.setInput(blob); + Mat out = qnet.forward(); + normAssertDetections(ref, out, "", confThreshold, scoreDiff, iouDiff); + } + + void testFaster(Net baseNet, const Mat& ref, double confThreshold, double scoreDiff, double iouDiff) + { + Mat inp = imread(_tf("dog416.png")); + resize(inp, inp, Size(800, 600)); + Mat blob = blobFromImage(inp, 1.0, Size(), Scalar(102.9801, 115.9465, 122.7717), false, false); + Mat imInfo = (Mat_(1, 3) << inp.rows, inp.cols, 1.6f); + + Net qnet = baseNet.quantize(std::vector{blob, imInfo}, CV_32F, CV_32F); + qnet.setPreferableBackend(backend); + qnet.setPreferableTarget(target); + + qnet.setInput(blob, "data"); + qnet.setInput(imInfo, "im_info"); + Mat out = qnet.forward(); + normAssertDetections(ref, out, "", confThreshold, scoreDiff, iouDiff); + } + + void testONNXNet(const String& basename, double l1, double lInf, bool useSoftmax = false) + { + String onnxmodel = findDataFile("dnn/onnx/models/" + basename + ".onnx", false); + + Mat blob = readTensorFromONNX(findDataFile("dnn/onnx/data/input_" + basename + ".pb")); + Mat ref = readTensorFromONNX(findDataFile("dnn/onnx/data/output_" + basename + ".pb")); + Net baseNet = readNetFromONNX(onnxmodel); + baseNet.setPreferableBackend(backend); + baseNet.setPreferableTarget(target); + + Net qnet = baseNet.quantize(blob, CV_32F, CV_32F); + qnet.setInput(blob); + Mat out = qnet.forward(); + + if (useSoftmax) + { + LayerParams lp; + Net netSoftmax; + netSoftmax.addLayerToPrev("softmaxLayer", "Softmax", lp); + netSoftmax.setPreferableBackend(DNN_BACKEND_OPENCV); + + netSoftmax.setInput(out); + out = netSoftmax.forward(); + + netSoftmax.setInput(ref); + ref = netSoftmax.forward(); + } + + normAssert(ref, out, "", l1, lInf); + } + + void testDarknetModel(const std::string& cfg, const std::string& weights, + const cv::Mat& ref, double scoreDiff, double iouDiff, + float confThreshold = 0.24, float nmsThreshold = 0.4) + { + CV_Assert(ref.cols == 7); + std::vector > refClassIds; + std::vector > refScores; + std::vector > refBoxes; + for (int i = 0; i < ref.rows; ++i) + { + int batchId = static_cast(ref.at(i, 0)); + int classId = static_cast(ref.at(i, 1)); + float score = ref.at(i, 2); + float left = ref.at(i, 3); + float top = ref.at(i, 4); + float right = ref.at(i, 5); + float bottom = ref.at(i, 6); + Rect2d box(left, top, right - left, bottom - top); + if (batchId >= refClassIds.size()) + { + refClassIds.resize(batchId + 1); + refScores.resize(batchId + 1); + refBoxes.resize(batchId + 1); + } + refClassIds[batchId].push_back(classId); + refScores[batchId].push_back(score); + refBoxes[batchId].push_back(box); + } + + Mat img1 = imread(_tf("dog416.png")); + Mat img2 = imread(_tf("street.png")); + std::vector samples(2); + samples[0] = img1; samples[1] = img2; + + // determine test type, whether batch or single img + int batch_size = refClassIds.size(); + CV_Assert(batch_size == 1 || batch_size == 2); + samples.resize(batch_size); + + Mat inp = blobFromImages(samples, 1.0/255, Size(416, 416), Scalar(), true, false); + + Net baseNet = readNetFromDarknet(findDataFile("dnn/" + cfg), findDataFile("dnn/" + weights, false)); + Net qnet = baseNet.quantize(inp, CV_32F, CV_32F); + qnet.setPreferableBackend(backend); + qnet.setPreferableTarget(target); + qnet.setInput(inp); + std::vector outs; + qnet.forward(outs, qnet.getUnconnectedOutLayersNames()); + + for (int b = 0; b < batch_size; ++b) + { + std::vector classIds; + std::vector confidences; + std::vector boxes; + for (int i = 0; i < outs.size(); ++i) + { + Mat out; + if (batch_size > 1){ + // get the sample slice from 3D matrix (batch, box, classes+5) + Range ranges[3] = {Range(b, b+1), Range::all(), Range::all()}; + out = outs[i](ranges).reshape(1, outs[i].size[1]); + }else{ + out = outs[i]; + } + for (int j = 0; j < out.rows; ++j) + { + Mat scores = out.row(j).colRange(5, out.cols); + double confidence; + Point maxLoc; + minMaxLoc(scores, 0, &confidence, 0, &maxLoc); + + if (confidence > confThreshold) { + float* detection = out.ptr(j); + double centerX = detection[0]; + double centerY = detection[1]; + double width = detection[2]; + double height = detection[3]; + boxes.push_back(Rect2d(centerX - 0.5 * width, centerY - 0.5 * height, + width, height)); + confidences.push_back(confidence); + classIds.push_back(maxLoc.x); + } + } + } + + // here we need NMS of boxes + std::vector indices; + NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices); + + std::vector nms_classIds; + std::vector nms_confidences; + std::vector nms_boxes; + + for (size_t i = 0; i < indices.size(); ++i) + { + int idx = indices[i]; + Rect2d box = boxes[idx]; + float conf = confidences[idx]; + int class_id = classIds[idx]; + nms_boxes.push_back(box); + nms_confidences.push_back(conf); + nms_classIds.push_back(class_id); + } + + if (cvIsNaN(iouDiff)) + { + if (b == 0) + std::cout << "Skip accuracy checks" << std::endl; + continue; + } + + normAssertDetections(refClassIds[b], refScores[b], refBoxes[b], nms_classIds, nms_confidences, nms_boxes, + format("batch size %d, sample %d\n", batch_size, b).c_str(), confThreshold, scoreDiff, iouDiff); + } + } +}; + +TEST_P(Test_Int8_nets, AlexNet) +{ +#if defined(OPENCV_32BIT_CONFIGURATION) && defined(HAVE_OPENCL) + applyTestTag(CV_TEST_TAG_MEMORY_2GB); +#else + applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB); +#endif + if (backend != DNN_BACKEND_OPENCV) + throw SkipTestException("Only OpenCV backend is supported"); + + Net net = readNetFromCaffe(findDataFile("dnn/bvlc_alexnet.prototxt"), + findDataFile("dnn/bvlc_alexnet.caffemodel", false)); + + Mat inp = imread(_tf("grace_hopper_227.png")); + Mat blob = blobFromImage(inp, 1.0, Size(227, 227), Scalar(), false); + Mat ref = blobFromNPY(_tf("caffe_alexnet_prob.npy")); + + float l1 = 1e-4, lInf = 0.003; + testClassificationNet(net, blob, ref, l1, lInf); +} + +TEST_P(Test_Int8_nets, GoogLeNet) +{ + Net net = readNetFromCaffe(findDataFile("dnn/bvlc_googlenet.prototxt"), + findDataFile("dnn/bvlc_googlenet.caffemodel", false)); + + std::vector inpMats; + inpMats.push_back( imread(_tf("googlenet_0.png")) ); + inpMats.push_back( imread(_tf("googlenet_1.png")) ); + Mat blob = blobFromImages(inpMats, 1.0, Size(224, 224), Scalar(), false); + Mat ref = blobFromNPY(_tf("googlenet_prob.npy")); + + float l1 = 2e-4, lInf = 0.06; + testClassificationNet(net, blob, ref, l1, lInf); +} + +TEST_P(Test_Int8_nets, ResNet50) +{ + applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB); + if (backend != DNN_BACKEND_OPENCV) + throw SkipTestException("Only OpenCV backend is supported"); + + Net net = readNetFromCaffe(findDataFile("dnn/ResNet-50-deploy.prototxt"), + findDataFile("dnn/ResNet-50-model.caffemodel", false)); + + Mat inp = imread(_tf("googlenet_0.png")); + Mat blob = blobFromImage(inp, 1.0, Size(224, 224), Scalar(), false); + Mat ref = blobFromNPY(_tf("resnet50_prob.npy")); + + float l1 = 3e-4, lInf = 0.035; + testClassificationNet(net, blob, ref, l1, lInf); +} + +TEST_P(Test_Int8_nets, DenseNet121) +{ + applyTestTag(CV_TEST_TAG_MEMORY_512MB); + + Net net = readNetFromCaffe(findDataFile("dnn/DenseNet_121.prototxt", false), + findDataFile("dnn/DenseNet_121.caffemodel", false)); + + Mat inp = imread(_tf("dog416.png")); + Mat blob = blobFromImage(inp, 1.0 / 255.0, Size(224, 224), Scalar(), true, true); + Mat ref = blobFromNPY(_tf("densenet_121_output.npy")); + + float l1 = 0.76, lInf = 3.31; // seems wrong + testClassificationNet(net, blob, ref, l1, lInf); +} + +TEST_P(Test_Int8_nets, SqueezeNet_v1_1) +{ + if(target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + + Net net = readNetFromCaffe(findDataFile("dnn/squeezenet_v1.1.prototxt"), + findDataFile("dnn/squeezenet_v1.1.caffemodel", false)); + + Mat inp = imread(_tf("googlenet_0.png")); + Mat blob = blobFromImage(inp, 1.0, Size(227, 227), Scalar(), false, true); + Mat ref = blobFromNPY(_tf("squeezenet_v1.1_prob.npy")); + + float l1 = 3e-4, lInf = 0.056; + testClassificationNet(net, blob, ref, l1, lInf); +} + +TEST_P(Test_Int8_nets, CaffeNet) +{ +#if defined(OPENCV_32BIT_CONFIGURATION) && (defined(HAVE_OPENCL) || defined(_WIN32)) + applyTestTag(CV_TEST_TAG_MEMORY_2GB); +#else + applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB); +#endif + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD + && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + float l1 = 4e-5, lInf = 0.0025; + testONNXNet("caffenet", l1, lInf); +} + +TEST_P(Test_Int8_nets, RCNN_ILSVRC13) +{ +#if defined(OPENCV_32BIT_CONFIGURATION) && (defined(HAVE_OPENCL) || defined(_WIN32)) + applyTestTag(CV_TEST_TAG_MEMORY_2GB); +#else + applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB); +#endif + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2019030000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD + && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + float l1 = 0.02, lInf = 0.042; + testONNXNet("rcnn_ilsvrc13", l1, lInf); +} + +TEST_P(Test_Int8_nets, Inception_v2) +{ + testONNXNet("inception_v2", default_l1, default_lInf, true); +} + +TEST_P(Test_Int8_nets, MobileNet_v2) +{ + testONNXNet("mobilenetv2", default_l1, default_lInf, true); +} + +TEST_P(Test_Int8_nets, Shufflenet) +{ + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER); + } + testONNXNet("shufflenet", default_l1, default_lInf); +} + +TEST_P(Test_Int8_nets, MobileNet_SSD) +{ + Net net = readNetFromCaffe(findDataFile("dnn/MobileNetSSD_deploy.prototxt", false), + findDataFile("dnn/MobileNetSSD_deploy.caffemodel", false)); + + Mat inp = imread(_tf("street.png")); + Mat blob = blobFromImage(inp, 1.0 / 127.5, Size(300, 300), Scalar(127.5, 127.5, 127.5), false); + Mat ref = blobFromNPY(_tf("mobilenet_ssd_caffe_out.npy")); + + float confThreshold = FLT_MIN, scoreDiff = 0.059, iouDiff = 0.11; + testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, MobileNet_v1_SSD) +{ + Net net = readNetFromTensorflow(findDataFile("dnn/ssd_mobilenet_v1_coco_2017_11_17.pb", false), + findDataFile("dnn/ssd_mobilenet_v1_coco_2017_11_17.pbtxt")); + + Mat inp = imread(_tf("dog416.png")); + Mat blob = blobFromImage(inp, 1.0, Size(300, 300), Scalar(), true, false); + Mat ref = blobFromNPY(_tf("tensorflow/ssd_mobilenet_v1_coco_2017_11_17.detection_out.npy")); + + float confThreshold = 0.5, scoreDiff = 0.034, iouDiff = 0.13; + testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, MobileNet_v1_SSD_PPN) +{ +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2018050000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, + CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + + Net net = readNetFromTensorflow(findDataFile("dnn/ssd_mobilenet_v1_ppn_coco.pb", false), + findDataFile("dnn/ssd_mobilenet_v1_ppn_coco.pbtxt")); + + Mat inp = imread(_tf("dog416.png")); + Mat blob = blobFromImage(inp, 1.0, Size(300, 300), Scalar(), true, false); + Mat ref = blobFromNPY(_tf("tensorflow/ssd_mobilenet_v1_ppn_coco.detection_out.npy")); + + float confThreshold = 0.51, scoreDiff = 0.04, iouDiff = 0.06; + testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, Inception_v2_SSD) +{ + applyTestTag(target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB); +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_LE(2019010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X, CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + + Net net = readNetFromTensorflow(findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pb", false), + findDataFile("dnn/ssd_inception_v2_coco_2017_11_17.pbtxt")); + + Mat inp = imread(_tf("street.png")); + Mat blob = blobFromImage(inp, 1.0, Size(300, 300), Scalar(), true, false); + Mat ref = (Mat_(5, 7) << 0, 1, 0.90176028, 0.19872092, 0.36311883, 0.26461923, 0.63498729, + 0, 3, 0.93569964, 0.64865261, 0.45906419, 0.80675775, 0.65708131, + 0, 3, 0.75838411, 0.44668293, 0.45907149, 0.49459291, 0.52197015, + 0, 10, 0.95932811, 0.38349164, 0.32528657, 0.40387636, 0.39165527, + 0, 10, 0.93973452, 0.66561931, 0.37841269, 0.68074018, 0.42907384); + + float confThreshold = 0.5, scoreDiff = 0.0114, iouDiff = 0.22; + testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, opencv_face_detector) +{ + Net net = readNetFromCaffe(findDataFile("dnn/opencv_face_detector.prototxt"), + findDataFile("dnn/opencv_face_detector.caffemodel", false)); + + Mat inp = imread(findDataFile("gpu/lbpcascade/er.png")); + Mat blob = blobFromImage(inp, 1.0, Size(), Scalar(104.0, 177.0, 123.0), false, false); + Mat ref = (Mat_(6, 7) << 0, 1, 0.99520785, 0.80997437, 0.16379407, 0.87996572, 0.26685631, + 0, 1, 0.9934696, 0.2831718, 0.50738752, 0.345781, 0.5985168, + 0, 1, 0.99096733, 0.13629119, 0.24892329, 0.19756334, 0.3310290, + 0, 1, 0.98977017, 0.23901358, 0.09084064, 0.29902688, 0.1769477, + 0, 1, 0.97203469, 0.67965847, 0.06876482, 0.73999709, 0.1513494, + 0, 1, 0.95097077, 0.51901293, 0.45863652, 0.5777427, 0.5347801); + + float confThreshold = 0.5, scoreDiff = 0.002, iouDiff = 0.21; + testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, EfficientDet) +{ + if (target != DNN_TARGET_CPU) + { + if (target == DNN_TARGET_OPENCL_FP16) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + if (target == DNN_TARGET_OPENCL) applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL); + if (target == DNN_TARGET_MYRIAD) applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); + } + Net net = readNetFromTensorflow(findDataFile("dnn/efficientdet-d0.pb", false), + findDataFile("dnn/efficientdet-d0.pbtxt")); + + Mat inp = imread(_tf("dog416.png")); + Mat blob = blobFromImage(inp, 1.0/255, Size(512, 512), Scalar(123.675, 116.28, 103.53)); + Mat ref = (Mat_(3, 7) << 0, 1, 0.8437444, 0.153996080160141, 0.20534580945968628, 0.7463544607162476, 0.7414066195487976, + 0, 17, 0.8245924, 0.16657517850399017, 0.3996818959712982, 0.4111558794975281, 0.9306337833404541, + 0, 7, 0.8039304, 0.6118435263633728, 0.13175517320632935, 0.9065558314323425, 0.2943994700908661); + + float confThreshold = 0.65, scoreDiff = 0.17, iouDiff = 0.18; + testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, FasterRCNN_resnet50) +{ + applyTestTag( + (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB), + CV_TEST_TAG_LONG, + CV_TEST_TAG_DEBUG_VERYLONG + ); + +#ifdef INF_ENGINE_RELEASE + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && + (INF_ENGINE_VER_MAJOR_LT(2019020000) || target != DNN_TARGET_CPU)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + + if (INF_ENGINE_VER_MAJOR_GT(2019030000) && + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); +#endif + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + + if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); + + Net net = readNetFromTensorflow(findDataFile("dnn/faster_rcnn_resnet50_coco_2018_01_28.pb", false), + findDataFile("dnn/faster_rcnn_resnet50_coco_2018_01_28.pbtxt")); + + Mat inp = imread(_tf("dog416.png")); + Mat blob = blobFromImage(inp, 1.0, Size(800, 600), Scalar(), true, false); + Mat ref = blobFromNPY(_tf("tensorflow/faster_rcnn_resnet50_coco_2018_01_28.detection_out.npy")); + + float confThreshold = 0.5, scoreDiff = 0.025, iouDiff = 0.15; + testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, FasterRCNN_inceptionv2) +{ + applyTestTag( + (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB), + CV_TEST_TAG_LONG, + CV_TEST_TAG_DEBUG_VERYLONG + ); + +#ifdef INF_ENGINE_RELEASE + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && + (INF_ENGINE_VER_MAJOR_LT(2019020000) || target != DNN_TARGET_CPU)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NN_BUILDER, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + + if (INF_ENGINE_VER_MAJOR_GT(2019030000) && + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); +#endif + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + + if (backend == DNN_BACKEND_OPENCV && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_OPENCL_FP16); + + if (backend == DNN_BACKEND_CUDA && target == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); + + Net net = readNetFromTensorflow(findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pb", false), + findDataFile("dnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt")); + + Mat inp = imread(_tf("dog416.png")); + Mat blob = blobFromImage(inp, 1.0, Size(800, 600), Scalar(), true, false); + Mat ref = blobFromNPY(_tf("tensorflow/faster_rcnn_inception_v2_coco_2018_01_28.detection_out.npy")); + + float confThreshold = 0.5, scoreDiff = 0.21, iouDiff = 0.1; + testDetectionNet(net, blob, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, FasterRCNN_vgg16) +{ + applyTestTag( +#if defined(OPENCV_32BIT_CONFIGURATION) && defined(HAVE_OPENCL) + CV_TEST_TAG_MEMORY_2GB, +#else + (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB), +#endif + CV_TEST_TAG_LONG, + CV_TEST_TAG_DEBUG_VERYLONG + ); + +#if defined(INF_ENGINE_RELEASE) + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && (target == DNN_TARGET_OPENCL || target == DNN_TARGET_OPENCL_FP16)) + applyTestTag(target == DNN_TARGET_OPENCL ? CV_TEST_TAG_DNN_SKIP_IE_OPENCL : CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_NGRAPH, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); +#endif + + Net net = readNetFromCaffe(findDataFile("dnn/faster_rcnn_vgg16.prototxt"), + findDataFile("dnn/VGG16_faster_rcnn_final.caffemodel", false)); + + Mat ref = (Mat_(3, 7) << 0, 2, 0.949398, 99.2454, 210.141, 601.205, 462.849, + 0, 7, 0.997022, 481.841, 92.3218, 722.685, 175.953, + 0, 12, 0.993028, 133.221, 189.377, 350.994, 563.166); + + float confThreshold = 0.8, scoreDiff = 0.024, iouDiff = 0.35; + testFaster(net, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, FasterRCNN_zf) +{ + applyTestTag( +#if defined(OPENCV_32BIT_CONFIGURATION) && defined(HAVE_OPENCL) + CV_TEST_TAG_MEMORY_2GB, +#else + (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB), +#endif + CV_TEST_TAG_DEBUG_LONG + ); + + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); + + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); + + if (target == DNN_TARGET_CUDA_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_CUDA_FP16); + + Net net = readNetFromCaffe(findDataFile("dnn/faster_rcnn_zf.prototxt"), + findDataFile("dnn/ZF_faster_rcnn_final.caffemodel", false)); + + Mat ref = (Mat_(3, 7) << 0, 2, 0.90121, 120.407, 115.83, 570.586, 528.395, + 0, 7, 0.988779, 469.849, 75.1756, 718.64, 186.762, + 0, 12, 0.967198, 138.588, 206.843, 329.766, 553.176); + + float confThreshold = 0.8, scoreDiff = 0.021, iouDiff = 0.1; + testFaster(net, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, RFCN) +{ + applyTestTag( + (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_2GB), + CV_TEST_TAG_LONG, + CV_TEST_TAG_DEBUG_VERYLONG + ); + + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); + + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD); + + Net net = readNetFromCaffe(findDataFile("dnn/rfcn_pascal_voc_resnet50.prototxt"), + findDataFile("dnn/resnet50_rfcn_final.caffemodel", false)); + + Mat ref = (Mat_(2, 7) << 0, 7, 0.991359, 491.822, 81.1668, 702.573, 178.234, + 0, 12, 0.94786, 132.093, 223.903, 338.077, 566.16); + + float confThreshold = 0.8, scoreDiff = 0.017, iouDiff = 0.11; + testFaster(net, ref, confThreshold, scoreDiff, iouDiff); +} + +TEST_P(Test_Int8_nets, YoloVoc) +{ + applyTestTag( +#if defined(OPENCV_32BIT_CONFIGURATION) && defined(HAVE_OPENCL) + CV_TEST_TAG_MEMORY_2GB, +#else + CV_TEST_TAG_MEMORY_1GB, +#endif + CV_TEST_TAG_LONG + ); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2019010000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16); +#endif +#if defined(INF_ENGINE_RELEASE) + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && + target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); +#endif + + Mat ref = (Mat_(6, 7) << 0, 6, 0.750469f, 0.577374f, 0.127391f, 0.902949f, 0.300809f, + 0, 1, 0.780879f, 0.270762f, 0.264102f, 0.732475f, 0.745412f, + 0, 11, 0.901615f, 0.1386f, 0.338509f, 0.421337f, 0.938789f, + 1, 14, 0.623813f, 0.183179f, 0.381921f, 0.247726f, 0.625847f, + 1, 6, 0.667770f, 0.446555f, 0.453578f, 0.499986f, 0.519167f, + 1, 6, 0.844947f, 0.637058f, 0.460398f, 0.828508f, 0.66427f); + + std::string config_file = "yolo-voc.cfg"; + std::string weights_file = "yolo-voc.weights"; + + double scoreDiff = 0.1, iouDiff = 0.3; + { + SCOPED_TRACE("batch size 1"); + testDarknetModel(config_file, weights_file, ref.rowRange(0, 3), scoreDiff, iouDiff); + } + + { + SCOPED_TRACE("batch size 2"); + testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff); + } +} + +TEST_P(Test_Int8_nets, TinyYoloVoc) +{ + applyTestTag(CV_TEST_TAG_MEMORY_512MB); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif +#if defined(INF_ENGINE_RELEASE) + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && + target == DNN_TARGET_MYRIAD && getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); +#endif + + Mat ref = (Mat_(4, 7) << 0, 6, 0.761967f, 0.579042f, 0.159161f, 0.894482f, 0.31994f, + 0, 11, 0.780595f, 0.129696f, 0.386467f, 0.445275f, 0.920994f, + 1, 6, 0.651450f, 0.460526f, 0.458019f, 0.522527f, 0.5341f, + 1, 6, 0.928758f, 0.651024f, 0.463539f, 0.823784f, 0.654998f); + + std::string config_file = "tiny-yolo-voc.cfg"; + std::string weights_file = "tiny-yolo-voc.weights"; + + double scoreDiff = 0.043, iouDiff = 0.12; + { + SCOPED_TRACE("batch size 1"); + testDarknetModel(config_file, weights_file, ref.rowRange(0, 2), scoreDiff, iouDiff); + } + + { + SCOPED_TRACE("batch size 2"); + testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff); + } +} + +TEST_P(Test_Int8_nets, YOLOv3) +{ + applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB)); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_NGRAPH); + + const int N0 = 3; + const int N1 = 6; + static const float ref_[/* (N0 + N1) * 7 */] = { +0, 16, 0.998836f, 0.160024f, 0.389964f, 0.417885f, 0.943716f, +0, 1, 0.987908f, 0.150913f, 0.221933f, 0.742255f, 0.746261f, +0, 7, 0.952983f, 0.614621f, 0.150257f, 0.901368f, 0.289251f, + +1, 2, 0.997412f, 0.647584f, 0.459939f, 0.821037f, 0.663947f, +1, 2, 0.989633f, 0.450719f, 0.463353f, 0.496306f, 0.522258f, +1, 0, 0.980053f, 0.195856f, 0.378454f, 0.258626f, 0.629257f, +1, 9, 0.785341f, 0.665503f, 0.373543f, 0.688893f, 0.439244f, +1, 9, 0.733275f, 0.376029f, 0.315694f, 0.401776f, 0.395165f, +1, 9, 0.384815f, 0.659824f, 0.372389f, 0.673927f, 0.429412f, + }; + Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_); + + std::string config_file = "yolov3.cfg"; + std::string weights_file = "yolov3.weights"; + + double scoreDiff = 0.08, iouDiff = 0.21, confThreshold = 0.25; + { + SCOPED_TRACE("batch size 1"); + testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff, confThreshold); + } + +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + else if (target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); + } +#endif + + { + SCOPED_TRACE("batch size 2"); + testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff, confThreshold); + } +} + +TEST_P(Test_Int8_nets, YOLOv4) +{ + applyTestTag(CV_TEST_TAG_LONG, (target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_1GB : CV_TEST_TAG_MEMORY_2GB)); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_EQ(2020040000) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif +#if defined(INF_ENGINE_RELEASE) + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + + const int N0 = 3; + const int N1 = 7; + static const float ref_[/* (N0 + N1) * 7 */] = { +0, 16, 0.992194f, 0.172375f, 0.402458f, 0.403918f, 0.932801f, +0, 1, 0.988326f, 0.166708f, 0.228236f, 0.737208f, 0.735803f, +0, 7, 0.94639f, 0.602523f, 0.130399f, 0.901623f, 0.298452f, + +1, 2, 0.99761f, 0.646556f, 0.45985f, 0.816041f, 0.659067f, +1, 0, 0.988913f, 0.201726f, 0.360282f, 0.266181f, 0.631728f, +1, 2, 0.98233f, 0.452007f, 0.462217f, 0.495612f, 0.521687f, +1, 9, 0.919195f, 0.374642f, 0.316524f, 0.398126f, 0.393714f, +1, 9, 0.856303f, 0.666842f, 0.372215f, 0.685539f, 0.44141f, +1, 9, 0.313516f, 0.656791f, 0.374734f, 0.671959f, 0.438371f, +1, 9, 0.256625f, 0.940232f, 0.326931f, 0.967586f, 0.374002f, + }; + Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_); + + std::string config_file = "yolov4.cfg"; + std::string weights_file = "yolov4.weights"; + double scoreDiff = 0.1, iouDiff = 0.17; + { + SCOPED_TRACE("batch size 1"); + testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff); + } + + { + SCOPED_TRACE("batch size 2"); + +#if defined(INF_ENGINE_RELEASE) + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019) + { + if (target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + else if (target == DNN_TARGET_OPENCL_FP16 && INF_ENGINE_VER_MAJOR_LE(202010000)) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + else if (target == DNN_TARGET_MYRIAD && + getInferenceEngineVPUType() == CV_DNN_INFERENCE_ENGINE_VPU_TYPE_MYRIAD_X) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD_X); + } +#endif + + testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff); + } +} + +TEST_P(Test_Int8_nets, YOLOv4_tiny) +{ + applyTestTag( + target == DNN_TARGET_CPU ? CV_TEST_TAG_MEMORY_512MB : CV_TEST_TAG_MEMORY_1GB + ); + +#if defined(INF_ENGINE_RELEASE) && INF_ENGINE_VER_MAJOR_GE(2021010000) + if (target == DNN_TARGET_MYRIAD) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif + + const float confThreshold = 0.6; + + const int N0 = 2; + const int N1 = 3; + static const float ref_[/* (N0 + N1) * 7 */] = { +0, 7, 0.85935f, 0.593484f, 0.141211f, 0.920356f, 0.291593f, +0, 16, 0.795188f, 0.169207f, 0.386886f, 0.423753f, 0.933004f, + +1, 2, 0.996832f, 0.653802f, 0.464573f, 0.815193f, 0.653292f, +1, 2, 0.963325f, 0.451151f, 0.458915f, 0.496255f, 0.52241f, +1, 0, 0.926244f, 0.194851f, 0.361743f, 0.260277f, 0.632364f, + }; + Mat ref(N0 + N1, 7, CV_32FC1, (void*)ref_); + + std::string config_file = "yolov4-tiny.cfg"; + std::string weights_file = "yolov4-tiny.weights"; + double scoreDiff = 0.12; + double iouDiff = target == DNN_TARGET_OPENCL_FP16 ? 0.2 : 0.082; + +#if defined(INF_ENGINE_RELEASE) + if (target == DNN_TARGET_MYRIAD) // bad accuracy + iouDiff = std::numeric_limits::quiet_NaN(); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL) + iouDiff = std::numeric_limits::quiet_NaN(); + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) + iouDiff = std::numeric_limits::quiet_NaN(); +#endif + + { + SCOPED_TRACE("batch size 1"); + testDarknetModel(config_file, weights_file, ref.rowRange(0, N0), scoreDiff, iouDiff, confThreshold); + } + + /* bad accuracy on second image + { + SCOPED_TRACE("batch size 2"); + testDarknetModel(config_file, weights_file, ref, scoreDiff, iouDiff, confThreshold); + } + */ + +#if defined(INF_ENGINE_RELEASE) + if (target == DNN_TARGET_MYRIAD) // bad accuracy + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_MYRIAD, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if (backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 && target == DNN_TARGET_OPENCL) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL, CV_TEST_TAG_DNN_SKIP_IE_VERSION); + if ((backend == DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 || + backend == DNN_BACKEND_INFERENCE_ENGINE_NGRAPH) && target == DNN_TARGET_OPENCL_FP16) + applyTestTag(CV_TEST_TAG_DNN_SKIP_IE_OPENCL_FP16, CV_TEST_TAG_DNN_SKIP_IE_VERSION); +#endif +} + +INSTANTIATE_TEST_CASE_P(/**/, Test_Int8_nets, dnnBackendsAndTargets()); +}} // namespace From 9cfa84313c5833d7295fcf57be93d5d2aaadfd88 Mon Sep 17 00:00:00 2001 From: Vincent Rabaud Date: Sat, 10 Jul 2021 00:21:52 +0200 Subject: [PATCH 124/128] Use the one argument version of SetTotalBytesLimit. The two argument versions has been deprecated, cf https://developers.google.com/protocol-buffers/docs/reference/cpp/google.protobuf.io.coded_stream --- modules/dnn/src/caffe/caffe_io.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/dnn/src/caffe/caffe_io.cpp b/modules/dnn/src/caffe/caffe_io.cpp index 2fc4d84f4604..ebecf95eea3a 100644 --- a/modules/dnn/src/caffe/caffe_io.cpp +++ b/modules/dnn/src/caffe/caffe_io.cpp @@ -92,6 +92,7 @@ #ifdef HAVE_PROTOBUF #include #include +#include #include #include @@ -1111,7 +1112,11 @@ static const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 by bool ReadProtoFromBinary(ZeroCopyInputStream* input, Message *proto) { CodedInputStream coded_input(input); +#if GOOGLE_PROTOBUF_VERSION >= 3006000 + coded_input.SetTotalBytesLimit(kProtoReadBytesLimit); +#else coded_input.SetTotalBytesLimit(kProtoReadBytesLimit, 536870912); +#endif return proto->ParseFromCodedStream(&coded_input); } From c08897cd106dbeca97c4591a8af088c563fe1444 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 19 Aug 2021 20:06:41 +0000 Subject: [PATCH 125/128] cmake: handle empty CVPY_SUFFIX --- modules/python/common.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/python/common.cmake b/modules/python/common.cmake index 6a438fd1a267..ebbb2e2f655d 100644 --- a/modules/python/common.cmake +++ b/modules/python/common.cmake @@ -86,7 +86,7 @@ set_target_properties(${the_module} PROPERTIES ARCHIVE_OUTPUT_NAME ${the_module} # prevent name conflict for python2/3 outputs PREFIX "" OUTPUT_NAME cv2 - SUFFIX ${CVPY_SUFFIX}) + SUFFIX "${CVPY_SUFFIX}") if(ENABLE_SOLUTION_FOLDERS) set_target_properties(${the_module} PROPERTIES FOLDER "bindings") From d6306f8ccbcd214dd806a8fed739eff03978ae40 Mon Sep 17 00:00:00 2001 From: Alexander Panov Date: Fri, 20 Aug 2021 13:57:05 +0300 Subject: [PATCH 126/128] Merge pull request #20564 from AleksandrPanov:update_kalman_sample Update kalman sample * updated view and comments, fixed dims * updated view and comments, added statePost --- samples/cpp/kalman.cpp | 62 ++++++++++++++----------- samples/python/kalman.py | 97 ++++++++++++++++++++-------------------- 2 files changed, 85 insertions(+), 74 deletions(-) diff --git a/samples/cpp/kalman.cpp b/samples/cpp/kalman.cpp index 501a749124c6..daf0ba5a7150 100644 --- a/samples/cpp/kalman.cpp +++ b/samples/cpp/kalman.cpp @@ -1,6 +1,6 @@ #include "opencv2/video/tracking.hpp" #include "opencv2/highgui.hpp" - +#include "opencv2/core/cvdef.h" #include using namespace cv; @@ -14,15 +14,19 @@ static void help() { printf( "\nExample of c calls to OpenCV's Kalman filter.\n" " Tracking of rotating point.\n" -" Rotation speed is constant.\n" +" Point moves in a circle and is characterized by a 1D state.\n" +" state_k+1 = state_k + speed + process_noise N(0, 1e-5)\n" +" The speed is constant.\n" " Both state and measurements vectors are 1D (a point angle),\n" -" Measurement is the real point angle + gaussian noise.\n" -" The real and the estimated points are connected with yellow line segment,\n" -" the real and the measured points are connected with red line segment.\n" +" Measurement is the real state + gaussian noise N(0, 1e-1).\n" +" The real and the measured points are connected with red line segment,\n" +" the real and the estimated points are connected with yellow line segment,\n" +" the real and the corrected estimated points are connected with green line segment.\n" " (if Kalman filter works correctly,\n" -" the yellow segment should be shorter than the red one).\n" +" the yellow segment should be shorter than the red one and\n" +" the green segment should be shorter than the yellow one)." "\n" -" Pressing any key (except ESC) will reset the tracking with a different speed.\n" +" Pressing any key (except ESC) will reset the tracking.\n" " Pressing ESC will stop the program.\n" ); } @@ -39,7 +43,9 @@ int main(int, char**) for(;;) { - randn( state, Scalar::all(0), Scalar::all(0.1) ); + img = Scalar::all(0); + state.at(0) = 0.0f; + state.at(1) = 2.f * (float)CV_PI / 6; KF.transitionMatrix = (Mat_(2, 2) << 1, 1, 0, 1); setIdentity(KF.measurementMatrix); @@ -60,36 +66,40 @@ int main(int, char**) double predictAngle = prediction.at(0); Point predictPt = calcPoint(center, R, predictAngle); - randn( measurement, Scalar::all(0), Scalar::all(KF.measurementNoiseCov.at(0))); - // generate measurement + randn( measurement, Scalar::all(0), Scalar::all(KF.measurementNoiseCov.at(0))); measurement += KF.measurementMatrix*state; double measAngle = measurement.at(0); Point measPt = calcPoint(center, R, measAngle); + // correct the state estimates based on measurements + // updates statePost & errorCovPost + KF.correct(measurement); + double improvedAngle = KF.statePost.at(0); + Point improvedPt = calcPoint(center, R, improvedAngle); + // plot points - #define drawCross( center, color, d ) \ - line( img, Point( center.x - d, center.y - d ), \ - Point( center.x + d, center.y + d ), color, 1, LINE_AA, 0); \ - line( img, Point( center.x + d, center.y - d ), \ - Point( center.x - d, center.y + d ), color, 1, LINE_AA, 0 ) - - img = Scalar::all(0); - drawCross( statePt, Scalar(255,255,255), 3 ); - drawCross( measPt, Scalar(0,0,255), 3 ); - drawCross( predictPt, Scalar(0,255,0), 3 ); - line( img, statePt, measPt, Scalar(0,0,255), 3, LINE_AA, 0 ); - line( img, statePt, predictPt, Scalar(0,255,255), 3, LINE_AA, 0 ); - - if(theRNG().uniform(0,4) != 0) - KF.correct(measurement); + img = img * 0.2; + drawMarker(img, measPt, Scalar(0, 0, 255), cv::MARKER_SQUARE, 5, 2); + drawMarker(img, predictPt, Scalar(0, 255, 255), cv::MARKER_SQUARE, 5, 2); + drawMarker(img, improvedPt, Scalar(0, 255, 0), cv::MARKER_SQUARE, 5, 2); + drawMarker(img, statePt, Scalar(255, 255, 255), cv::MARKER_STAR, 10, 1); + // forecast one step + Mat test = Mat(KF.transitionMatrix*KF.statePost); + drawMarker(img, calcPoint(center, R, Mat(KF.transitionMatrix*KF.statePost).at(0)), + Scalar(255, 255, 0), cv::MARKER_SQUARE, 12, 1); + + line( img, statePt, measPt, Scalar(0,0,255), 1, LINE_AA, 0 ); + line( img, statePt, predictPt, Scalar(0,255,255), 1, LINE_AA, 0 ); + line( img, statePt, improvedPt, Scalar(0,255,0), 1, LINE_AA, 0 ); + randn( processNoise, Scalar(0), Scalar::all(sqrt(KF.processNoiseCov.at(0, 0)))); state = KF.transitionMatrix*state + processNoise; imshow( "Kalman", img ); - code = (char)waitKey(100); + code = (char)waitKey(1000); if( code > 0 ) break; diff --git a/samples/python/kalman.py b/samples/python/kalman.py index 654e3de3da0d..cf152a8700fd 100755 --- a/samples/python/kalman.py +++ b/samples/python/kalman.py @@ -1,14 +1,18 @@ #!/usr/bin/env python """ Tracking of rotating point. - Rotation speed is constant. + Point moves in a circle and is characterized by a 1D state. + state_k+1 = state_k + speed + process_noise N(0, 1e-5) + The speed is constant. Both state and measurements vectors are 1D (a point angle), - Measurement is the real point angle + gaussian noise. - The real and the estimated points are connected with yellow line segment, - the real and the measured points are connected with red line segment. + Measurement is the real state + gaussian noise N(0, 1e-1). + The real and the measured points are connected with red line segment, + the real and the estimated points are connected with yellow line segment, + the real and the corrected estimated points are connected with green line segment. (if Kalman filter works correctly, - the yellow segment should be shorter than the red one). - Pressing any key (except ESC) will reset the tracking with a different speed. + the yellow segment should be shorter than the red one and + the green segment should be shorter than the yellow one). + Pressing any key (except ESC) will reset the tracking. Pressing ESC will stop the program. """ # Python 2/3 compatibility @@ -21,8 +25,7 @@ import numpy as np import cv2 as cv -from math import cos, sin, sqrt -import numpy as np +from math import cos, sin, sqrt, pi def main(): img_height = 500 @@ -30,64 +33,62 @@ def main(): kalman = cv.KalmanFilter(2, 1, 0) code = long(-1) - - cv.namedWindow("Kalman") - + num_circle_steps = 12 while True: - state = 0.1 * np.random.randn(2, 1) - - kalman.transitionMatrix = np.array([[1., 1.], [0., 1.]]) - kalman.measurementMatrix = 1. * np.ones((1, 2)) - kalman.processNoiseCov = 1e-5 * np.eye(2) - kalman.measurementNoiseCov = 1e-1 * np.ones((1, 1)) - kalman.errorCovPost = 1. * np.ones((2, 2)) - kalman.statePost = 0.1 * np.random.randn(2, 1) + img = np.zeros((img_height, img_width, 3), np.uint8) + state = np.array([[0.0],[(2 * pi) / num_circle_steps]]) # start state + kalman.transitionMatrix = np.array([[1., 1.], [0., 1.]]) # F. input + kalman.measurementMatrix = 1. * np.eye(1, 2) # H. input + kalman.processNoiseCov = 1e-5 * np.eye(2) # Q. input + kalman.measurementNoiseCov = 1e-1 * np.ones((1, 1)) # R. input + kalman.errorCovPost = 1. * np.eye(2, 2) # P._k|k KF state var + kalman.statePost = 0.1 * np.random.randn(2, 1) # x^_k|k KF state var while True: def calc_point(angle): - return (np.around(img_width/2 + img_width/3*cos(angle), 0).astype(int), - np.around(img_height/2 - img_width/3*sin(angle), 1).astype(int)) - + return (np.around(img_width / 2. + img_width / 3.0 * cos(angle), 0).astype(int), + np.around(img_height / 2. - img_width / 3.0 * sin(angle), 1).astype(int)) + img = img * 1e-3 state_angle = state[0, 0] state_pt = calc_point(state_angle) - + # advance Kalman filter to next timestep + # updates statePre, statePost, errorCovPre, errorCovPost + # k-> k+1, x'(k) = A*x(k) + # P'(k) = temp1*At + Q prediction = kalman.predict() - predict_angle = prediction[0, 0] - predict_pt = calc_point(predict_angle) - - measurement = kalman.measurementNoiseCov * np.random.randn(1, 1) + predict_pt = calc_point(prediction[0, 0]) # equivalent to calc_point(kalman.statePre[0,0]) # generate measurement + measurement = kalman.measurementNoiseCov * np.random.randn(1, 1) measurement = np.dot(kalman.measurementMatrix, state) + measurement measurement_angle = measurement[0, 0] measurement_pt = calc_point(measurement_angle) - # plot points - def draw_cross(center, color, d): - cv.line(img, - (center[0] - d, center[1] - d), (center[0] + d, center[1] + d), - color, 1, cv.LINE_AA, 0) - cv.line(img, - (center[0] + d, center[1] - d), (center[0] - d, center[1] + d), - color, 1, cv.LINE_AA, 0) - - img = np.zeros((img_height, img_width, 3), np.uint8) - draw_cross(np.int32(state_pt), (255, 255, 255), 3) - draw_cross(np.int32(measurement_pt), (0, 0, 255), 3) - draw_cross(np.int32(predict_pt), (0, 255, 0), 3) - - cv.line(img, state_pt, measurement_pt, (0, 0, 255), 3, cv.LINE_AA, 0) - cv.line(img, state_pt, predict_pt, (0, 255, 255), 3, cv.LINE_AA, 0) - + # correct the state estimates based on measurements + # updates statePost & errorCovPost kalman.correct(measurement) + improved_pt = calc_point(kalman.statePost[0, 0]) - process_noise = sqrt(kalman.processNoiseCov[0,0]) * np.random.randn(2, 1) - state = np.dot(kalman.transitionMatrix, state) + process_noise + # plot points + cv.drawMarker(img, measurement_pt, (0, 0, 255), cv.MARKER_SQUARE, 5, 2) + cv.drawMarker(img, predict_pt, (0, 255, 255), cv.MARKER_SQUARE, 5, 2) + cv.drawMarker(img, improved_pt, (0, 255, 0), cv.MARKER_SQUARE, 5, 2) + cv.drawMarker(img, state_pt, (255, 255, 255), cv.MARKER_STAR, 10, 1) + # forecast one step + cv.drawMarker(img, calc_point(np.dot(kalman.transitionMatrix, kalman.statePost)[0, 0]), + (255, 255, 0), cv.MARKER_SQUARE, 12, 1) + + cv.line(img, state_pt, measurement_pt, (0, 0, 255), 1, cv.LINE_AA, 0) # red measurement error + cv.line(img, state_pt, predict_pt, (0, 255, 255), 1, cv.LINE_AA, 0) # yellow pre-meas error + cv.line(img, state_pt, improved_pt, (0, 255, 0), 1, cv.LINE_AA, 0) # green post-meas error + + # update the real process + process_noise = sqrt(kalman.processNoiseCov[0, 0]) * np.random.randn(2, 1) + state = np.dot(kalman.transitionMatrix, state) + process_noise # x_k+1 = F x_k + w_k cv.imshow("Kalman", img) - - code = cv.waitKey(100) + code = cv.waitKey(1000) if code != -1: break From 6801dd043de53030a6f8e36893580951a20dd842 Mon Sep 17 00:00:00 2001 From: rogday Date: Fri, 20 Aug 2021 17:43:47 +0300 Subject: [PATCH 127/128] Merge pull request #20494 from rogday:onnx_diagnostic_fix fix ONNXImporter diagnostic mode layer registration issue * fix layer registration, thread unsafe access and align the behavior of DNN_DIAGNOSTICS_RUN between onnx and tf importers * move skipModelInput * print all missing layers * address TF issue --- apps/model-diagnostics/model_diagnostics.cpp | 2 + .../include/opencv2/dnn/layer_reg.private.hpp | 8 +- .../include/opencv2/dnn/utils/debug_utils.hpp | 24 ++ modules/dnn/src/debug_utils.cpp | 91 ++++++ modules/dnn/src/dnn.cpp | 18 +- modules/dnn/src/dnn_common.hpp | 43 +++ modules/dnn/src/onnx/onnx_importer.cpp | 308 ++++++------------ modules/dnn/src/tensorflow/tf_importer.cpp | 97 +++--- modules/dnn/test/test_tf_importer.cpp | 3 + 9 files changed, 321 insertions(+), 273 deletions(-) create mode 100644 modules/dnn/include/opencv2/dnn/utils/debug_utils.hpp create mode 100644 modules/dnn/src/debug_utils.cpp diff --git a/apps/model-diagnostics/model_diagnostics.cpp b/apps/model-diagnostics/model_diagnostics.cpp index d3934577aec6..6970c8507108 100644 --- a/apps/model-diagnostics/model_diagnostics.cpp +++ b/apps/model-diagnostics/model_diagnostics.cpp @@ -4,6 +4,7 @@ **************************************************/ #include #include +#include #include @@ -57,6 +58,7 @@ int main( int argc, const char** argv ) CV_Assert(!model.empty()); enableModelDiagnostics(true); + skipModelImport(true); redirectError(diagnosticsErrorCallback, NULL); Net ocvNet = readNet(model, config, frameworkId); diff --git a/modules/dnn/include/opencv2/dnn/layer_reg.private.hpp b/modules/dnn/include/opencv2/dnn/layer_reg.private.hpp index 46a58f09bc8b..e944644f8f21 100644 --- a/modules/dnn/include/opencv2/dnn/layer_reg.private.hpp +++ b/modules/dnn/include/opencv2/dnn/layer_reg.private.hpp @@ -12,10 +12,16 @@ CV__DNN_INLINE_NS_BEGIN //! @addtogroup dnn //! @{ -//! Register layer types of DNN model. typedef std::map > LayerFactory_Impl; + +//! Register layer types of DNN model. +//! +//! @note In order to thread-safely access the factory, see getLayerFactoryMutex() function. LayerFactory_Impl& getLayerFactoryImpl(); +//! Get the mutex guarding @ref LayerFactory_Impl, see getLayerFactoryImpl() function. +Mutex& getLayerFactoryMutex(); + //! @} CV__DNN_INLINE_NS_END } diff --git a/modules/dnn/include/opencv2/dnn/utils/debug_utils.hpp b/modules/dnn/include/opencv2/dnn/utils/debug_utils.hpp new file mode 100644 index 000000000000..71dd3ab8d670 --- /dev/null +++ b/modules/dnn/include/opencv2/dnn/utils/debug_utils.hpp @@ -0,0 +1,24 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#ifndef OPENCV_DNN_UTILS_DEBUG_UTILS_HPP +#define OPENCV_DNN_UTILS_DEBUG_UTILS_HPP + +#include "../dnn.hpp" + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +/** + * @brief Skip model import after diagnostic run in readNet() functions. + * @param[in] skip Indicates whether to skip the import. + * + * This is an internal OpenCV function not intended for users. + */ +CV_EXPORTS void skipModelImport(bool skip); + +CV__DNN_INLINE_NS_END +}} // namespace + +#endif // OPENCV_DNN_UTILS_DEBUG_UTILS_HPP diff --git a/modules/dnn/src/debug_utils.cpp b/modules/dnn/src/debug_utils.cpp new file mode 100644 index 000000000000..d951205bd876 --- /dev/null +++ b/modules/dnn/src/debug_utils.cpp @@ -0,0 +1,91 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. + +#include "precomp.hpp" + +#include + +#include +#include +#include + +namespace cv { namespace dnn { +CV__DNN_INLINE_NS_BEGIN + +bool DNN_DIAGNOSTICS_RUN = false; +bool DNN_SKIP_REAL_IMPORT = false; + +void enableModelDiagnostics(bool isDiagnosticsMode) +{ + DNN_DIAGNOSTICS_RUN = isDiagnosticsMode; + + if (DNN_DIAGNOSTICS_RUN) + { + detail::NotImplemented::Register(); + } + else + { + detail::NotImplemented::unRegister(); + } +} + +void skipModelImport(bool skip) +{ + DNN_SKIP_REAL_IMPORT = skip; +} + +void detail::LayerHandler::addMissing(const std::string& name, const std::string& type) +{ + cv::AutoLock lock(getLayerFactoryMutex()); + auto& registeredLayers = getLayerFactoryImpl(); + + // If we didn't add it, but can create it, it's custom and not missing. + if (layers.find(type) == layers.end() && registeredLayers.find(type) != registeredLayers.end()) + { + return; + } + + layers[type].insert(name); +} + +bool detail::LayerHandler::contains(const std::string& type) const +{ + return layers.find(type) != layers.end(); +} + +void detail::LayerHandler::printMissing() +{ + if (layers.empty()) + { + return; + } + + std::stringstream ss; + ss << "DNN: Not supported types:\n"; + for (const auto& type_names : layers) + { + const auto& type = type_names.first; + ss << "Type='" << type << "', affected nodes:\n["; + for (const auto& name : type_names.second) + { + ss << "'" << name << "', "; + } + ss.seekp(-2, std::ios_base::end); + ss << "]\n"; + } + CV_LOG_ERROR(NULL, ss.str()); +} + +LayerParams detail::LayerHandler::getNotImplementedParams(const std::string& name, const std::string& op) +{ + LayerParams lp; + lp.name = name; + lp.type = "NotImplemented"; + lp.set("type", op); + + return lp; +} + +CV__DNN_INLINE_NS_END +}} // namespace diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 492ad166d038..4e38b0374f00 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -94,22 +94,6 @@ static bool DNN_CHECK_NAN_INF = utils::getConfigurationParameterBool("OPENCV_DNN static bool DNN_CHECK_NAN_INF_DUMP = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_DUMP", false); static bool DNN_CHECK_NAN_INF_RAISE_ERROR = utils::getConfigurationParameterBool("OPENCV_DNN_CHECK_NAN_INF_RAISE_ERROR", false); -bool DNN_DIAGNOSTICS_RUN = false; - -void enableModelDiagnostics(bool isDiagnosticsMode) -{ - DNN_DIAGNOSTICS_RUN = isDiagnosticsMode; - - if (DNN_DIAGNOSTICS_RUN) - { - detail::NotImplemented::Register(); - } - else - { - detail::NotImplemented::unRegister(); - } -} - using std::vector; using std::map; using std::make_pair; @@ -5662,7 +5646,7 @@ bool Layer::updateMemoryShapes(const std::vector &inputs) } ////////////////////////////////////////////////////////////////////////// -static Mutex& getLayerFactoryMutex() +Mutex& getLayerFactoryMutex() { static Mutex* volatile instance = NULL; if (instance == NULL) diff --git a/modules/dnn/src/dnn_common.hpp b/modules/dnn/src/dnn_common.hpp index 591be88079f3..3c68322e098c 100644 --- a/modules/dnn/src/dnn_common.hpp +++ b/modules/dnn/src/dnn_common.hpp @@ -5,6 +5,9 @@ #ifndef __OPENCV_DNN_COMMON_HPP__ #define __OPENCV_DNN_COMMON_HPP__ +#include +#include + #include namespace cv { namespace dnn { @@ -13,6 +16,9 @@ CV__DNN_INLINE_NS_BEGIN Mutex& getInitializationMutex(); void initializeLayerFactory(); +extern bool DNN_DIAGNOSTICS_RUN; +extern bool DNN_SKIP_REAL_IMPORT; + namespace detail { #define CALL_MEMBER_FN(object, ptrToMemFn) ((object).*(ptrToMemFn)) @@ -25,6 +31,43 @@ class NotImplemented : public Layer static void unRegister(); }; +template +Net readNet(Args&& ... args) +{ + Net net; + Importer importer(net, std::forward(args)...); + return net; +} + +template +Net readNetDiagnostic(Args&& ... args) +{ + Net maybeDebugNet = readNet(std::forward(args)...); + if (DNN_DIAGNOSTICS_RUN && !DNN_SKIP_REAL_IMPORT) + { + // if we just imported the net in diagnostic mode, disable it and import again + enableModelDiagnostics(false); + Net releaseNet = readNet(std::forward(args)...); + enableModelDiagnostics(true); + return releaseNet; + } + return maybeDebugNet; +} + +class LayerHandler +{ +public: + void addMissing(const std::string& name, const std::string& type); + bool contains(const std::string& type) const; + void printMissing(); + +protected: + LayerParams getNotImplementedParams(const std::string& name, const std::string& op); + +private: + std::unordered_map> layers; +}; + struct NetImplBase { const int networkId; // network global identifier diff --git a/modules/dnn/src/onnx/onnx_importer.cpp b/modules/dnn/src/onnx/onnx_importer.cpp index 3379ea3a0bb6..8fc179037cde 100644 --- a/modules/dnn/src/onnx/onnx_importer.cpp +++ b/modules/dnn/src/onnx/onnx_importer.cpp @@ -41,6 +41,8 @@ CV__DNN_INLINE_NS_BEGIN extern bool DNN_DIAGNOSTICS_RUN; +class ONNXLayerHandler; + class ONNXImporter { opencv_onnx::ModelProto model_proto; @@ -61,60 +63,16 @@ class ONNXImporter void addConstant(const std::string& name, const Mat& blob); void addLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); - static const std::set& getSupportedTypes(); public: - - ONNXImporter(Net& net, const char *onnxFile) - : dstNet(net), utilNet(), dispatch(buildDispatchMap()) - { - hasDynamicShapes = false; - CV_Assert(onnxFile); - CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile); - - std::fstream input(onnxFile, std::ios::in | std::ios::binary); - if (!input) - { - CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", onnxFile)); - } - - if (!model_proto.ParseFromIstream(&input)) - { - CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX model: %s", onnxFile)); - } - - populateNet(); - } - - ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer) - : dstNet(net), utilNet(), dispatch(buildDispatchMap()) - { - hasDynamicShapes = false; - CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)"); - - struct _Buf : public std::streambuf - { - _Buf(const char* buffer, size_t sizeBuffer) - { - char* p = const_cast(buffer); - setg(p, p, p + sizeBuffer); - } - }; - - _Buf buf(buffer, sizeBuffer); - std::istream input(&buf); - - if (!model_proto.ParseFromIstream(&input)) - CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array."); - - populateNet(); - } + ONNXImporter(Net& net, const char *onnxFile); + ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer); void populateNet(); protected: + std::unique_ptr layerHandler; Net& dstNet; - Net utilNet; opencv_onnx::GraphProto graph_proto; std::string framework_name; @@ -131,9 +89,13 @@ class ONNXImporter void handleNode(const opencv_onnx::NodeProto& node_proto); private: + friend class ONNXLayerHandler; typedef void (ONNXImporter::*ONNXImporterNodeParser)(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); typedef std::map DispatchMap; + const DispatchMap dispatch; + static const DispatchMap buildDispatchMap(); + void parseMaxPool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseAveragePool (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseReduce (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); @@ -178,12 +140,84 @@ class ONNXImporter void parseSoftMax (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseDetectionOutput (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); void parseCumSum (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); - void parseCustom (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); - const DispatchMap dispatch; - static const DispatchMap buildDispatchMap(); + void parseCustomLayer (LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto); +}; + +class ONNXLayerHandler : public detail::LayerHandler +{ +public: + explicit ONNXLayerHandler(ONNXImporter* importer_); + + void fillRegistry(const opencv_onnx::GraphProto& net); + +protected: + ONNXImporter* importer; }; +ONNXLayerHandler::ONNXLayerHandler(ONNXImporter* importer_) : importer(importer_){} + +void ONNXLayerHandler::fillRegistry(const opencv_onnx::GraphProto &net) +{ + int layersSize = net.node_size(); + for (int li = 0; li < layersSize; li++) { + const opencv_onnx::NodeProto &node_proto = net.node(li); + const std::string& name = node_proto.output(0); + const std::string& type = node_proto.op_type(); + if (importer->dispatch.find(type) == importer->dispatch.end()) + { + addMissing(name, type); + } + } + printMissing(); +} + +ONNXImporter::ONNXImporter(Net& net, const char *onnxFile) + : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr), + dstNet(net), dispatch(buildDispatchMap()) +{ + hasDynamicShapes = false; + CV_Assert(onnxFile); + CV_LOG_DEBUG(NULL, "DNN/ONNX: processing ONNX model from file: " << onnxFile); + + std::fstream input(onnxFile, std::ios::in | std::ios::binary); + if (!input) + { + CV_Error(Error::StsBadArg, cv::format("Can't read ONNX file: %s", onnxFile)); + } + + if (!model_proto.ParseFromIstream(&input)) + { + CV_Error(Error::StsUnsupportedFormat, cv::format("Failed to parse ONNX model: %s", onnxFile)); + } + + populateNet(); +} + +ONNXImporter::ONNXImporter(Net& net, const char* buffer, size_t sizeBuffer) + : layerHandler(DNN_DIAGNOSTICS_RUN ? new ONNXLayerHandler(this) : nullptr), dstNet(net), dispatch(buildDispatchMap()) +{ + hasDynamicShapes = false; + CV_LOG_DEBUG(NULL, "DNN/ONNX: processing in-memory ONNX model (" << sizeBuffer << " bytes)"); + + struct _Buf : public std::streambuf + { + _Buf(const char* buffer, size_t sizeBuffer) + { + char* p = const_cast(buffer); + setg(p, p, p + sizeBuffer); + } + }; + + _Buf buf(buffer, sizeBuffer); + std::istream input(&buf); + + if (!model_proto.ParseFromIstream(&input)) + CV_Error(Error::StsUnsupportedFormat, "Failed to parse onnx model from in-memory byte array."); + + populateNet(); +} + inline void replaceLayerParam(LayerParams& layerParams, const String& oldKey, const String& newKey) { if (layerParams.has(oldKey)) { @@ -422,11 +456,7 @@ Mat ONNXImporter::getBlob(const std::string& input_name) void ONNXImporter::addLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { - int id; - if (DNN_DIAGNOSTICS_RUN) - id = utilNet.addLayer(layerParams.name, layerParams.type, layerParams); - else - id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); + int id = dstNet.addLayer(layerParams.name, layerParams.type, layerParams); for (int i = 0; i < node_proto.output_size(); ++i) { layer_id.insert(std::make_pair(node_proto.output(i), LayerInfo(id, i))); @@ -439,10 +469,7 @@ void ONNXImporter::addLayer(LayerParams& layerParams, const std::string& input_name = node_proto.input(j); IterLayerId_t layerId = layer_id.find(input_name); if (layerId != layer_id.end()) { - if (DNN_DIAGNOSTICS_RUN) - utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum); - else - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum); + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, inpNum); ++inpNum; // Collect input shapes. IterShape_t shapeIt = outShapes.find(input_name); @@ -451,11 +478,7 @@ void ONNXImporter::addLayer(LayerParams& layerParams, } } // Compute shape of output blob for this layer. - Ptr layer; - if (DNN_DIAGNOSTICS_RUN) - layer = utilNet.getLayer(id); - else - layer = dstNet.getLayer(id); // FIXIT: avoid instantiation of layers during the import stage + Ptr layer = dstNet.getLayer(id); // FIXIT: avoid instantiation of layers during the import stage layer->getMemoryShapes(layerInpShapes, 0, layerOutShapes, layerInternalShapes); for (int i = 0; i < node_proto.output_size() && i < (int)layerOutShapes.size(); ++i) { @@ -532,35 +555,11 @@ void ONNXImporter::populateNet() layer_id.insert(std::make_pair(name, LayerInfo(0, netInputs.size() - 1))); } } - utilNet.setInputsNames(netInputs); dstNet.setInputsNames(netInputs); if (DNN_DIAGNOSTICS_RUN) { - auto &supportedTypes = getSupportedTypes(); - for (int li = 0; li < layersSize; li++) { - const opencv_onnx::NodeProto &node_proto = graph_proto.node(li); - std::string name = node_proto.output(0); - std::string layer_type = node_proto.op_type(); - auto registered = supportedTypes.find(layer_type); - if (registered == supportedTypes.end()) { - CV_LOG_ERROR(NULL, "DNN/ONNX: NOTE: Potential problem with creating node " << name<< " with type " << layer_type << ".\n Type " - << layer_type << " IS NOT SUPPORTED!\n" - ); - } - } - auto oldConstBlobs = constBlobs; - auto oldOutShapes = outShapes; - auto oldLayerId = layer_id; CV_LOG_INFO(NULL, "DNN/ONNX: start diagnostic run!"); - for (int li = 0; li < layersSize; li++) { - const opencv_onnx::NodeProto &node_proto = graph_proto.node(li); - handleNode(node_proto); - } - CV_LOG_INFO(NULL, "DNN/ONNX: diagnostic run completed!"); - constBlobs = oldConstBlobs; - outShapes = oldOutShapes; - layer_id = oldLayerId; - enableModelDiagnostics(false); + layerHandler->fillRegistry(graph_proto); } for(int li = 0; li < layersSize; li++) @@ -569,83 +568,7 @@ void ONNXImporter::populateNet() handleNode(node_proto); } - CV_LOG_DEBUG(NULL, "DNN/ONNX: import completed!"); -} - -const std::set& ONNXImporter::getSupportedTypes() -{ - static const std::set layerTypes = { - "MaxPool", - "AveragePool", - "GlobalAveragePool", - "GlobalMaxPool", - "ReduceMean", - "ReduceSum", - "ReduceMax", - "Slice", - "Split", - "Add", - "Sum", - "Sub", - "Pow", - "Max", - "Neg", - "Constant", - "LSTM", - "GRU", - "ImageScaler", - "Clip", - "LeakyRelu", - "Relu", - "Elu", - "Tanh", - "PRelu", - "LRN", - "InstanceNormalization", - "BatchNormalization", - "Gemm", - "MatMul", - "Mul", - "Div", - "Conv", - "ConvTranspose", - "Transpose", - "Squeeze", - "Flatten", - "Unsqueeze", - "Expand", - "Reshape", - "Pad", - "Shape", - "Cast", - "ConstantOfShape", - "ConstantFill", - "Gather", - "Concat", - "Resize", - "Upsample", - "SoftMax", - "Softmax", - "LogSoftmax", - "DetectionOutput", - "Interp", - "CropAndResize", - "ROIPooling", - "PSROIPooling", - "ChannelsPReLU", - "Sigmoid", - "Swish", - "Mish", - "AbsVal", - "BNLL", - "MaxUnpool", - "Dropout", - "Identity", - "Crop", - "Normalize", - "CumSum" - }; - return layerTypes; + CV_LOG_DEBUG(NULL, (DNN_DIAGNOSTICS_RUN ? "DNN/ONNX: diagnostic run completed!" : "DNN/ONNX: import completed!")); } void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) @@ -673,7 +596,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) } else { - parseCustom(layerParams, node_proto); + parseCustomLayer(layerParams, node_proto); } } catch (const cv::Exception& e) @@ -683,6 +606,7 @@ void ONNXImporter::handleNode(const opencv_onnx::NodeProto& node_proto) CV_LOG_ERROR(NULL, "DNN/ONNX: Potential problem during processing node with " << node_proto.input_size() << " inputs and " << node_proto.output_size() << " outputs: " << cv::format("[%s]:(%s)", layer_type.c_str(), name.c_str()) << "\n" << e.msg ); + cv::AutoLock lock(getLayerFactoryMutex()); auto registeredLayers = getLayerFactoryImpl(); if (registeredLayers.find(layerParams.type) != registeredLayers.end()) { @@ -1068,11 +992,7 @@ void ONNXImporter::parseBias(LayerParams& layerParams, const opencv_onnx::NodePr constParams.name = layerParams.name + "/const"; constParams.type = "Const"; constParams.blobs.push_back((isSub ? -1 : 1) * blob); - int id; - if (DNN_DIAGNOSTICS_RUN) - id = utilNet.addLayer(constParams.name, constParams.type, constParams); - else - id = dstNet.addLayer(constParams.name, constParams.type, constParams); + int id = dstNet.addLayer(constParams.name, constParams.type, constParams); layer_id.insert(std::make_pair(constParams.name, LayerInfo(id, 0))); outShapes[constParams.name] = shape(blob); @@ -1117,19 +1037,12 @@ void ONNXImporter::parseBias(LayerParams& layerParams, const opencv_onnx::NodePr powerParams.type = "Power"; powerParams.set("scale", -1); - int id; //Create Power layer - if (DNN_DIAGNOSTICS_RUN) - id = utilNet.addLayer(powerParams.name, powerParams.type, powerParams); - else - id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); + int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); //Connect to input IterLayerId_t layerId = layer_id.find(node_proto.input(1)); CV_Assert(layerId != layer_id.end()); - if (DNN_DIAGNOSTICS_RUN) - utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); - else - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); //Add shape layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); outShapes[powerParams.name] = outShapes[node_proto.input(1)]; @@ -1404,18 +1317,11 @@ void ONNXImporter::parseInstanceNormalization(LayerParams& layerParams, const op layerParams.erase("epsilon"); //Create MVN layer - int id; - if (DNN_DIAGNOSTICS_RUN) - id = utilNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); - else - id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); + int id = dstNet.addLayer(mvnParams.name, mvnParams.type, mvnParams); //Connect to input IterLayerId_t layerId = layer_id.find(node_proto.input(0)); CV_Assert(layerId != layer_id.end()); - if (DNN_DIAGNOSTICS_RUN) - utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); - else - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); //Add shape layer_id.insert(std::make_pair(mvnParams.name, LayerInfo(id, 0))); outShapes[mvnParams.name] = outShapes[node_proto.input(0)]; @@ -1621,19 +1527,12 @@ void ONNXImporter::parseMul(LayerParams& layerParams, const opencv_onnx::NodePro powerParams.type = "Power"; powerParams.set("power", -1); - int id; //Create Power layer - if (DNN_DIAGNOSTICS_RUN) - id = utilNet.addLayer(powerParams.name, powerParams.type, powerParams); - else - id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); + int id = dstNet.addLayer(powerParams.name, powerParams.type, powerParams); //Connect to input IterLayerId_t layerId = layer_id.find(node_proto.input(1)); CV_Assert(layerId != layer_id.end()); - if (DNN_DIAGNOSTICS_RUN) - utilNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); - else - dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); + dstNet.connect(layerId->second.layerId, layerId->second.outputId, id, 0); //Add shape layer_id.insert(std::make_pair(powerParams.name, LayerInfo(id, 0))); outShapes[powerParams.name] = outShapes[node_proto.input(1)]; @@ -2418,7 +2317,7 @@ void ONNXImporter::parseCumSum(LayerParams& layerParams, const opencv_onnx::Node addLayer(layerParams, node_proto); } -void ONNXImporter::parseCustom(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) +void ONNXImporter::parseCustomLayer(LayerParams& layerParams, const opencv_onnx::NodeProto& node_proto) { for (int j = 0; j < node_proto.input_size(); j++) { if (layer_id.find(node_proto.input(j)) == layer_id.end()) @@ -2476,23 +2375,18 @@ const ONNXImporter::DispatchMap ONNXImporter::buildDispatchMap() dispatch["SoftMax"] = dispatch["LogSoftmax"] = &ONNXImporter::parseSoftMax; dispatch["DetectionOutput"] = &ONNXImporter::parseDetectionOutput; dispatch["CumSum"] = &ONNXImporter::parseCumSum; - dispatch["Custom"] = &ONNXImporter::parseCustom; return dispatch; } Net readNetFromONNX(const String& onnxFile) { - Net net; - ONNXImporter onnxImporter(net, onnxFile.c_str()); - return net; + return detail::readNetDiagnostic(onnxFile.c_str()); } Net readNetFromONNX(const char* buffer, size_t sizeBuffer) { - Net net; - ONNXImporter onnxImporter(net, buffer, sizeBuffer); - return net; + return detail::readNetDiagnostic(buffer, sizeBuffer); } Net readNetFromONNX(const std::vector& buffer) diff --git a/modules/dnn/src/tensorflow/tf_importer.cpp b/modules/dnn/src/tensorflow/tf_importer.cpp index de7ec3dfccb1..f87988d0a117 100644 --- a/modules/dnn/src/tensorflow/tf_importer.cpp +++ b/modules/dnn/src/tensorflow/tf_importer.cpp @@ -507,7 +507,7 @@ void ExcludeLayer(tensorflow::GraphDef& net, const int layer_index, const int in net.mutable_node()->DeleteSubrange(layer_index, 1); } -class LayerHandler; +class TFLayerHandler; class TFImporter { @@ -516,8 +516,7 @@ class TFImporter TFImporter(Net& net, const char *dataModel, size_t lenModel, const char *dataConfig = NULL, size_t lenConfig = 0); protected: - std::unique_ptr layerHandler; - std::unique_ptr utilNet; + std::unique_ptr layerHandler; Net& dstNet; void populateNet(); @@ -559,7 +558,7 @@ class TFImporter void addPermuteLayer(const int* order, const std::string& permName, Pin& inpId); void setPadding(LayerParams &layerParams, const tensorflow::NodeDef &layer, std::string& inputName, float value = 0.); - friend class LayerHandler; + friend class TFLayerHandler; typedef void (TFImporter::*TFImporterNodeParser)(tensorflow::GraphDef&, const tensorflow::NodeDef&, LayerParams&); typedef std::map DispatchMap; @@ -625,18 +624,17 @@ void TFImporter::setPadding(LayerParams &layerParams, const tensorflow::NodeDef layerParams.set("pad_mode", "VALID"); } -class LayerHandler +class TFLayerHandler : public detail::LayerHandler { public: - LayerHandler(TFImporter* importer_); - ~LayerHandler() = default; + explicit TFLayerHandler(TFImporter* importer_); - bool handleMissing(const opencv_tensorflow::NodeDef& layer); - void handleFailed(const opencv_tensorflow::NodeDef& layer); + void fillRegistry(const tensorflow::GraphDef& net); + bool handleMissing(const tensorflow::NodeDef& layer); + void handleFailed(const tensorflow::NodeDef& layer); -private: +protected: TFImporter* importer; - std::set layers; }; const TFImporter::DispatchMap TFImporter::buildDispatchMap() @@ -2471,9 +2469,8 @@ void TFImporter::parseCustomLayer(tensorflow::GraphDef& net, const tensorflow::N } TFImporter::TFImporter(Net& net, const char *model, const char *config) - : layerHandler(DNN_DIAGNOSTICS_RUN ? new LayerHandler(this) : nullptr), - utilNet(DNN_DIAGNOSTICS_RUN ? new Net : nullptr), - dstNet(DNN_DIAGNOSTICS_RUN ? *utilNet : net), dispatch(buildDispatchMap()) + : layerHandler(DNN_DIAGNOSTICS_RUN ? new TFLayerHandler(this) : nullptr), + dstNet(net), dispatch(buildDispatchMap()) { if (model && model[0]) { @@ -2494,9 +2491,8 @@ TFImporter::TFImporter( const char *dataModel, size_t lenModel, const char *dataConfig, size_t lenConfig ) - : layerHandler(DNN_DIAGNOSTICS_RUN ? new LayerHandler(this) : nullptr), - utilNet(DNN_DIAGNOSTICS_RUN ? new Net : nullptr), - dstNet(DNN_DIAGNOSTICS_RUN ? *utilNet : net), dispatch(buildDispatchMap()) + : layerHandler(DNN_DIAGNOSTICS_RUN ? new TFLayerHandler(this) : nullptr), + dstNet(net), dispatch(buildDispatchMap()) { if (dataModel != NULL && lenModel > 0) { @@ -2855,6 +2851,11 @@ void TFImporter::populateNet() addConstNodes(netBin, value_id, layers_to_ignore); addConstNodes(netTxt, value_id, layers_to_ignore); + if (DNN_DIAGNOSTICS_RUN) { + CV_LOG_INFO(NULL, "DNN/TF: start diagnostic run!"); + layerHandler->fillRegistry(net); + } + for (int li = 0; li < layersSize; li++) { const tensorflow::NodeDef& layer = net.node(li); @@ -2873,7 +2874,7 @@ void TFImporter::populateNet() CV_Assert(!netInputsNames[i].empty()); } dstNet.setInputsNames(netInputsNames); - CV_LOG_DEBUG(NULL, "DNN/TF: ===================== Import completed ====================="); + CV_LOG_DEBUG(NULL, (DNN_DIAGNOSTICS_RUN? "DNN/TF: diagnostic run completed!" : "DNN/TF: import completed!")); } void TFImporter::addPermuteLayer(const int* order, const std::string& permName, Pin& inpId) @@ -2933,41 +2934,45 @@ void TFImporter::parseNode(const tensorflow::NodeDef& layer) } } -LayerHandler::LayerHandler(TFImporter* importer_) : importer(importer_) {} +TFLayerHandler::TFLayerHandler(TFImporter* importer_) : importer(importer_) {} -void LayerHandler::handleFailed(const opencv_tensorflow::NodeDef& layer) +void TFLayerHandler::fillRegistry(const tensorflow::GraphDef& net) { - LayerParams lp; - lp.name = layer.name(); - lp.type = "NotImplemented"; - lp.set("type", layer.op()); + for (int li = 0; li < net.node_size(); li++) { + const tensorflow::NodeDef& layer = net.node(li); - // the layer will be created or its params and type will be replaced - int id = importer->dstNet.addLayer(lp.name, "NotImplemented", lp); - if (id != -1) // internal layer failure before the call to addLayer() - { - importer->layer_id[lp.name] = id; + const std::string& name = layer.name(); + const std::string& type = layer.op(); + if (importer->dispatch.find(type) == importer->dispatch.end()) + { + addMissing(name, type); + } } -} + printMissing(); +}; -bool LayerHandler::handleMissing(const opencv_tensorflow::NodeDef& layer) +bool TFLayerHandler::handleMissing(const tensorflow::NodeDef& layer) { - LayerParams lp; - // If we didn't add it, but can create it, it's custom and not missing. - if (layers.find(layer.op()) == layers.end() && LayerFactory::createLayerInstance(layer.op(), lp)) - { - return false; - } + bool unsupported = contains(layer.op()); - if (layers.insert(layer.op()).second) + if (unsupported) { - CV_LOG_ERROR(NULL, "DNN/TF: Node='" << layer.name() << "' of type='"<< layer.op() - << "' is not supported. This error won't be displayed again."); + handleFailed(layer); } - handleFailed(layer); + return unsupported; +} - return true; +void TFLayerHandler::handleFailed(const tensorflow::NodeDef& layer) +{ + LayerParams lp = getNotImplementedParams(layer.name(), layer.op()); + + // the layer will be created or its params and type will be replaced + int id = importer->dstNet.addLayer(lp.name, lp.type, lp); + if (id != -1) // internal layer failure before the call to addLayer() + { + importer->layer_id[lp.name] = id; + } } } // namespace @@ -2976,17 +2981,13 @@ bool LayerHandler::handleMissing(const opencv_tensorflow::NodeDef& layer) Net readNetFromTensorflow(const String &model, const String &config) { - Net net; - TFImporter importer(net, model.c_str(), config.c_str()); - return net; + return detail::readNetDiagnostic(model.c_str(), config.c_str()); } Net readNetFromTensorflow(const char* bufferModel, size_t lenModel, const char* bufferConfig, size_t lenConfig) { - Net net; - TFImporter importer(net, bufferModel, lenModel, bufferConfig, lenConfig); - return net; + return detail::readNetDiagnostic(bufferModel, lenModel, bufferConfig, lenConfig); } Net readNetFromTensorflow(const std::vector& bufferModel, const std::vector& bufferConfig) diff --git a/modules/dnn/test/test_tf_importer.cpp b/modules/dnn/test/test_tf_importer.cpp index 3f33f16774b2..3d53ced0a450 100644 --- a/modules/dnn/test/test_tf_importer.cpp +++ b/modules/dnn/test/test_tf_importer.cpp @@ -13,6 +13,7 @@ Test for Tensorflow models loading #include "npy_blob.hpp" #include // CV_DNN_REGISTER_LAYER_CLASS +#include namespace opencv_test { @@ -605,11 +606,13 @@ class Test_TensorFlow_diagnostics : public DNNTestLayer { Test_TensorFlow_diagnostics() { enableModelDiagnostics(true); + skipModelImport(true); } ~Test_TensorFlow_diagnostics() { enableModelDiagnostics(false); + skipModelImport(false); } void runFailingTensorFlowNet(const std::string& prefix, bool hasText = false) From f28e4b86fbc6bf361c2e6c7a36fd96c552a2aab7 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Sat, 21 Aug 2021 16:04:13 +0000 Subject: [PATCH 128/128] dnn(ocl): fix top initialization in verifyResult --- modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index fd989193431d..ef7c380c1be5 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -1257,8 +1257,11 @@ bool OCL4DNNConvSpatial::verifyResult(const UMat &bottom, else if (config->tested) return false; - int32_t sz[4] = {numImages, num_output_, output_h_, output_w_}; - top.zeros(4, sz, (use_half_) ? CV_16SC1 : CV_32FC1); + //int32_t sz[4] = {numImages, num_output_, output_h_, output_w_}; + CV_CheckEQ(top.total(), (size_t)numImages * num_output_ * output_h_ * output_w_, ""); + CV_CheckTypeEQ(top.type(), (use_half_) ? CV_16SC1 : CV_32FC1, ""); + top.setTo(Scalar::all(0)); + bool saved_tuned = tuned_; tuned_ = false; convolve(bottom, top, weight, bias, numImages, config);