From 05e5644715c678773abaf180222a33959ee0dadb Mon Sep 17 00:00:00 2001 From: Isabella Gottardi Date: Fri, 16 Nov 2018 11:26:52 +0000 Subject: COMPMID-1463: SSD support: Create Detection layer Change-Id: I8b59b9b94cbd132e1ff5157a4c59882719e12e3b Reviewed-on: https://review.mlplatform.org/327 Reviewed-by: Anthony Barbier Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- arm_compute/core/Types.h | 128 +++++++++++++++++++++ arm_compute/runtime/CPP/CPPFunctions.h | 1 + .../CPP/functions/CPPDetectionOutputLayer.h | 100 ++++++++++++++++ 3 files changed, 229 insertions(+) create mode 100644 arm_compute/runtime/CPP/functions/CPPDetectionOutputLayer.h (limited to 'arm_compute') diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 6833a66cd9..75b38c5cb8 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -995,6 +995,134 @@ private: std::array _steps; }; +/** Available Detection Output code types */ +enum class DetectionOutputLayerCodeType +{ + CORNER, /**< Use box corners */ + CENTER_SIZE, /**< Use box centers and size */ + CORNER_SIZE, /**< Use box centers and size */ + TF_CENTER /**< Use box centers and size but flip x and y co-ordinates */ +}; + +/** Detection Output layer info */ +class DetectionOutputLayerInfo final +{ +public: + /** Default Constructor */ + DetectionOutputLayerInfo() + : _num_classes(), + _share_location(), + _code_type(DetectionOutputLayerCodeType::CORNER), + _keep_top_k(), + _nms_threshold(), + _top_k(), + _background_label_id(), + _confidence_threshold(), + _variance_encoded_in_target(false), + _eta(), + _num_loc_classes() + { + _num_loc_classes = _share_location ? 1 : _num_classes; + } + /** Constructor + * + * @param[in] num_classes Number of classes to be predicted. + * @param[in] share_location If true, bounding box are shared among different classes. + * @param[in] code_type Type of coding method for bbox. + * @param[in] keep_top_k Number of total bounding boxes to be kept per image after NMS step. + * @param[in] nms_threshold Threshold to be used in NMS. + * @param[in] top_k (Optional) Number of boxes per image with top confidence scores that are fed into the NMS algorithm. Default set to -1. + * @param[in] background_label_id (Optional) Background label ID. If there is no background class, set it as -1. + * @param[in] confidence_threshold (Optional) Only consider detections whose confidences are larger than a threshold. Default set to -FLT_MAX. + * @param[in] variance_encoded_in_target (Optional) If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.Default set to false. + * @param[in] eta (Optional) Eta. + */ + DetectionOutputLayerInfo(int num_classes, bool share_location, DetectionOutputLayerCodeType code_type, int keep_top_k, float nms_threshold, int top_k = -1, int background_label_id = -1, + float confidence_threshold = std::numeric_limits::lowest(), bool variance_encoded_in_target = false, float eta = 1) + : _num_classes(num_classes), + _share_location(share_location), + _code_type(code_type), + _keep_top_k(keep_top_k), + _nms_threshold(nms_threshold), + _top_k(top_k), + _background_label_id(background_label_id), + _confidence_threshold(confidence_threshold), + _variance_encoded_in_target(variance_encoded_in_target), + _eta(eta), + _num_loc_classes() + { + _num_loc_classes = _share_location ? 1 : _num_classes; + } + /** Get num classes. */ + int num_classes() const + { + return _num_classes; + } + /** Get share location. */ + bool share_location() const + { + return _share_location; + } + /** Get detection output code type. */ + DetectionOutputLayerCodeType code_type() const + { + return _code_type; + } + /** Get if variance encoded in target. */ + bool variance_encoded_in_target() const + { + return _variance_encoded_in_target; + } + /** Get the number of total bounding boxes to be kept per image. */ + int keep_top_k() const + { + return _keep_top_k; + } + /** Get nms threshold. */ + float nms_threshold() const + { + return _nms_threshold; + } + /** Get eta. */ + float eta() const + { + return _eta; + } + /** Get background label ID. */ + int background_label_id() const + { + return _background_label_id; + } + /** Get confidence threshold. */ + float confidence_threshold() const + { + return _confidence_threshold; + } + /** Get top K. */ + int top_k() const + { + return _top_k; + } + /** Get number of location classes. */ + int num_loc_classes() const + { + return _num_loc_classes; + } + +private: + int _num_classes; + bool _share_location; + DetectionOutputLayerCodeType _code_type; + int _keep_top_k; + float _nms_threshold; + int _top_k; + int _background_label_id; + float _confidence_threshold; + bool _variance_encoded_in_target; + float _eta; + int _num_loc_classes; +}; + /** Pooling Layer Information class */ class PoolingLayerInfo { diff --git a/arm_compute/runtime/CPP/CPPFunctions.h b/arm_compute/runtime/CPP/CPPFunctions.h index 9c17d9e404..63df437d11 100644 --- a/arm_compute/runtime/CPP/CPPFunctions.h +++ b/arm_compute/runtime/CPP/CPPFunctions.h @@ -26,6 +26,7 @@ /* Header regrouping all the CPP functions */ #include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h" +#include "arm_compute/runtime/CPP/functions/CPPDetectionOutputLayer.h" #include "arm_compute/runtime/CPP/functions/CPPPermute.h" #include "arm_compute/runtime/CPP/functions/CPPUpsample.h" diff --git a/arm_compute/runtime/CPP/functions/CPPDetectionOutputLayer.h b/arm_compute/runtime/CPP/functions/CPPDetectionOutputLayer.h new file mode 100644 index 0000000000..7f80948c81 --- /dev/null +++ b/arm_compute/runtime/CPP/functions/CPPDetectionOutputLayer.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CPP_DETECTION_OUTPUT_LAYER_H__ +#define __ARM_COMPUTE_CPP_DETECTION_OUTPUT_LAYER_H__ + +#include "arm_compute/runtime/CPP/ICPPSimpleFunction.h" + +#include "arm_compute/core/Types.h" + +#include + +namespace arm_compute +{ +class ITensor; + +// Normalized Bounding Box [xmin, ymin, xmax, ymax] +using NormalizedBBox = std::array; +// LabelBBox used for map label and bounding box +using LabelBBox = std::map>; + +/** CPP Function to generate the detection output based on location and confidence + * predictions by doing non maximum suppression. + * + * @note Intended for use with MultiBox detection method. + */ +class CPPDetectionOutputLayer : public IFunction +{ +public: + /** Default constructor */ + CPPDetectionOutputLayer(); + /** Configure the detection output layer CPP kernel + * + * @param[in] input_loc The mbox location input tensor of size [C1, N]. Data types supported: F32. + * @param[in] input_conf The mbox confidence input tensor of size [C2, N]. Data types supported: F32. + * @param[in] input_priorbox The mbox prior box input tensor of size [C3, 2, N]. Data types supported: F32. + * @param[out] output The output tensor of size [7, M]. Data types supported: Same as @p input + * @param[in] info (Optional) DetectionOutputLayerInfo information. + * + * @note Output contains all the detections. Of those, only the ones selected by the valid region are valid. + */ + void configure(const ITensor *input_loc, const ITensor *input_conf, const ITensor *input_priorbox, ITensor *output, DetectionOutputLayerInfo info = DetectionOutputLayerInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CPPDetectionOutputLayer + * + * @param[in] input_loc The mbox location input tensor info. Data types supported: F32. + * @param[in] input_conf The mbox confidence input tensor info. Data types supported: F32. + * @param[in] input_priorbox The mbox prior box input tensor info. Data types supported: F32. + * @param[in] output The output tensor info. Data types supported: Same as @p input + * @param[in] info (Optional) DetectionOutputLayerInfo information. + * + * @return a status + */ + static Status validate(const ITensorInfo *input_loc, const ITensorInfo *input_conf, const ITensorInfo *input_priorbox, const ITensorInfo *output, + DetectionOutputLayerInfo info = DetectionOutputLayerInfo()); + // Inherited methods overridden: + void run() override; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPDetectionOutputLayer(const CPPDetectionOutputLayer &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CPPDetectionOutputLayer &operator=(const CPPDetectionOutputLayer &) = delete; + +private: + const ITensor *_input_loc; + const ITensor *_input_conf; + const ITensor *_input_priorbox; + ITensor *_output; + DetectionOutputLayerInfo _info; + + int _num_priors; + int _num; + + std::vector _all_location_predictions; + std::vector>> _all_confidence_scores; + std::vector _all_prior_bboxes; + std::vector> _all_prior_variances; + std::vector _all_decode_bboxes; + std::vector>> _all_indices; +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CPP_DETECTION_OUTPUT_LAYER_H__ */ -- cgit v1.2.1