From 58c71efe07031fc7ba82e61e2cdca8ae5ea13a8a Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Mon, 30 Sep 2019 15:03:21 +0100 Subject: COMPMID-2257: Add support for QASYMM8 in NEGenerateProposals Change-Id: I7d9aa21ecac97847fce209f97dff0dea6e62790a Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2020 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez Comments-Addressed: Arm Jenkins --- .../NEON/kernels/NEGenerateProposalsLayerKernel.h | 7 +++- .../core/NEON/kernels/NEStridedSliceKernel.h | 4 +- .../CL/functions/CLGenerateProposalsLayer.h | 2 +- .../NEON/functions/NEGenerateProposalsLayer.h | 43 ++++++++++++++-------- 4 files changed, 36 insertions(+), 20 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h b/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h index a7b2603648..9ee9d5dd08 100644 --- a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h @@ -53,7 +53,7 @@ public: /** Set the input and output tensors. * - * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: F16/F32 + * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo * @@ -62,7 +62,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel * - * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: F16/F32 + * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo * @@ -74,6 +74,9 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: + template + void internal_run(const Window &window, const ThreadInfo &info); + const ITensor *_anchors; ITensor *_all_anchors; ComputeAnchorsInfo _anchors_info; diff --git a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h index a272a8118b..12075207b1 100644 --- a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h +++ b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h @@ -58,7 +58,7 @@ public: * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/QASYMM16/QSYMM16/U32/S32/F16/F32 * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). @@ -76,7 +76,7 @@ public: * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/QASYMM16/QSYMM16/U32/S32/F16/F32 * @param[in] output Destination tensor info. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h index 827f19d130..e14e195ec6 100644 --- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h +++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h @@ -50,7 +50,7 @@ class ICLTensor; * -# @ref CLReshapeLayer x 2 * -# @ref CLBoundingBoxTransform * -# @ref CLPadLayerKernel - * -# @ref CLDequantizationLayerKernel + * -# @ref CLDequantizationLayerKernel x 2 * -# @ref CLQuantizationLayerKernel * And the following CPP functions: * -# @ref CPPBoxWithNonMaximaSuppressionLimit diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h index c6d3628e37..cd370a03dd 100644 --- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h @@ -23,15 +23,16 @@ */ #ifndef __ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H__ #define __ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H__ -#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h" #include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" +#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" +#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" +#include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" @@ -46,9 +47,10 @@ class ITensor; * -# @ref NEComputeAllAnchors * -# @ref NEPermute x 2 * -# @ref NEReshapeLayer x 2 - * -# @ref NEStridedSlice x 3 * -# @ref NEBoundingBoxTransform * -# @ref NEPadLayerKernel + * -# @ref NEDequantizationLayerKernel x 2 + * -# @ref NEQuantizationLayerKernel * And the following CPP kernels: * -# @ref CPPBoxWithNonMaximaSuppressionLimit */ @@ -71,10 +73,12 @@ public: /** Set the input and output tensors. * - * @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. Data types supported: F16/F32 + * @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. + * Data types supported: QASYMM8/F16/F32 * @param[in] deltas Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores - * @param[in] anchors Anchors tensor of size (4, A). Data types supported: Same as @p input - * @param[out] proposals Box proposals output tensor of size (5, W*H*A). Data types supported: Same as @p input + * @param[in] anchors Anchors tensor of size (4, A). Data types supported: QSYMM16 with scale of 0.125 if @p scores is QASYMM8, otherwise same as @p scores + * @param[out] proposals Box proposals output tensor of size (5, W*H*A). + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p scores is QASYMM8, otherwise same as @p scores * @param[out] scores_out Box scores output tensor of size (W*H*A). Data types supported: Same as @p input * @param[out] num_valid_proposals Scalar output tensor which says which of the first proposals are valid. Data types supported: U32 * @param[in] info Contains GenerateProposals operation information described in @ref GenerateProposalsInfo @@ -87,12 +91,14 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEGenerateProposalsLayer * - * @param[in] scores Scores info from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. Data types supported: F16/F32 + * @param[in] scores Scores info from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. + * Data types supported: QASYMM8/F16/F32 * @param[in] deltas Bounding box deltas info from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores - * @param[in] anchors Anchors tensor info of size (4, A). Data types supported: Same as @p input - * @param[in] proposals Box proposals info output tensor of size (5, W*H*A). Data types supported: Data types supported: U32 + * @param[in] anchors Anchors tensor info of size (4, A). Data types supported: QSYMM16 with scale of 0.125 if @p scores is QASYMM8, otherwise same as @p scores + * @param[in] proposals Box proposals info output tensor of size (5, W*H*A). + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p scores is QASYMM8, otherwise same as @p scores * @param[in] scores_out Box scores output tensor info of size (W*H*A). Data types supported: Same as @p input - * @param[in] num_valid_proposals Scalar output tensor info which says which of the first proposals are valid. Data types supported: Same as @p input + * @param[in] num_valid_proposals Scalar output tensor info which says which of the first proposals are valid. Data types supported: U32 * @param[in] info Contains GenerateProposals operation information described in @ref GenerateProposalsInfo * * @return a Status @@ -116,29 +122,36 @@ private: NEComputeAllAnchorsKernel _compute_anchors_kernel; NEBoundingBoxTransformKernel _bounding_box_kernel; NEPadLayerKernel _pad_kernel; + NEDequantizationLayerKernel _dequantize_anchors; + NEDequantizationLayerKernel _dequantize_deltas; + NEQuantizationLayerKernel _quantize_all_proposals; - // CPP kernels - CPPBoxWithNonMaximaSuppressionLimitKernel _cpp_nms_kernel; + // CPP functions + CPPBoxWithNonMaximaSuppressionLimit _cpp_nms; bool _is_nhwc; + bool _is_qasymm8; // Temporary tensors Tensor _deltas_permuted; Tensor _deltas_flattened; + Tensor _deltas_flattened_f32; Tensor _scores_permuted; Tensor _scores_flattened; Tensor _all_anchors; + Tensor _all_anchors_f32; Tensor _all_proposals; + Tensor _all_proposals_quantized; Tensor _keeps_nms_unused; Tensor _classes_nms_unused; Tensor _proposals_4_roi_values; + // Temporary tensor pointers + Tensor *_all_proposals_to_use; + // Output tensor pointers ITensor *_num_valid_proposals; ITensor *_scores_out; - - /** Internal function to run the CPP BoxWithNMS kernel */ - void run_cpp_nms_kernel(); }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H__ */ -- cgit v1.2.1