diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2019-09-30 15:03:21 +0100 |
---|---|---|
committer | Michele Di Giorgio <michele.digiorgio@arm.com> | 2019-10-02 09:10:12 +0000 |
commit | 58c71efe07031fc7ba82e61e2cdca8ae5ea13a8a (patch) | |
tree | 58811e9b9f62fc937aba74352d9fcdef216bc0e0 /arm_compute | |
parent | d64a46c6dfa81ce4607fc3de57bc9d9ac7e01e4a (diff) | |
download | ComputeLibrary-58c71efe07031fc7ba82e61e2cdca8ae5ea13a8a.tar.gz |
COMPMID-2257: Add support for QASYMM8 in NEGenerateProposals
Change-Id: I7d9aa21ecac97847fce209f97dff0dea6e62790a
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2020
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
4 files changed, 36 insertions, 20 deletions
diff --git a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h b/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h index a7b2603648..9ee9d5dd08 100644 --- a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h @@ -53,7 +53,7 @@ public: /** Set the input and output tensors. * - * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: F16/F32 + * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo * @@ -62,7 +62,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel * - * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: F16/F32 + * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32 * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo * @@ -74,6 +74,9 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: + template <typename T> + void internal_run(const Window &window, const ThreadInfo &info); + const ITensor *_anchors; ITensor *_all_anchors; ComputeAnchorsInfo _anchors_info; diff --git a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h index a272a8118b..12075207b1 100644 --- a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h +++ b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h @@ -58,7 +58,7 @@ public: * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/QASYMM16/QSYMM16/U32/S32/F16/F32 * @param[out] output Destination tensor. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). @@ -76,7 +76,7 @@ public: * * @note Supported tensor rank: up to 4 * - * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 + * @param[in] input Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/QASYMM16/QSYMM16/U32/S32/F16/F32 * @param[in] output Destination tensor info. Data type supported: Same as @p input * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input). * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input). diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h index 827f19d130..e14e195ec6 100644 --- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h +++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h @@ -50,7 +50,7 @@ class ICLTensor; * -# @ref CLReshapeLayer x 2 * -# @ref CLBoundingBoxTransform * -# @ref CLPadLayerKernel - * -# @ref CLDequantizationLayerKernel + * -# @ref CLDequantizationLayerKernel x 2 * -# @ref CLQuantizationLayerKernel * And the following CPP functions: * -# @ref CPPBoxWithNonMaximaSuppressionLimit diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h index c6d3628e37..cd370a03dd 100644 --- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h @@ -23,15 +23,16 @@ */ #ifndef __ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H__ #define __ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H__ -#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h" #include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h" +#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEPermuteKernel.h" +#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CPP/CPPScheduler.h" +#include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" @@ -46,9 +47,10 @@ class ITensor; * -# @ref NEComputeAllAnchors * -# @ref NEPermute x 2 * -# @ref NEReshapeLayer x 2 - * -# @ref NEStridedSlice x 3 * -# @ref NEBoundingBoxTransform * -# @ref NEPadLayerKernel + * -# @ref NEDequantizationLayerKernel x 2 + * -# @ref NEQuantizationLayerKernel * And the following CPP kernels: * -# @ref CPPBoxWithNonMaximaSuppressionLimit */ @@ -71,10 +73,12 @@ public: /** Set the input and output tensors. * - * @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. Data types supported: F16/F32 + * @param[in] scores Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. + * Data types supported: QASYMM8/F16/F32 * @param[in] deltas Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores - * @param[in] anchors Anchors tensor of size (4, A). Data types supported: Same as @p input - * @param[out] proposals Box proposals output tensor of size (5, W*H*A). Data types supported: Same as @p input + * @param[in] anchors Anchors tensor of size (4, A). Data types supported: QSYMM16 with scale of 0.125 if @p scores is QASYMM8, otherwise same as @p scores + * @param[out] proposals Box proposals output tensor of size (5, W*H*A). + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p scores is QASYMM8, otherwise same as @p scores * @param[out] scores_out Box scores output tensor of size (W*H*A). Data types supported: Same as @p input * @param[out] num_valid_proposals Scalar output tensor which says which of the first proposals are valid. Data types supported: U32 * @param[in] info Contains GenerateProposals operation information described in @ref GenerateProposalsInfo @@ -87,12 +91,14 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref NEGenerateProposalsLayer * - * @param[in] scores Scores info from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. Data types supported: F16/F32 + * @param[in] scores Scores info from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. + * Data types supported: QASYMM8/F16/F32 * @param[in] deltas Bounding box deltas info from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores - * @param[in] anchors Anchors tensor info of size (4, A). Data types supported: Same as @p input - * @param[in] proposals Box proposals info output tensor of size (5, W*H*A). Data types supported: Data types supported: U32 + * @param[in] anchors Anchors tensor info of size (4, A). Data types supported: QSYMM16 with scale of 0.125 if @p scores is QASYMM8, otherwise same as @p scores + * @param[in] proposals Box proposals info output tensor of size (5, W*H*A). + * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p scores is QASYMM8, otherwise same as @p scores * @param[in] scores_out Box scores output tensor info of size (W*H*A). Data types supported: Same as @p input - * @param[in] num_valid_proposals Scalar output tensor info which says which of the first proposals are valid. Data types supported: Same as @p input + * @param[in] num_valid_proposals Scalar output tensor info which says which of the first proposals are valid. Data types supported: U32 * @param[in] info Contains GenerateProposals operation information described in @ref GenerateProposalsInfo * * @return a Status @@ -116,29 +122,36 @@ private: NEComputeAllAnchorsKernel _compute_anchors_kernel; NEBoundingBoxTransformKernel _bounding_box_kernel; NEPadLayerKernel _pad_kernel; + NEDequantizationLayerKernel _dequantize_anchors; + NEDequantizationLayerKernel _dequantize_deltas; + NEQuantizationLayerKernel _quantize_all_proposals; - // CPP kernels - CPPBoxWithNonMaximaSuppressionLimitKernel _cpp_nms_kernel; + // CPP functions + CPPBoxWithNonMaximaSuppressionLimit _cpp_nms; bool _is_nhwc; + bool _is_qasymm8; // Temporary tensors Tensor _deltas_permuted; Tensor _deltas_flattened; + Tensor _deltas_flattened_f32; Tensor _scores_permuted; Tensor _scores_flattened; Tensor _all_anchors; + Tensor _all_anchors_f32; Tensor _all_proposals; + Tensor _all_proposals_quantized; Tensor _keeps_nms_unused; Tensor _classes_nms_unused; Tensor _proposals_4_roi_values; + // Temporary tensor pointers + Tensor *_all_proposals_to_use; + // Output tensor pointers ITensor *_num_valid_proposals; ITensor *_scores_out; - - /** Internal function to run the CPP BoxWithNMS kernel */ - void run_cpp_nms_kernel(); }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H__ */ |