From 7b9998d0fe1f98768b690ead10ebfa166d1b873d Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Mon, 21 Oct 2019 17:59:07 +0100 Subject: COMPMID-1816: Use parallel reduction on 0 axis in CL ARG_MIN/ARG_MAX Introducing new CLArgMinMax kernel Change-Id: I0b8254207cc3859d19ceef9b6429cf5c1c586db0 Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/2202 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou --- .../runtime/CL/functions/CLArgMinMaxLayer.h | 27 ++++++++++++++-------- .../runtime/CL/functions/CLReductionOperation.h | 10 ++++---- 2 files changed, 22 insertions(+), 15 deletions(-) (limited to 'arm_compute/runtime/CL/functions') diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h index 1b465a4866..21cded0417 100644 --- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h @@ -21,10 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_CLARGMINMAXLAYER_H__ -#define __ARM_COMPUTE_CLARGMINMAXLAYER_H__ +#ifndef ARM_COMPUTE_CLARGMINMAXLAYER_H +#define ARM_COMPUTE_CLARGMINMAXLAYER_H +#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h" +#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -33,7 +36,6 @@ namespace arm_compute { class ITensorInfo; class ICLTensor; -class CLReductionOperation; /** Function to calculate the index of the minimum or maximum values in a * tensor based on an axis. @@ -53,19 +55,18 @@ public: CLArgMinMaxLayer(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Input source tensor, this could be written if @ref CLReductionOperation - * manipulates its border for better performance. Data types supported: F16/F32. + * @param[in] input Input source tensor. Data types supported: F16/F32. * @param[in] axis Axis to find max/min index. * @param[out] output Output source tensor. Data types supported: U32/S32. - * @param[in] op Operation to perform: min or max + * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN */ - void configure(ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); + void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayer * * @param[in] input Input source tensor info. Data types supported: F16/F32. * @param[in] axis Axis to find max/min index. * @param[in] output Output source tensor info. Data types supported: U32/S32. - * @param[in] op Operation to perform: min or max + * @param[in] op Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN * * @return a status */ @@ -75,7 +76,13 @@ public: void run() override; private: - std::unique_ptr _reduction_function; + MemoryGroup _memory_group; + std::vector _results_vector; + CLTensor _not_reshaped_output; + std::vector _reduction_kernels_vector; + CLReshapeLayerKernel _reshape_kernel; + unsigned int _num_of_stages; + unsigned int _reduction_axis; }; } // namespace arm_compute -#endif /* __ARM_COMPUTE_CLARGMINMAXLAYER_H__ */ +#endif /* ARM_COMPUTE_CLARGMINMAXLAYER_H */ diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h index 405e1177fd..9e0bf03ffe 100644 --- a/arm_compute/runtime/CL/functions/CLReductionOperation.h +++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_CLREDUCTIONOPERATION_H__ -#define __ARM_COMPUTE_CLREDUCTIONOPERATION_H__ +#ifndef ARM_COMPUTE_CLREDUCTIONOPERATION_H +#define ARM_COMPUTE_CLREDUCTIONOPERATION_H #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" @@ -57,7 +57,7 @@ public: * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 - * @param[in] op Reduction operation to perform. + * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. */ void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); @@ -67,7 +67,7 @@ public: * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 - * @param[in] op Reduction operation to perform. + * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. * * @return a status @@ -92,4 +92,4 @@ private: bool _is_reshape_required; }; } // namespace arm_compute -#endif /*__ARM_COMPUTE_CLREDUCTIONOPERATION_H__ */ +#endif /* ARM_COMPUTE_CLREDUCTIONOPERATION_H */ \ No newline at end of file -- cgit v1.2.1