diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/Utils.h | 10 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/ShapeCalculator.h | 17 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h | 25 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLReductionOperation.h | 28 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h | 8 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEReductionOperation.h | 32 |
6 files changed, 83 insertions, 37 deletions
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 3f04ed9963..3939491bb2 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -881,6 +881,16 @@ std::pair<unsigned int, unsigned int> scaled_dimensions(unsigned int width, unsi const PadStrideInfo &pad_stride_info, const Size2D &dilation = Size2D(1U, 1U)); +/** Check if the given reduction operation should be handled in a serial way. + * + * @param[in] op Reduction operation to perform + * @param[in] dt Data type + * @param[in] axis Axis along which to reduce + * + * @return True if the given reduction operation should be handled in a serial way. + */ +bool needs_serialized_reduction(ReductionOperation op, DataType dt, unsigned int axis); + /** Convert a tensor format into a string. * * @param[in] format @ref Format to be translated to string. diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index c4c360842f..080d63f60d 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -1179,15 +1179,24 @@ inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Mul /** Calculate the reduced shape of a tensor given an axis * - * @param[in] input Input tensor info - * @param[in] axis Axis on which to perform reduction + * @param[in] input Input tensor info + * @param[in] axis Axis on which to perform reduction + * @param[in] keep_dims (Optional) Whether to keep the dimension after reduction operation. Defaults to true. * * @return the calculated shape */ -inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis) +inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims = true) { TensorShape output_shape{ input }; - output_shape.set(axis, 1); + + if(!keep_dims) + { + output_shape.remove_dimension(axis); + } + else + { + output_shape.set(axis, 1); + } return output_shape; } diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h index 2384ebcd37..28feee09ab 100644 --- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h @@ -24,13 +24,16 @@ #ifndef __ARM_COMPUTE_CLARGMINMAXLAYER_H__ #define __ARM_COMPUTE_CLARGMINMAXLAYER_H__ -#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" namespace arm_compute { +class ITensorInfo; class ICLTensor; +class CLReductionOperation; /** Function to calculate the index of the minimum or maximum values in a * tensor based on an axis. @@ -39,17 +42,23 @@ class ICLTensor; * responsibility to check that the results do not overflow in case the * output data type is set to signed 32-bit integer (S32). */ -class CLArgMinMaxLayer : public ICLSimpleFunction +class CLArgMinMaxLayer : public IFunction { public: + /** Default Constructor. + * + * @param[in] memory_manager (Optional) Memory manager. + */ + CLArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Input source tensor. Data types supported: F16/F32. + * @param[in] input Input source tensor, this could be written if @ref CLReductionOperation + * manipulates its border for better performance. Data types supported: F16/F32. * @param[in] axis Axis to find max/min index. * @param[out] output Output source tensor. Data types supported: U32/S32. * @param[in] op Operation to perform: min or max */ - void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); + void configure(ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayer * * @param[in] input Input source tensor info. Data types supported: F16/F32. @@ -60,6 +69,12 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr<CLReductionOperation> _reduction_function; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_CLARGMINMAXLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h index f71313f235..405e1177fd 100644 --- a/arm_compute/runtime/CL/functions/CLReductionOperation.h +++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h @@ -26,6 +26,7 @@ #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" +#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" @@ -53,35 +54,42 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 - * @param[in] op Reduction operation to perform. + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 + * @param[in] op Reduction operation to perform. + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. */ - void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op); + void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation. * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 - * @param[in] op Reduction operation to perform. + * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 + * @param[in] op Reduction operation to perform. + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); // Inherited methods overridden: void run() override; private: + ICLTensor *configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output); + MemoryGroup _memory_group; std::vector<CLTensor> _results_vector; std::vector<CLReductionOperationKernel> _reduction_kernels_vector; std::vector<CLFillBorderKernel> _border_handlers_vector; + CLReshapeLayerKernel _reshape_kernel; + ReductionOperation _op; unsigned int _num_of_stages; unsigned int _reduction_axis; bool _is_serial; + bool _is_reshape_required; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLREDUCTIONOPERATION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h index 85bf7d92c9..b0e2d783b3 100644 --- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h @@ -24,8 +24,6 @@ #ifndef __ARM_COMPUTE_NEARGMINMAXLAYER_H__ #define __ARM_COMPUTE_NEARGMINMAXLAYER_H__ -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/INESimpleFunction.h" @@ -33,6 +31,7 @@ namespace arm_compute { class ITensor; +class NEReductionOperation; /** Function to calculate the index of the minimum or maximum values in a * tensor based on an axis. @@ -74,10 +73,7 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEReductionOperationKernel _reduction_kernel; - NEFillBorderKernel _fill_border_kernel; - bool _run_fill_border; + std::unique_ptr<NEReductionOperation> _reduction_function; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEARGMINMAXLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h index 5bc7059b62..1e72c4f74d 100644 --- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h +++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,7 +28,9 @@ #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" +#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" namespace arm_compute { @@ -44,35 +46,41 @@ class NEReductionOperation : public IFunction { public: /** Default constructor */ - NEReductionOperation(); + NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. + * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. */ - void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op); + void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation. * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. + * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); // Inherited methods overridden: void run() override; private: + MemoryGroup _memory_group; NEReductionOperationKernel _reduction_kernel; NEFillBorderKernel _fill_border_kernel; + NEReshapeLayerKernel _reshape_kernel; + Tensor _output_internal; size_t _window_split; int _reduction_axis; + bool _is_reshape_required; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEREDUCTIONOPERATION_H__ */ |