diff options
18 files changed, 437 insertions, 173 deletions
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 3f04ed9963..3939491bb2 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -881,6 +881,16 @@ std::pair<unsigned int, unsigned int> scaled_dimensions(unsigned int width, unsi const PadStrideInfo &pad_stride_info, const Size2D &dilation = Size2D(1U, 1U)); +/** Check if the given reduction operation should be handled in a serial way. + * + * @param[in] op Reduction operation to perform + * @param[in] dt Data type + * @param[in] axis Axis along which to reduce + * + * @return True if the given reduction operation should be handled in a serial way. + */ +bool needs_serialized_reduction(ReductionOperation op, DataType dt, unsigned int axis); + /** Convert a tensor format into a string. * * @param[in] format @ref Format to be translated to string. diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index c4c360842f..080d63f60d 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -1179,15 +1179,24 @@ inline TensorShape compute_tiled_shape(const TensorShape &input_shape, const Mul /** Calculate the reduced shape of a tensor given an axis * - * @param[in] input Input tensor info - * @param[in] axis Axis on which to perform reduction + * @param[in] input Input tensor info + * @param[in] axis Axis on which to perform reduction + * @param[in] keep_dims (Optional) Whether to keep the dimension after reduction operation. Defaults to true. * * @return the calculated shape */ -inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis) +inline TensorShape compute_reduced_shape(const TensorShape &input, unsigned int axis, bool keep_dims = true) { TensorShape output_shape{ input }; - output_shape.set(axis, 1); + + if(!keep_dims) + { + output_shape.remove_dimension(axis); + } + else + { + output_shape.set(axis, 1); + } return output_shape; } diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h index 2384ebcd37..28feee09ab 100644 --- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h @@ -24,13 +24,16 @@ #ifndef __ARM_COMPUTE_CLARGMINMAXLAYER_H__ #define __ARM_COMPUTE_CLARGMINMAXLAYER_H__ -#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" namespace arm_compute { +class ITensorInfo; class ICLTensor; +class CLReductionOperation; /** Function to calculate the index of the minimum or maximum values in a * tensor based on an axis. @@ -39,17 +42,23 @@ class ICLTensor; * responsibility to check that the results do not overflow in case the * output data type is set to signed 32-bit integer (S32). */ -class CLArgMinMaxLayer : public ICLSimpleFunction +class CLArgMinMaxLayer : public IFunction { public: + /** Default Constructor. + * + * @param[in] memory_manager (Optional) Memory manager. + */ + CLArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Input source tensor. Data types supported: F16/F32. + * @param[in] input Input source tensor, this could be written if @ref CLReductionOperation + * manipulates its border for better performance. Data types supported: F16/F32. * @param[in] axis Axis to find max/min index. * @param[out] output Output source tensor. Data types supported: U32/S32. * @param[in] op Operation to perform: min or max */ - void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); + void configure(ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op); /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayer * * @param[in] input Input source tensor info. Data types supported: F16/F32. @@ -60,6 +69,12 @@ public: * @return a status */ static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr<CLReductionOperation> _reduction_function; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_CLARGMINMAXLAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h index f71313f235..405e1177fd 100644 --- a/arm_compute/runtime/CL/functions/CLReductionOperation.h +++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h @@ -26,6 +26,7 @@ #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" +#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" @@ -53,35 +54,42 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. - * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 - * @param[in] op Reduction operation to perform. + * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 + * @param[in] op Reduction operation to perform. + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. */ - void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op); + void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation. * - * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. - * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. - * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 - * @param[in] op Reduction operation to perform. + * @param[in] input Source tensor info. Data types supported: QASYMM8/F16/F32. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3 + * @param[in] op Reduction operation to perform. + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); // Inherited methods overridden: void run() override; private: + ICLTensor *configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output); + MemoryGroup _memory_group; std::vector<CLTensor> _results_vector; std::vector<CLReductionOperationKernel> _reduction_kernels_vector; std::vector<CLFillBorderKernel> _border_handlers_vector; + CLReshapeLayerKernel _reshape_kernel; + ReductionOperation _op; unsigned int _num_of_stages; unsigned int _reduction_axis; bool _is_serial; + bool _is_reshape_required; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLREDUCTIONOPERATION_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h index 85bf7d92c9..b0e2d783b3 100644 --- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h +++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h @@ -24,8 +24,6 @@ #ifndef __ARM_COMPUTE_NEARGMINMAXLAYER_H__ #define __ARM_COMPUTE_NEARGMINMAXLAYER_H__ -#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" -#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/INESimpleFunction.h" @@ -33,6 +31,7 @@ namespace arm_compute { class ITensor; +class NEReductionOperation; /** Function to calculate the index of the minimum or maximum values in a * tensor based on an axis. @@ -74,10 +73,7 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEReductionOperationKernel _reduction_kernel; - NEFillBorderKernel _fill_border_kernel; - bool _run_fill_border; + std::unique_ptr<NEReductionOperation> _reduction_function; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEARGMINMAXLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h index 5bc7059b62..1e72c4f74d 100644 --- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h +++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,7 +28,9 @@ #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h" +#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" namespace arm_compute { @@ -44,35 +46,41 @@ class NEReductionOperation : public IFunction { public: /** Default constructor */ - NEReductionOperation(); + NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) - * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. + * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input. + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. */ - void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op); + void configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperation. * - * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) - * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. - * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 - * @param[in] op Reduction operation to perform. + * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. Data layouts supported: NCHW. (Written to only for border_size != 0) + * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input. + * @param[in] axis Dimension along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * @param[in] keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims = true); // Inherited methods overridden: void run() override; private: + MemoryGroup _memory_group; NEReductionOperationKernel _reduction_kernel; NEFillBorderKernel _fill_border_kernel; + NEReshapeLayerKernel _reshape_kernel; + Tensor _output_internal; size_t _window_split; int _reduction_axis; + bool _is_reshape_required; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEREDUCTIONOPERATION_H__ */ diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 8e92b591d1..a085ab1683 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -33,6 +33,7 @@ #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "support/ToolchainSupport.h" @@ -80,17 +81,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, u std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, unsigned int axis, ReductionOperation op) { // Output tensor auto initialization if not yet initialized - TensorShape output_shape{ input->tensor_shape() }; - output_shape.set(axis, 1); - const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX); - DataType output_data_type = is_arg_min_max ? DataType::U32 : input->data_type(); + const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::ARG_IDX_MAX); + const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis, !is_arg_min_max); + const DataType output_data_type = is_arg_min_max ? DataType::U32 : input->data_type(); auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true)); const unsigned int num_elems_processed_per_iteration = (is_data_type_quantized(input->data_type()) && (axis == 0)) ? 1 : 16; Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); bool window_changed = false; - const bool is_serial_op = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::MIN - || op == ReductionOperation::MAX || is_data_type_quantized(input->data_type())); + const bool is_serial_op = needs_serialized_reduction(op, input->data_type(), axis); switch(axis) { @@ -198,8 +197,8 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou // Create kernel cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange(); std::string kernel_axis_name; - const bool is_serial_op = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN || op == ReductionOperation::MIN || op == ReductionOperation::MAX - || is_data_type_quantized(input->info()->data_type())); + const bool is_serial_op = needs_serialized_reduction(_op, _input->info()->data_type(), _reduction_axis); + switch(axis) { case 0: @@ -264,8 +263,7 @@ void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &que ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - const bool is_serial_op = (_op == ReductionOperation::ARG_IDX_MAX || _op == ReductionOperation::ARG_IDX_MIN || _op == ReductionOperation::MIN || _op == ReductionOperation::MAX - || is_data_type_quantized(_input->info()->data_type())); + const bool is_serial_op = needs_serialized_reduction(_op, _input->info()->data_type(), _reduction_axis); switch(_reduction_axis) { case 0: diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index 7e1af0e27d..fa335d757b 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -427,6 +427,16 @@ std::pair<unsigned int, unsigned int> arm_compute::scaled_dimensions(unsigned in return std::make_pair(w, h); } +bool arm_compute::needs_serialized_reduction(ReductionOperation op, DataType dt, unsigned int axis) +{ + const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MAX || op == ReductionOperation::ARG_IDX_MIN); + const bool is_min_max = (op == ReductionOperation::MAX || op == ReductionOperation::MIN); + const bool is_quantized_type = is_data_type_quantized(dt); + const bool is_first_dim = (axis == 0); + + return !is_first_dim || is_arg_min_max || is_min_max || is_quantized_type; +} + #ifdef ARM_COMPUTE_ASSERTS_ENABLED void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const uint8_t *ptr, unsigned int n, int stream_width, const std::string &element_delim) { diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp index a6393c57c1..fd172d5f2c 100644 --- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp +++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,26 +23,33 @@ */ #include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h" +#include "arm_compute/runtime/CL/functions/CLReductionOperation.h" -#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/CL/CLScheduler.h" namespace arm_compute { -void CLArgMinMaxLayer::configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op) +CLArgMinMaxLayer::CLArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager) + : _reduction_function(support::cpp14::make_unique<CLReductionOperation>(std::move(memory_manager))) { - auto k = arm_compute::support::cpp14::make_unique<CLReductionOperationKernel>(); - k->configure(input, output, axis, op); - _kernel = std::move(k); +} + +void CLArgMinMaxLayer::configure(ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op) +{ + _reduction_function->configure(input, output, axis, op, false); } Status CLArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Invalid operation"); - return CLReductionOperationKernel::validate(input, output, axis, op); + return CLReductionOperation::validate(input, output, axis, op, false); +} + +void CLArgMinMaxLayer::run() +{ + _reduction_function->run(); } } // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp index 38f0a7523c..447c15b1e8 100644 --- a/src/runtime/CL/functions/CLReductionOperation.cpp +++ b/src/runtime/CL/functions/CLReductionOperation.cpp @@ -26,15 +26,17 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h" #include "arm_compute/core/Error.h" +#include "arm_compute/core/Helpers.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/Tensor.h" #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ namespace { unsigned int calculate_number_of_stages(const ITensorInfo *input, unsigned int axis) @@ -56,17 +58,52 @@ unsigned int calculate_number_of_stages(const ITensorInfo *input, unsigned int a } // namespace CLReductionOperation::CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _results_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _num_of_stages(), _reduction_axis(), _is_serial() + : _memory_group(std::move(memory_manager)), _results_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _reshape_kernel(), _op(), _num_of_stages(), _reduction_axis(), _is_serial(), + _is_reshape_required(false) { } -Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) +Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims) { - const unsigned int num_of_stages = calculate_number_of_stages(input, axis); - bool is_serial = is_data_type_quantized(input->data_type()) || axis != 0; + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); + + const unsigned int num_of_stages = calculate_number_of_stages(input, axis); + const bool is_serial = needs_serialized_reduction(op, input->data_type(), axis); + const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN); + const bool is_reshape_required = !keep_dims || is_arg_min_max; + + if(is_reshape_required) + { + const TensorInfo expected_output_shape = output->clone()->set_tensor_shape(arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis, keep_dims)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output_shape, output); + } + + auto *output_internal = output; + + TensorInfo output_before_reshape; + const auto input_shape = input->tensor_shape(); + const auto input_data_type = input->data_type(); + const auto input_num_channles = input->num_channels(); + const auto input_qinfo = input->quantization_info(); + const auto output_data_type = is_arg_min_max ? DataType::U32 : output->data_type(); + + auto initialize_tensorinfo = [](TensorInfo & ti, TensorShape shape, DataType data_type, int num_channels, QuantizationInfo qinfo) + { + ti.set_data_type(data_type).set_tensor_shape(shape).set_num_channels(num_channels).set_quantization_info(qinfo); + }; + + if(is_reshape_required) + { + auto shape_before_reshape = input_shape; + shape_before_reshape.set(axis, 1); + initialize_tensorinfo(output_before_reshape, shape_before_reshape, output_data_type, input_num_channles, input_qinfo); + output_internal = &output_before_reshape; + } + if(is_serial) { - ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(input, output, axis, op)); + ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(input, output_internal, axis, op)); } else { @@ -74,14 +111,13 @@ Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInf std::vector<TensorInfo> sums_vector(num_of_stages - 1); // Create intermediate tensor info - TensorShape shape{ input->tensor_shape() }; + TensorShape shape{ input_shape }; + + shape.set(0, ceil(shape.x() / 128.f)); for(unsigned int i = 0; i < num_of_stages - 1; i++) { - shape.set(0, ceil(shape.x() / 128.f)); - sums_vector[i].set_data_type(input->data_type()); - sums_vector[i].set_tensor_shape(shape); - sums_vector[i].set_num_channels(input->num_channels()); + initialize_tensorinfo(sums_vector[i], shape, input_data_type, input_num_channles, input_qinfo); } ReductionOperation first_kernel_op; @@ -130,17 +166,72 @@ Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInf // Validate ReductionOperation on the last stage const unsigned int last_stage = num_of_stages - 1; - ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(&sums_vector[last_stage - 1], output, axis, last_kernel_op, input->dimension(0))); + ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(&sums_vector[last_stage - 1], output_internal, axis, last_kernel_op, input->dimension(0))); + } + + if(is_reshape_required) + { + ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(output_internal, output)); } return Status{}; } -void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op) +ICLTensor *CLReductionOperation::configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output) +{ + if(!_is_reshape_required && _is_serial) + { + return output; + } + + auto intermediate_result_vector_size = _is_serial ? 1 : _num_of_stages; + const auto is_arg_min_max = (_op == ReductionOperation::ARG_IDX_MAX || _op == ReductionOperation::ARG_IDX_MIN); + + if(!_is_reshape_required) + { + --intermediate_result_vector_size; + } + + _results_vector.resize(intermediate_result_vector_size); + auto shape = input->info()->tensor_shape(); + + shape.set(_reduction_axis, _is_serial ? 1 : ceil(shape.x() / 128.f)); + + for(auto &v : _results_vector) + { + if(&v == &_results_vector.back() && _is_reshape_required) + { + shape.set(_reduction_axis, 1); + } + v.allocator()->init(input->info()->clone()->set_tensor_shape(shape)); + } + + if(is_arg_min_max) + { + _results_vector.back().info()->set_data_type(DataType::U32).set_is_resizable(true).reset_padding(); + } + + return _is_reshape_required ? &_results_vector.back() : output; +} + +void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims) { - _num_of_stages = calculate_number_of_stages(input->info(), axis); - _reduction_axis = axis; - _is_serial = is_data_type_quantized(input->info()->data_type()) || axis != 0; + _op = op; + _num_of_stages = calculate_number_of_stages(input->info(), axis); + _reduction_axis = axis; + _is_serial = needs_serialized_reduction(op, input->info()->data_type(), axis); + const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN); + _is_reshape_required = !keep_dims || is_arg_min_max; + + auto *output_internal = configure_intermediate_result_vector(input, output); + + // ArgMinMax might not give initialized output tensor, so initialize here. + if(_is_reshape_required) + { + const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, false); + const auto output_data_type = is_arg_min_max ? DataType::U32 : input->info()->data_type(); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true)); + } // Configure reduction operation kernels _reduction_kernels_vector.resize(_num_of_stages); @@ -148,20 +239,16 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign // Create temporary tensors if(_is_serial) { - _reduction_kernels_vector[0].configure(input, output, axis, op, 0); + if(_is_reshape_required) + { + _memory_group.manage(&_results_vector.back()); + } + + _reduction_kernels_vector[0].configure(input, output_internal, axis, op, 0); } else { _border_handlers_vector.resize(_num_of_stages); - _results_vector.resize(_num_of_stages - 1); - TensorShape shape{ input->info()->tensor_shape() }; - for(unsigned int i = 0; i < _num_of_stages - 1; i++) - { - shape.set(0, ceil(shape.x() / 128.f)); - _results_vector[i].allocator()->init(input->info()->clone()->set_tensor_shape(shape)); - } - - // Apply ReductionOperation only on first kernel _memory_group.manage(&_results_vector[0]); ReductionOperation first_kernel_op; @@ -262,10 +349,22 @@ void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsign // Apply ReductionOperation on the last stage const unsigned int last_stage = _num_of_stages - 1; const unsigned int input_width = input->info()->dimension(0); - _reduction_kernels_vector[last_stage].configure(&_results_vector[last_stage - 1], output, axis, last_kernel_op, input_width); + + if(_is_reshape_required) + { + _memory_group.manage(&_results_vector.back()); + } + + _reduction_kernels_vector[last_stage].configure(&_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width); _border_handlers_vector[last_stage].configure(&_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, pixelValue); _results_vector[last_stage - 1].allocator()->allocate(); } + + if(_is_reshape_required) + { + _reshape_kernel.configure(&_results_vector.back(), output); + _results_vector.back().allocator()->allocate(); + } } void CLReductionOperation::run() @@ -284,4 +383,10 @@ void CLReductionOperation::run() CLScheduler::get().enqueue(_reduction_kernels_vector[i], false); } } + + if(_is_reshape_required) + { + CLScheduler::get().enqueue(_reshape_kernel, false); + } } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp index 6863bb0b3b..ab2d6f0c1f 100644 --- a/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp +++ b/src/runtime/NEON/functions/NEArgMinMaxLayer.cpp @@ -23,47 +23,35 @@ */ #include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h" +#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" -#include "arm_compute/runtime/NEON/NEScheduler.h" namespace arm_compute { NEArgMinMaxLayer::NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager) - : _memory_group(std::move(memory_manager)), _reduction_kernel(), _fill_border_kernel(), _run_fill_border(false) + : _reduction_function(support::cpp14::make_unique<NEReductionOperation>()) { + ARM_COMPUTE_UNUSED(memory_manager); } void NEArgMinMaxLayer::configure(ITensor *input, int axis, ITensor *output, const ReductionOperation &op) { - _reduction_kernel.configure(input, output, axis, op); - - if(axis == 0) - { - _fill_border_kernel.configure(input, _reduction_kernel.border_size(), BorderMode::REPLICATE); - _run_fill_border = true; - } + _reduction_function->configure(input, output, axis, op, false); } Status NEArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(op != ReductionOperation::ARG_IDX_MAX && op != ReductionOperation::ARG_IDX_MIN, "Invalid operation"); - ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperationKernel::validate(input, output, axis, op)); - return Status{}; + return NEReductionOperation::validate(input, output, axis, op, false); } void NEArgMinMaxLayer::run() { - MemoryGroupResourceScope scope_mg(_memory_group); - - if(_run_fill_border) - { - NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); - } - NEScheduler::get().schedule(&_reduction_kernel, Window::DimY); + _reduction_function->run(); } } // namespace arm_compute
\ No newline at end of file diff --git a/src/runtime/NEON/functions/NEReductionOperation.cpp b/src/runtime/NEON/functions/NEReductionOperation.cpp index dc6cf59019..09cd765d4b 100644 --- a/src/runtime/NEON/functions/NEReductionOperation.cpp +++ b/src/runtime/NEON/functions/NEReductionOperation.cpp @@ -24,6 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" namespace arm_compute @@ -52,25 +53,78 @@ size_t reduction_window_split_dimension(unsigned int axis) } } // namespace -NEReductionOperation::NEReductionOperation() - : _reduction_kernel(), _fill_border_kernel(), _window_split(0), _reduction_axis() +NEReductionOperation::NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager) + : _memory_group(memory_manager), _reduction_kernel(), _fill_border_kernel(), _reshape_kernel(), _output_internal(), _window_split(0), _reduction_axis(), _is_reshape_required(false) { } -Status NEReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) +Status NEReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims) { - ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperationKernel::validate(input, output, axis, op)); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis"); + + const auto is_reshape_required = !keep_dims; + + auto *output_internal = output; + + TensorInfo info_before_reshape; + + if(is_reshape_required) + { + const TensorInfo expected_output_shape = output->clone()->set_tensor_shape(arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis, keep_dims)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output_shape, output); + + auto shape_before_reshape = input->tensor_shape(); + shape_before_reshape.set(axis, 1); + + const auto input_num_channles = input->num_channels(); + const auto input_qinfo = input->quantization_info(); + const auto is_arg_min_max = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN); + const auto output_data_type = is_arg_min_max ? DataType::U32 : output->data_type(); + + info_before_reshape.set_data_type(output_data_type).set_tensor_shape(shape_before_reshape).set_num_channels(input_num_channles).set_quantization_info(input_qinfo); + + output_internal = &info_before_reshape; + } + + ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperationKernel::validate(input, output_internal, axis, op)); + + if(is_reshape_required) + { + ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(output_internal, output)); + } return Status{}; } -void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op) +void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op, bool keep_dims) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(NEReductionOperation::validate(input->info(), output->info(), axis, op)); + + _is_reshape_required = !keep_dims; + + auto *output_internal = output; + const auto is_arg_min_max = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN); + + if(_is_reshape_required) + { + const auto output_internal_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis); + const auto output_external_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, false); + const auto output_data_type = is_arg_min_max ? DataType::U32 : input->info()->data_type(); + const auto num_channels = input->info()->num_channels(); + const auto qinfo = input->info()->quantization_info(); + + _output_internal.allocator()->init(input->info()->clone()->set_data_type(output_data_type).set_tensor_shape(output_internal_shape).reset_padding().set_is_resizable(true).set_num_channels( + num_channels).set_quantization_info(qinfo)); + _memory_group.manage(&_output_internal); + output_internal = &_output_internal; + auto_init_if_empty(*output->info(), input->info()->clone()->set_data_type(output_data_type).set_tensor_shape(output_external_shape).reset_padding().set_is_resizable(true)); + } + + ARM_COMPUTE_ERROR_THROW_ON(NEReductionOperation::validate(input->info(), output->info(), axis, op, keep_dims)); // Configure reduction kernel - _reduction_kernel.configure(input, output, axis, op); + _reduction_kernel.configure(input, output_internal, axis, op); _window_split = reduction_window_split_dimension(axis); _reduction_axis = axis; @@ -150,7 +204,13 @@ void NEReductionOperation::configure(ITensor *input, ITensor *output, unsigned i default: ARM_COMPUTE_ERROR("Reduction Operation unsupported"); } - _fill_border_kernel.configure(input, fill_border_size, BorderMode::CONSTANT, pixelValue); + _fill_border_kernel.configure(input, fill_border_size, (is_arg_min_max ? BorderMode::REPLICATE : BorderMode::CONSTANT), pixelValue); + } + + if(_is_reshape_required) + { + _reshape_kernel.configure(output_internal, output); + _output_internal.allocator()->allocate(); } } @@ -161,5 +221,9 @@ void NEReductionOperation::run() NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); } NEScheduler::get().schedule(&_reduction_kernel, _window_split); + if(_is_reshape_required) + { + NEScheduler::get().schedule(&_reshape_kernel, Window::DimY); + } } } // namespace arm_compute diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp index 6de09bed25..845fdbf493 100644 --- a/tests/validation/CL/ArgMinMax.cpp +++ b/tests/validation/CL/ArgMinMax.cpp @@ -25,7 +25,9 @@ #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h" +#include "arm_compute/runtime/CL/functions/CLReductionOperation.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "tests/CL/CLAccessor.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/datasets/SplitDataset.h" @@ -49,16 +51,18 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 3U, 16U, 2U), 1, DataType::F32), // Invalid axis TensorInfo(TensorShape(27U, 3U, 16U, 2U), 1, DataType::F32), // Invalid output shape TensorInfo(TensorShape(32U, 16U, 16U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 16U, 16U, 2U), 1, DataType::F32) // Invalid operation + TensorInfo(TensorShape(32U, 16U, 16U, 2U), 1, DataType::F32), // Invalid operation + TensorInfo(TensorShape(32U, 16U, 16U, 2U), 1, DataType::F32) // Not allowed keeping the dimension }), - framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(27U, 3U, 1U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 3U, 1U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 16U, 1U, 2U), 1, DataType::U32), - TensorInfo(TensorShape(32U, 16U, 1U, 2U), 1, DataType::F32) + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(27U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(27U, 3U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::U32), + TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 16U, 1U, 2U), 1, DataType::U32) })), - framework::dataset::make("Axis", { 4, 0, 2, 0 })), - framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MAX, ReductionOperation::ARG_IDX_MAX, ReductionOperation::ARG_IDX_MAX, ReductionOperation::MEAN_SUM })), - framework::dataset::make("Expected", { false, false, true, false })), + framework::dataset::make("Axis", { 4, 0, 2, 0, 2 })), + framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MAX, ReductionOperation::ARG_IDX_MAX, ReductionOperation::ARG_IDX_MAX, ReductionOperation::MEAN_SUM, ReductionOperation::ARG_IDX_MAX })), + framework::dataset::make("Expected", { false, false, true, false, false })), input_info, output_info, axis, operation, expected) { const Status status = CLArgMinMaxLayer::validate(&input_info.clone()->set_is_resizable(false), axis, &output_info.clone()->set_is_resizable(false), operation); @@ -76,13 +80,13 @@ DATA_TEST_CASE(Configuration, CLTensor ref_src = create_tensor<CLTensor>(shape, data_type); CLTensor dst; + constexpr int axis = 1; + // Create and Configure function CLArgMinMaxLayer arg_min_max_layer; - arg_min_max_layer.configure(&ref_src, 1, &dst, ReductionOperation::ARG_IDX_MAX); + arg_min_max_layer.configure(&ref_src, axis, &dst, ReductionOperation::ARG_IDX_MAX); - // Validate valid region - TensorShape output_shape = shape; - output_shape.set(1, 1); + const auto output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(shape, axis, false); const ValidRegion valid_region = shape_to_valid_region(output_shape); validate(dst.info()->valid_region(), valid_region); } diff --git a/tests/validation/CL/ReductionOperation.cpp b/tests/validation/CL/ReductionOperation.cpp index 9a3cd996fa..1dec020d18 100644 --- a/tests/validation/CL/ReductionOperation.cpp +++ b/tests/validation/CL/ReductionOperation.cpp @@ -57,6 +57,7 @@ const auto ReductionOperations = framework::dataset::make("ReductionOperation", }); +const auto KeepDimensions = framework::dataset::make("KeepDims", { true, false }); } // namespace TEST_SUITE(CL) @@ -64,29 +65,34 @@ TEST_SUITE(ReductionOperation) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching data type input/output TensorInfo(TensorShape(128U, 64U), 3, DataType::F32), // Number of Input channels != 1 TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != QASYMM8/F16/F32 TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions TensorInfo(TensorShape(128U, 64U), 1, DataType::QASYMM8), // Axis == 0 and SUM_SQUARE and QASYMM8 - TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) // Kept Dimension when keep_dims = false + }), framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(1U, 64U), 1, DataType::F16), TensorInfo(TensorShape(1U, 64U), 1, DataType::F32), TensorInfo(TensorShape(1U, 64U), 1, DataType::S16), TensorInfo(TensorShape(1U, 64U), 1, DataType::F32), TensorInfo(TensorShape(1U, 64U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(1U, 64U), 1, DataType::F32), TensorInfo(TensorShape(1U, 64U), 1, DataType::F32) })), - framework::dataset::make("Axis", { 0U, 0U, 0U, static_cast<unsigned int>(TensorShape::num_max_dimensions), 1U, 0U })), - framework::dataset::make("Expected", { false, false, false, false, false, true })), - input_info, output_info, axis, expected) + framework::dataset::make("Axis", { 0U, 0U, 0U, static_cast<unsigned int>(TensorShape::num_max_dimensions), 1U, 0U, 0U })), + framework::dataset::make("KeepDims", { true, true, true, true, true, true, false })), + framework::dataset::make("Expected", { false, false, false, false, false, true , false })), + input_info, output_info, axis, keep_dims, expected) { bool is_valid = bool(CLReductionOperation::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(true), axis, - ReductionOperation::SUM_SQUARE)); + ReductionOperation::SUM_SQUARE, + keep_dims)); ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); } // clang-format on @@ -97,28 +103,54 @@ using CLReductionOperationFixture = ReductionOperationFixture<CLTensor, CLAccess TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationFixture<half>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations)) +FIXTURE_DATA_TEST_CASE(RunSmall2D, CLReductionOperationFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1 })), ReductionOperations), KeepDimensions)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunSmall3D, CLReductionOperationFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2 })), ReductionOperations), KeepDimensions)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunSmall4D, CLReductionOperationFixture<half>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations), + KeepDimensions)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); } FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<half>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations)) + combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations), KeepDimensions)) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0, tolerance_f16); } TEST_SUITE_END() // F16 TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations)) +FIXTURE_DATA_TEST_CASE(RunSmall2D, CLReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::Small2DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1 })), ReductionOperations), KeepDimensions)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSmall3D, CLReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::Small3DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2 })), ReductionOperations), KeepDimensions)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunSmall4D, CLReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations), + KeepDimensions)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations)) + combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), ReductionOperations), KeepDimensions)) { // Validate output validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, tolerance_f32); diff --git a/tests/validation/NEON/ArgMinMax.cpp b/tests/validation/NEON/ArgMinMax.cpp index 71fb39a30d..642a69ba5f 100644 --- a/tests/validation/NEON/ArgMinMax.cpp +++ b/tests/validation/NEON/ArgMinMax.cpp @@ -24,9 +24,11 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" #include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h" +#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "tests/NEON/Accessor.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/datasets/SplitDataset.h" @@ -54,7 +56,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( }), framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(27U, 3U, 1U, 2U), 1, DataType::F32), TensorInfo(TensorShape(27U, 3U, 1U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 16U, 1U, 2U), 1, DataType::U32), + TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::U32), TensorInfo(TensorShape(32U, 16U, 1U, 2U), 1, DataType::F32) })), framework::dataset::make("Axis", { 4, 0, 2, 0 })), @@ -74,17 +76,17 @@ DATA_TEST_CASE(Configuration, shape, data_type) { // Create tensors - Tensor ref_src = create_tensor<Tensor>(shape, data_type); - Tensor dst; + Tensor ref_src = create_tensor<Tensor>(shape, data_type); + Tensor dst; + const int axis = 1; // Create and Configure function NEArgMinMaxLayer arg_min_max_layer; - arg_min_max_layer.configure(&ref_src, 1, &dst, ReductionOperation::ARG_IDX_MAX); + arg_min_max_layer.configure(&ref_src, axis, &dst, ReductionOperation::ARG_IDX_MAX); // Validate valid region - TensorShape output_shape = shape; - output_shape.set(1, 1); - const ValidRegion valid_region = shape_to_valid_region(output_shape); + const auto expected_output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(shape, axis, false); + const ValidRegion valid_region = shape_to_valid_region(expected_output_shape); validate(dst.info()->valid_region(), valid_region); } diff --git a/tests/validation/NEON/ReductionOperation.cpp b/tests/validation/NEON/ReductionOperation.cpp index 5b697a5efa..3a7f707d23 100644 --- a/tests/validation/NEON/ReductionOperation.cpp +++ b/tests/validation/NEON/ReductionOperation.cpp @@ -66,6 +66,8 @@ const auto QuantizationInfos = framework::dataset::make("QuantizationInfo", const auto Axises = framework::dataset::make("Axis", { 0, 1, 2, 3 }); +const auto KeepDims = framework::dataset::make("KeepDims", { true, false }); + } // namespace TEST_SUITE(NEON) @@ -73,27 +75,31 @@ TEST_SUITE(ReductionOperation) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching data type input/output TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1 TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F32 TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions - TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) // Kept dimension when keep_dims = false }), framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(1U, 64U), 1, DataType::F16), TensorInfo(TensorShape(1U, 64U), 1, DataType::F32), TensorInfo(TensorShape(1U, 64U), 1, DataType::S16), TensorInfo(TensorShape(1U, 64U), 1, DataType::F32), + TensorInfo(TensorShape(1U, 64U), 1, DataType::F32), TensorInfo(TensorShape(1U, 64U), 1, DataType::F32) })), - framework::dataset::make("Axis", { 0U, 0U, 0U, static_cast<unsigned int>(TensorShape::num_max_dimensions), 0U })), - framework::dataset::make("Expected", { false, false, false, false, true })), - input_info, output_info, axis, expected) + framework::dataset::make("Axis", { 0U, 0U, 0U, static_cast<unsigned int>(TensorShape::num_max_dimensions), 0U, 0U })), + framework::dataset::make("KeepDims", { true, true, true, true, true, false})), + framework::dataset::make("Expected", { false, false, false, false, true, false })), + input_info, output_info, axis, keep_dims, expected) { bool is_valid = bool(NEReductionOperation::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(true), axis, - ReductionOperation::SUM_SQUARE)); + ReductionOperation::SUM_SQUARE, + keep_dims)); ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); } // clang-format on @@ -104,13 +110,13 @@ using NEReductionOperationFixture = ReductionOperationFixture<Tensor, Accessor, TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), Axises), ReductionOperations)) + combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), Axises), ReductionOperations), KeepDims)) { // Validate output validate(Accessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunLarge, NEReductionOperationFixture<float>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F32)), Axises), ReductionOperations)) + combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::F32)), Axises), ReductionOperations), KeepDims)) { // Validate output validate(Accessor(_target), _reference, rel_tolerance_f32, 0, tolerance_f32); @@ -122,17 +128,19 @@ using NEReductionOperationQuantizedFixture = ReductionOperationQuantizedFixture< TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, NEReductionOperationQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), Axises), - ReductionOperations), - QuantizationInfos)) + combine(combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), Axises), + ReductionOperations), + QuantizationInfos), + KeepDims)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunLarge, NEReductionOperationQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), Axises), - ReductionOperations), - QuantizationInfos)) + combine(combine(combine(combine(combine(datasets::Large4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), Axises), + ReductionOperations), + QuantizationInfos), + KeepDims)) { // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); diff --git a/tests/validation/fixtures/ArgMinMaxFixture.h b/tests/validation/fixtures/ArgMinMaxFixture.h index ed6b51abe5..f8fe4ff1ee 100644 --- a/tests/validation/fixtures/ArgMinMaxFixture.h +++ b/tests/validation/fixtures/ArgMinMaxFixture.h @@ -26,6 +26,7 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/Tensor.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" @@ -121,8 +122,7 @@ protected: // Fill reference fill(src); - TensorShape output_shape = src_shape; - output_shape.set(axis, 1); + TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(src_shape, axis, false); return reference::reduction_operation<T, uint32_t>(src, output_shape, axis, op); } diff --git a/tests/validation/fixtures/ReductionOperationFixture.h b/tests/validation/fixtures/ReductionOperationFixture.h index d01f41abf0..867c08ec3a 100644 --- a/tests/validation/fixtures/ReductionOperationFixture.h +++ b/tests/validation/fixtures/ReductionOperationFixture.h @@ -26,6 +26,7 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/Tensor.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" @@ -45,11 +46,15 @@ class ReductionOperationValidationFixture : public framework::Fixture { public: template <typename...> - void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info) + void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info, bool keep_dims = false) { - const TensorShape output_shape = get_output_shape(shape, axis); - _target = compute_target(shape, output_shape, data_type, axis, op, quantization_info); - _reference = compute_reference(shape, output_shape, data_type, axis, op, quantization_info); + const bool is_arg_min_max = (op == ReductionOperation::ARG_IDX_MAX) || (op == ReductionOperation::ARG_IDX_MIN); + _keep_dims = keep_dims && !is_arg_min_max; + + const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(shape, axis, _keep_dims); + + _target = compute_target(shape, data_type, axis, op, quantization_info); + _reference = compute_reference(shape, output_shape, data_type, axis, op, quantization_info); } protected: @@ -70,15 +75,15 @@ protected: } } - TensorType compute_target(const TensorShape &src_shape, const TensorShape &dst_shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info) + TensorType compute_target(const TensorShape &src_shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info) { // Create tensors TensorType src = create_tensor<TensorType>(src_shape, data_type, 1, quantization_info); - TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, quantization_info); + TensorType dst; // Create and configure function FunctionType reduction_func; - reduction_func.configure(&src, &dst, axis, op); + reduction_func.configure(&src, &dst, axis, op, _keep_dims); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -114,12 +119,7 @@ protected: SimpleTensor<T> _reference{}; private: - TensorShape get_output_shape(TensorShape shape, unsigned int axis) - { - TensorShape output_shape(shape); - output_shape.set(axis, 1); - return output_shape; - } + bool _keep_dims{ false }; }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T> @@ -127,9 +127,9 @@ class ReductionOperationQuantizedFixture : public ReductionOperationValidationFi { public: template <typename...> - void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info = QuantizationInfo()) + void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, QuantizationInfo quantization_info = QuantizationInfo(), bool keep_dims = false) { - ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, quantization_info); + ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, quantization_info, keep_dims); } }; @@ -138,9 +138,9 @@ class ReductionOperationFixture : public ReductionOperationValidationFixture<Ten { public: template <typename...> - void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op) + void setup(TensorShape shape, DataType data_type, unsigned int axis, ReductionOperation op, bool keep_dims = false) { - ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, QuantizationInfo()); + ReductionOperationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, axis, op, QuantizationInfo(), keep_dims); } }; } // namespace validation |