From 62385bce6baacfa194cff9e6be6d8eaa73bc3fab Mon Sep 17 00:00:00 2001 From: John Richardson Date: Fri, 20 Apr 2018 13:11:36 +0100 Subject: COMPMID-948: Add validation to CLL2NormalizeLayer Change-Id: I452a718a60b81da51cd3e98641fd99c86c4debab Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129451 Tested-by: Jenkins Reviewed-by: Gian Marco Iodice --- .../core/CL/kernels/CLL2NormalizeLayerKernel.h | 30 +++++++-- .../core/CL/kernels/CLReductionOperationKernel.h | 19 +++++- .../runtime/CL/functions/CLL2NormalizeLayer.h | 31 ++++++--- .../runtime/CL/functions/CLReductionOperation.h | 17 ++++- src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp | 77 +++++++++++++++++----- src/core/CL/kernels/CLReductionOperationKernel.cpp | 70 ++++++++++++++++---- src/runtime/CL/functions/CLL2NormalizeLayer.cpp | 22 ++++++- src/runtime/CL/functions/CLReductionOperation.cpp | 61 ++++++++++++++--- tests/validation/CL/L2NormalizeLayer.cpp | 33 +++++++++- tests/validation/CL/ReductionOperation.cpp | 32 ++++++++- 10 files changed, 332 insertions(+), 60 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h index f7d717119b..dec4192fcd 100644 --- a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ -#define __ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ +#ifndef __ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H__ +#define __ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H__ #include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/Types.h" @@ -31,7 +31,7 @@ namespace arm_compute { class ICLTensor; -/** Interface for the reduction operation kernel */ +/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */ class CLL2NormalizeLayerKernel : public ICLKernel { public: @@ -50,14 +50,30 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8, QS16, F32. + * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW. * @param[in] sum Sum values tensor. Data types supported: same as @p input. - * @param[out] output Destination tensor. Data types supported: Same as @p input. + * Sum will have the same number of dimensions as input. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 * @param[in] epsilon Lower bound value for the normalization. */ void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon); + /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel. + * + * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW. + * @param[in] sum Sum values tensor info. Data types supported: same as @p input. + * Sum will have the same number of dimensions as input. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] epsilon Lower bound value for the normalization. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon); + // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -69,4 +85,4 @@ private: float _epsilon; }; } // namespace arm_compute -#endif /*__ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ */ +#endif /*__ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h index 0bb001d16d..56f75e5fb7 100644 --- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h +++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,13 +50,26 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. */ void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op); + /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel. + * + * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * Output will have the same number of dimensions as input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; BorderSize border_size() const override; diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h index 8aea7a641b..d3d34f877b 100644 --- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h +++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_CLL2NORMALIZE_H__ -#define __ARM_COMPUTE_CLL2NORMALIZE_H__ +#ifndef __ARM_COMPUTE_CLL2NORMALIZELAYER_H__ +#define __ARM_COMPUTE_CLL2NORMALIZELAYER_H__ #include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h" #include "arm_compute/core/Types.h" @@ -39,7 +39,11 @@ namespace arm_compute { class ICLTensor; -/** Perform reduction operation. +/** Basic function to perform a L2 normalization on a given axis. + * + * This function runs the following kernels: + * -# @ref CLReductionOperation + * -# @ref CLL2NormalizeLayerKernel */ class CLL2NormalizeLayer : public IFunction { @@ -49,13 +53,24 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8, QS16, F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 - * @param[in] epsilon Lower bound value for the normalization. + * @param[in] epsilon (Optional) Lower bound value for the normalization. */ void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon = 1e-12); + /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayer. + * + * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] epsilon (Optional) Lower bound value for the normalization. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, float epsilon = 1e-12); + // Inherited methods overridden: void run() override; @@ -66,4 +81,4 @@ private: CLTensor _sumsq; }; } -#endif /*__ARM_COMPUTE_CLL2NORMALIZE_H__ */ +#endif /*__ARM_COMPUTE_CLL2NORMALIZELAYER_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h index abec9b8dc5..b8108b507b 100644 --- a/arm_compute/runtime/CL/functions/CLReductionOperation.h +++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h @@ -53,13 +53,24 @@ public: /** Set the input and output tensors. * - * @param[in] input Source tensor. Data types supported: QS8, QS16, F16, F32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW. + * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input. * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 * @param[in] op Reduction operation to perform. */ void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op); + /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation. + * + * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW. + * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input. + * @param[in] axis Axis along which to reduce. Supported reduction axis : 0 + * @param[in] op Reduction operation to perform. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op); + // Inherited methods overridden: void run() override; @@ -71,4 +82,4 @@ private: unsigned int _num_of_stages; }; } -#endif /*__ARM_COMPUTE_CLL2NORMALIZE_H__ */ +#endif /*__ARM_COMPUTE_CLREDUCTIONOPERATION_H__ */ diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp index 36e351e048..3d30350c59 100644 --- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp +++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,18 +42,60 @@ CLL2NormalizeLayerKernel::CLL2NormalizeLayerKernel() { } -void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon) +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon) +{ + ARM_COMPUTE_UNUSED(epsilon); + + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); + + // Reduce shape on axis + TensorShape sum_shape = input->tensor_shape(); + sum_shape.set(axis, 1); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(sum->tensor_shape(), sum_shape); + + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(input->tensor_shape(), output->tensor_shape()); + ARM_COMPUTE_RETURN_ERROR_ON(output->data_layout() != DataLayout::NCHW); + } + + return Status{}; +} + +std::tuple validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); + const unsigned int num_elems_processed_per_iteration = 16; + + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + + // Output tensor auto initialization if not yet initialized + auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type(), input->fixed_point_position()); - // Sum and output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); - ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + bool window_changed = update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, input->valid_region()); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + + return std::make_tuple(err, win); +} +} // namespace + +void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), sum->info(), output->info(), axis, epsilon)); _input = input; _sum = sum; @@ -76,15 +118,18 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor _kernel.setArg(idx, _epsilon); // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + auto win_config = validate_and_configure_window(_input->info(), _output->info()); + ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + ICLKernel::configure(std::get<1>(win_config)); +} - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->info()->valid_region()); +Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, sum, output, axis, epsilon)); + ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get()))); - ICLKernel::configure(win); + return Status{}; } void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp index 1dd5eb97ec..1347a9bc94 100644 --- a/src/core/CL/kernels/CLReductionOperationKernel.cpp +++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp @@ -38,6 +38,52 @@ using namespace arm_compute; +namespace +{ +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) +{ + ARM_COMPUTE_UNUSED(op); + + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); + + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON(output->data_layout() != DataLayout::NCHW); + } + + return Status{}; +} + +std::tuple validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, unsigned int axis) +{ + // Output tensor auto initialization if not yet initialized + TensorShape output_shape{ input->tensor_shape() }; + output_shape.set(axis, 1); + auto_init_if_empty(*output, output_shape, 1, input->data_type(), input->fixed_point_position()); + + const unsigned int num_elems_processed_per_iteration = 16; + + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + const unsigned int border_width = ((input->dimension(0) % 128) != 0) ? 128 - input->dimension(0) % 128 : 0; // TODO (COMPMID-1143): Fix padding (possible value 127!) + + AccessWindowStatic input_access(input, 0, 0, input->dimension(0) + border_width, 1); + AccessWindowHorizontal output_access(output, 0, 1); + + bool window_changed = update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, output->valid_region()); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + + return std::make_tuple(err, win); +} +} // namespace + CLReductionOperationKernel::CLReductionOperationKernel() : _input(nullptr), _output(nullptr), _reduction_axis(0), _op(ReductionOperation::SUM_SQUARE), _border_size() { @@ -50,17 +96,13 @@ BorderSize CLReductionOperationKernel::border_size() const void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Output tensor auto initialization if not yet initialized TensorShape output_shape{ input->info()->tensor_shape() }; output_shape.set(axis, 1); - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions"); - ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0"); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op)); const unsigned int num_elems_processed_per_iteration = 16; const unsigned int border_width = ((input->info()->dimension(0) % 128) != 0) ? 128 - input->info()->dimension(0) % 128 : 0; @@ -97,15 +139,19 @@ void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *ou _kernel = static_cast(CLKernelLibrary::get().create_kernel("reduction_operation", build_opts)); // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + auto win_config = validate_and_configure_window(_input->info(), _output->info(), axis); - AccessWindowStatic input_access(input->info(), 0, 0, input->info()->dimension(0) + border_width, 1); - AccessWindowHorizontal output_access(output->info(), 0, 1); + ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config)); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, output->info()->valid_region()); + ICLKernel::configure(std::get<1>(win_config)); +} + +Status CLReductionOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op)); + ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), axis))); - ICLKernel::configure(win); + return Status{}; } void CLReductionOperationKernel::run(const Window &window, cl::CommandQueue &queue) diff --git a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp index d1bb65f1c9..a3010a73ea 100644 --- a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp +++ b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,6 +52,26 @@ void CLL2NormalizeLayer::configure(ICLTensor *input, ICLTensor *output, unsigned _sumsq.allocator()->allocate(); } +Status CLL2NormalizeLayer::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, float epsilon) +{ + TensorShape shape(input->tensor_shape()); + + // Create intermediate tensor info + TensorInfo sum_sq; + sum_sq.set_data_type(input->data_type()); + sum_sq.set_tensor_shape(shape); + + ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperation::validate(input, &sum_sq, axis, ReductionOperation::SUM_SQUARE)); + + // Reduce shape on axis (supported axis is 0) + shape.set(0, 1); + sum_sq.set_tensor_shape(shape); + + ARM_COMPUTE_RETURN_ON_ERROR(CLL2NormalizeLayerKernel::validate(input, &sum_sq, output, axis, epsilon)); + + return Status{}; +} + void CLL2NormalizeLayer::run() { _memory_group.acquire(); diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp index d02afb4e90..3a5133d91f 100644 --- a/src/runtime/CL/functions/CLReductionOperation.cpp +++ b/src/runtime/CL/functions/CLReductionOperation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -35,19 +35,64 @@ using namespace arm_compute; +namespace +{ +unsigned int calculate_number_of_stages(const ITensorInfo *input) +{ + // Calculate number of WGs. 16 elements per thread, 8 threads per WG + const unsigned int num_of_wg = ceil(input->dimension(0) / 128.f); + + // Calculate number of stages. First stage performs op and the rest reduction sum + // depending on the size of the input. Last stage should have only 1 WG. + const unsigned int num_of_stages = num_of_wg / 128 + 2; + + return num_of_stages; +} +} // namespace + CLReductionOperation::CLReductionOperation(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _sums_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _num_of_stages() { } -void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op) +Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op) { - // Calculate number of WGs. 16 elements per thread, 8 threads per WG - unsigned int num_of_wg = ceil(input->info()->dimension(0) / 128.f); + const unsigned int num_of_stages = calculate_number_of_stages(input); - // Calculate number of stages. First stage performs op and the rest reduction sum - // depending on the size of the input. Last stage should have only 1 WG. - _num_of_stages = num_of_wg / 128 + 2; + // Create temporary tensor infos + auto sums_vector = arm_compute::support::cpp14::make_unique(num_of_stages - 1); + + // Create intermediate tensor info + TensorShape shape{ input->tensor_shape() }; + + for(unsigned int i = 0; i < num_of_stages - 1; i++) + { + shape.set(0, ceil(shape.x() / 128.f)); + sums_vector[i].set_data_type(input->data_type()); + sums_vector[i].set_tensor_shape(shape); + sums_vector[i].set_num_channels(input->num_channels()); + sums_vector[i].set_fixed_point_position(input->fixed_point_position()); + } + + // Validate ReductionOperation only on first kernel + ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(input, sums_vector.get(), axis, op)); + + // Validate ReductionOperation on intermediate stages + for(unsigned int i = 1; i < num_of_stages - 1; ++i) + { + ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(sums_vector.get() + i - 1, sums_vector.get() + i, axis, op)); + } + + // Validate ReductionOperation on the last stage + const unsigned int last_stage = num_of_stages - 1; + ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(sums_vector.get() + last_stage - 1, output, axis, op)); + + return Status{}; +} + +void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op) +{ + _num_of_stages = calculate_number_of_stages(input->info()); // Create temporary tensors _sums_vector = arm_compute::support::cpp14::make_unique(_num_of_stages - 1); @@ -95,4 +140,4 @@ void CLReductionOperation::run() } _memory_group.release(); -} \ No newline at end of file +} diff --git a/tests/validation/CL/L2NormalizeLayer.cpp b/tests/validation/CL/L2NormalizeLayer.cpp index bc2374bc68..3d121b079d 100644 --- a/tests/validation/CL/L2NormalizeLayer.cpp +++ b/tests/validation/CL/L2NormalizeLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,37 @@ constexpr AbsoluteTolerance tolerance_f32(0.00001f); TEST_SUITE(CL) TEST_SUITE(L2NormalizeLayer) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching data type input/output + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching shape input/output + TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1 + TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F32 + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 0 + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) + }), + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F16), + TensorInfo(TensorShape(256U, 64U), 1, DataType::F32), + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), + TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) + })), + framework::dataset::make("Axis", { 0U, 0U, 0U, 0U, static_cast(TensorShape::num_max_dimensions), 1U, 0U })), + framework::dataset::make("Expected", { false, false, false, false, false, false, true })), + input_info, output_info, axis, expected) +{ + bool is_valid = bool(CLL2NormalizeLayer::validate(&input_info.clone()->set_is_resizable(false), + &output_info.clone()->set_is_resizable(false), + axis)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + template using CLL2NormalizeLayerFixture = L2NormalizeLayerValidationFixture; diff --git a/tests/validation/CL/ReductionOperation.cpp b/tests/validation/CL/ReductionOperation.cpp index 684ed4694f..a2a5eff4de 100644 --- a/tests/validation/CL/ReductionOperation.cpp +++ b/tests/validation/CL/ReductionOperation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,6 +50,36 @@ RelativeTolerance tolerance_f32(0.00001f); TEST_SUITE(CL) TEST_SUITE(ReductionOperation) +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching data type input/output + TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1 + TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F32 + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 0 + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) + }), + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F16), + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), + TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), + TensorInfo(TensorShape(128U, 64U), 1, DataType::F32) + })), + framework::dataset::make("Axis", { 0U, 0U, 0U, static_cast(TensorShape::num_max_dimensions), 1U, 0U })), + framework::dataset::make("Expected", { false, false, false, false, false, true })), + input_info, output_info, axis, expected) +{ + bool is_valid = bool(CLReductionOperation::validate(&input_info.clone()->set_is_resizable(false), + &output_info.clone()->set_is_resizable(false), + axis, + ReductionOperation::SUM_SQUARE)); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + template using CLReductionOperationFixture = ReductionOperationValidationFixture; -- cgit v1.2.1