From 226e4b92b191491ffa57ede66eba1d5d6fcf3b76 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Thu, 23 Aug 2018 12:00:02 +0100 Subject: COMPMID-1470 Add auto-init of the output in NECol2im The output of NECol2Im is already auto-initialized. This patch is about calling ShapeCalculator instead of computing the shape inside the kernel, adding validate_and_configure_window, and standardize the way convolved dims are passed (now NEON uses Size2D, while CL passes a pair of uint values: using Size2D for both implementations) Change-Id: I795696e1b6532f57847c3186c1b532c09f5a25da Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145345 Tested-by: Jenkins Reviewed-by: Michele DiGiorgio Reviewed-by: Georgios Pinitas --- arm_compute/core/CL/kernels/CLCol2ImKernel.h | 6 +-- arm_compute/core/utils/misc/ShapeCalculator.h | 10 ++--- src/core/CL/kernels/CLCol2ImKernel.cpp | 14 +++---- src/core/NEON/kernels/NECol2ImKernel.cpp | 48 +++++++++++----------- .../CL/functions/CLGEMMConvolutionLayer.cpp | 4 +- .../CL/functions/CLLocallyConnectedLayer.cpp | 4 +- src/runtime/CL/tuners/BifrostTuner.cpp | 2 +- tests/validation/CL/Col2Im.cpp | 4 +- tests/validation/fixtures/Col2ImFixture.h | 8 ++-- 9 files changed, 49 insertions(+), 51 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h index 2a18ae08c8..948b412ccd 100644 --- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h +++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h @@ -71,7 +71,7 @@ public: * @param[in] convolved_dims Output convolved dimensions. * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution */ - void configure(const ICLTensor *input, ICLTensor *output, std::pair convolved_dims, unsigned int num_groups = 1); + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel * * @param[in] input The input tensor to convert. Data types supported: QASYMM8/F16/F32 @@ -82,7 +82,7 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, std::pair convolved_dims, unsigned int num_groups = 1); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups = 1); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -90,7 +90,7 @@ public: public: const ICLTensor *_input; ICLTensor *_output; - std::pair _convolved_dims; + Size2D _convolved_dims; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLCOL2IMKERNEL_H__ */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index c40e7119b2..09f558d8b0 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -141,18 +141,18 @@ inline TensorShape compute_reductionB_shape(const ITensorInfo &a) return shape_vector_sum_row; } -inline TensorShape compute_col2im_shape(const ITensorInfo &input, std::pair convolved_dims, unsigned int num_groups = 1) +inline TensorShape compute_col2im_shape(const ITensorInfo &input, const Size2D &convolved_dims, bool batch_size_on_z, unsigned int num_groups = 1) { ARM_COMPUTE_ERROR_ON(num_groups == 0); - ARM_COMPUTE_ERROR_ON(input.tensor_shape()[1] != (convolved_dims.first * convolved_dims.second)); + ARM_COMPUTE_ERROR_ON(input.tensor_shape()[1] != (convolved_dims.area())); ARM_COMPUTE_ERROR_ON((num_groups > 1) && input.tensor_shape()[2] != num_groups); TensorShape col2im_shape{ input.tensor_shape() }; - col2im_shape.set(0, convolved_dims.first); - col2im_shape.set(1, convolved_dims.second); + col2im_shape.set(0, convolved_dims.width); + col2im_shape.set(1, convolved_dims.height); col2im_shape.set(2, input.tensor_shape()[0] * num_groups); - const unsigned int batch_idx = (num_groups == 1) ? 2 : 3; + const unsigned int batch_idx = (batch_size_on_z && num_groups == 1) ? 2 : 3; col2im_shape.set(3, input.tensor_shape()[batch_idx]); return col2im_shape; diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index 40032f97c4..74bbb9b4df 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -40,7 +40,7 @@ using namespace arm_compute::misc::shape_calculator; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, std::pair convolved_dims, unsigned int num_groups) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); @@ -49,7 +49,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, s // Checks performed when output is configured if(output->total_size() != 0) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_col2im_shape(*input, convolved_dims, num_groups)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_col2im_shape(*input, convolved_dims, true, num_groups)); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_layout() != DataLayout::NCHW, "Col2Im output's data layout must always be NCHW"); @@ -58,11 +58,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, s return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, std::pair convolved_dims, unsigned int num_groups) +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_col2im_shape(*input, convolved_dims, num_groups)).set_data_layout(DataLayout::NCHW)); + auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_col2im_shape(*input, convolved_dims, true, num_groups)).set_data_layout(DataLayout::NCHW)); const unsigned int num_elems_read_per_iteration = 8; @@ -87,7 +87,7 @@ CLCol2ImKernel::CLCol2ImKernel() { } -void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::pair convolved_dims, unsigned int num_groups) +void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -105,7 +105,7 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->info()->element_size())); build_opts.add_option("-DWIDTH_INPUT=" + support::cpp11::to_string(input->info()->dimension(0))); - build_opts.add_option("-DWIDTH_OUTPUT=" + support::cpp11::to_string(_convolved_dims.first)); + build_opts.add_option("-DWIDTH_OUTPUT=" + support::cpp11::to_string(_convolved_dims.width)); build_opts.add_option_if(num_groups > 1, "-DGROUPING"); _kernel = static_cast(CLKernelLibrary::get().create_kernel("col2im", build_opts.options())); @@ -130,7 +130,7 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p _config_id += support::cpp11::to_string(output->info()->dimension(1)); } -Status CLCol2ImKernel::validate(const ITensorInfo *input, const ITensorInfo *output, std::pair convolved_dims, unsigned int num_groups) +Status CLCol2ImKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, convolved_dims, num_groups)); diff --git a/src/core/NEON/kernels/NECol2ImKernel.cpp b/src/core/NEON/kernels/NECol2ImKernel.cpp index bb8e758b78..d6517ac072 100644 --- a/src/core/NEON/kernels/NECol2ImKernel.cpp +++ b/src/core/NEON/kernels/NECol2ImKernel.cpp @@ -29,26 +29,17 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include #include #include using namespace arm_compute; +using namespace misc::shape_calculator; namespace { -TensorShape get_output_shape(const ITensorInfo *input, const Size2D &convolved_dims) -{ - TensorShape output_shape = input->tensor_shape(); - output_shape.set(0, convolved_dims.width); - output_shape.set(1, convolved_dims.height); - output_shape.set(2, input->tensor_shape()[0]); - output_shape.set(3, input->tensor_shape()[3]); // For NEON the batch size is on the fourth dimension of the input tensor - - return output_shape; -} - Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims) { //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. @@ -60,12 +51,28 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c // Validate configured output if(output->total_size() != 0) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), get_output_shape(input, convolved_dims)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), compute_col2im_shape(*input, convolved_dims, false)); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } return Status{}; } + +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const Size2D &convolved_dims) +{ + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_col2im_shape(*input, convolved_dims, false))); + + // Configure kernel window + Window win = calculate_max_window(*input, Steps()); + + // The NECol2ImKernel doesn't need padding so update_window_and_padding() can be skipped + Coordinates coord; + coord.set_num_dimensions(output->num_dimensions()); + output->set_valid_region(ValidRegion(coord, output->tensor_shape())); + + return std::make_pair(Status{}, win); +} } // namespace template @@ -102,11 +109,6 @@ NECol2ImKernel::NECol2ImKernel() void NECol2ImKernel::configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(get_output_shape(input->info(), convolved_dims))); - - // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), convolved_dims)); _input = input; @@ -130,19 +132,15 @@ void NECol2ImKernel::configure(const ITensor *input, ITensor *output, const Size } // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - - // The NECol2ImKernel doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - INEKernel::configure(win); + auto win_config = validate_and_configure_window(input->info(), output->info(), convolved_dims); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + INEKernel::configure(win_config.second); } Status NECol2ImKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, convolved_dims)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), convolved_dims).first); return Status{}; } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 782fe710e7..c9daea4169 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -285,7 +285,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * if(input->info()->data_layout() == DataLayout::NCHW) { // Configure and tune Col2Im - _col2im_kernel.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, std::make_pair(conv_w, conv_h), num_groups); + _col2im_kernel.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups); CLScheduler::get().tune_kernel_static(_col2im_kernel); } else @@ -443,7 +443,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI { ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, - std::make_pair(conv_w, conv_h), num_groups)); + Size2D(conv_w, conv_h), num_groups)); } } diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp index 40bf032d69..5c6bef995f 100644 --- a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp @@ -122,7 +122,7 @@ Status CLLocallyConnectedLayer::validate(const ITensorInfo *input, const ITensor ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias)); ARM_COMPUTE_RETURN_ON_ERROR(CLWeightsReshapeKernel::validate(weights, biases, &weights_reshaped_info)); ARM_COMPUTE_RETURN_ON_ERROR(CLLocallyConnectedMatrixMultiplyKernel::validate(&input_im2col_reshaped_info, &weights_reshaped_info, &gemm_output_info)); - ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(&gemm_output_info, output, std::make_pair(conv_w, conv_h))); + ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(&gemm_output_info, output, Size2D(conv_w, conv_h))); return Status{}; } @@ -163,7 +163,7 @@ void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); - _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); + _output_col2im_kernel.configure(&_gemm_output, output, Size2D(conv_w, conv_h)); // Allocate intermediate tensors _input_im2col_reshaped.allocator()->allocate(); diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp index 2d52f3392e..59d73b4e79 100644 --- a/src/runtime/CL/tuners/BifrostTuner.cpp +++ b/src/runtime/CL/tuners/BifrostTuner.cpp @@ -134,7 +134,7 @@ void tune_col2im_kernel(CLCol2ImKernel &k) // via exhaustive autotuning over 30 representative tensor shapes. if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::G76)) { - if((k._convolved_dims.first == 7) || (k._convolved_dims.first == 14)) + if((k._convolved_dims.width == 7) || (k._convolved_dims.width == 14)) { lws_hint = cl::NDRange(1, 7, 1); } diff --git a/tests/validation/CL/Col2Im.cpp b/tests/validation/CL/Col2Im.cpp index 6f1163c278..1fea5c1111 100644 --- a/tests/validation/CL/Col2Im.cpp +++ b/tests/validation/CL/Col2Im.cpp @@ -63,14 +63,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( framework::dataset::make("Expected", { false, false, false, true })), input_info, output_info, convolved_width, convolved_height, num_groups, expected) { - bool status = bool(CLCol2Im::validate(&input_info, &output_info, std::make_pair(convolved_width, convolved_height), num_groups)); + bool status = bool(CLCol2Im::validate(&input_info, &output_info, Size2D(convolved_width, convolved_height), num_groups)); ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS); } // clang-format on // *INDENT-ON* template -using CLCol2ImFixture = Col2ImValidationFixture; +using CLCol2ImFixture = Col2ImValidationFixture; TEST_SUITE(Float) TEST_SUITE(FP32) diff --git a/tests/validation/fixtures/Col2ImFixture.h b/tests/validation/fixtures/Col2ImFixture.h index ddc78a5032..5488f8a3ea 100644 --- a/tests/validation/fixtures/Col2ImFixture.h +++ b/tests/validation/fixtures/Col2ImFixture.h @@ -44,16 +44,16 @@ namespace validation { using namespace arm_compute::misc::shape_calculator; -template +template class Col2ImValidationFixture : public framework::Fixture { public: template void setup(TensorShape input_shape, const unsigned int convolved_width, unsigned int convolved_height, unsigned int num_groups, DataType data_type) { - const std::pair convolved_dims(convolved_width, convolved_height); + const Size2D convolved_dims(convolved_width, convolved_height); - const TensorShape output_shape = compute_col2im_shape(TensorInfo(input_shape, 1, data_type), convolved_dims, num_groups); + const TensorShape output_shape = compute_col2im_shape(TensorInfo(input_shape, 1, data_type), convolved_dims, batch_size_on_z, num_groups); _target = compute_target(input_shape, output_shape, convolved_dims, num_groups, data_type); _reference = compute_reference(input_shape, output_shape, num_groups, data_type); @@ -66,7 +66,7 @@ protected: library->fill_tensor_uniform(tensor, seed); } - TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, std::pair convolved_dims, unsigned int num_groups, DataType data_type) + TensorType compute_target(const TensorShape &input_shape, const TensorShape &output_shape, const Size2D &convolved_dims, unsigned int num_groups, DataType data_type) { // Create tensors TensorType src = create_tensor(input_shape, data_type); -- cgit v1.2.1