From 13edbff0820c3b41e7dd766db5a9d6ff65fcda2a Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 26 Jun 2017 17:20:16 +0100 Subject: COMPMID-432 - Extended Convolution Layer to support rectangular kernels Change-Id: I99be1efede4de6dd63ce103fb11196c413757621 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79252 Tested-by: Kaizen Reviewed-by: Moritz Pflanzer --- src/core/CL/cl_kernels/convolution_layer.cl | 50 +++++++--------- src/core/CL/kernels/CLCol2ImKernel.cpp | 6 +- src/core/CL/kernels/CLIm2ColKernel.cpp | 66 +++++++--------------- src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 17 +++--- src/core/NEON/kernels/NEIm2ColKernel.cpp | 31 +++++----- src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 10 ++-- src/runtime/CL/functions/CLConvolutionLayer.cpp | 14 ++--- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 5 +- .../CL/functions/CLLocallyConnectedLayer.cpp | 2 +- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 7 +-- .../NEON/functions/NEFullyConnectedLayer.cpp | 5 +- .../NEON/functions/NELocallyConnectedLayer.cpp | 7 ++- 12 files changed, 97 insertions(+), 123 deletions(-) (limited to 'src') diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl index bd5dfaff68..837fdd70fe 100644 --- a/src/core/CL/cl_kernels/convolution_layer.cl +++ b/src/core/CL/cl_kernels/convolution_layer.cl @@ -27,7 +27,7 @@ * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -35,13 +35,13 @@ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] dst_ptr Pointer to the destination tensor. Same as input + * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] bias_ptr Pointer to the bias tensor. Same as input + * @param[in] bias_ptr Pointer to the bias tensor. Same as @p src_ptr * @param[in] bias_stride_x Stride of the bias tensor in X dimension (in bytes) * @param[in] bias_step_x bias_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] bias_offset_first_element_in_bytes The offset of the first element in the source tensor @@ -93,12 +93,13 @@ __kernel void reshape_to_columns( } } +#if(defined CONVOLVED_WIDTH && defined STRIDE_X && defined STRIDE_Y && defined PAD_X && defined PAD_Y && defined KERNEL_WIDTH && defined KERNEL_HEIGHT && defined KERNEL_DEPTH && SRC_WIDTH && SRC_HEIGHT) /** This kernel performs a reshaping of the input tensor to a tensor used to perform convolution using GEMM. * * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float * @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row. * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -106,48 +107,36 @@ __kernel void reshape_to_columns( * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: F16, F32 + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] kernel_size The convolution kernel size - * @param[in] kernel_depth The kernel depth - * @param[in] width The output tensor width - * @param[in] input_dims The input tensor dimensions - * @param[in] strides The strides of the im2col operation - * @param[in] paddings The input tensor paddings */ __kernel void im2col_generic( TENSOR3D_DECLARATION(src), - IMAGE_DECLARATION(dst), - int kernel_size, - int kernel_depth, - int width, - int2 input_dims, - int2 strides, - int2 paddings) + IMAGE_DECLARATION(dst)) { Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); Image dst = CONVERT_TO_IMAGE_STRUCT_NO_STEP(dst); // Determine output index - uint idx = (get_global_id(1) * width + get_global_id(0)) * dst.stride_y; + uint idx = (get_global_id(1) * CONVOLVED_WIDTH + get_global_id(0)) * dst.stride_y; __global uchar *output_ptr = dst.ptr + idx; // Determine current input index - const int top_left_x = get_global_id(0) * strides.x - paddings.x; - const int top_left_y = get_global_id(1) * strides.y - paddings.y; + const int top_left_x = get_global_id(0) * STRIDE_X - PAD_X; + const int top_left_y = get_global_id(1) * STRIDE_Y - PAD_Y; // Linearize convolution elements - for(int d = 0; d < kernel_depth; ++d) + for(int d = 0; d < KERNEL_DEPTH; ++d) { - for(int y = top_left_y, y_e = top_left_y + kernel_size; y < y_e; ++y) + for(int y = top_left_y, y_e = top_left_y + KERNEL_HEIGHT; y < y_e; ++y) { - for(int x = top_left_x, x_e = top_left_x + kernel_size; x < x_e; ++x, output_ptr += dst.stride_x) + for(int x = top_left_x, x_e = top_left_x + KERNEL_WIDTH; x < x_e; ++x, output_ptr += dst.stride_x) { - if(x < 0 || x >= input_dims.x || y < 0 || y >= input_dims.y) + if(x < 0 || x >= SRC_WIDTH || y < 0 || y >= SRC_HEIGHT) { *((__global DATA_TYPE *)output_ptr) = 0; } @@ -160,21 +149,22 @@ __kernel void im2col_generic( } #if defined HAS_BIAS - *((__global DATA_TYPE *)output_ptr) = 1; + *((__global DATA_TYPE *)output_ptr) = (DATA_TYPE)1; #endif } +#endif //(defined CONVOLVED_WIDTH && defined STRIDE_X && defined STRIDE_Y && defined PAD_X && defined PAD_Y && defined KERNEL_WIDTH && defined KERNEL_HEIGHT && defined KERNEL_DEPTH && SRC_WIDTH && SRC_HEIGHT) /** This kernel performs a reshaping of the output of the convolution layer. * * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: F16, F32 + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) @@ -202,7 +192,7 @@ __kernel void col2im( * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float * @note In case biases will be added in late stage, -DHAS_BIAS has to be passed to append the final matrix with 1 in each row. * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -210,7 +200,7 @@ __kernel void col2im( * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] dst_ptr Pointer to the destination tensor. Same as input. + * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index ad66c39483..679943ba3e 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -61,8 +61,12 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p // Configure window Window win = calculate_max_window(*input->info(), Steps()); + // The CLCol2ImKernel doesn't need padding so update_window_and_padding() can be skipped - output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); + Coordinates coord; + coord.set_num_dimensions(output->info()->num_dimensions()); + output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + ICLKernel::configure(win); } diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 8c0fe26666..092f495f92 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -29,8 +29,10 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "support/ToolchainSupport.h" #include #include @@ -38,14 +40,14 @@ using namespace arm_compute; CLIm2ColKernel::CLIm2ColKernel() - : _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_size(0), _num_elems_processed_per_iteration(1), _run_func(nullptr) + : _input(nullptr), _output(nullptr), _convolved_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr) { } -void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, std::pair convolved_dims, const PadStrideInfo &conv_info, bool has_bias) +void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); _input = input; _output = output; @@ -70,44 +72,23 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, std::p if(!run_img2col_reduced) { - _convolved_dims = convolved_dims; - _conv_info = conv_info; - _kernel_size = std::sqrt((output->info()->dimension(0) - (has_bias ? 1 : 0)) / input->info()->dimension(2)); + _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), + kernel_dims.width, kernel_dims.height, + conv_info); _num_elems_processed_per_iteration = output->info()->dimension(0); - _kernel = static_cast(CLKernelLibrary::get().create_kernel("im2col_generic", build_opts)); + build_opts.emplace("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width)); + build_opts.emplace("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height)); + build_opts.emplace("-DKERNEL_DEPTH=" + support::cpp11::to_string(input->info()->dimension(2))); + build_opts.emplace("-DCONVOLVED_WIDTH=" + support::cpp11::to_string(_convolved_dims.first)); + build_opts.emplace("-DSTRIDE_X=" + support::cpp11::to_string(conv_info.stride().first)); + build_opts.emplace("-DSTRIDE_Y=" + support::cpp11::to_string(conv_info.stride().second)); + build_opts.emplace("-DPAD_X=" + support::cpp11::to_string(conv_info.pad().first)); + build_opts.emplace("-DPAD_Y=" + support::cpp11::to_string(conv_info.pad().second)); + build_opts.emplace("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); + build_opts.emplace("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1))); - // Create static kernel arguments - const cl_int2 input_dims = - { - { - static_cast(input->info()->dimension(0)), - static_cast(input->info()->dimension(1)), - } - }; - const cl_int2 strides = - { - { - stride_x, - stride_y, - } - }; - const cl_int2 paddings = - { - { - pad_x, - pad_y, - } - }; - - // Set static kernel arguments - unsigned int idx = num_arguments_per_2D_tensor() + num_arguments_per_3D_tensor(); - _kernel.setArg(idx++, _kernel_size); - _kernel.setArg(idx++, input->info()->dimension(2) /* depth */); - _kernel.setArg(idx++, _convolved_dims.first /* output width */); - _kernel.setArg(idx++, input_dims); - _kernel.setArg(idx++, strides); - _kernel.setArg(idx++, paddings); + _kernel = static_cast(CLKernelLibrary::get().create_kernel("im2col_generic", build_opts)); _run_func = &CLIm2ColKernel::run_generic; } @@ -136,13 +117,6 @@ void CLIm2ColKernel::run_generic(const Window &window, cl::CommandQueue &queue) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - int pad_x = 0; - int pad_y = 0; - int stride_x = 0; - int stride_y = 0; - std::tie(pad_x, pad_y) = _conv_info.pad(); - std::tie(stride_x, stride_y) = _conv_info.stride(); - // Get initial windows Window slice = window.first_slice_window_3D(); Window slice_in = window.first_slice_window_3D(); diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index 845bd3799d..82634164de 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -42,14 +42,7 @@ CLWeightsReshapeKernel::CLWeightsReshapeKernel() void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->num_dimensions() != 1)); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->num_dimensions() != 2)); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3])); - ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3] || biases->info()->dimension(1) != input->info()->tensor_shape()[4])); ARM_COMPUTE_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != input->info()->dimension(1)); const DataType dt = input->info()->data_type(); const int fixed_point_position = input->info()->fixed_point_position(); @@ -67,6 +60,16 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor * ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases); + ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->num_dimensions() != 1)); + ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->num_dimensions() != 2)); + ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 4) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3])); + ARM_COMPUTE_ERROR_ON((input->info()->num_dimensions() == 5) && (biases->info()->dimension(0) != input->info()->tensor_shape()[3] || biases->info()->dimension(1) != input->info()->tensor_shape()[4])); + } + _biases = biases; _output = output; _input = input; diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 875c08ed42..99daa2e5e7 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -27,6 +27,7 @@ #include "arm_compute/core/FixedPoint.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" @@ -47,7 +48,8 @@ inline void linearize_volume(const uint8_t *const in_ptr, bool has_bias, int top_left_x, int top_left_y, - int kernel_size, + int kernel_width, + int kernel_height, int kernel_depth, int input_w, int input_h, @@ -56,9 +58,9 @@ inline void linearize_volume(const uint8_t *const in_ptr, int input_stride_z, int fixed_point_position) { - const int kernel_size2 = kernel_size * kernel_size; - const int x_e = top_left_x + kernel_size; - const int y_e = top_left_y + kernel_size; + const int kernel_size2 = kernel_width * kernel_height; + const int x_e = top_left_x + kernel_width; + const int y_e = top_left_y + kernel_height; // Linearize volume int d = 0; @@ -109,8 +111,8 @@ inline void linearize_volume(const uint8_t *const in_ptr, if((y < 0 || y >= input_h) && has_pads) { // All the values will be zeros - memset(out_ptr, 0, kernel_size * sizeof(T)); - out_ptr += kernel_size; + memset(out_ptr, 0, kernel_width * sizeof(T)); + out_ptr += kernel_width; } else { @@ -199,7 +201,8 @@ void NEIm2ColKernel::run_generic(const Window &window) _has_bias, top_left_x, top_left_y, - static_cast(_kernel_size), + static_cast(_kernel_width), + static_cast(_kernel_height), kernel_depth, input_w, input_h, @@ -260,22 +263,24 @@ void NEIm2ColKernel::run_reduced(const Window &window) } NEIm2ColKernel::NEIm2ColKernel() - : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_size(0), _has_bias(false) + : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false) { } -void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, std::pair convolved_dims, const PadStrideInfo &conv_info, bool has_bias) +void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QS8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32, DataType::QS8); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); _input = input; _output = output; - _convolved_dims = convolved_dims; _conv_info = conv_info; - _kernel_size = std::sqrt((output->info()->dimension(0) - (has_bias ? 1 : 0)) / input->info()->dimension(2)); - _has_bias = has_bias; + _kernel_width = kernel_dims.width; + _kernel_height = kernel_dims.height, + _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), + _kernel_width, _kernel_height, + _conv_info); + _has_bias = has_bias; unsigned int pad_x, pad_y, stride_x, stride_y = 0; std::tie(pad_x, pad_y) = conv_info.pad(); diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index e9b76e7967..ac688e1381 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -97,13 +97,13 @@ void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != input->info()->dimension(1)); - const DataType dt = input->info()->data_type(); - const int fixed_point_position = input->info()->fixed_point_position(); - - TensorShape output_shape{ input->info()->tensor_shape() }; + const int fixed_point_position = input->info()->fixed_point_position(); + const DataType dt = input->info()->data_type(); + const TensorShape &input_shape = input->info()->tensor_shape(); + TensorShape output_shape{ input_shape }; output_shape.collapse(3); + const size_t tmp_dim = output_shape[0]; output_shape.set(0, output_shape[1]); output_shape.set(1, tmp_dim + (bias != nullptr ? 1 : 0)); diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 933d92bef7..b29bf8f136 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -24,6 +24,7 @@ #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" #include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" @@ -40,16 +41,13 @@ CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights() void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, biases, output); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); if(biases != nullptr) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3)); ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); @@ -98,8 +96,6 @@ CLConvolutionLayer::CLConvolutionLayer() void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2)); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); @@ -191,7 +187,7 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type())); // Configure kernels - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); if(_is_fully_connected_convolution) diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index 57d57d517f..b51e709927 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/CL/CLScheduler.h" @@ -126,7 +127,7 @@ void CLFullyConnectedLayer::configure_conv_fc_wb(const ICLTensor *input, const I _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); // Configure im2col kernel - _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure interleave4x4 kernel _interleave4x4_kernel.configure(&_im2col_output, &_interleave4x4_output); @@ -176,7 +177,7 @@ void CLFullyConnectedLayer::configure_conv_fc_nb(const ICLTensor *input, const I _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); // Configure im2col kernel - _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure matrix multiply kernel _mm_kernel.configure(&_im2col_output, weights, output, 1.0f); diff --git a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp index 0e6d23e0d8..ef6fb50bbf 100644 --- a/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLLocallyConnectedLayer.cpp @@ -100,7 +100,7 @@ void CLLocallyConnectedLayer::configure(const ICLTensor *input, const ICLTensor _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type())); // Configure kernels - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(conv_w, conv_h), conv_info, _has_bias); _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index b38d6617d5..dc8652747f 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -24,6 +24,7 @@ #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" #include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -41,7 +42,6 @@ NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights() void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); @@ -97,8 +97,6 @@ NEConvolutionLayer::NEConvolutionLayer() void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights, output); ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2)); @@ -106,7 +104,6 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, if(biases != nullptr) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases); ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && biases->info()->dimension(0) != weights->info()->dimension(3)); @@ -197,7 +194,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, dt, fixed_point_position)); // Configure kernels - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); if(_is_fully_connected_convolution) { _mm_kernel.configure(&_input_im2col_reshaped, weights, &_gemm_output, 1.0f); diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index abb41e9f70..6e27ed344a 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" +#include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -126,7 +127,7 @@ void NEFullyConnectedLayer::configure_conv_fc_wb(const ITensor *input, const ITe _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, dt, fixed_point_position)); // Configure im2col kernel - _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure interleave4x4 kernel _interleave4x4_kernel.configure(&_im2col_output, &_interleave4x4_output); @@ -176,7 +177,7 @@ void NEFullyConnectedLayer::configure_conv_fc_nb(const ITensor *input, const ITe _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt, fixed_point_position)); // Configure im2col kernel - _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); // Configure matrix multiply kernel _mm_kernel.configure(&_im2col_output, weights, output, 1.0f); diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp index 3b095b4688..e7c71e04d1 100644 --- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp @@ -65,10 +65,13 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei std::tie(stride_x, stride_y) = conv_info.stride(); std::tie(pad_x, pad_y) = conv_info.pad(); + const unsigned int kernel_width = weights->info()->dimension(0); + const unsigned int kernel_height = weights->info()->dimension(1); + // Get convolved dimensions unsigned int conv_w = 0; unsigned int conv_h = 0; - std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1), + std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height, conv_info); ARM_COMPUTE_ERROR_ON_MSG((output->info()->dimension(0) != conv_w) || (output->info()->dimension(1) != conv_h), "Output shape does not match the expected one"); @@ -100,7 +103,7 @@ void NELocallyConnectedLayer::configure(const ITensor *input, const ITensor *wei _gemm_output.allocator()->init(TensorInfo(shape_gemm, 1, input->info()->data_type())); // Configure kernels - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, std::make_pair(conv_w, conv_h), conv_info, _has_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias); _weights_reshape_kernel.configure(weights, biases, &_weights_reshaped); _mm_kernel.configure(&_input_im2col_reshaped, &_weights_reshaped, &_gemm_output); _output_col2im_kernel.configure(&_gemm_output, output, std::make_pair(conv_w, conv_h)); -- cgit v1.2.1