From a1ed41fe2427dfa2b5d0139444ceb77ad16a5a73 Mon Sep 17 00:00:00 2001 From: Jaroslaw Rzepecki Date: Fri, 13 Oct 2017 11:13:58 +0100 Subject: IVGCVSW-601: support for asymetric padding in cl conv and depthwise conv Change-Id: I5c6c95091ae77dba96459c0640f9f6167a988c8c Reviewed-on: http://mpd-gerrit.cambridge.arm.com/91700 Reviewed-by: Anthony Barbier Tested-by: Kaizen --- arm_compute/core/Types.h | 60 +++++++++++++++++++++-- src/core/CL/cl_kernels/convolution_layer.cl | 14 +++--- src/core/CL/cl_kernels/depthwise_convolution.cl | 12 ++--- src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp | 6 ++- src/core/CL/kernels/CLIm2ColKernel.cpp | 14 +++--- src/core/NEON/kernels/NEIm2ColKernel.cpp | 24 +++++---- src/core/Utils.cpp | 30 ++++++------ src/runtime/CL/functions/CLConvolutionLayer.cpp | 3 -- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 3 -- tests/datasets/DepthwiseConvolutionDataset.h | 7 +++ tests/datasets/SmallConvolutionLayerDataset.h | 8 +++ tests/validation/CPP/ConvolutionLayer.cpp | 30 +++++++----- tests/validation/CPP/DepthwiseConvolution.cpp | 38 ++++++++------ utils/TypePrinter.h | 3 +- 14 files changed, 166 insertions(+), 86 deletions(-) diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index f9766b39be..f52dd12597 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -418,7 +418,32 @@ public: unsigned int pad_x = 0, unsigned int pad_y = 0, DimensionRoundingType round = DimensionRoundingType::FLOOR) : _stride(std::make_pair(stride_x, stride_y)), - _pad(std::make_pair(pad_x, pad_y)), + _pad_left(pad_x), + _pad_top(pad_y), + _pad_right(pad_x), + _pad_bottom(pad_y), + _round_type(round) + { + } + /** Constructor + * + * @param[in] stride_x Stride, in elements, across x. + * @param[in] stride_y Stride, in elements, across y. + * @param[in] pad_left Padding across x on the left, in elements. + * @param[in] pad_top Padding across y on the top, in elements. + * @param[in] pad_right Padding across x on the right, in elements. + * @param[in] pad_bottom Padding across y on the bottom, in elements. + * @param[in] round Dimensions rounding. + */ + PadStrideInfo(unsigned int stride_x, unsigned int stride_y, + unsigned int pad_left, unsigned int pad_right, + unsigned int pad_top, unsigned int pad_bottom, + DimensionRoundingType round) + : _stride(std::make_pair(stride_x, stride_y)), + _pad_left(pad_left), + _pad_top(pad_top), + _pad_right(pad_right), + _pad_bottom(pad_bottom), _round_type(round) { } @@ -428,16 +453,45 @@ public: } std::pair pad() const { - return _pad; + //this accessor should be used only when padding is symmetric + ARM_COMPUTE_ERROR_ON(_pad_left != _pad_right || _pad_top != _pad_bottom); + return std::make_pair(_pad_left, _pad_top); } + + unsigned int pad_left() const + { + return _pad_left; + } + unsigned int pad_right() const + { + return _pad_right; + } + unsigned int pad_top() const + { + return _pad_top; + } + unsigned int pad_bottom() const + { + return _pad_bottom; + } + DimensionRoundingType round() const { return _round_type; } + bool has_padding() const + { + return (_pad_left != 0 || _pad_top != 0 || _pad_right != 0 || _pad_bottom != 0); + } + private: std::pair _stride; - std::pair _pad; + unsigned int _pad_left; + unsigned int _pad_top; + unsigned int _pad_right; + unsigned int _pad_bottom; + DimensionRoundingType _round_type; }; diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl index 9e9d0b0ccc..e3018461e3 100644 --- a/src/core/CL/cl_kernels/convolution_layer.cl +++ b/src/core/CL/cl_kernels/convolution_layer.cl @@ -97,7 +97,7 @@ __kernel void reshape_to_columns( } } -#if defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_X) && defined(PAD_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) +#if defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) /** This kernel performs a reshaping of the input tensor to a tensor used to perform convolution using GEMM. * * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float @@ -134,8 +134,8 @@ __kernel void im2col_generic( const int batch = get_global_id(2) / filter_depth; // the batch // Calculate input indeces - const int xi = xc * STRIDE_X - PAD_X; - const int yi = yc * STRIDE_Y - PAD_Y; + const int xi = xc * STRIDE_X - PAD_LEFT; + const int yi = yc * STRIDE_Y - PAD_TOP; // Calculate output indeces const int xo = ch * KERNEL_WIDTH * KERNEL_HEIGHT; @@ -149,9 +149,9 @@ __kernel void im2col_generic( { for(int x = xi, x_e = xi + KERNEL_WIDTH; x < x_e; ++x, ++output_ptr) { -#if PAD_X == 0 && PAD_Y == 0 +#if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0 *output_ptr = *((__global DATA_TYPE *)(input_ptr + x * src_stride_x + y * src_stride_y)); -#else // PAD_X == 0 && PAD_Y == 0 +#else // PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0 if(x < 0 || x >= SRC_WIDTH || y < 0 || y >= SRC_HEIGHT) { *output_ptr = 0; @@ -160,7 +160,7 @@ __kernel void im2col_generic( { *output_ptr = *((__global DATA_TYPE *)(input_ptr + x * src_stride_x + y * src_stride_y)); } -#endif // PAD_X == 0 && PAD_Y == 0 +#endif // PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0 } } @@ -245,7 +245,7 @@ __kernel void im2col_kernel3x3_padx0_pady0( } #endif // HAS_BIAS } -#endif //defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_X) && defined(PAD_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) +#endif //defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) #if defined(WIDTH_OUTPUT) /** This kernel performs a reshaping of the output of the convolution layer. diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl index 9c2c3a5b37..081a4e6c44 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution.cl @@ -226,11 +226,11 @@ __kernel void depthwise_weights_reshape(TENSOR3D_DECLARATION(src), IMAGE_DECLARA } #endif //defined(SRC_WIDTH) && defined(DATA_TYPE) -#if defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_X) && defined(PAD_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DATA_TYPE) +#if defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DATA_TYPE) /** This kernel performs a reshaping of the input tensor to a tensor used to perform depthwise convolution using vector to matrix multiplication. * * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float - * @note The convolution information must be passed at compile time using -DSTRIDE_X, -DSTRIDE_Y, -DPAD_X, -DPAD_Y, -DKERNEL_WIDHT, -DKERNEL_HEIGHT, -DSRC_WIDTH, -DSRC_HEIGHT + * @note The convolution information must be passed at compile time using -DSTRIDE_X, -DSTRIDE_Y, -DPAD_LEFT, -DPAD_TOP, -DPAD_RIGHT, -DPAD_BOTTOM, -DKERNEL_WIDHT, -DKERNEL_HEIGHT, -DSRC_WIDTH, -DSRC_HEIGHT * * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QS16/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -255,11 +255,11 @@ __kernel void depthwise_im2col(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(d Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); const int src_pixel_linear = get_global_id(1) * STRIDE_X; - const int full_length = SRC_WIDTH + 2 * PAD_X; + const int full_length = SRC_WIDTH + PAD_LEFT + PAD_RIGHT; const int max_initial_x = STRIDE_X * (((full_length - KERNEL_WIDTH) / STRIDE_X) + 1); - const int src_x = -PAD_X + src_pixel_linear % max_initial_x; - const int src_y = -PAD_Y + src_pixel_linear / max_initial_x * STRIDE_Y; + const int src_x = -PAD_LEFT + src_pixel_linear % max_initial_x; + const int src_y = -PAD_TOP + src_pixel_linear / max_initial_x * STRIDE_Y; const int src_z = get_global_id(2); __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes + src_z * src_stride_z; @@ -281,7 +281,7 @@ __kernel void depthwise_im2col(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(d } } -#endif //defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_X) && defined(PAD_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(DATA_TYPE) +#endif //defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(DATA_TYPE) #if defined(CONV_WIDTH) && defined(CONV_HEIGHT) && defined(DATA_TYPE) diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index 0eaadb80c6..5c7fe7e0b4 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -58,8 +58,10 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.emplace("-DSTRIDE_X=" + support::cpp11::to_string(conv_info.stride().first)); build_opts.emplace("-DSTRIDE_Y=" + support::cpp11::to_string(conv_info.stride().second)); - build_opts.emplace("-DPAD_X=" + support::cpp11::to_string(conv_info.pad().first)); - build_opts.emplace("-DPAD_Y=" + support::cpp11::to_string(conv_info.pad().second)); + build_opts.emplace("-DPAD_LEFT=" + support::cpp11::to_string(conv_info.pad_left())); + build_opts.emplace("-DPAD_TOP=" + support::cpp11::to_string(conv_info.pad_top())); + build_opts.emplace("-DPAD_RIGHT=" + support::cpp11::to_string(conv_info.pad_right())); + build_opts.emplace("-DPAD_BOTTOM=" + support::cpp11::to_string(conv_info.pad_bottom())); build_opts.emplace("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); build_opts.emplace("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1))); build_opts.emplace("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width)); diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 98a799f783..6cc45573d8 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -63,18 +63,16 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); } - int pad_x = 0; - int pad_y = 0; int stride_x = 0; int stride_y = 0; - std::tie(pad_x, pad_y) = conv_info.pad(); + std::tie(stride_x, stride_y) = conv_info.stride(); const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), output->info()->tensor_shape().cbegin() + 1)) - && ((stride_x == 1) && (stride_y == 1) && (pad_x == 0) && (pad_y == 0)); + && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()); if(!run_img2col_reduced) { @@ -90,12 +88,14 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const build_opts.emplace("-DCONVOLVED_HEIGHT=" + support::cpp11::to_string(_convolved_dims.second)); build_opts.emplace("-DSTRIDE_X=" + support::cpp11::to_string(conv_info.stride().first)); build_opts.emplace("-DSTRIDE_Y=" + support::cpp11::to_string(conv_info.stride().second)); - build_opts.emplace("-DPAD_X=" + support::cpp11::to_string(conv_info.pad().first)); - build_opts.emplace("-DPAD_Y=" + support::cpp11::to_string(conv_info.pad().second)); + build_opts.emplace("-DPAD_LEFT=" + support::cpp11::to_string(conv_info.pad_left())); + build_opts.emplace("-DPAD_TOP=" + support::cpp11::to_string(conv_info.pad_top())); + build_opts.emplace("-DPAD_RIGHT=" + support::cpp11::to_string(conv_info.pad_right())); + build_opts.emplace("-DPAD_BOTTOM=" + support::cpp11::to_string(conv_info.pad_bottom())); build_opts.emplace("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); build_opts.emplace("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1))); - if(kernel_dims.width == 3 && kernel_dims.height == 3 && conv_info.pad().first == 0 && conv_info.pad().second == 0) + if(kernel_dims.width == 3 && kernel_dims.height == 3 && !conv_info.has_padding()) { _kernel = static_cast(CLKernelLibrary::get().create_kernel("im2col_kernel3x3_padx0_pady0", build_opts)); } diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 71910e3a69..1c018b269b 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -163,16 +163,17 @@ void NEIm2ColKernel::run_generic(const Window &window) const int input_stride_y = _input->info()->strides_in_bytes().y(); const int input_stride_z = _input->info()->strides_in_bytes().z(); - int pad_x = 0; - int pad_y = 0; + int pad_left = 0; + int pad_top = 0; int stride_x = 0; int stride_y = 0; - std::tie(pad_x, pad_y) = _conv_info.pad(); + pad_left = _conv_info.pad_left(); + pad_top = _conv_info.pad_top(); std::tie(stride_x, stride_y) = _conv_info.stride(); // Setup input window - const int start_x = -pad_x; - const int start_y = -pad_y; + const int start_x = -pad_left; + const int start_y = -pad_top; Window window_in(window); // The first three dimensions of the input are increased by the inner loops @@ -291,18 +292,15 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size _conv_info); _has_bias = has_bias; - unsigned int pad_x = 0; - unsigned int pad_y = 0; unsigned int stride_x = 0; unsigned int stride_y = 0; - std::tie(pad_x, pad_y) = conv_info.pad(); std::tie(stride_x, stride_y) = conv_info.stride(); bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), output->info()->tensor_shape().cbegin() + 1)) - && ((stride_x == 1) && (stride_y == 1) && (pad_x == 0) && (pad_y == 0)); + && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()); Window window = calculate_max_window(*input->info(), Steps()); @@ -334,18 +332,18 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size switch(_input->info()->data_type()) { case DataType::F32: - _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; + _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; #ifdef ARM_COMPUTE_ENABLE_FP16 case DataType::F16: - _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; + _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; #endif /* ARM_COMPUTE_ENABLE_FP16 */ case DataType::QS8: - _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; + _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; case DataType::QS16: - _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; + _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; default: ARM_COMPUTE_ERROR("Data type not supported"); diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index d5ce1ea027..0a35e07430 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -288,37 +288,39 @@ const std::pair arm_compute::scaled_dimensions(unsig unsigned int kernel_width, unsigned int kernel_height, const PadStrideInfo &pad_stride_info) { - const unsigned int pad_x = pad_stride_info.pad().first; - const unsigned int pad_y = pad_stride_info.pad().second; - const unsigned int stride_x = pad_stride_info.stride().first; - const unsigned int stride_y = pad_stride_info.stride().second; - unsigned int w = 0; - unsigned int h = 0; + const unsigned int pad_left = pad_stride_info.pad_left(); + const unsigned int pad_top = pad_stride_info.pad_top(); + const unsigned int pad_right = pad_stride_info.pad_right(); + const unsigned int pad_bottom = pad_stride_info.pad_bottom(); + const unsigned int stride_x = pad_stride_info.stride().first; + const unsigned int stride_y = pad_stride_info.stride().second; + unsigned int w = 0; + unsigned int h = 0; switch(pad_stride_info.round()) { case DimensionRoundingType::FLOOR: - w = static_cast(std::floor((static_cast(width + 2 * pad_x - kernel_width) / stride_x) + 1)); - h = static_cast(std::floor((static_cast(height + 2 * pad_y - kernel_height) / stride_y) + 1)); + w = static_cast(std::floor((static_cast(width + pad_left + pad_right - kernel_width) / stride_x) + 1)); + h = static_cast(std::floor((static_cast(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1)); break; case DimensionRoundingType::CEIL: - w = static_cast(std::ceil((static_cast(width + 2 * pad_x - kernel_width) / stride_x) + 1)); - h = static_cast(std::ceil((static_cast(height + 2 * pad_y - kernel_height) / stride_y) + 1)); + w = static_cast(std::ceil((static_cast(width + pad_left + pad_right - kernel_width) / stride_x) + 1)); + h = static_cast(std::ceil((static_cast(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1)); break; default: ARM_COMPUTE_ERROR("Unsupported rounding type"); } // Make sure that border operations will start from inside the input and not the padded area - if(((w - 1) * stride_x) >= (width + pad_x)) + if(((w - 1) * stride_x) >= (width + pad_left)) { --w; } - if(((h - 1) * stride_y) >= (height + pad_y)) + if(((h - 1) * stride_y) >= (height + pad_top)) { --h; } - ARM_COMPUTE_ERROR_ON(((w - 1) * stride_x) >= (width + pad_x)); - ARM_COMPUTE_ERROR_ON(((h - 1) * stride_y) >= (height + pad_y)); + ARM_COMPUTE_ERROR_ON(((w - 1) * stride_x) >= (width + pad_left)); + ARM_COMPUTE_ERROR_ON(((h - 1) * stride_y) >= (height + pad_top)); return std::make_pair(w, h); } diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 4b1bfd8b8f..a3be6f4144 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -128,10 +128,7 @@ void CLConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weig // Get parameters from conv_info unsigned int stride_x = 0; unsigned int stride_y = 0; - unsigned int pad_x = 0; - unsigned int pad_y = 0; std::tie(stride_x, stride_y) = conv_info.stride(); - std::tie(pad_x, pad_y) = conv_info.pad(); // Get convolved dimensions unsigned int conv_w = 0; diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index f34f497436..155f4e561a 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -136,10 +136,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, // Get parameters from conv_info unsigned int stride_x = 0; unsigned int stride_y = 0; - unsigned int pad_x = 0; - unsigned int pad_y = 0; std::tie(stride_x, stride_y) = conv_info.stride(); - std::tie(pad_x, pad_y) = conv_info.pad(); // Get convolved dimensions unsigned int conv_w = 0; diff --git a/tests/datasets/DepthwiseConvolutionDataset.h b/tests/datasets/DepthwiseConvolutionDataset.h index 593b8238d7..8cceae0083 100644 --- a/tests/datasets/DepthwiseConvolutionDataset.h +++ b/tests/datasets/DepthwiseConvolutionDataset.h @@ -125,6 +125,13 @@ public: add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1)); add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0)); add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1)); + // Asymmetric padding + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); } }; diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h index 8eda2e87fe..aa9d9f8899 100644 --- a/tests/datasets/SmallConvolutionLayerDataset.h +++ b/tests/datasets/SmallConvolutionLayerDataset.h @@ -58,6 +58,14 @@ public: add_config(TensorShape(17U, 31U, 2U, 4U), TensorShape(5U, 3U, 2U, 19U), TensorShape(19U), TensorShape(15U, 16U, 19U, 4U), PadStrideInfo(1, 2, 1, 1)); // Arbitrary batch size add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(11U, 11U, 16U, 5U), PadStrideInfo(3, 2, 1, 0)); + + // Asymmetric padding + add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(10U, 11U, 16U, 5U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(10U, 11U, 16U, 5U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); } }; } // namespace datasets diff --git a/tests/validation/CPP/ConvolutionLayer.cpp b/tests/validation/CPP/ConvolutionLayer.cpp index 656cd2ee26..ab3690a493 100644 --- a/tests/validation/CPP/ConvolutionLayer.cpp +++ b/tests/validation/CPP/ConvolutionLayer.cpp @@ -26,6 +26,8 @@ #include "tests/validation/FixedPoint.h" #include "tests/validation/Helpers.h" +#include "tests/framework/Asserts.h" + namespace arm_compute { namespace test @@ -149,21 +151,24 @@ SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor const int width_weights = weights.shape().x(); const int height_weights = weights.shape().y(); const int depth_weights = weights.shape().z(); - const int pad_xi = std::min(static_cast(info.pad().first), width_weights / 2); - const int pad_yi = std::min(static_cast(info.pad().second), height_weights / 2); - const int start_xi = width_weights / 2 - pad_xi; - const int start_yi = height_weights / 2 - pad_yi; - const int end_xi = width_in - start_xi; - const int end_yi = height_in - start_yi; - const int stride_xi = info.stride().first; - const int stride_yi = info.stride().second; - const int num_batches = src.shape().total_size() / (width_in * height_in * depth_in); + const int pad_left = std::min(static_cast(info.pad_left()), width_weights / 2); + const int pad_top = std::min(static_cast(info.pad_top()), height_weights / 2); + const int pad_right = std::min(static_cast(info.pad_right()), width_weights / 2); + const int pad_bottom = std::min(static_cast(info.pad_bottom()), height_weights / 2); + + const int start_xi = width_weights / 2 - pad_left; + const int start_yi = height_weights / 2 - pad_top; + const int end_xi = width_in + pad_left - width_weights / 2 + pad_right - width_weights / 2; + const int end_yi = height_in + pad_top - height_weights / 2 + pad_bottom - height_weights / 2; + const int stride_xi = info.stride().first; + const int stride_yi = info.stride().second; + const int num_batches = src.shape().total_size() / (width_in * height_in * depth_in); for(int r = 0; r < num_batches; ++r) { - for(int yi = start_yi; yi < end_yi; yi += stride_yi) + for(int yi = start_yi; yi < start_yi + end_yi; yi += stride_yi) { - for(int xi = start_xi; xi < end_xi; xi += stride_xi) + for(int xi = start_xi; xi < start_xi + end_xi; xi += stride_xi) { for(int ofm = 0; ofm < depth_out; ++ofm) { @@ -173,6 +178,9 @@ SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor const int yo = (yi - start_yi) / stride_yi; const int offset_out = xo + yo * width_out + ofm * width_out * height_out + r * width_out * height_out * depth_out; + ARM_COMPUTE_ASSERT(xo < width_out); + ARM_COMPUTE_ASSERT(yo < height_out); + // Compute 3D convolution convolution3d(src.data() + offset_in, weights.data() + ofm * width_weights * height_weights * depth_weights, diff --git a/tests/validation/CPP/DepthwiseConvolution.cpp b/tests/validation/CPP/DepthwiseConvolution.cpp index ae54494c03..b57c2686f6 100644 --- a/tests/validation/CPP/DepthwiseConvolution.cpp +++ b/tests/validation/CPP/DepthwiseConvolution.cpp @@ -51,29 +51,35 @@ SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTe SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() }; // Compute reference - const size_t filter_width = weights.shape().x(); - const size_t filter_height = weights.shape().y(); - const size_t filter_plane = filter_width * filter_height; - const size_t input_width = src.shape().x(); - const size_t input_height = src.shape().y(); - const size_t input_depth = src.shape().z(); - const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth); + const int filter_width = weights.shape().x(); + const int filter_height = weights.shape().y(); + const int filter_plane = filter_width * filter_height; + const int input_width = src.shape().x(); + const int input_height = src.shape().y(); + const int input_depth = src.shape().z(); + const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth); - const size_t filter_half_width = filter_width / 2; - const size_t filter_half_height = filter_height / 2; - const size_t pad_x = std::min(filter_half_width, static_cast(conv_info.pad().first)); - const size_t pad_y = std::min(filter_half_height, static_cast(conv_info.pad().second)); - const size_t minimum_x = -pad_x + filter_half_width; - const size_t minimum_y = -pad_y + filter_half_height; + const int filter_half_width = filter_width / 2; + const int filter_half_height = filter_height / 2; + + const int pad_left = std::min(static_cast(conv_info.pad_left()), filter_half_width); + const int pad_top = std::min(static_cast(conv_info.pad_top()), filter_half_height); + const int pad_right = std::min(static_cast(conv_info.pad_right()), filter_half_width); + const int pad_bottom = std::min(static_cast(conv_info.pad_bottom()), filter_half_height); + + const int minimum_x = -pad_left + filter_half_width; + const int minimum_y = -pad_top + filter_half_height; + const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width; + const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height; int out_pos = 0; for(int r = 0; r < num_batches; ++r) { - for(size_t z = 0; z < input_depth; ++z) + for(int z = 0; z < input_depth; ++z) { - for(size_t y = minimum_y; y < input_height - minimum_y; y += conv_info.stride().second) + for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second) { - for(size_t x = minimum_x; x < input_width - minimum_x; x += conv_info.stride().first) + for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first) { Coordinates coords(static_cast(x), static_cast(y), static_cast(z), static_cast(r)); size_t filter_offset = filter_plane * z; diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index 2fa3d0ea4b..de7eaf8b3d 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -599,7 +599,8 @@ inline ::std::ostream &operator<<(::std::ostream &os, const PadStrideInfo &pad_s { os << pad_stride_info.stride().first << "," << pad_stride_info.stride().second; os << ";"; - os << pad_stride_info.pad().first << "," << pad_stride_info.pad().second; + os << pad_stride_info.pad_left() << "," << pad_stride_info.pad_right() << "," + << pad_stride_info.pad_top() << "," << pad_stride_info.pad_bottom(); return os; } -- cgit v1.2.1