From b6f182d3e5b69cc193d7e5ec397c4d61083572d5 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 29 Nov 2017 10:17:56 +0000 Subject: COMPMID-556: Fix CLDepthwiseConvolution3x3 Kernel. Kernel was not sliding the input window. Change-Id: Ia5903ceaed1243e86bee773a84102d8a1132dfa5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111055 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier --- .../CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp | 16 ++++--- tests/datasets/DepthwiseConvolutionDataset.h | 10 ++--- tests/validation/CPP/DepthwiseConvolution.cpp | 52 ++++++++++++---------- 3 files changed, 42 insertions(+), 36 deletions(-) diff --git a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp index 208d06d7cd..63586b0f0f 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp @@ -137,14 +137,16 @@ void CLDepthwiseConvolution3x3Kernel::run(const Window &window, cl::CommandQueue ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - Window slice_in = window.first_slice_window_3D(); + // Create input window and adjust + Window win_in = window; + win_in.adjust(Window::DimX, -_conv_pad_left, true); + win_in.adjust(Window::DimY, -_conv_pad_top, true); + win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x); + win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y); + + Window slice_in = win_in.first_slice_window_3D(); Window slice_out = window.first_slice_window_3D(); Window slice_weights = window.first_slice_window_3D(); - - slice_in.adjust(Window::DimX, -_conv_pad_left, true); - slice_in.adjust(Window::DimY, -_conv_pad_top, true); - slice_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x); - slice_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y); slice_weights.set_dimension_step(Window::DimX, 0); slice_weights.set_dimension_step(Window::DimY, 0); @@ -166,5 +168,5 @@ void CLDepthwiseConvolution3x3Kernel::run(const Window &window, cl::CommandQueue enqueue(queue, *this, slice_out); } - while(window.slide_window_slice_3D(slice_out)); + while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in)); } diff --git a/tests/datasets/DepthwiseConvolutionDataset.h b/tests/datasets/DepthwiseConvolutionDataset.h index 430d2c9aca..2c8347fc8c 100644 --- a/tests/datasets/DepthwiseConvolutionDataset.h +++ b/tests/datasets/DepthwiseConvolutionDataset.h @@ -161,10 +161,10 @@ class SmallDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDat public: SmallDepthwiseConvolutionDataset3x3() { - add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1)); - add_config(TensorShape(21U, 31U, 9U), TensorShape(3U, 3U, 9U), TensorShape(9U), TensorShape(21U, 15U, 9U), PadStrideInfo(1, 2, 1, 0)); - add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1)); + add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0)); + add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1)); } }; @@ -173,11 +173,11 @@ class LargeDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDat public: LargeDepthwiseConvolutionDataset3x3() { - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(233U, 277U, 55U, 3U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U, 3U), PadStrideInfo(2, 1, 0, 0)); add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0)); add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1)); + add_config(TensorShape(333U, 277U, 77U, 5U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U, 5U), PadStrideInfo(2, 3, 0, 1)); add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); } }; diff --git a/tests/validation/CPP/DepthwiseConvolution.cpp b/tests/validation/CPP/DepthwiseConvolution.cpp index ad0653846b..229e044783 100644 --- a/tests/validation/CPP/DepthwiseConvolution.cpp +++ b/tests/validation/CPP/DepthwiseConvolution.cpp @@ -137,6 +137,7 @@ SimpleTensor depthwise_convolution(const SimpleTensor &src, co const int input_width = src.shape().x(); const int input_height = src.shape().y(); const int input_depth = src.shape().z(); + const int num_batches = src.shape().total_size() / (input_width * input_height * input_depth); const int filter_half_size = filter_width / 2; const int pad_x = std::min(filter_half_size, static_cast(conv_info.pad().first)); @@ -145,37 +146,40 @@ SimpleTensor depthwise_convolution(const SimpleTensor &src, co const int minimum_y = -pad_y + filter_half_size; int out_pos = 0; - for(int z = 0; z < input_depth; ++z) + for(int r = 0; r < num_batches; ++r) { - int32_t bias_val = *static_cast(biases(Coordinates(z))); - for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second) + for(int z = 0; z < input_depth; ++z) { - for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first) + int32_t bias_val = *static_cast(biases(Coordinates(z))); + for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second) { - Coordinates coords(x, y, z); - int filter_offset = filter_plane * z; - - uint32_t val = 0; - for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j) + for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first) { - for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i) + Coordinates coords(x, y, z); + int filter_offset = filter_plane * z; + + uint32_t val = 0; + for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j) { - coords.set(0, i); - coords.set(1, j); - auto in_val = tensor_elem_at(src, coords, BorderMode::CONSTANT, 0); - uint8_t w_val = *(weights.data() + filter_offset); - val += (in_val + input_offset) * (w_val + weights_offset); - ++filter_offset; + for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i) + { + coords.set(0, i); + coords.set(1, j); + auto in_val = tensor_elem_at(src, coords, BorderMode::CONSTANT, 0); + uint8_t w_val = *(weights.data() + filter_offset); + val += (in_val + input_offset) * (w_val + weights_offset); + ++filter_offset; + } } + val += bias_val; + val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift); + val += output_offset; + val = std::max(val, 0); + val = std::min(val, 255); + + // Store the result + dst[out_pos++] = val; } - val += bias_val; - val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift); - val += output_offset; - val = std::max(val, 0); - val = std::min(val, 255); - - // Store the result - dst[out_pos++] = val; } } } -- cgit v1.2.1