IVGCVSW-656 : added support (and unit tests) for asymmetric padding in

dirct conv (CL) Change-Id: I4b8389376e675bfa93b4a1ae7c8e65b8db1f4c4b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111102 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Jaroslaw Rzepecki <jaroslaw.rzepecki@arm.com> 2017-11-29 13:51:34 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:41:58 +0000
commit: 2ecbadada0d2b5e48eb4ffd0ae5e3390c0c96db5 (patch)
tree: b838193bc40254726afb242dcdda6ec9205f05f2
parent: b81fa60a6a78aea2f8e30dd7f2a495b510b4f918 (diff)
download: ComputeLibrary-2ecbadada0d2b5e48eb4ffd0ae5e3390c0c96db5.tar.gz
5 files changed, 92 insertions, 26 deletions
diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
index 0564553b45..8e3f3d15a2 100644
--- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h
@@ -93,8 +93,6 @@ private:
     const ICLTensor *_weights;
     ICLTensor       *_output;
     BorderSize       _border_size;
-    int              _conv_pad_x;
-    int              _conv_pad_y;
     int              _conv_stride_x;
     int              _conv_stride_y;
 };
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 3c5799f7ba..aea0161a1d 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -63,7 +63,7 @@ TensorShape get_output_shape(TensorShape input_shape, TensorShape weights_shape,
 } // namespace
 
 CLDirectConvolutionLayerKernel::CLDirectConvolutionLayerKernel()
-    : _input(nullptr), _biases(nullptr), _weights(nullptr), _output(nullptr), _border_size(0), _conv_pad_x(0), _conv_pad_y(0), _conv_stride_x(0), _conv_stride_y(0)
+    : _input(nullptr), _biases(nullptr), _weights(nullptr), _output(nullptr), _border_size(0), _conv_stride_x(0), _conv_stride_y(0)
 {
 }
 
@@ -99,14 +99,17 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
 
     _conv_stride_x = std::get<0>(conv_info.stride());
     _conv_stride_y = std::get<1>(conv_info.stride());
-    _conv_pad_x    = std::min(std::get<0>(conv_info.pad()), kernel_size / 2);
-    _conv_pad_y    = std::min(std::get<1>(conv_info.pad()), kernel_size / 2);
 
-    _input       = input;
-    _weights     = weights;
-    _output      = output;
-    _biases      = biases;
-    _border_size = BorderSize(_conv_pad_y, _conv_pad_x);
+    _input   = input;
+    _weights = weights;
+    _output  = output;
+    _biases  = biases;
+
+    int conv_pad_left   = std::min(conv_info.pad_left(), kernel_size / 2);
+    int conv_pad_top    = std::min(conv_info.pad_top(), kernel_size / 2);
+    int conv_pad_right  = std::min(conv_info.pad_right(), kernel_size / 2);
+    int conv_pad_bottom = std::min(conv_info.pad_bottom(), kernel_size / 2);
+    _border_size        = BorderSize(conv_pad_top, conv_pad_right, conv_pad_bottom, conv_pad_left);
 
     const GPUTarget gpu_target = get_arch_from_target(get_target());
 
@@ -217,13 +220,13 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
         }
 
         // Calculate right and bottom border
-        const int input_width  = input->info()->dimension(0) - kernel_size / 2 + _conv_pad_x;
-        const int input_height = input->info()->dimension(1) - kernel_size / 2 + _conv_pad_y;
+        const int input_width  = input->info()->dimension(0) - kernel_size / 2 + conv_pad_right;
+        const int input_height = input->info()->dimension(1) - kernel_size / 2 + conv_pad_bottom;
 
         // Create window and update padding
         win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y));
 
-        AccessWindowStatic    input_access(input->info(), -_conv_pad_x, -_conv_pad_y, input_width + num_elems_read_per_iteration_x, input_height + num_elems_read_per_iteration_y);
+        AccessWindowStatic    input_access(input->info(), -conv_pad_left, -conv_pad_top, input_width + num_elems_read_per_iteration_x, input_height + num_elems_read_per_iteration_y);
         AccessWindowStatic    weights_access(weights->info(), 0, 0, kernel_size, kernel_size);
         AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_y);
 
@@ -262,13 +265,13 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
         const unsigned int num_elems_written_per_iteration_y = 1;
 
         // Calculate right and bottom border
-        const int input_width  = input->info()->dimension(0) - kernel_size / 2 + _conv_pad_x;
-        const int input_height = input->info()->dimension(1) - kernel_size / 2 + _conv_pad_y;
+        const int input_width  = input->info()->dimension(0) - kernel_size / 2 + conv_pad_right;
+        const int input_height = input->info()->dimension(1) - kernel_size / 2 + conv_pad_bottom;
 
         // Create window and update padding
         Window win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y));
 
-        AccessWindowStatic    input_access(input->info(), -_conv_pad_x, -_conv_pad_y, input_width + num_elems_read_per_iteration_x, input_height + num_elems_read_per_iteration_y);
+        AccessWindowStatic    input_access(input->info(), -conv_pad_left, -conv_pad_top, input_width + num_elems_read_per_iteration_x, input_height + num_elems_read_per_iteration_y);
         AccessWindowStatic    weights_access(weights->info(), 0, 0, kernel_size, kernel_size);
         AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration_x, num_elems_written_per_iteration_y);
 
@@ -302,9 +305,13 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
     _config_id += "_";
     _config_id += support::cpp11::to_string(kernel_size);
     _config_id += "_";
-    _config_id += support::cpp11::to_string(_conv_pad_x);
+    _config_id += support::cpp11::to_string(conv_pad_left);
+    _config_id += "_";
+    _config_id += support::cpp11::to_string(conv_pad_top);
+    _config_id += "_";
+    _config_id += support::cpp11::to_string(conv_pad_right);
     _config_id += "_";
-    _config_id += support::cpp11::to_string(_conv_pad_y);
+    _config_id += support::cpp11::to_string(conv_pad_bottom);
     _config_id += "_";
     _config_id += support::cpp11::to_string(_conv_stride_x);
     _config_id += "_";
@@ -371,8 +378,8 @@ void CLDirectConvolutionLayerKernel::run(const Window &window, cl::CommandQueue
     Window slice  = window.first_slice_window_3D();
     Window win_in = window;
 
-    win_in.adjust(Window::DimX, -_conv_pad_x, true);
-    win_in.adjust(Window::DimY, -_conv_pad_y, true);
+    win_in.adjust(Window::DimX, -_border_size.left, true);
+    win_in.adjust(Window::DimY, -_border_size.top, true);
     win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x);
     win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y);
 
diff --git a/tests/datasets/DirectConvolutionLayerDataset.h b/tests/datasets/DirectConvolutionLayerDataset.h
index 294f44fa42..4777ab289f 100644
--- a/tests/datasets/DirectConvolutionLayerDataset.h
+++ b/tests/datasets/DirectConvolutionLayerDataset.h
@@ -46,12 +46,11 @@ public:
         add_config(TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 3U), TensorShape(3U), TensorShape(13U, 13U, 3U), PadStrideInfo(1, 1, 1, 1));
         add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 4U), TensorShape(4U), TensorShape(13U, 13U, 4U), PadStrideInfo(1, 1, 1, 1));
         add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 5U), TensorShape(5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 3U), TensorShape(3U), TensorShape(13U, 13U, 3U), PadStrideInfo(2, 2, 1, 1));
-        add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 4U), TensorShape(4U), TensorShape(13U, 13U, 4U), PadStrideInfo(2, 2, 1, 1));
-        add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 5U), TensorShape(5U), TensorShape(13U, 13U, 5U), PadStrideInfo(2, 2, 1, 1));
-        add_config(TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 3U), TensorShape(3U), TensorShape(13U, 13U, 3U), PadStrideInfo(3, 3, 1, 1));
-        add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 4U), TensorShape(4U), TensorShape(13U, 13U, 4U), PadStrideInfo(3, 3, 1, 1));
-        add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 5U), TensorShape(5U), TensorShape(13U, 13U, 5U), PadStrideInfo(3, 3, 1, 1));
+        add_config(TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 3U), TensorShape(3U), TensorShape(7U, 7U, 3U), PadStrideInfo(2, 2, 1, 1));
+        add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 4U), TensorShape(4U), TensorShape(7U, 7U, 4U), PadStrideInfo(2, 2, 1, 1));
+        add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 5U), TensorShape(5U), TensorShape(7U, 7U, 5U), PadStrideInfo(2, 2, 1, 1));
+        add_config(TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 3U), TensorShape(3U), TensorShape(12U, 12U, 3U), PadStrideInfo(1, 1, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 5U), TensorShape(5U), TensorShape(12U, 12U, 5U), PadStrideInfo(1, 1, 1, 0, 1, 0, DimensionRoundingType::FLOOR));
     }
 };
 } // namespace datasets
diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp
index e6a196ae46..84e1bca6a5 100644
--- a/tests/validation/CL/DirectConvolutionLayer.cpp
+++ b/tests/validation/CL/DirectConvolutionLayer.cpp
@@ -27,6 +27,7 @@
 #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/PaddingCalculator.h"
+#include "tests/datasets/DirectConvolutionLayerDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
@@ -148,6 +149,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
 
 template <typename T>
 using CLDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
+template <typename T>
+using CLDirectConvolutionValidationWithTensorShapesFixture = DirectConvolutionValidationWithTensorShapesFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -165,6 +168,15 @@ FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionLayerFixture<float>, framework::D
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
 TEST_SUITE_END()
+
+TEST_SUITE(FP32_CustomDataset)
+FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesFixture<float>, framework::DatasetMode::ALL, combine(datasets::DirectConvolutionLayerDataset(),
+                       framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
 TEST_SUITE_END()
 
 template <typename T>
@@ -192,6 +204,8 @@ TEST_SUITE_END()
 
 template <typename T>
 using CLDirectConvolutionLayerQuantizedFixture = DirectConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
+template <typename T>
+using CLDirectConvolutionValidationWithTensorShapesQuantizedFixture = DirectConvolutionValidationWithTensorShapesQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
@@ -202,6 +216,16 @@ FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, f
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END()
+
+TEST_SUITE(QASYMM8_CustomDataset)
+FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(datasets::DirectConvolutionLayerDataset(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
 TEST_SUITE_END()
 
 TEST_SUITE_END()
diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
index b78f13acd1..4916c3335b 100644
--- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h
@@ -65,6 +65,20 @@ public:
         _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
     }
 
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+               DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
+    {
+        _fractional_bits   = fractional_bits;
+        _quantization_info = quantization_info;
+        _data_type         = data_type;
+
+        const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type, bias_data_type, fractional_bits, quantization_info);
+    }
+
 protected:
     template <typename U>
     void fill(U &&tensor, int i)
@@ -207,6 +221,30 @@ public:
     }
 };
 
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationWithTensorShapesQuantizedFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+               DataType data_type, QuantizationInfo quantization_info)
+    {
+        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, quantization_info);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DirectConvolutionValidationWithTensorShapesFixture : public DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info,
+               DataType data_type)
+    {
+        DirectConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, QuantizationInfo());
+    }
+};
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
author	Jaroslaw Rzepecki <jaroslaw.rzepecki@arm.com>	2017-11-29 13:51:34 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:41:58 +0000
commit	2ecbadada0d2b5e48eb4ffd0ae5e3390c0c96db5 (patch)
tree	b838193bc40254726afb242dcdda6ec9205f05f2
parent	b81fa60a6a78aea2f8e30dd7f2a495b510b4f918 (diff)
download	ComputeLibrary-2ecbadada0d2b5e48eb4ffd0ae5e3390c0c96db5.tar.gz