From bc4e31113be0af320f44b338969d6972b64ca4de Mon Sep 17 00:00:00 2001 From: SiCongLi Date: Tue, 29 Jun 2021 13:18:30 +0100 Subject: Implement FP GPU depthwise convolution 1x1 kernel for in-place computation * Implement in-place graph node mutator for 1x1 depthwise convolution * Add in-place to validation fixture except for DepthwiseConvolutionLayerNativeValidationFixture as it would be a duplicate test otherwise (DepthwiseConvolutionLayerNative test tests the underlying kernel) Resolves: COMPMID-4432 Change-Id: Id7f10f5ebdce7d49f550c0b62dbaaab7f5b59d29 Signed-off-by: SiCongLi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5874 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Georgios Pinitas --- tests/datasets/DepthwiseConvolutionLayerDataset.h | 19 +++++ tests/validation/CL/DepthwiseConvolutionLayer.cpp | 30 +++++++- .../fixtures/DepthwiseConvolutionLayerFixture.h | 83 +++++++++++++++------- 3 files changed, 106 insertions(+), 26 deletions(-) (limited to 'tests') diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h index a19e7ee8cf..53b5248374 100644 --- a/tests/datasets/DepthwiseConvolutionLayerDataset.h +++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h @@ -262,6 +262,25 @@ public: add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 4, 4, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U)); } }; + +/** Dataset containing in-place 1x1 depthwise convolution shapes. + * + * For a depthwise convolution op to be in-place: + * * Output has the same shape as the input; + * * 1x1 filter + * * stride == 1 + * * dilations == 1 + * * No paddings +*/ +class SmallInPlaceDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionLayerDataset +{ +public: + SmallInPlaceDepthwiseConvolutionLayerDataset() + { + add_config(TensorShape(7U, 7U, 1U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(11U, 13U, 16U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0)); + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp index c88f7c1624..22922f41a2 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp @@ -156,6 +156,8 @@ template using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture; template using CLDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture; +template +using CLDepthwiseConvolutionLayerInPlaceFixture = DepthwiseConvolutionLayerValidationFixture; TEST_SUITE(Float) TEST_SUITE(FP16) @@ -290,6 +292,19 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture, f } TEST_SUITE_END() // Dilation TEST_SUITE_END() // Generic + +TEST_SUITE(InPlace) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerInPlaceFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(datasets::SmallInPlaceDepthwiseConvolutionLayerDataset(), + framework::dataset::make("DepthMultiplier", { 1 })), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(CLAccessor(_src), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() // InPlace TEST_SUITE_END() // FP16 TEST_SUITE(FP32) @@ -355,7 +370,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerMixedD framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", DataLayout::NHWC)), - framework::dataset::make("ActivationInfo", ActivationLayerInfo()))) + framework::dataset::make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_f32); } @@ -436,6 +451,19 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture, } TEST_SUITE_END() // Dilation TEST_SUITE_END() // Generic + +TEST_SUITE(InPlace) +FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerInPlaceFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(datasets::SmallInPlaceDepthwiseConvolutionLayerDataset(), + framework::dataset::make("DepthMultiplier", { 1 })), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) +{ + validate(CLAccessor(_src), _reference, tolerance_f32); +} +TEST_SUITE_END() // InPlace TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h index 19ec6b2560..c255cc5c13 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -59,8 +59,9 @@ public: void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType input_data_type, DataType weights_data_type, QuantizationInfo input_quantization_info, QuantizationInfo weights_quantization_info, QuantizationInfo output_quantization_info, - DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false) + DataLayout data_layout, ActivationLayerInfo act_info, bool mixed_layout = false, bool in_place = false) { + ARM_COMPUTE_ERROR_ON(mixed_layout && in_place); _mixed_layout = mixed_layout; _input_shape = in_shape; _input_data_type = input_data_type; @@ -73,6 +74,7 @@ public: _act_info = act_info; _depth_multiplier = depth_multiplier; _dilation = dilation; + _in_place = in_place; _bias_data_type = is_data_type_quantized(_input_data_type) ? DataType::S32 : _input_data_type; @@ -101,13 +103,18 @@ public: } // Create tensors - _src = create_tensor(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout); - _weights = create_tensor(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout); - _biases = create_tensor(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout); - _target = create_tensor(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout); + _src = create_tensor(input_shape, _input_data_type, 1, _input_quantization_info, _data_layout); + _weights = create_tensor(weights_shape, _weights_data_type, 1, _weights_quantization_info, _data_layout); + _biases = create_tensor(_biases_shape, _bias_data_type, 1, _input_quantization_info, _data_layout); + TensorType *target_to_use = nullptr; + if(!_in_place) + { + _target = create_tensor(output_shape, _input_data_type, 1, _output_quantization_info, _data_layout); + target_to_use = &_target; + } // Create Depthwise Convolution configure function - _dwc.configure(&_src, &_weights, &_biases, &_target, _pad_stride_info, _depth_multiplier, _act_info, _dilation); + _dwc.configure(&_src, &_weights, &_biases, target_to_use, _pad_stride_info, _depth_multiplier, _act_info, _dilation); ARM_COMPUTE_ASSERT(_src.info()->is_resizable()); ARM_COMPUTE_ASSERT(_weights.info()->is_resizable()); @@ -117,18 +124,26 @@ public: void allocate_and_run_target() { - add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout); + add_padding_x({ &_src, &_weights, &_biases }, _data_layout); + if(!_in_place) + { + add_padding_x({ &_target }, _data_layout); + } // Allocate tensors _src.allocator()->allocate(); _weights.allocator()->allocate(); _biases.allocator()->allocate(); - _target.allocator()->allocate(); ARM_COMPUTE_ASSERT(!_src.info()->is_resizable()); ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable()); ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!_target.info()->is_resizable()); + + if(!_in_place) + { + _target.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!_target.info()->is_resizable()); + } // Fill tensors fill(AccessorType(_src), 0); @@ -163,6 +178,7 @@ public: protected: void mix_layout(FunctionType &layer, TensorType &src, TensorType &dst) { + ARM_COMPUTE_ERROR_ON(_in_place); // Test Multi DataLayout graph cases, when the data layout changes after configure src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); dst.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); @@ -240,9 +256,10 @@ protected: unsigned int _depth_multiplier{}; Size2D _dilation{}; bool _mixed_layout{ false }; + bool _in_place{ false }; }; -template +template class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture { public: @@ -252,7 +269,7 @@ public: { DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type, data_type, QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), - data_layout, act_info, mixed_layout); + data_layout, act_info, mixed_layout, in_place); } }; @@ -393,7 +410,7 @@ protected: unsigned int _depth_multiplier{}; }; -template +template class DepthwiseConvolutionLayerNativeConfigurableValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture { public: @@ -407,6 +424,7 @@ public: _data_layout = data_layout; _act_info = act_info; _n0 = n0; + _in_place = in_place; _input_shape = TensorShape(width, height, channel, batch); _weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier); @@ -434,10 +452,15 @@ public: } // Create tensors - _src = create_tensor(input_shape, _data_type, 1, QuantizationInfo(), _data_layout); - _weights = create_tensor(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout); - _biases = create_tensor(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout); - _target = create_tensor(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout); + _src = create_tensor(input_shape, _data_type, 1, QuantizationInfo(), _data_layout); + _weights = create_tensor(weights_shape, _data_type, 1, QuantizationInfo(), _data_layout); + _biases = create_tensor(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout); + TensorType *target_to_use = nullptr; + if(!_in_place) + { + _target = create_tensor(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout); + target_to_use = &_target; + } DWCWeightsKernelInfo dwc_weights_info; dwc_weights_info.n0 = _n0; @@ -446,7 +469,7 @@ public: dwc_info.activation_info = _act_info; // Create Depthwise Convolution configure function - _dwc.configure(&_src, &_weights, &_biases, &_target, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation); + _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation); ARM_COMPUTE_ASSERT(_src.info()->is_resizable()); ARM_COMPUTE_ASSERT(_weights.info()->is_resizable()); @@ -462,12 +485,15 @@ public: _src.allocator()->allocate(); _weights.allocator()->allocate(); _biases.allocator()->allocate(); - _target.allocator()->allocate(); ARM_COMPUTE_ASSERT(!_src.info()->is_resizable()); ARM_COMPUTE_ASSERT(!_weights.info()->is_resizable()); ARM_COMPUTE_ASSERT(!_biases.info()->is_resizable()); - ARM_COMPUTE_ASSERT(!_target.info()->is_resizable()); + if(!_in_place) + { + _target.allocator()->allocate(); + ARM_COMPUTE_ASSERT(!_target.info()->is_resizable()); + } // Fill tensors fill(AccessorType(_src), 0); @@ -476,13 +502,19 @@ public: // Test Multi DataLayout graph cases, when the data layout changes after configure _src.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); - _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + if(!_in_place) + { + _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); + } // Compute function _dwc.run(); // Reinstating original data layout for the test suite to properly check the values - _target.info()->set_data_layout(_data_layout); + if(!_in_place) + { + _target.info()->set_data_layout(_data_layout); + } } void compute_reference() @@ -541,9 +573,10 @@ protected: Size2D _dilation{}; unsigned int _depth_multiplier{}; unsigned int _n0{}; + bool _in_place{ false }; }; -template +template class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConvolutionLayerValidationGenericFixture { public: @@ -553,11 +586,11 @@ public: { DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, data_type, data_type, input_quantization_info, input_quantization_info, output_quantization_info, - data_layout, act_info, mixed_layout); + data_layout, act_info, mixed_layout, in_place); } }; -template +template class DepthwiseConvolutionLayerValidationQuantizedPerChannelFixture : public DepthwiseConvolutionLayerValidationGenericFixture { public: @@ -579,7 +612,7 @@ public: DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, input_data_type, weights_data_type, input_quantization_info, QuantizationInfo(weights_scales), output_quantization_info, - data_layout, act_info); + data_layout, act_info, false, in_place); } }; } // namespace validation -- cgit v1.2.1