From 561c176598cd14245e2e7918fdf136d1c888d1da Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 16 Apr 2021 15:08:59 +0100 Subject: Rework OpenCL Depthwise Convolution - Remove dedicated kernels for NCHW. Now we only use NHWC with permute - Remove specialized kernels for 3x3 NHWC - Simplify CLDepthwiseConvolutionLayer.cpp to call just the native implementation for both floating-point and quantized data types - Develop two parametric opencl kernels for depthwise convolution layer NHWC (floating-point and quantized) - Add support to export the weights to cl_image - Extend test for depthwise convolution on opencl Resolves COMPMID-4417 Change-Id: I253dd5d959a70783c82e62b1771a5e9f91621cb0 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5806 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Giorgio Arena --- tests/validation/CL/DepthwiseConvolutionLayer.cpp | 5 +- .../CL/DepthwiseConvolutionLayerNative.cpp | 179 ++++++++++++++++++--- .../fixtures/DepthwiseConvolutionLayerFixture.h | 58 ++++--- 3 files changed, 203 insertions(+), 39 deletions(-) (limited to 'tests/validation') diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp index c88f7c1624..e6bf8801bb 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp @@ -48,13 +48,12 @@ constexpr RelativeTolerance tolerance_f32(0.01f); /**< constexpr AbsoluteTolerance tolerance_qasymm8(0); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ constexpr float tolerance_num = 0.05f; /**< Tolerance number */ -const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5 }); +const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 5 }); const auto large_depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5, 8 }); //Activation Functions const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { - ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f, 0.f) }); @@ -355,7 +354,7 @@ FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerMixedD framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", DataLayout::NHWC)), - framework::dataset::make("ActivationInfo", ActivationLayerInfo()))) + framework::dataset::make("ActivationInfo", ActivationLayerInfo()))) { validate(CLAccessor(_target), _reference, tolerance_f32); } diff --git a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp index f640ee2b18..f565255719 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp @@ -62,7 +62,7 @@ RelativeTolerance rel_tolerance_f16(half_float::half(0.01f)); constexpr float abs_tolerance_f16(0.03f); /** Width values to test - Precommit */ -const auto width_values_precommit = framework::dataset::make("width", { 1U, 17U, 32U } ); +const auto width_values_precommit = framework::dataset::make("width", { 1U, 33U } ); /** Width values to test - Nightly */ const auto width_values_nightly = framework::dataset::make("width", { 53U, 47U } ); @@ -79,6 +79,12 @@ const auto channel_values_precommit = framework::dataset::make("channels", { 15U /** Channel values to test - Nightly */ const auto channel_values_nightly = framework::dataset::make("channels", { 33U, 19U }); +/** Channel values to test with cl_image support - Precommit */ +const auto channel_values_export_to_cl_image_precommit = framework::dataset::make("channels", { 16U }); + +/** Channel values to test with cl_image support - Nightly */ +const auto channel_values_export_to_cl_image_nightly = framework::dataset::make("channels", { 32U }); + /** Batch values to test - Precommit */ const auto batch_values_precommit = framework::dataset::make("batch", { 1U, 2U }); @@ -115,11 +121,17 @@ const auto n0_values_precommit = framework::dataset::make("N0", {2, 4}); /** N0 values to test - Nightly */ const auto n0_values_nightly = framework::dataset::make("N0", {3, 8}); +/** N0 values to test with cl_image support - Precommit */ +const auto n0_values_export_to_cl_image_precommit = framework::dataset::make("N0", {4}); + +/** N0 values to test with cl_image support - Nightly */ +const auto n0_values_export_to_cl_image_nightly = framework::dataset::make("N0", {8}); + /** Activation values to test */ const auto act_values = framework::dataset::make("Activation", { ActivationLayerInfo(), - ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.0f, 0.5f), }); } // namespace @@ -129,7 +141,7 @@ TEST_SUITE(DepthwiseConvolutionLayerNative) TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_precommit, height_values_precommit), channel_values_precommit), @@ -142,14 +154,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_nightly, height_values_nightly), channel_values_nightly), @@ -162,16 +175,79 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + width_values_precommit, + height_values_precommit), + channel_values_export_to_cl_image_precommit), + batch_values_precommit), + kernel_sz_values_precommit), + framework::dataset::make("depth_multiplier", 1)), + dilation_values), + stride_values), + padding_valid_values), + framework::dataset::make("DataType", DataType::F32)), + data_layout_values), + act_values), + n0_values_export_to_cl_image_precommit), + framework::dataset::make("ExportToCLImage", true))) +{ + // Validate output + if(_validate_output) + { + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} + +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + width_values_nightly, + height_values_nightly), + channel_values_export_to_cl_image_nightly), + batch_values_nightly), + kernel_sz_values_nightly), + framework::dataset::make("depth_multiplier", 1)), + dilation_values), + stride_values), + padding_valid_values), + framework::dataset::make("DataType", DataType::F32)), + data_layout_values), + act_values), + n0_values_export_to_cl_image_nightly), + framework::dataset::make("ExportToCLImage", true))) +{ + // Validate output + if(_validate_output) + { + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +TEST_SUITE_END() // ExportWeightsToCLImage TEST_SUITE_END() // FP32 TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_precommit, height_values_precommit), channel_values_precommit), @@ -184,14 +260,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_nightly, height_values_nightly), channel_values_nightly), @@ -204,18 +281,80 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + width_values_precommit, + height_values_precommit), + channel_values_export_to_cl_image_precommit), + batch_values_precommit), + kernel_sz_values_precommit), + framework::dataset::make("depth_multiplier", 1)), + dilation_values), + stride_values), + padding_valid_values), + framework::dataset::make("DataType", DataType::F16)), + data_layout_values), + act_values), + n0_values_export_to_cl_image_precommit), + framework::dataset::make("ExportToCLImage", true))) +{ + // Validate output + if(_validate_output) + { + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} + +FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + width_values_nightly, + height_values_nightly), + channel_values_export_to_cl_image_nightly), + batch_values_nightly), + kernel_sz_values_nightly), + framework::dataset::make("depth_multiplier", 1)), + dilation_values), + stride_values), + padding_valid_values), + framework::dataset::make("DataType", DataType::F16)), + data_layout_values), + act_values), + n0_values_export_to_cl_image_nightly), + framework::dataset::make("ExportToCLImage", true))) +{ + // Validate output + if(_validate_output) + { + // Validate output + validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16); + } + else + { + ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped"); + framework::ARM_COMPUTE_PRINT_INFO(); + } +} +TEST_SUITE_END() // ExportWeightsToCLImage TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float TEST_SUITE(DepthMultiplier) TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_precommit, height_values_precommit), channel_values_precommit), @@ -228,14 +367,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_nightly, height_values_nightly), channel_values_nightly), @@ -248,7 +388,8 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::ALL, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_precommit, height_values_precommit), channel_values_precommit), @@ -270,14 +411,15 @@ FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( + combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine( width_values_nightly, height_values_nightly), channel_values_nightly), @@ -290,7 +432,8 @@ FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture void setup(size_t width, size_t height, size_t channel, size_t batch, Size2D kernel_size, size_t depth_multiplier, Size2D dilation, Size2D stride, bool padding_valid, DataType data_type, - DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0) + DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0, bool export_to_cl_image) { - _dilation = dilation; - _depth_multiplier = depth_multiplier; - _data_type = data_type; - _data_layout = data_layout; - _act_info = act_info; - _n0 = n0; + _dilation = dilation; + _depth_multiplier = depth_multiplier; + _data_type = data_type; + _data_layout = data_layout; + _act_info = act_info; + _n0 = n0; + _export_to_cl_image = export_to_cl_image; _input_shape = TensorShape(width, height, channel, batch); _weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier); @@ -414,11 +415,11 @@ public: if(padding_valid) { - _conv_info = PadStrideInfo(); + _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation); } else { - _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation); + _conv_info = PadStrideInfo(stride.width, stride.height); } } @@ -439,14 +440,26 @@ public: _biases = create_tensor(_biases_shape, _data_type, 1, QuantizationInfo(), _data_layout); _target = create_tensor(TensorShape(), _data_type, 1, QuantizationInfo(), _data_layout); - DWCWeightsKernelInfo dwc_weights_info; - dwc_weights_info.n0 = _n0; + DWCComputeKernelInfo dwc_info; + dwc_info.n0 = _n0; + dwc_info.m0 = _conv_info.stride().first == 1 && _dilation.x() == 1 ? 8 : 1; + dwc_info.export_weights_to_cl_image = _export_to_cl_image; - DWCKernelInfo dwc_info; - dwc_info.activation_info = _act_info; +#if defined(ARM_COMPUTE_OPENCL_ENABLED) + if(_export_to_cl_image) + { + _validate_output |= image2d_from_buffer_supported(CLKernelLibrary::get().get_device()); + _validate_output |= (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) != 0); + } +#endif // ARM_COMPUTE_OPENCL_ENABLED + + const ConvolutionInfo conv_kernel_info + { + _conv_info, _depth_multiplier, _act_info, _dilation + }; // Create Depthwise Convolution configure function - _dwc.configure(&_src, &_weights, &_biases, &_target, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation); + _dwc.configure(&_src, &_weights, &_biases, &_target, dwc_info, conv_kernel_info); ARM_COMPUTE_ASSERT(_src.info()->is_resizable()); ARM_COMPUTE_ASSERT(_weights.info()->is_resizable()); @@ -456,7 +469,8 @@ public: void allocate_and_run_target() { - add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout); + add_padding_x({ &_src, &_biases, &_target }, _data_layout); + add_padding_x({ &_weights }, _data_layout, _export_to_cl_image); // Don't add left padding if cl image will be used // Allocate tensors _src.allocator()->allocate(); @@ -479,7 +493,10 @@ public: _target.info()->set_data_layout(_data_layout == DataLayout::NCHW ? DataLayout::NHWC : DataLayout::NCHW); // Compute function - _dwc.run(); + if(_validate_output) + { + _dwc.run(); + } // Reinstating original data layout for the test suite to properly check the values _target.info()->set_data_layout(_data_layout); @@ -497,7 +514,10 @@ public: const ConvolutionInfo info{ _conv_info, _depth_multiplier, _act_info, _dilation }; const TensorShape dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), info); - _reference = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info); + if(_validate_output) + { + _reference = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info); + } } protected: @@ -541,6 +561,8 @@ protected: Size2D _dilation{}; unsigned int _depth_multiplier{}; unsigned int _n0{}; + bool _export_to_cl_image{}; + bool _validate_output{ true }; }; template -- cgit v1.2.1