From 8155c0253c00aa9e26651361460c66feb39829a6 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 16 Apr 2021 15:08:59 +0100 Subject: Rework OpenCL Depthwise Convolution - Remove dedicated kernels for NCHW. Now we only use NHWC with permute - Remove specialized kernels for 3x3 NHWC - Simplify CLDepthwiseConvolutionLayer.cpp to call just the native implementation for both floating-point and quantized data types - Develop two parametric opencl kernels for depthwise convolution layer NHWC (floating-point and quantized) - Add support to export the weights to cl_image - Extend test for depthwise convolution on opencl Resolves COMPMID-4417 Change-Id: Ibe533f79c2860f9cac8e921895d5a8f947753a5c Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5893 Reviewed-by: Giorgio Arena Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- .../fixtures/DepthwiseConvolutionLayerFixture.h | 63 +++++++++++++++------- 1 file changed, 43 insertions(+), 20 deletions(-) (limited to 'tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h') diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h index c255cc5c13..0e02ae28ca 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -198,7 +198,7 @@ protected: { case DataType::QASYMM8: { - std::uniform_int_distribution distribution(0, 10); + std::uniform_int_distribution distribution(0, 15); library->fill(tensor, distribution, i); break; } @@ -292,7 +292,7 @@ public: if(padding_valid) { - _conv_info = PadStrideInfo(); + _conv_info = PadStrideInfo(stride.width, stride.height); } else { @@ -333,6 +333,7 @@ public: void allocate_and_run_target() { add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout); + add_padding_y({ &_src, &_target }, _data_layout); // Allocate tensors _src.allocator()->allocate(); @@ -416,15 +417,16 @@ class DepthwiseConvolutionLayerNativeConfigurableValidationFixture : public Dept public: template void setup(size_t width, size_t height, size_t channel, size_t batch, Size2D kernel_size, size_t depth_multiplier, Size2D dilation, Size2D stride, bool padding_valid, DataType data_type, - DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0) + DataLayout data_layout, const ActivationLayerInfo &act_info, unsigned int n0, bool export_to_cl_image) { - _dilation = dilation; - _depth_multiplier = depth_multiplier; - _data_type = data_type; - _data_layout = data_layout; - _act_info = act_info; - _n0 = n0; - _in_place = in_place; + _dilation = dilation; + _depth_multiplier = depth_multiplier; + _data_type = data_type; + _data_layout = data_layout; + _act_info = act_info; + _n0 = n0; + _export_to_cl_image = export_to_cl_image; + _in_place = in_place; _input_shape = TensorShape(width, height, channel, batch); _weights_shape = TensorShape(kernel_size.width, kernel_size.height, channel * _depth_multiplier); @@ -432,11 +434,11 @@ public: if(padding_valid) { - _conv_info = PadStrideInfo(); + _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation); } else { - _conv_info = calculate_same_pad(_input_shape, _weights_shape, PadStrideInfo(stride.width, stride.height), DataLayout::NCHW, _dilation); + _conv_info = PadStrideInfo(stride.width, stride.height); } } @@ -462,14 +464,26 @@ public: target_to_use = &_target; } - DWCWeightsKernelInfo dwc_weights_info; - dwc_weights_info.n0 = _n0; + DWCComputeKernelInfo dwc_info; + dwc_info.n0 = _n0; + dwc_info.m0 = _conv_info.stride().first == 1 && _dilation.x() == 1 ? 8 : 1; + dwc_info.export_weights_to_cl_image = _export_to_cl_image; - DWCKernelInfo dwc_info; - dwc_info.activation_info = _act_info; +#if defined(ARM_COMPUTE_OPENCL_ENABLED) + if(_export_to_cl_image) + { + _validate_output |= image2d_from_buffer_supported(CLKernelLibrary::get().get_device()); + _validate_output |= (get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) != 0); + } +#endif // ARM_COMPUTE_OPENCL_ENABLED + + const ConvolutionInfo conv_kernel_info + { + _conv_info, _depth_multiplier, _act_info, _dilation + }; // Create Depthwise Convolution configure function - _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_weights_info, dwc_info, _conv_info, _depth_multiplier, _dilation); + _dwc.configure(&_src, &_weights, &_biases, target_to_use, dwc_info, conv_kernel_info); ARM_COMPUTE_ASSERT(_src.info()->is_resizable()); ARM_COMPUTE_ASSERT(_weights.info()->is_resizable()); @@ -479,7 +493,8 @@ public: void allocate_and_run_target() { - add_padding_x({ &_src, &_weights, &_biases, &_target }, _data_layout); + add_padding_x({ &_src, &_biases, &_target }, _data_layout); + add_padding_x({ &_weights }, _data_layout, _export_to_cl_image); // Don't add left padding if cl image will be used // Allocate tensors _src.allocator()->allocate(); @@ -508,7 +523,10 @@ public: } // Compute function - _dwc.run(); + if(_validate_output) + { + _dwc.run(); + } // Reinstating original data layout for the test suite to properly check the values if(!_in_place) @@ -529,7 +547,10 @@ public: const ConvolutionInfo info{ _conv_info, _depth_multiplier, _act_info, _dilation }; const TensorShape dst_shape = compute_depthwise_convolution_shape(TensorInfo(_input_shape, 1, _data_type), TensorInfo(_weights_shape, 1, _data_type), info); - _reference = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info); + if(_validate_output) + { + _reference = reference::activation_layer(reference::depthwise_convolution(src, weights, biases, dst_shape, _conv_info, _depth_multiplier, _dilation), _act_info); + } } protected: @@ -573,6 +594,8 @@ protected: Size2D _dilation{}; unsigned int _depth_multiplier{}; unsigned int _n0{}; + bool _export_to_cl_image{}; + bool _validate_output{ true }; bool _in_place{ false }; }; -- cgit v1.2.1