diff options
author | SiCongLi <sicong.li@arm.com> | 2021-06-29 13:18:30 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2021-07-02 12:05:37 +0000 |
commit | bc4e31113be0af320f44b338969d6972b64ca4de (patch) | |
tree | 21e44fad55a80794a9609e847188764cf42f13d7 /src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp | |
parent | 66831659fdef07c428993dccfa5d92416bae1ef9 (diff) | |
download | ComputeLibrary-bc4e31113be0af320f44b338969d6972b64ca4de.tar.gz |
Implement FP GPU depthwise convolution 1x1 kernel for in-place computation
* Implement in-place graph node mutator for 1x1 depthwise convolution
* Add in-place to validation fixture except for
DepthwiseConvolutionLayerNativeValidationFixture as it would be a
duplicate test otherwise (DepthwiseConvolutionLayerNative test tests
the underlying kernel)
Resolves: COMPMID-4432
Change-Id: Id7f10f5ebdce7d49f550c0b62dbaaab7f5b59d29
Signed-off-by: SiCongLi <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5874
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp')
-rw-r--r-- | src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index c7520cd087..a826f85c5c 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -136,11 +136,11 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure( ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights); ARM_COMPUTE_ERROR_THROW_ON(CLDepthwiseConvolutionLayer::validate(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, - output->info(), + output != nullptr ? output->info() : input->info(), conv_info, depth_multiplier, act_info, @@ -220,6 +220,11 @@ Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::validate const PadStrideInfo &conv_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation) { + const bool in_place = input == output || output == nullptr; + if(in_place) + { + output = input; + } ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT); @@ -254,6 +259,7 @@ Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::validate if(needs_permute) { + ARM_COMPUTE_RETURN_ERROR_ON_MSG(in_place, "In-place is supported only with NHWC data layout"); TensorShape permuted_input_shape = input->tensor_shape(); TensorShape permuted_weights_shape = weights->tensor_shape(); const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation }; @@ -309,7 +315,7 @@ void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::prepare() _output_shifts.map(); quantization::compute_quantized_multipliers_and_shifts(_input->info(), _original_weights->info(), - _output->info(), + _output != nullptr ? _output->info() : _input->info(), reinterpret_cast<int32_t *>(_output_multipliers.ptr_to_element(Coordinates(0))), reinterpret_cast<int32_t *>(_output_shifts.ptr_to_element(Coordinates(0)))); _output_multipliers.unmap(); @@ -549,6 +555,11 @@ void CLDepthwiseConvolutionLayer::configure(const CLCompileContext &compile_cont unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation) { + if(output == nullptr) + { + // In-place + output = input; + } _depth_conv_func = get_depthwiseconvolution_function(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info, dilation); switch(_depth_conv_func) |