diff options
author | SiCongLi <sicong.li@arm.com> | 2021-06-29 13:18:30 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2021-07-02 12:05:37 +0000 |
commit | bc4e31113be0af320f44b338969d6972b64ca4de (patch) | |
tree | 21e44fad55a80794a9609e847188764cf42f13d7 /src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp | |
parent | 66831659fdef07c428993dccfa5d92416bae1ef9 (diff) | |
download | ComputeLibrary-bc4e31113be0af320f44b338969d6972b64ca4de.tar.gz |
Implement FP GPU depthwise convolution 1x1 kernel for in-place computation
* Implement in-place graph node mutator for 1x1 depthwise convolution
* Add in-place to validation fixture except for
DepthwiseConvolutionLayerNativeValidationFixture as it would be a
duplicate test otherwise (DepthwiseConvolutionLayerNative test tests
the underlying kernel)
Resolves: COMPMID-4432
Change-Id: Id7f10f5ebdce7d49f550c0b62dbaaab7f5b59d29
Signed-off-by: SiCongLi <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5874
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp | 33 |
1 files changed, 28 insertions, 5 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp index 4cc0e462c4..eb1cf146af 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp @@ -46,7 +46,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts) { ARM_COMPUTE_UNUSED(dwc_info); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); + bool in_place = false; + if(output == nullptr || output == input) + { + in_place = true; + output = input; + } + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); @@ -58,6 +64,18 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, ARM_COMPUTE_UNUSED(idx_c); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_c) != (input->dimension(idx_c) * depth_multiplier)); + // In place restrictions + if(in_place) + { + const int weights_width_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH); + const int weights_height_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT); + ARM_COMPUTE_RETURN_ERROR_ON(weights->tensor_shape()[weights_width_idx] != 1U || weights->tensor_shape()[weights_height_idx] != 1U); + ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier != 1U); + ARM_COMPUTE_RETURN_ERROR_ON(conv_info.stride() != std::make_pair(1U, 1U)); + ARM_COMPUTE_RETURN_ERROR_ON(dilation != Size2D(1U, 1U)); + ARM_COMPUTE_RETURN_ERROR_ON(conv_info.has_padding()); // Note that in princple padding can be supported with in_place but we choose not to support it + } + const ConvolutionInfo info{ conv_info, depth_multiplier, ActivationLayerInfo(), dilation }; const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, info); @@ -139,19 +157,24 @@ CLDepthwiseConvolutionLayerNativeKernel::CLDepthwiseConvolutionLayerNativeKernel _type = CLKernelType::DEPTHWISE; } -void CLDepthwiseConvolutionLayerNativeKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, - const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, +void CLDepthwiseConvolutionLayerNativeKernel::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, + const DWCWeightsKernelInfo &dwc_weights_info, const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const ICLTensor *output_multipliers, const ICLTensor *output_shifts) { configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation, output_multipliers, output_shifts); } -void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, +void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info, const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation, const ICLTensor *output_multipliers, const ICLTensor *output_shifts) { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights); + if(output == nullptr) + { + // In-place + output = input; + } ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation, (output_multipliers != nullptr) ? output_multipliers->info() : nullptr, (output_shifts != nullptr) ? output_shifts->info() : nullptr)); |