From ad0c7388f6261989a268ffb2d042f2bd80736e3f Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Mon, 23 Apr 2018 16:16:21 +0100 Subject: COMPMID-1068 Create validate method to CLDepthWiseConvolution Change-Id: I3301b66a8a072c6ecd0d7f2dabef350017b55ac4 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/128677 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- .../CL/functions/CLDepthwiseConvolutionLayer.cpp | 65 +++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) (limited to 'src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp') diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index 88bb0c417d..676a121a76 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -24,6 +24,8 @@ #include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h" #include "arm_compute/core/CL/ICLTensor.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" @@ -66,6 +68,21 @@ void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, zero_value); } +Status CLDepthwiseConvolutionLayer3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier, + ActivationLayerInfo act_info, GPUTarget gpu_target) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW && input->data_layout() != DataLayout::NHWC); + + if(input->data_layout() == DataLayout::NCHW) + { + return CLDepthwiseConvolutionLayer3x3NCHWKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info, gpu_target); + } + + return CLDepthwiseConvolutionLayer3x3NHWCKernel::validate(input, weights, biases, output, conv_info, depth_multiplier, act_info); +} + void CLDepthwiseConvolutionLayer3x3::run() { CLScheduler::get().enqueue(_border_handler); @@ -82,7 +99,6 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - ARM_COMPUTE_ERROR_ON((input->info()->dimension(2) * depth_multiplier) != weights->info()->dimension(2)); const size_t weights_w = weights->info()->dimension(0); const size_t weights_h = weights->info()->dimension(1); @@ -168,6 +184,53 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w _v2mm_output.allocator()->allocate(); } +Status CLDepthwiseConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(2) * depth_multiplier) != weights->dimension(2)); + + const bool is_quantized = is_data_type_quantized_asymmetric(input->data_type()); + const bool append_bias = (biases != nullptr) && !is_quantized; + const TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier); + const size_t weights_w = weights->dimension(0); + const size_t weights_h = weights->dimension(1); + const size_t weights_z = weights->dimension(2); + const unsigned int conv_w = output_shape.x(); + const unsigned int conv_h = output_shape.y(); + const size_t patch_size = weights_w * weights_h + ((append_bias) ? 1 : 0); + const size_t conv_size = conv_w * conv_h; + + TensorShape shape_im2col = input->tensor_shape(); + shape_im2col.set(0, patch_size); + shape_im2col.set(1, conv_size); + shape_im2col.set(2, weights_z); + TensorInfo input_reshaped(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col)); + ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseIm2ColKernel::validate(input, &input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier)); + + const TensorShape shape_weights_reshape(patch_size, weights_z); + TensorInfo weights_reshaped(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_weights_reshape)); + ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseWeightsReshapeKernel::validate(weights, &weights_reshaped, append_bias ? biases : nullptr)); + + DataType v2mm_dt = (input->data_type() == DataType::QASYMM8) ? DataType::S32 : input->data_type(); + TensorShape shape_v2mm_out = input->tensor_shape(); + shape_v2mm_out.set(0, conv_size * weights_z); + shape_v2mm_out.set(1, 1); + shape_v2mm_out.set(2, 1); + TensorInfo v2mm_output(input->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out)); + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixVectorMultiplyKernel::validate(&input_reshaped, &weights_reshaped, &v2mm_output)); + + TensorInfo output_reshaped(v2mm_output.clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape)); + ARM_COMPUTE_RETURN_ON_ERROR(CLDepthwiseVectorToTensorKernel::validate(&v2mm_output, (is_quantized) ? &output_reshaped : output, conv_w, conv_h)); + + if(is_quantized) + { + ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayerOutputStageKernel::validate(&output_reshaped, biases, output)); + } + + return Status{}; +} + void CLDepthwiseConvolutionLayer::run() { // Run weights reshaping (Runs once for every configure) -- cgit v1.2.1