From 7657224de2b697a8a92cccf26d98e53ccd7c1a03 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 4 Apr 2018 17:44:26 +0100 Subject: COMPMID-926 Add depth multiplier support to NEON/CL/GLES depthwise convolution Change-Id: I03f32c62350e5ea43e77bb15fc5a832d83719e3b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126657 Tested-by: Jenkins Reviewed-by: Michele DiGiorgio Reviewed-by: Georgios Pinitas --- .../cs_shaders/depthwise_convolution3x3.cs | 6 +++- .../GCDepthwiseConvolutionLayer3x3Kernel.cpp | 32 +++++----------------- 2 files changed, 12 insertions(+), 26 deletions(-) (limited to 'src/core/GLES_COMPUTE') diff --git a/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs b/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs index adfc126c95..134cc1060f 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -108,6 +108,8 @@ void main() uint z_index = gl_GlobalInvocationID.z; TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_attrs.stride_z); + src_iter.current_offset_in_bytes -= int((z_index - z_index / uint(DEPTH_MULTIPLIER)) * src_attrs.step_z); + vec4 w[3]; w[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(weights_ptr, weights_iter); w[1] = LOAD_UNPACK4_HALF(weights_ptr, TENSOR3D_OFFSET(weights_iter, 0, 1, 0)); @@ -263,6 +265,8 @@ void main() uint z_index = gl_GlobalInvocationID.z; TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_attrs.stride_z); + src_iter.current_offset_in_bytes -= int((z_index - z_index / uint(DEPTH_MULTIPLIER)) * src_attrs.step_z); + vec4 w[3]; w[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(weights_ptr, weights_iter); w[1] = LOAD_UNPACK4_HALF(weights_ptr, TENSOR3D_OFFSET(weights_iter, 0, 1, 0)); diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp index 9343268d9e..c2374096a2 100644 --- a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp @@ -33,31 +33,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" using namespace arm_compute; - -namespace -{ -/** Calculates expected output shape dimension - * - * @param[in] Input shape - * - * @return Expected output shape - */ -TensorShape get_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info) -{ - unsigned int output_width = 0; - unsigned int output_height = 0; - - std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info); - - TensorShape output_shape = input_shape; - output_shape.set(0, output_width); - output_shape.set(1, output_height); - - return output_shape; -} -} // namespace +using namespace arm_compute::misc::shape_calculator; GCDepthwiseConvolutionLayer3x3Kernel::GCDepthwiseConvolutionLayer3x3Kernel() : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0), _lws(gles::NDRange(1U, 1U, 1U)) @@ -69,7 +48,8 @@ BorderSize GCDepthwiseConvolutionLayer3x3Kernel::border_size() const return _border_size; } -void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info) +void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -83,7 +63,7 @@ void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, con } // Get convolved dimensions - TensorShape output_shape = get_output_shape(input->info()->tensor_shape(), weights->info()->tensor_shape(), conv_info); + const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier); // Output auto inizialitation if not yet initialized auto_init_if_empty(*output->info(), @@ -93,6 +73,7 @@ void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, con input->info()->fixed_point_position()); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(2) != weights->info()->dimension(2)); _input = input; _output = output; @@ -108,6 +89,7 @@ void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, con ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3); std::set options; + options.emplace("#define DEPTH_MULTIPLIER " + support::cpp11::to_string(depth_multiplier)); options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0])); options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1])); options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2])); -- cgit v1.2.1