From 7657224de2b697a8a92cccf26d98e53ccd7c1a03 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 4 Apr 2018 17:44:26 +0100 Subject: COMPMID-926 Add depth multiplier support to NEON/CL/GLES depthwise convolution Change-Id: I03f32c62350e5ea43e77bb15fc5a832d83719e3b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126657 Tested-by: Jenkins Reviewed-by: Michele DiGiorgio Reviewed-by: Georgios Pinitas --- .../reference/DepthwiseConvolutionLayer.cpp | 135 ++++++++++----------- 1 file changed, 65 insertions(+), 70 deletions(-) (limited to 'tests/validation/reference/DepthwiseConvolutionLayer.cpp') diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp index d05da9140b..207e5fc45c 100644 --- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp @@ -51,7 +51,8 @@ namespace reference * */ template -void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, SimpleTensor &dst, const PadStrideInfo &conv_info) +void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, SimpleTensor &dst, const PadStrideInfo &conv_info, + unsigned int depth_multiplier) { // Compute reference const int filter_width = weights.shape().x(); @@ -75,40 +76,47 @@ void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTensor(x), static_cast(y), static_cast(z), static_cast(r)); - size_t filter_offset = filter_plane * z; + const int out_z = z * depth_multiplier + m; - T val(0); - for(int j = y - filter_half_height; j <= static_cast(y + filter_half_height); ++j) + for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second) + { + for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first) { - for(int i = x - filter_half_width; i <= static_cast(x + filter_half_width); ++i) + Coordinates coords(static_cast(x), static_cast(y), static_cast(z), static_cast(r)); + size_t filter_offset = filter_plane * out_z; + + T val(0); + for(int j = y - filter_half_height; j <= static_cast(y + filter_half_height); ++j) { - coords.set(0, i); - coords.set(1, j); - T border_value(0); - val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value); - ++filter_offset; + for(int i = x - filter_half_width; i <= static_cast(x + filter_half_width); ++i) + { + coords.set(0, i); + coords.set(1, j); + + val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value); + ++filter_offset; + } } + + dst[out_pos++] = saturate_cast(val + *static_cast(biases(Coordinates(out_z)))); } - coords.set(0, x); - coords.set(1, y); - dst[out_pos++] = saturate_cast(val + *static_cast(biases(Coordinates(z)))); } } } } } -void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, SimpleTensor &dst, const PadStrideInfo &conv_info) +void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, SimpleTensor &dst, const PadStrideInfo &conv_info, + unsigned int depth_multiplier) { // Create reference const int input_offset = -src.quantization_info().offset; @@ -150,89 +158,76 @@ void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTe { for(int z = 0; z < input_depth; ++z) { - int32_t bias_val = *static_cast(biases(Coordinates(z))); - for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second) + for(unsigned int m = 0; m < depth_multiplier; ++m) { - for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first) - { - Coordinates coords(x, y, z, r); - int filter_offset = filter_plane * z; + const int out_z = z * depth_multiplier + m; + const int32_t bias_val = *static_cast(biases(Coordinates(out_z))); - int32_t val = 0; - for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j) + for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second) + { + for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first) { - for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i) + Coordinates coords(x, y, z, r); + int filter_offset = filter_plane * out_z; + + int32_t val = 0; + for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j) { - coords.set(0, i); - coords.set(1, j); - const auto in_val = tensor_elem_at(src, coords, BorderMode::CONSTANT, -input_offset); - const uint8_t w_val = *(weights.data() + filter_offset); - val += (in_val + input_offset) * (w_val + weights_offset); - ++filter_offset; + for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i) + { + coords.set(0, i); + coords.set(1, j); + const auto in_val = tensor_elem_at(src, coords, BorderMode::CONSTANT, -input_offset); + const uint8_t w_val = *(weights.data() + filter_offset); + val += (in_val + input_offset) * (w_val + weights_offset); + ++filter_offset; + } } + val += bias_val; + val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift); + val += output_offset; + val = std::max(val, 0); + val = std::min(val, 255); + + // Store the result + dst[out_pos++] = val; } - val += bias_val; - val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift); - val += output_offset; - val = std::max(val, 0); - val = std::min(val, 255); - - // Store the result - dst[out_pos++] = val; } } } } } -template <> -SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info) -{ - SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() }; - - if(src.data_layout() == DataLayout::NHWC) - { - SimpleTensor src_nchw = reference::permute(src, PermutationVector(1U, 2U, 0U)); - SimpleTensor weights_nchw = reference::permute(weights, PermutationVector(1U, 2U, 0U)); - SimpleTensor dst_nchw = reference::permute(dst, PermutationVector(1U, 2U, 0U)); - - depthwise_convolution_nchw(src_nchw, weights_nchw, biases, dst_nchw, conv_info); - - return reference::permute(dst_nchw, PermutationVector(2U, 0U, 1U)); - } - - depthwise_convolution_nchw(src, weights, biases, dst, conv_info); - - return dst; -} - template -SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info) +SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info, + unsigned int depth_multiplier) { - SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() }; + SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() }; - if(src.data_layout() == DataLayout::NHWC && src.data_type() == DataType::F32) + if(src.data_layout() == DataLayout::NHWC) { SimpleTensor src_nchw = reference::permute(src, PermutationVector(1U, 2U, 0U)); SimpleTensor weights_nchw = reference::permute(weights, PermutationVector(1U, 2U, 0U)); SimpleTensor dst_nchw = reference::permute(dst, PermutationVector(1U, 2U, 0U)); - depthwise_convolution_nchw(src_nchw, weights_nchw, biases, dst_nchw, conv_info); + depthwise_convolution_nchw(src_nchw, weights_nchw, biases, dst_nchw, conv_info, depth_multiplier); return reference::permute(dst_nchw, PermutationVector(2U, 0U, 1U)); } - depthwise_convolution_nchw(src, weights, biases, dst, conv_info); + depthwise_convolution_nchw(src, weights, biases, dst, conv_info, depth_multiplier); return dst; } +template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, + const PadStrideInfo &conv_info, unsigned int depth_multiplier); + template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info); + const PadStrideInfo &conv_info, unsigned int depth_multiplier); template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info); + const PadStrideInfo &conv_info, unsigned int depth_multiplier); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1