From e6630e4063fc3aa4312a2c8d094318b09ad2c3f5 Mon Sep 17 00:00:00 2001 From: Isabella Gottardi Date: Thu, 18 Jan 2018 15:50:39 +0000 Subject: COMPMID-790 - NEON: Add QASYMM8 support to Convolution Change-Id: Iec82a91ad351cfe8d07d0976a24bd42f4703177a Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/116833 Tested-by: Jenkins Reviewed-by: Anthony Barbier Reviewed-by: Gian Marco Iodice --- src/core/CL/kernels/CLIm2ColKernel.cpp | 9 +++--- .../NEON/kernels/NEGEMMInterleave4x4Kernel.cpp | 10 +++---- .../kernels/NEGEMMLowpOffsetContributionKernel.cpp | 9 ++---- src/core/NEON/kernels/NEIm2ColKernel.cpp | 35 ++++++++++++++-------- src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 5 ++-- 5 files changed, 37 insertions(+), 31 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 0e9f2c5344..4f693187bd 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,9 +41,10 @@ using namespace arm_compute; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); // Checks performed when output is configured @@ -67,7 +68,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias)); _input = input; _output = output; @@ -208,7 +209,7 @@ Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *out ARM_COMPUTE_UNUSED(kernel_dims); ARM_COMPUTE_UNUSED(conv_info); ARM_COMPUTE_UNUSED(has_bias); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias)); return Status{}; } diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp index 2f8afd8b06..12755a45f8 100644 --- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp +++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include #include @@ -37,6 +38,7 @@ #include using namespace arm_compute; +using namespace arm_compute::misc::shape_calculator; namespace { @@ -178,12 +180,8 @@ void NEGEMMInterleave4x4Kernel::configure(const ITensor *input, ITensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - TensorShape output_shape = input->info()->tensor_shape(); - output_shape.set(0, input->info()->dimension(0) * 4); - output_shape.set(1, std::ceil(input->info()->dimension(1) / 4.0f)); - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_interleaved_shape(*input->info()))); // Perform validate step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); diff --git a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp index 3d41548a6a..ee334dfca0 100644 --- a/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -143,13 +143,10 @@ void NEGEMMLowpOffsetContributionKernel::configure(ITensor *mm_result, const ITe // If a_offset == 0, vector_sum_col can be a nullptr if(a_offset != 0) { - TensorShape vector_sum_col_shape = vector_sum_col->info()->tensor_shape(); // NOLINT - vector_sum_col_shape.collapse(1); - // Check if vector_sum_col_shape should be slidden or not // Don't slide vector_sum_col_shape along the y dimension if vector_sum_col_shape has just 1 dimension and vector_sum_row_shape more than 1 // This scenario can happen when the the matrix multiplication is used to perform a convolution operation - _slide_vector_sum_col = vector_sum_col_shape[1] != 1; + _slide_vector_sum_col = vector_sum_col->info()->tensor_shape().num_dimensions() > 1; } // Configure kernel window @@ -201,7 +198,7 @@ void NEGEMMLowpOffsetContributionKernel::run(const Window &window, const ThreadI Iterator vector_sum_row(_vector_sum_row, win_vector_sum_row); Iterator mm_result(_mm_result, window); - execute_window_loop(window, [&](const Coordinates & id) + execute_window_loop(collapsed_window, [&](const Coordinates & id) { // Compute the leftover term due to a_offset. int32x4x4_t a_offset_term_s32 = diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 8eb235b360..633f78de4b 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,7 +68,8 @@ inline void linearize_volume(const uint8_t *const in_ptr, int input_stride_x, int input_stride_y, int input_stride_z, - int fixed_point_position) + int fixed_point_position, + int pad_value) { const int kernel_size2 = kernel_width * kernel_height; const int x_e = top_left_x + kernel_width; @@ -85,12 +86,12 @@ inline void linearize_volume(const uint8_t *const in_ptr, { if((y < 0 || y >= input_h) && has_pads) { - // All the values will be zeros + // All the values will be the offset (will be zeros when not quantized) for(int x = top_left_x; x < x_e; ++x, ++out_ptr) { - *(out_ptr + 0 * kernel_size2) = 0; - *(out_ptr + 1 * kernel_size2) = 0; - *(out_ptr + 2 * kernel_size2) = 0; + *(out_ptr + 0 * kernel_size2) = pad_value; + *(out_ptr + 1 * kernel_size2) = pad_value; + *(out_ptr + 2 * kernel_size2) = pad_value; } } else @@ -99,9 +100,9 @@ inline void linearize_volume(const uint8_t *const in_ptr, { if((x < 0 || x >= input_w) && has_pads) { - *(out_ptr + 0 * kernel_size2) = 0; - *(out_ptr + 1 * kernel_size2) = 0; - *(out_ptr + 2 * kernel_size2) = 0; + *(out_ptr + 0 * kernel_size2) = pad_value; + *(out_ptr + 1 * kernel_size2) = pad_value; + *(out_ptr + 2 * kernel_size2) = pad_value; } else { @@ -122,8 +123,8 @@ inline void linearize_volume(const uint8_t *const in_ptr, { if((y < 0 || y >= input_h) && has_pads) { - // All the values will be zeros - memset(out_ptr, 0, kernel_width * sizeof(T)); + // All the values will be the offset (will be zeros when not quantized) + memset(out_ptr, pad_value, kernel_width * sizeof(T)); out_ptr += kernel_width; } else @@ -132,7 +133,7 @@ inline void linearize_volume(const uint8_t *const in_ptr, { if((x < 0 || x >= input_w) && has_pads) { - *out_ptr = 0; + *out_ptr = pad_value; } else { @@ -174,6 +175,7 @@ void NEIm2ColKernel::run_generic(const Window &window) const int input_stride_x = _input->info()->strides_in_bytes().x(); const int input_stride_y = _input->info()->strides_in_bytes().y(); const int input_stride_z = _input->info()->strides_in_bytes().z(); + const int offset = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().offset : 0; int pad_left = 0; int pad_top = 0; @@ -226,7 +228,8 @@ void NEIm2ColKernel::run_generic(const Window &window) input_stride_x, input_stride_y, input_stride_z, - _input->info()->fixed_point_position()); + _input->info()->fixed_point_position(), + offset); }, in, out); } @@ -335,6 +338,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size case DataType::QS16: _func = &NEIm2ColKernel::run_reduced; break; + case DataType::QASYMM8: + _func = &NEIm2ColKernel::run_reduced; + break; default: ARM_COMPUTE_ERROR("Data type not supported"); break; @@ -358,6 +364,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size case DataType::QS16: _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; + case DataType::QASYMM8: + _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; + break; default: ARM_COMPUTE_ERROR("Data type not supported"); break; diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index 794c179277..150140271d 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -101,11 +101,12 @@ TensorShape get_output_shape(const ITensorInfo *input, bool has_bias) Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); if(biases != nullptr) { + ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_asymmetric(input->data_type())); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases); ARM_COMPUTE_RETURN_ERROR_ON((input->num_dimensions() == 4) && (biases->num_dimensions() != 1)); -- cgit v1.2.1