From 8c837ca85c06c53ccca20937be9dfd74d00d597a Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Tue, 7 Jan 2020 15:06:41 +0000 Subject: COMPMID-2766: Add support for QASYMM8_SIGNED in NEDepthwiseConvolutionLayer This patch also adds support for QASYMM8_SIGNED in the generic functions that uses NEDepthwiseConvolutionLayerNativeKernel. Change-Id: I74a99e1476cb1ebd2078e076ab2bea703949527b Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2552 Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- .../NEDepthwiseConvolutionLayerNativeKernel.cpp | 27 ++++++++++++++++------ 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp index aee13ee578..7626fda886 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -299,7 +299,7 @@ void depthwise_loop_multiplier1_quantized(const ITensor *input, const ITensor *w { acc.at(i) = rounding_divide_by_exp2(saturating_doubling_high_mul(acc.at(i), out_mul), out_shift) + output_qoffset; } - out_vals[i] = static_cast(utility::clamp(acc.at(i))); + out_vals[i] = static_cast(utility::clamp(acc.at(i))); } wrapper::vstore(reinterpret_cast(output_it.ptr()), out_vals); @@ -403,7 +403,7 @@ void depthwise_loop_generic_quantized(const ITensor *input, const ITensor *weigh { acc.at(m) = rounding_divide_by_exp2(saturating_doubling_high_mul(acc.at(m), out_mul), out_shift) + output_qoffset; } - *(reinterpret_cast(output_it.ptr() + m * sizeof(T))) = static_cast(utility::clamp(acc.at(m))); + *(reinterpret_cast(output_it.ptr() + m * sizeof(T))) = static_cast(utility::clamp(acc.at(m))); } }, input_it, weights_it, biases_it, output_it); @@ -415,7 +415,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier == 0); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) + (weights->dimension(1) - 1) * (dilation.x() - 1) > input->dimension(1) + conv_info.pad_left() + conv_info.pad_right()); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(2) + (weights->dimension(2) - 1) * (dilation.y() - 1) > input->dimension(2) + conv_info.pad_top() + conv_info.pad_bottom()); @@ -553,9 +553,22 @@ void NEDepthwiseConvolutionLayerNativeKernel::configure(const ITensor *input, co &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise; pad_vectors(_output_multiplier, _output_shift, 8); break; + case DataType::QASYMM8_SIGNED: + _func = (biases != nullptr) ? &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise : + &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise; + pad_vectors(_output_multiplier, _output_shift, 8); + break; case DataType::QSYMM8_PER_CHANNEL: - _func = (biases != nullptr) ? &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise : - &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise; + if(_input->info()->data_type() == DataType::QASYMM8) + { + _func = (biases != nullptr) ? &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise : + &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise; + } + else + { + _func = (biases != nullptr) ? &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise : + &NEDepthwiseConvolutionLayerNativeKernel::run_depthwise; + } pad_vectors(_output_multiplier, _output_shift, 8); break; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -621,7 +634,7 @@ void NEDepthwiseConvolutionLayerNativeKernel::run_depthwise(const Window &window } } -template ::value, int>::type> +template void NEDepthwiseConvolutionLayerNativeKernel::run_depthwise(const Window &window) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); -- cgit v1.2.1