diff options
author | Pablo Tello <pablo.tello@arm.com> | 2019-02-05 16:16:19 +0000 |
---|---|---|
committer | Pablo Marquez <pablo.tello@arm.com> | 2019-02-08 11:15:30 +0000 |
commit | 54e98d98fbe082b265b2c4a384eabe0144866bcc (patch) | |
tree | f0367171d8dfcad282523efceb709ec523ef9edd /src/core | |
parent | 03bb550b72ebd107a6cbd994008cdcc00597d822 (diff) | |
download | ComputeLibrary-54e98d98fbe082b265b2c4a384eabe0144866bcc.tar.gz |
COMPMID-1918: Different qinfos support in NEConcatLayer.
Added support in NEDepthConcatenateLayerKernel and NEWidthConcatenateLayer for
different quantization arguments both for the input and output.
If input's quantization infos are not homogeneous the input values are requantized using
the output's quantization info.
Change-Id: I2daa638361947eb3ec848d5425d0a5bbfea1936d
Reviewed-on: https://review.mlplatform.org/627
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 33 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp | 36 |
2 files changed, 50 insertions, 19 deletions
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index 8c875cdb2d..8352c94586 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEAsymm.h" #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" @@ -57,14 +58,30 @@ void depth_concat(const ITensor *in, ITensor *out, std::pair<int, int> start_xy, Iterator input(in, window); Iterator output(out, window); - execute_window_loop(window, [&](const Coordinates & id) + const DataType dt = in->info()->data_type(); + const QuantizationInfo &input_qinfo = in->info()->quantization_info(); + const QuantizationInfo &output_qinfo = out->info()->quantization_info(); + if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { - const auto in_ptr = reinterpret_cast<const T *>(input_ptr + input.offset()); - const auto out_ptr = reinterpret_cast<T *>(output_ptr + output.offset()); - - wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr)); - }, - input, output); + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast<const uint8_t *>(input_ptr + input.offset()); + const auto out_ptr = reinterpret_cast<uint8_t *>(output_ptr + output.offset()); + vst1q_u8(out_ptr, vquantize(vdequantize(vld1q_u8(in_ptr), input_qinfo), output_qinfo)); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = reinterpret_cast<const T *>(input_ptr + input.offset()); + const auto out_ptr = reinterpret_cast<T *>(output_ptr + output.offset()); + + wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr)); + }, + input, output); + } } std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output) diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index a84a6d9028..ca27a26493 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEAsymm.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" @@ -110,15 +111,28 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo & uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0]; // Create iterators - Iterator input(_input, window); - Iterator output(_output, window); - - execute_window_loop(window, [&](const Coordinates & id) + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); + const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { - const auto in_ptr = input.ptr(); - const auto out_ptr = output_ptr + output.offset(); - - wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr)); - }, - input, output); + execute_window_loop(window, [&](const Coordinates &) + { + vst1q_u8(output_ptr + output.offset(), vquantize(vdequantize(vld1q_u8(input.ptr()), input_qinfo), output_qinfo)); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates &) + { + const auto in_ptr = input.ptr(); + const auto out_ptr = output_ptr + output.offset(); + + wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr)); + }, + input, output); + } } |