aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON')
-rw-r--r--src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp33
-rw-r--r--src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp36
2 files changed, 50 insertions, 19 deletions
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
index 8c875cdb2d..8352c94586 100644
--- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/NEAsymm.h"
#include "arm_compute/core/NEON/NEFixedPoint.h"
#include "arm_compute/core/NEON/wrapper/wrapper.h"
#include "arm_compute/core/TensorInfo.h"
@@ -57,14 +58,30 @@ void depth_concat(const ITensor *in, ITensor *out, std::pair<int, int> start_xy,
Iterator input(in, window);
Iterator output(out, window);
- execute_window_loop(window, [&](const Coordinates & id)
+ const DataType dt = in->info()->data_type();
+ const QuantizationInfo &input_qinfo = in->info()->quantization_info();
+ const QuantizationInfo &output_qinfo = out->info()->quantization_info();
+ if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
{
- const auto in_ptr = reinterpret_cast<const T *>(input_ptr + input.offset());
- const auto out_ptr = reinterpret_cast<T *>(output_ptr + output.offset());
-
- wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr));
- },
- input, output);
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const auto in_ptr = reinterpret_cast<const uint8_t *>(input_ptr + input.offset());
+ const auto out_ptr = reinterpret_cast<uint8_t *>(output_ptr + output.offset());
+ vst1q_u8(out_ptr, vquantize(vdequantize(vld1q_u8(in_ptr), input_qinfo), output_qinfo));
+ },
+ input, output);
+ }
+ else
+ {
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const auto in_ptr = reinterpret_cast<const T *>(input_ptr + input.offset());
+ const auto out_ptr = reinterpret_cast<T *>(output_ptr + output.offset());
+
+ wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr));
+ },
+ input, output);
+ }
}
std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output)
diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
index a84a6d9028..ca27a26493 100644
--- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/NEAsymm.h"
#include "arm_compute/core/NEON/wrapper/wrapper.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
@@ -110,15 +111,28 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &
uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0];
// Create iterators
- Iterator input(_input, window);
- Iterator output(_output, window);
-
- execute_window_loop(window, [&](const Coordinates & id)
+ Iterator input(_input, window);
+ Iterator output(_output, window);
+ const DataType dt = _input->info()->data_type();
+ const QuantizationInfo &input_qinfo = _input->info()->quantization_info();
+ const QuantizationInfo &output_qinfo = _output->info()->quantization_info();
+ if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo)
{
- const auto in_ptr = input.ptr();
- const auto out_ptr = output_ptr + output.offset();
-
- wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr));
- },
- input, output);
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ vst1q_u8(output_ptr + output.offset(), vquantize(vdequantize(vld1q_u8(input.ptr()), input_qinfo), output_qinfo));
+ },
+ input, output);
+ }
+ else
+ {
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const auto in_ptr = input.ptr();
+ const auto out_ptr = output_ptr + output.offset();
+
+ wrapper::vstore(out_ptr, wrapper::vloadq(in_ptr));
+ },
+ input, output);
+ }
}