aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSheri Zhang <sheri.zhang@arm.com>2020-04-30 22:53:39 +0100
committerSheri Zhang <sheri.zhang@arm.com>2020-05-01 14:22:55 +0000
commit359c48eaf251cbb84b523e5a67a099c739482e6c (patch)
treea0c19341aee267246c4d8877a23b96985f6a99d5
parent81870c05533cba03373d5e51fed95cd5e74f741d (diff)
downloadComputeLibrary-359c48eaf251cbb84b523e5a67a099c739482e6c.tar.gz
COMPMID-3379: Add support for QASYMM8_SIGNED in NEScale
Update QASYMM8_SIGNED support document Signed-off-by: Sheri Zhang <sheri.zhang@arm.com> Change-Id: Ia0cb0d2eed13e7e38b165dc317af06e7f2df4285 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3132 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CPP/kernels/CPPUpsampleKernel.h4
-rw-r--r--arm_compute/core/NEON/kernels/NEScaleKernel.h4
-rw-r--r--arm_compute/runtime/CPP/functions/CPPUpsample.h4
-rw-r--r--docs/00_introduction.dox4
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp214
-rw-r--r--tests/validation/NEON/Scale.cpp19
6 files changed, 190 insertions, 59 deletions
diff --git a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
index 375ff55037..9fbc9b697c 100644
--- a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -55,7 +55,7 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8
+ * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] info Padding info.
*/
diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h
index e6790371a6..0d0d457d51 100644
--- a/arm_compute/core/NEON/kernels/NEScaleKernel.h
+++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h
@@ -57,7 +57,7 @@ public:
* @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
* @note Using @p policy Area only supports data layout NCHW and input data type U8.
*
- * @param[in] input Source tensor. Data types supported: U8/S16/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
* @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
* @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
* @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
@@ -77,7 +77,7 @@ public:
* @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
* @note Using @p policy Area only supports data layout NCHW and input data type U8.
*
- * @param[in] input Source tensor. Data types supported: U8/S16/F16/F32.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
* @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
* @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
* @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
diff --git a/arm_compute/runtime/CPP/functions/CPPUpsample.h b/arm_compute/runtime/CPP/functions/CPPUpsample.h
index 4f74e51d22..3e1852bc30 100644
--- a/arm_compute/runtime/CPP/functions/CPPUpsample.h
+++ b/arm_compute/runtime/CPP/functions/CPPUpsample.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@ class CPPUpsample : public ICPPSimpleFunction
public:
/** Configure the upsample CPP kernel
*
- * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8
+ * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED
* @param[out] output The output tensor. Data types supported: Same as @p input
* @param[in] info Padding information
*/
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 8155952b1c..1c8b136f16 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -264,6 +264,8 @@ v20.05 Public major release
- @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
- @ref CLReductionOperation
- @ref CLReduceMean
+ - @ref NEScale
+ - @ref NEScaleKernel
- @ref NEUpsampleLayer
- @ref NECast
- @ref NEReductionOperation
@@ -277,6 +279,8 @@ v20.05 Public major release
- @ref CPPPermute
- @ref CPPTopKVKernel
- @ref CPPTopKV
+ - @ref CPPUpsample
+ - @ref CPPUpsampleKernel
- New OpenCL kernels / functions:
- @ref CLQLSTMLayer
- @ref CLQLSTMLayerNormalizationKernel
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 66408ab94c..47a02d77c5 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -49,7 +49,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding, bool align_corners)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(output == input);
@@ -251,9 +251,8 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1;
- const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8);
- const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
execute_window_loop(window, [&](const Coordinates & id)
{
@@ -309,7 +308,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
T res = 0;
//dequantize quantized input
- if(is_quantized)
+ if(input->info()->data_type() == DataType::QASYMM8)
{
float inp00 = dequantize_qasymm8(a00, iq_info);
float inp01 = dequantize_qasymm8(a01, iq_info);
@@ -317,6 +316,14 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
float inp11 = dequantize_qasymm8(a11, iq_info);
res = static_cast<T>(quantize_qasymm8((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info));
}
+ else if(input->info()->data_type() == DataType::QASYMM8_SIGNED)
+ {
+ float inp00 = dequantize_qasymm8_signed(a00, iq_info);
+ float inp01 = dequantize_qasymm8_signed(a01, iq_info);
+ float inp10 = dequantize_qasymm8_signed(a10, iq_info);
+ float inp11 = dequantize_qasymm8_signed(a11, iq_info);
+ res = static_cast<T>(quantize_qasymm8_signed((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info));
+ }
else
{
res = static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
@@ -469,6 +476,42 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
switch(_input->info()->data_type())
{
+ case DataType::QASYMM8_SIGNED:
+ {
+ int8x16_t tmp = vdupq_n_s8(0);
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
+ const uint8_t *const in_ptr = in.ptr();
+
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr);
+ const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1));
+ ARM_COMPUTE_ERROR_ON(in_yi_clamped < -1 || in_yi_clamped > static_cast<int>(_input->info()->dimension(1)));
+ const int offset_row = in_yi_clamped * input_stride;
+
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[0] + offset_row], tmp, 0);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[1] + offset_row], tmp, 1);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[2] + offset_row], tmp, 2);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[3] + offset_row], tmp, 3);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[4] + offset_row], tmp, 4);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[5] + offset_row], tmp, 5);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[6] + offset_row], tmp, 6);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[7] + offset_row], tmp, 7);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[8] + offset_row], tmp, 8);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[9] + offset_row], tmp, 9);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[10] + offset_row], tmp, 10);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[11] + offset_row], tmp, 11);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[12] + offset_row], tmp, 12);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[13] + offset_row], tmp, 13);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[14] + offset_row], tmp, 14);
+ tmp = vsetq_lane_s8(in_ptr[offsets_ptr[15] + offset_row], tmp, 15);
+
+ vst1q_s8(reinterpret_cast<int8_t *>(out.ptr()), tmp);
+ },
+ in, offsets, out);
+ break;
+ }
case DataType::QASYMM8:
case DataType::U8:
{
@@ -639,7 +682,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
void NEScaleKernel::scale_bilinear_nchw(const Window &window)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::F32);
// Compute the ratio between source height and destination height
const auto hr = arm_compute::calculate_resize_ratio(_input->info()->dimension(1), _output->info()->dimension(1), _align_corners);
@@ -669,13 +712,89 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1];
const size_t in_stride = in_stide_in_bytes / _input->info()->element_size();
- const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8);
- const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
switch(_input->info()->data_type())
{
+ case DataType::QASYMM8_SIGNED:
+ {
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
+ const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
+ const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
+ const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
+
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
+ const int offset_row = in_yi * in_stide_in_bytes;
+
+ int8x8_t tmp0 = vdup_n_s8(0);
+
+ tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0);
+ tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1);
+ tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2);
+ tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3);
+ tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4);
+ tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5);
+ tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6);
+ tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7);
+
+ int8x8_t tmp1 = vdup_n_s8(0);
+
+ tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0);
+ tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1);
+ tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2);
+ tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3);
+ tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4);
+ tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5);
+ tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6);
+ tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7);
+
+ vst1q_s8(reinterpret_cast<int8_t *>(out.ptr()), vcombine_s8(tmp0, tmp1));
+ },
+ in, offsets, dx, dy, out);
+ break;
+ }
case DataType::QASYMM8:
+ {
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
+ const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
+ const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
+ const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
+
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
+ const int offset_row = in_yi * in_stide_in_bytes;
+
+ uint8x8_t tmp0 = vdup_n_u8(0);
+
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7);
+
+ uint8x8_t tmp1 = vdup_n_u8(0);
+
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7);
+
+ vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1));
+ },
+ in, offsets, dx, dy, out);
+ break;
+ }
case DataType::U8:
{
execute_window_loop(window, [&](const Coordinates & id)
@@ -689,51 +808,27 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const int offset_row = in_yi * in_stide_in_bytes;
uint8x8_t tmp0 = vdup_n_u8(0);
- if(is_quantized)
- {
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7);
- }
- else
- {
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7);
- }
+
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7);
+
uint8x8_t tmp1 = vdup_n_u8(0);
- if(is_quantized)
- {
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7);
- }
- else
- {
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7);
- }
+
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7);
+
vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1));
},
in, offsets, dx, dy, out);
@@ -950,6 +1045,19 @@ void NEScaleKernel::scale_nhwc(const Window &window)
switch(_input->info()->data_type())
{
+ case DataType::QASYMM8_SIGNED:
+ {
+ if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
+ {
+ scale_nearest_nhwc_core<int8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
+ }
+ else
+ {
+ scale_bilinear_nhwc_core<int8_t, int8_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
+ window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
+ }
+ break;
+ }
case DataType::QASYMM8:
case DataType::U8:
{
diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp
index ce1b249f88..b7d7b00cb6 100644
--- a/tests/validation/NEON/Scale.cpp
+++ b/tests/validation/NEON/Scale.cpp
@@ -360,6 +360,25 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<uint8_t>, framework::Da
validate(Accessor(_target), _reference, valid_region, tolerance_u8);
}
TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+ datasets::BorderModes()),
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })),
+ AlignCorners))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_u8);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // Scale