aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>2019-01-17 15:16:19 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-18 11:23:51 +0000
commit3ac2f3a1d9297220d1b0ce920dd13fdd4edcc187 (patch)
treebf04769d89eb4586fd2023bc182ec928172c578c
parent90313eb8f72cf9ba17cbf7b84382a609e927a1fe (diff)
downloadComputeLibrary-3ac2f3a1d9297220d1b0ce920dd13fdd4edcc187.tar.gz
COMPMID-1814 : NEScale add support for TOP_LEFT and QASYMM8
Added support for TOP_LEFT sampling policy and QASYMM8 data type. Change-Id: Id9135bb4b6ebd93f1d6fb70b06e83684a167eb94 Reviewed-on: https://review.mlplatform.org/533 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--arm_compute/core/Helpers.h36
-rw-r--r--arm_compute/core/NEON/kernels/NEScaleKernel.h3
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp125
-rw-r--r--src/runtime/NEON/functions/NEScale.cpp11
-rw-r--r--tests/validation/NEON/Scale.cpp46
5 files changed, 167 insertions, 54 deletions
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index 8f4220fb80..91d85be086 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -144,6 +144,40 @@ inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy
return static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
}
+/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between
+ * the real coordinates and the smallest following integer coordinates. Input must be quantized and in single channel format.
+ *
+ * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input.
+ * @param[in] stride Stride to access the bottom-left and bottom-right pixel values
+ * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer
+ * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer
+ * @param[in] iq_info Input QuantizationInfo
+ * @param[in] oq_info Output QuantizationInfo
+ *
+ * @note dx and dy must be in the range [0, 1.0]
+ *
+ * @return The bilinear interpolated pixel value
+ */
+inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, QuantizationInfo iq_info, QuantizationInfo oq_info)
+{
+ ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr);
+
+ const float dx1 = 1.0f - dx;
+ const float dy1 = 1.0f - dy;
+
+ const float a00 = iq_info.dequantize(*pixel_ptr);
+ const float a01 = iq_info.dequantize(*(pixel_ptr + 1));
+ const float a10 = iq_info.dequantize(*(pixel_ptr + stride));
+ const float a11 = iq_info.dequantize(*(pixel_ptr + stride + 1));
+
+ const float w1 = dx1 * dy1;
+ const float w2 = dx * dy1;
+ const float w3 = dx1 * dy;
+ const float w4 = dx * dy;
+ float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
+ return static_cast<uint8_t>(oq_info.quantize(res, RoundingPolicy::TO_NEAREST_UP));
+}
+
/** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between
* the real coordinates and the smallest following integer coordinates. Input must be in single channel format.
*
diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h
index c851b3d335..83d99643dc 100644
--- a/arm_compute/core/NEON/kernels/NEScaleKernel.h
+++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -110,6 +110,7 @@ private:
InterpolationPolicy _policy;
BorderSize _border_size;
BorderMode _border_mode;
+ float _sampling_offset;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NESCALEKERNEL_H__ */
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 5fef4f9744..3d300ef26b 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,11 +48,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
BorderMode border_mode, SamplingPolicy sampling_policy)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(output == input);
- ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER);
+ ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER && sampling_policy != SamplingPolicy::TOP_LEFT);
ARM_COMPUTE_UNUSED(border_mode);
const DataLayout data_layout = input->data_layout();
@@ -74,6 +74,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
if(policy == InterpolationPolicy::AREA)
{
ARM_COMPUTE_RETURN_ERROR_ON(data_layout != DataLayout::NCHW);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
}
return Status{};
@@ -184,7 +185,7 @@ inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets
template <typename T>
inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offsets, const ITensor *dx, const ITensor *dy, ITensor *output,
- float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, BorderMode border_mode)
+ float hr, float sampling_offset, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, BorderMode border_mode)
{
Iterator in(input, win_in);
Iterator out(output, window);
@@ -204,12 +205,16 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1;
+ const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8);
+ const QuantizationInfo iq_info = input->info()->quantization_info();
+ const QuantizationInfo oq_info = output->info()->quantization_info();
+
execute_window_loop(window, [&](const Coordinates & id)
{
const auto offset = (*reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())))) / static_cast<int>(sizeof(T));
const auto dx_scale = *reinterpret_cast<const float *>(dx->ptr_to_element(Coordinates(id.y(), id.z())));
const auto dy_scale = *reinterpret_cast<const float *>(dy->ptr_to_element(Coordinates(id.y(), id.z())));
- const int in_yi = std::floor((id.z() + 0.5f) * hr - 0.5f);
+ const int in_yi = std::floor((id.z() + sampling_offset) * hr - sampling_offset);
const int offset_row = in_yi * stride_h + id.x() * stride_c;
const T *in_ptr = reinterpret_cast<T *>(in.ptr() + offset * stride_w + offset_row);
@@ -253,8 +258,22 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
const float w3 = dx1 * dy_scale;
const float w4 = dx_scale * dy_scale;
+ T res = 0;
+ //dequantize quantized input
+ if(is_quantized)
+ {
+ float inp00 = iq_info.dequantize(a00);
+ float inp01 = iq_info.dequantize(a01);
+ float inp10 = iq_info.dequantize(a10);
+ float inp11 = iq_info.dequantize(a11);
+ res = static_cast<T>(oq_info.quantize((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), RoundingPolicy::TO_NEAREST_UP));
+ }
+ else
+ {
+ res = static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
+ }
// Store result
- *reinterpret_cast<T *>(out.ptr()) = static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
+ *reinterpret_cast<T *>(out.ptr()) = res;
}
else
{
@@ -275,7 +294,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
} // namespace
NEScaleKernel::NEScaleKernel()
- : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode()
+ : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode(), _sampling_offset(0)
{
}
@@ -311,6 +330,11 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe
_border_size = BorderSize(1);
_border_mode = border_mode;
+ if(sampling_policy == SamplingPolicy::CENTER)
+ {
+ _sampling_offset = 0.5f;
+ }
+
// Compute the ratio between source width/height and destination width/height
const auto wr = static_cast<float>(input->info()->dimension(idx_width)) / static_cast<float>(output->info()->dimension(idx_width));
const auto hr = static_cast<float>(input->info()->dimension(idx_height)) / static_cast<float>(output->info()->dimension(idx_height));
@@ -389,6 +413,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
switch(_input->info()->data_type())
{
+ case DataType::QASYMM8:
case DataType::U8:
{
uint8x16_t tmp = vdupq_n_u8(0);
@@ -559,7 +584,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
void NEScaleKernel::scale_bilinear_nchw(const Window &window)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
// Compute the ratio between source height and destination height
const auto hr = static_cast<float>(_input->info()->dimension(1)) / static_cast<float>(_output->info()->dimension(1));
@@ -589,8 +614,13 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1];
const size_t in_stride = in_stide_in_bytes / _input->info()->element_size();
+ const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8);
+ const QuantizationInfo iq_info = _input->info()->quantization_info();
+ const QuantizationInfo oq_info = _output->info()->quantization_info();
+
switch(_input->info()->data_type())
{
+ case DataType::QASYMM8:
case DataType::U8:
{
execute_window_loop(window, [&](const Coordinates & id)
@@ -600,29 +630,55 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
- const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f);
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
const int offset_row = in_yi * in_stide_in_bytes;
uint8x8_t tmp0 = vdup_n_u8(0);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7);
-
+ if(is_quantized)
+ {
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7);
+ }
+ else
+ {
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7);
+ }
uint8x8_t tmp1 = vdup_n_u8(0);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7);
-
+ if(is_quantized)
+ {
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7);
+ }
+ else
+ {
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7);
+ }
vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1));
},
in, offsets, dx, dy, out);
@@ -636,7 +692,7 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
- const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f);
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
const int offset_row = in_yi * in_stide_in_bytes;
int16x8x2_t tmp =
@@ -679,7 +735,7 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
- const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f);
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
const int offset_row = in_yi * in_stide_in_bytes;
float16x8x2_t tmp =
@@ -722,7 +778,7 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
- const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f);
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
const int offset_row = in_yi * in_stide_in_bytes;
float32x4x4_t tmp =
@@ -839,6 +895,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
switch(_input->info()->data_type())
{
+ case DataType::QASYMM8:
case DataType::U8:
{
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
@@ -847,7 +904,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<uint8_t>(_input, _offsets, _dx, _dy, _output, hr,
+ scale_bilinear_nhwc_core<uint8_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
}
break;
@@ -860,7 +917,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<int16_t>(_input, _offsets, _dx, _dy, _output, hr,
+ scale_bilinear_nhwc_core<int16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
}
break;
@@ -875,7 +932,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<float16_t>(_input, _offsets, _dx, _dy, _output, hr,
+ scale_bilinear_nhwc_core<float16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
}
break;
@@ -889,7 +946,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<float>(_input, _offsets, _dx, _dy, _output, hr,
+ scale_bilinear_nhwc_core<float>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
}
break;
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index 169b9bbf6a..483aa4c0b5 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -46,6 +46,11 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float
{
ARM_COMPUTE_ERROR_ON(nullptr == offsets);
ARM_COMPUTE_UNUSED(sampling_policy);
+ float sampling_offset = 0.0f;
+ if(sampling_policy == SamplingPolicy::CENTER)
+ {
+ sampling_offset = 0.5f;
+ }
Window win;
win.set(Window::DimX, Window::Dimension(0, offsets->info()->dimension(0), 1));
@@ -60,8 +65,8 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float
execute_window_loop(win, [&](const Coordinates & id)
{
- const float in_x = (id.x() + 0.5f) * wr - 0.5f;
- const float in_y = (id.y() + 0.5f) * hr - 0.5f;
+ const float in_x = (id.x() + sampling_offset) * wr - sampling_offset;
+ const float in_y = (id.y() + sampling_offset) * hr - sampling_offset;
const int in_xi = std::floor(in_x);
const int in_yi = std::floor(in_y);
@@ -174,7 +179,7 @@ Status NEScale::validate(const ITensorInfo *input, const ITensorInfo *output, In
BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER);
+ ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER && sampling_policy != SamplingPolicy::TOP_LEFT);
ARM_COMPUTE_UNUSED(border_mode, constant_border_value);
ITensorInfo *offsets = nullptr;
diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp
index 127a552367..c05b8ac03b 100644
--- a/tests/validation/NEON/Scale.cpp
+++ b/tests/validation/NEON/Scale.cpp
@@ -81,19 +81,16 @@ TEST_SUITE(Scale)
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Mismatching data type
- TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported sampling point
TensorInfo(TensorShape(4U, 27U, 13U), 1, DataType::F32), // Invalid policy
TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Insufficient padding
TensorInfo(TensorShape(4U, 27U, 13U), 1, DataType::F32),
}),
framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32),
- TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(4U, 132U, 25U), 1, DataType::F32),
TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(4U, 132U, 25U), 1, DataType::F32),
})),
framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR,
- InterpolationPolicy::NEAREST_NEIGHBOR,
InterpolationPolicy::AREA,
InterpolationPolicy::AREA,
InterpolationPolicy::NEAREST_NEIGHBOR,
@@ -101,22 +98,19 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
framework::dataset::make("BorderMode", { BorderMode::UNDEFINED,
BorderMode::UNDEFINED,
BorderMode::UNDEFINED,
- BorderMode::UNDEFINED,
BorderMode::REPLICATE,
})),
framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER,
- SamplingPolicy::TOP_LEFT,
SamplingPolicy::CENTER,
SamplingPolicy::CENTER,
SamplingPolicy::CENTER,
})),
framework::dataset::make("DataLayout", { DataLayout::NCHW,
- DataLayout::NCHW,
DataLayout::NHWC,
DataLayout::NCHW,
DataLayout::NHWC,
})),
- framework::dataset::make("Expected", { false, false, false, false ,true })),
+ framework::dataset::make("Expected", { false, false, false ,true })),
input_info, output_info, policy,border_mode, sampling_policy, data_layout, expected)
{
const PixelValue constant_border(5);
@@ -201,6 +195,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi
template <typename T>
using NEScaleFixture = ScaleValidationFixture<Tensor, Accessor, NEScale, T>;
+template <typename T>
+using NEScaleQuantizedFixture = ScaleValidationQuantizedFixture<Tensor, Accessor, NEScale, T>;
TEST_SUITE(Float)
TEST_SUITE(FP32)
@@ -209,7 +205,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<float>, framework::DatasetMode::
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
datasets::BorderModes()),
- framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })))
{
//Create valid region
TensorInfo src_info(_shape, 1, _data_type);
@@ -223,7 +219,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<float>, framework::DatasetMode::
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
datasets::BorderModes()),
- framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })))
{
//Create valid region
TensorInfo src_info(_shape, 1, _data_type);
@@ -240,7 +236,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<half>, framework::DatasetMode::A
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
datasets::BorderModes()),
- framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })))
{
//Create valid region
TensorInfo src_info(_shape, 1, _data_type);
@@ -254,7 +250,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<half>, framework::DatasetMode::N
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
datasets::BorderModes()),
- framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })))
{
//Create valid region
TensorInfo src_info(_shape, 1, _data_type);
@@ -274,7 +270,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<uint8_t>, framework::DatasetMode
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
datasets::BorderModes()),
- framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })))
{
//Create valid region
TensorInfo src_info(_shape, 1, _data_type);
@@ -288,7 +284,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<uint8_t>, framework::DatasetMode
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
datasets::BorderModes()),
- framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })))
{
//Create valid region
TensorInfo src_info(_shape, 1, _data_type);
@@ -304,7 +300,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<int16_t>, framework::DatasetMode
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
datasets::BorderModes()),
- framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })))
{
//Create valid region
TensorInfo src_info(_shape, 1, _data_type);
@@ -318,7 +314,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<int16_t>, framework::DatasetMode
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
datasets::BorderModes()),
- framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })))
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })))
{
//Create valid region
TensorInfo src_info(_shape, 1, _data_type);
@@ -330,6 +326,26 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture<int16_t>, framework::DatasetMode
TEST_SUITE_END() // S16
TEST_SUITE_END() // Integer
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", DataType::QASYMM8)),
+ framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+ datasets::BorderModes()),
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })))
+{
+ //Create valid region
+ TensorInfo src_info(_shape, 1, _data_type);
+ ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+ // Validate output
+ validate(Accessor(_target), _reference, valid_region, tolerance_u8);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+
TEST_SUITE_END() // Scale
TEST_SUITE_END() // NEON
} // namespace validation