From 3ac2f3a1d9297220d1b0ce920dd13fdd4edcc187 Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Thu, 17 Jan 2019 15:16:19 +0000 Subject: COMPMID-1814 : NEScale add support for TOP_LEFT and QASYMM8 Added support for TOP_LEFT sampling policy and QASYMM8 data type. Change-Id: Id9135bb4b6ebd93f1d6fb70b06e83684a167eb94 Reviewed-on: https://review.mlplatform.org/533 Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou Reviewed-by: Georgios Pinitas --- arm_compute/core/Helpers.h | 36 +++++++- arm_compute/core/NEON/kernels/NEScaleKernel.h | 3 +- src/core/NEON/kernels/NEScaleKernel.cpp | 125 +++++++++++++++++++------- src/runtime/NEON/functions/NEScale.cpp | 11 ++- tests/validation/NEON/Scale.cpp | 46 ++++++---- 5 files changed, 167 insertions(+), 54 deletions(-) diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h index 8f4220fb80..91d85be086 100644 --- a/arm_compute/core/Helpers.h +++ b/arm_compute/core/Helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -144,6 +144,40 @@ inline T delta_bilinear_c1(const T *pixel_ptr, size_t stride, float dx, float dy return static_cast(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4); } +/** Computes bilinear interpolation for quantized input and output, using the pointer to the top-left pixel and the pixel's distance between + * the real coordinates and the smallest following integer coordinates. Input must be quantized and in single channel format. + * + * @param[in] pixel_ptr Pointer to the top-left pixel value of a single channel input. + * @param[in] stride Stride to access the bottom-left and bottom-right pixel values + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer + * @param[in] iq_info Input QuantizationInfo + * @param[in] oq_info Output QuantizationInfo + * + * @note dx and dy must be in the range [0, 1.0] + * + * @return The bilinear interpolated pixel value + */ +inline uint8_t delta_bilinear_c1_quantized(const uint8_t *pixel_ptr, size_t stride, float dx, float dy, QuantizationInfo iq_info, QuantizationInfo oq_info) +{ + ARM_COMPUTE_ERROR_ON(pixel_ptr == nullptr); + + const float dx1 = 1.0f - dx; + const float dy1 = 1.0f - dy; + + const float a00 = iq_info.dequantize(*pixel_ptr); + const float a01 = iq_info.dequantize(*(pixel_ptr + 1)); + const float a10 = iq_info.dequantize(*(pixel_ptr + stride)); + const float a11 = iq_info.dequantize(*(pixel_ptr + stride + 1)); + + const float w1 = dx1 * dy1; + const float w2 = dx * dy1; + const float w3 = dx1 * dy; + const float w4 = dx * dy; + float res = a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4; + return static_cast(oq_info.quantize(res, RoundingPolicy::TO_NEAREST_UP)); +} + /** Computes linear interpolation using the pointer to the top pixel and the pixel's distance between * the real coordinates and the smallest following integer coordinates. Input must be in single channel format. * diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h index c851b3d335..83d99643dc 100644 --- a/arm_compute/core/NEON/kernels/NEScaleKernel.h +++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -110,6 +110,7 @@ private: InterpolationPolicy _policy; BorderSize _border_size; BorderMode _border_mode; + float _sampling_offset; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NESCALEKERNEL_H__ */ diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 5fef4f9744..3d300ef26b 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,11 +48,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const BorderMode border_mode, SamplingPolicy sampling_policy) { ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON(output == input); - ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER); + ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER && sampling_policy != SamplingPolicy::TOP_LEFT); ARM_COMPUTE_UNUSED(border_mode); const DataLayout data_layout = input->data_layout(); @@ -74,6 +74,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const if(policy == InterpolationPolicy::AREA) { ARM_COMPUTE_RETURN_ERROR_ON(data_layout != DataLayout::NCHW); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); } return Status{}; @@ -184,7 +185,7 @@ inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets template inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offsets, const ITensor *dx, const ITensor *dy, ITensor *output, - float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, BorderMode border_mode) + float hr, float sampling_offset, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, BorderMode border_mode) { Iterator in(input, win_in); Iterator out(output, window); @@ -204,12 +205,16 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1; + const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8); + const QuantizationInfo iq_info = input->info()->quantization_info(); + const QuantizationInfo oq_info = output->info()->quantization_info(); + execute_window_loop(window, [&](const Coordinates & id) { const auto offset = (*reinterpret_cast(offsets->ptr_to_element(Coordinates(id.y(), id.z())))) / static_cast(sizeof(T)); const auto dx_scale = *reinterpret_cast(dx->ptr_to_element(Coordinates(id.y(), id.z()))); const auto dy_scale = *reinterpret_cast(dy->ptr_to_element(Coordinates(id.y(), id.z()))); - const int in_yi = std::floor((id.z() + 0.5f) * hr - 0.5f); + const int in_yi = std::floor((id.z() + sampling_offset) * hr - sampling_offset); const int offset_row = in_yi * stride_h + id.x() * stride_c; const T *in_ptr = reinterpret_cast(in.ptr() + offset * stride_w + offset_row); @@ -253,8 +258,22 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset const float w3 = dx1 * dy_scale; const float w4 = dx_scale * dy_scale; + T res = 0; + //dequantize quantized input + if(is_quantized) + { + float inp00 = iq_info.dequantize(a00); + float inp01 = iq_info.dequantize(a01); + float inp10 = iq_info.dequantize(a10); + float inp11 = iq_info.dequantize(a11); + res = static_cast(oq_info.quantize((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), RoundingPolicy::TO_NEAREST_UP)); + } + else + { + res = static_cast(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4); + } // Store result - *reinterpret_cast(out.ptr()) = static_cast(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4); + *reinterpret_cast(out.ptr()) = res; } else { @@ -275,7 +294,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset } // namespace NEScaleKernel::NEScaleKernel() - : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode() + : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode(), _sampling_offset(0) { } @@ -311,6 +330,11 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe _border_size = BorderSize(1); _border_mode = border_mode; + if(sampling_policy == SamplingPolicy::CENTER) + { + _sampling_offset = 0.5f; + } + // Compute the ratio between source width/height and destination width/height const auto wr = static_cast(input->info()->dimension(idx_width)) / static_cast(output->info()->dimension(idx_width)); const auto hr = static_cast(input->info()->dimension(idx_height)) / static_cast(output->info()->dimension(idx_height)); @@ -389,6 +413,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) switch(_input->info()->data_type()) { + case DataType::QASYMM8: case DataType::U8: { uint8x16_t tmp = vdupq_n_u8(0); @@ -559,7 +584,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) void NEScaleKernel::scale_bilinear_nchw(const Window &window) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32); // Compute the ratio between source height and destination height const auto hr = static_cast(_input->info()->dimension(1)) / static_cast(_output->info()->dimension(1)); @@ -589,8 +614,13 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1]; const size_t in_stride = in_stide_in_bytes / _input->info()->element_size(); + const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8); + const QuantizationInfo iq_info = _input->info()->quantization_info(); + const QuantizationInfo oq_info = _output->info()->quantization_info(); + switch(_input->info()->data_type()) { + case DataType::QASYMM8: case DataType::U8: { execute_window_loop(window, [&](const Coordinates & id) @@ -600,29 +630,55 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) const auto dy_ptr = reinterpret_cast(dy.ptr()); const auto in_ptr = reinterpret_cast(in.ptr()); - const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f); + const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset); const int offset_row = in_yi * in_stide_in_bytes; uint8x8_t tmp0 = vdup_n_u8(0); - tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0); - tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1); - tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2); - tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3); - tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4); - tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5); - tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6); - tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7); - + if(is_quantized) + { + tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0); + tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1); + tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2); + tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3); + tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4); + tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5); + tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6); + tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7); + } + else + { + tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0); + tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1); + tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2); + tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3); + tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4); + tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5); + tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6); + tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7); + } uint8x8_t tmp1 = vdup_n_u8(0); - tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0); - tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1); - tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2); - tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3); - tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4); - tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5); - tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6); - tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7); - + if(is_quantized) + { + tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0); + tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1); + tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2); + tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3); + tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4); + tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5); + tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6); + tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7); + } + else + { + tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0); + tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1); + tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2); + tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3); + tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4); + tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5); + tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6); + tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7); + } vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1)); }, in, offsets, dx, dy, out); @@ -636,7 +692,7 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) const auto dx_ptr = reinterpret_cast(dx.ptr()); const auto dy_ptr = reinterpret_cast(dy.ptr()); - const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f); + const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset); const int offset_row = in_yi * in_stide_in_bytes; int16x8x2_t tmp = @@ -679,7 +735,7 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) const auto dx_ptr = reinterpret_cast(dx.ptr()); const auto dy_ptr = reinterpret_cast(dy.ptr()); - const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f); + const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset); const int offset_row = in_yi * in_stide_in_bytes; float16x8x2_t tmp = @@ -722,7 +778,7 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) const auto dx_ptr = reinterpret_cast(dx.ptr()); const auto dy_ptr = reinterpret_cast(dy.ptr()); - const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f); + const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset); const int offset_row = in_yi * in_stide_in_bytes; float32x4x4_t tmp = @@ -839,6 +895,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) switch(_input->info()->data_type()) { + case DataType::QASYMM8: case DataType::U8: { if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) @@ -847,7 +904,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) } else { - scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, + scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset, window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode); } break; @@ -860,7 +917,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) } else { - scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, + scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset, window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode); } break; @@ -875,7 +932,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) } else { - scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, + scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset, window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode); } break; @@ -889,7 +946,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) } else { - scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, + scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset, window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode); } break; diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp index 169b9bbf6a..483aa4c0b5 100644 --- a/src/runtime/NEON/functions/NEScale.cpp +++ b/src/runtime/NEON/functions/NEScale.cpp @@ -46,6 +46,11 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float { ARM_COMPUTE_ERROR_ON(nullptr == offsets); ARM_COMPUTE_UNUSED(sampling_policy); + float sampling_offset = 0.0f; + if(sampling_policy == SamplingPolicy::CENTER) + { + sampling_offset = 0.5f; + } Window win; win.set(Window::DimX, Window::Dimension(0, offsets->info()->dimension(0), 1)); @@ -60,8 +65,8 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float execute_window_loop(win, [&](const Coordinates & id) { - const float in_x = (id.x() + 0.5f) * wr - 0.5f; - const float in_y = (id.y() + 0.5f) * hr - 0.5f; + const float in_x = (id.x() + sampling_offset) * wr - sampling_offset; + const float in_y = (id.y() + sampling_offset) * hr - sampling_offset; const int in_xi = std::floor(in_x); const int in_yi = std::floor(in_y); @@ -174,7 +179,7 @@ Status NEScale::validate(const ITensorInfo *input, const ITensorInfo *output, In BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER); + ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER && sampling_policy != SamplingPolicy::TOP_LEFT); ARM_COMPUTE_UNUSED(border_mode, constant_border_value); ITensorInfo *offsets = nullptr; diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp index 127a552367..c05b8ac03b 100644 --- a/tests/validation/NEON/Scale.cpp +++ b/tests/validation/NEON/Scale.cpp @@ -81,42 +81,36 @@ TEST_SUITE(Scale) // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Mismatching data type - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported sampling point TensorInfo(TensorShape(4U, 27U, 13U), 1, DataType::F32), // Invalid policy TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Insufficient padding TensorInfo(TensorShape(4U, 27U, 13U), 1, DataType::F32), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32), TensorInfo(TensorShape(4U, 132U, 25U), 1, DataType::F32), TensorInfo(TensorShape(132U, 25U, 2U), 1, DataType::F32), TensorInfo(TensorShape(4U, 132U, 25U), 1, DataType::F32), })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, - InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::AREA, InterpolationPolicy::AREA, InterpolationPolicy::NEAREST_NEIGHBOR, })), framework::dataset::make("BorderMode", { BorderMode::UNDEFINED, - BorderMode::UNDEFINED, BorderMode::UNDEFINED, BorderMode::UNDEFINED, BorderMode::REPLICATE, })), framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER, - SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER, SamplingPolicy::CENTER, SamplingPolicy::CENTER, })), framework::dataset::make("DataLayout", { DataLayout::NCHW, - DataLayout::NCHW, DataLayout::NHWC, DataLayout::NCHW, DataLayout::NHWC, })), - framework::dataset::make("Expected", { false, false, false, false ,true })), + framework::dataset::make("Expected", { false, false, false ,true })), input_info, output_info, policy,border_mode, sampling_policy, data_layout, expected) { const PixelValue constant_border(5); @@ -201,6 +195,8 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi template using NEScaleFixture = ScaleValidationFixture; +template +using NEScaleQuantizedFixture = ScaleValidationQuantizedFixture; TEST_SUITE(Float) TEST_SUITE(FP32) @@ -209,7 +205,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode:: framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), - framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) + framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER }))) { //Create valid region TensorInfo src_info(_shape, 1, _data_type); @@ -223,7 +219,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode:: framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), - framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) + framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER }))) { //Create valid region TensorInfo src_info(_shape, 1, _data_type); @@ -240,7 +236,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::A framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), - framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) + framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER }))) { //Create valid region TensorInfo src_info(_shape, 1, _data_type); @@ -254,7 +250,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode::N framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), - framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) + framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER }))) { //Create valid region TensorInfo src_info(_shape, 1, _data_type); @@ -274,7 +270,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), - framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) + framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER }))) { //Create valid region TensorInfo src_info(_shape, 1, _data_type); @@ -288,7 +284,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), - framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) + framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER }))) { //Create valid region TensorInfo src_info(_shape, 1, _data_type); @@ -304,7 +300,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), - framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) + framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER }))) { //Create valid region TensorInfo src_info(_shape, 1, _data_type); @@ -318,7 +314,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), - framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) + framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER }))) { //Create valid region TensorInfo src_info(_shape, 1, _data_type); @@ -330,6 +326,26 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode TEST_SUITE_END() // S16 TEST_SUITE_END() // Integer +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), + datasets::BorderModes()), + framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER }))) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_u8); +} +TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // Scale TEST_SUITE_END() // NEON } // namespace validation -- cgit v1.2.1