aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEScaleKernel.cpp
diff options
context:
space:
mode:
authorVidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>2019-01-17 15:16:19 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-01-18 11:23:51 +0000
commit3ac2f3a1d9297220d1b0ce920dd13fdd4edcc187 (patch)
treebf04769d89eb4586fd2023bc182ec928172c578c /src/core/NEON/kernels/NEScaleKernel.cpp
parent90313eb8f72cf9ba17cbf7b84382a609e927a1fe (diff)
downloadComputeLibrary-3ac2f3a1d9297220d1b0ce920dd13fdd4edcc187.tar.gz
COMPMID-1814 : NEScale add support for TOP_LEFT and QASYMM8
Added support for TOP_LEFT sampling policy and QASYMM8 data type. Change-Id: Id9135bb4b6ebd93f1d6fb70b06e83684a167eb94 Reviewed-on: https://review.mlplatform.org/533 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEScaleKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp125
1 files changed, 91 insertions, 34 deletions
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 5fef4f9744..3d300ef26b 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -48,11 +48,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
BorderMode border_mode, SamplingPolicy sampling_policy)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(output == input);
- ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER);
+ ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER && sampling_policy != SamplingPolicy::TOP_LEFT);
ARM_COMPUTE_UNUSED(border_mode);
const DataLayout data_layout = input->data_layout();
@@ -74,6 +74,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
if(policy == InterpolationPolicy::AREA)
{
ARM_COMPUTE_RETURN_ERROR_ON(data_layout != DataLayout::NCHW);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
}
return Status{};
@@ -184,7 +185,7 @@ inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets
template <typename T>
inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offsets, const ITensor *dx, const ITensor *dy, ITensor *output,
- float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, BorderMode border_mode)
+ float hr, float sampling_offset, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, BorderMode border_mode)
{
Iterator in(input, win_in);
Iterator out(output, window);
@@ -204,12 +205,16 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1;
+ const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8);
+ const QuantizationInfo iq_info = input->info()->quantization_info();
+ const QuantizationInfo oq_info = output->info()->quantization_info();
+
execute_window_loop(window, [&](const Coordinates & id)
{
const auto offset = (*reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())))) / static_cast<int>(sizeof(T));
const auto dx_scale = *reinterpret_cast<const float *>(dx->ptr_to_element(Coordinates(id.y(), id.z())));
const auto dy_scale = *reinterpret_cast<const float *>(dy->ptr_to_element(Coordinates(id.y(), id.z())));
- const int in_yi = std::floor((id.z() + 0.5f) * hr - 0.5f);
+ const int in_yi = std::floor((id.z() + sampling_offset) * hr - sampling_offset);
const int offset_row = in_yi * stride_h + id.x() * stride_c;
const T *in_ptr = reinterpret_cast<T *>(in.ptr() + offset * stride_w + offset_row);
@@ -253,8 +258,22 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
const float w3 = dx1 * dy_scale;
const float w4 = dx_scale * dy_scale;
+ T res = 0;
+ //dequantize quantized input
+ if(is_quantized)
+ {
+ float inp00 = iq_info.dequantize(a00);
+ float inp01 = iq_info.dequantize(a01);
+ float inp10 = iq_info.dequantize(a10);
+ float inp11 = iq_info.dequantize(a11);
+ res = static_cast<T>(oq_info.quantize((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), RoundingPolicy::TO_NEAREST_UP));
+ }
+ else
+ {
+ res = static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
+ }
// Store result
- *reinterpret_cast<T *>(out.ptr()) = static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
+ *reinterpret_cast<T *>(out.ptr()) = res;
}
else
{
@@ -275,7 +294,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
} // namespace
NEScaleKernel::NEScaleKernel()
- : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode()
+ : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode(), _sampling_offset(0)
{
}
@@ -311,6 +330,11 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe
_border_size = BorderSize(1);
_border_mode = border_mode;
+ if(sampling_policy == SamplingPolicy::CENTER)
+ {
+ _sampling_offset = 0.5f;
+ }
+
// Compute the ratio between source width/height and destination width/height
const auto wr = static_cast<float>(input->info()->dimension(idx_width)) / static_cast<float>(output->info()->dimension(idx_width));
const auto hr = static_cast<float>(input->info()->dimension(idx_height)) / static_cast<float>(output->info()->dimension(idx_height));
@@ -389,6 +413,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
switch(_input->info()->data_type())
{
+ case DataType::QASYMM8:
case DataType::U8:
{
uint8x16_t tmp = vdupq_n_u8(0);
@@ -559,7 +584,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
void NEScaleKernel::scale_bilinear_nchw(const Window &window)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
// Compute the ratio between source height and destination height
const auto hr = static_cast<float>(_input->info()->dimension(1)) / static_cast<float>(_output->info()->dimension(1));
@@ -589,8 +614,13 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1];
const size_t in_stride = in_stide_in_bytes / _input->info()->element_size();
+ const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8);
+ const QuantizationInfo iq_info = _input->info()->quantization_info();
+ const QuantizationInfo oq_info = _output->info()->quantization_info();
+
switch(_input->info()->data_type())
{
+ case DataType::QASYMM8:
case DataType::U8:
{
execute_window_loop(window, [&](const Coordinates & id)
@@ -600,29 +630,55 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
- const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f);
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
const int offset_row = in_yi * in_stide_in_bytes;
uint8x8_t tmp0 = vdup_n_u8(0);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7);
-
+ if(is_quantized)
+ {
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6);
+ tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7);
+ }
+ else
+ {
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6);
+ tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7);
+ }
uint8x8_t tmp1 = vdup_n_u8(0);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7);
-
+ if(is_quantized)
+ {
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6);
+ tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7);
+ }
+ else
+ {
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6);
+ tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7);
+ }
vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1));
},
in, offsets, dx, dy, out);
@@ -636,7 +692,7 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
- const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f);
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
const int offset_row = in_yi * in_stide_in_bytes;
int16x8x2_t tmp =
@@ -679,7 +735,7 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
- const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f);
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
const int offset_row = in_yi * in_stide_in_bytes;
float16x8x2_t tmp =
@@ -722,7 +778,7 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
- const int in_yi = std::floor((id.y() + 0.5f) * hr - 0.5f);
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
const int offset_row = in_yi * in_stide_in_bytes;
float32x4x4_t tmp =
@@ -839,6 +895,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
switch(_input->info()->data_type())
{
+ case DataType::QASYMM8:
case DataType::U8:
{
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
@@ -847,7 +904,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<uint8_t>(_input, _offsets, _dx, _dy, _output, hr,
+ scale_bilinear_nhwc_core<uint8_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
}
break;
@@ -860,7 +917,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<int16_t>(_input, _offsets, _dx, _dy, _output, hr,
+ scale_bilinear_nhwc_core<int16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
}
break;
@@ -875,7 +932,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<float16_t>(_input, _offsets, _dx, _dy, _output, hr,
+ scale_bilinear_nhwc_core<float16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
}
break;
@@ -889,7 +946,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<float>(_input, _offsets, _dx, _dy, _output, hr,
+ scale_bilinear_nhwc_core<float>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
}
break;