aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/CL/cl_kernels/scale.cl26
-rw-r--r--src/core/CL/kernels/CLScaleKernel.cpp1
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp38
-rw-r--r--src/runtime/NEON/functions/NEScale.cpp10
-rw-r--r--tests/validation/GLES_COMPUTE/Scale.cpp2
-rw-r--r--tests/validation/reference/Scale.cpp5
6 files changed, 46 insertions, 36 deletions
diff --git a/src/core/CL/cl_kernels/scale.cl b/src/core/CL/cl_kernels/scale.cl
index 499f9ea53f..971087021f 100644
--- a/src/core/CL/cl_kernels/scale.cl
+++ b/src/core/CL/cl_kernels/scale.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -100,10 +100,14 @@ __kernel void scale_nearest_neighbour_nchw(
const float scale_x,
const float scale_y)
{
- Image in = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in);
- Image out = CONVERT_TO_IMAGE_STRUCT(out);
- const float2 r = (float2)(scale_x, scale_y);
- const float8 tc = clamp_to_border_with_size(transform_nearest(get_current_coords(), r), input_width, input_height, BORDER_SIZE);
+ Image in = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in);
+ Image out = CONVERT_TO_IMAGE_STRUCT(out);
+ const float2 r = (float2)(scale_x, scale_y);
+ float8 transformed = transform_nearest(get_current_coords(), r);
+#ifdef ALIGN_CORNERS
+ transformed = round(transformed);
+#endif // ALIGN_CORNERS
+ const float8 tc = clamp_to_border_with_size(transformed, input_width, input_height, BORDER_SIZE);
vstore4(read_texels4(&in, convert_int8(tc)), 0, (__global DATA_TYPE *)out.ptr);
}
@@ -182,14 +186,18 @@ __kernel void scale_nearest_neighbour_nhwc(
Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT);
#ifdef SAMPLING_POLICY_TOP_LEFT
- const float new_x = get_global_id(1) * scale_x;
- const float new_y = (get_global_id(2) % DEPTH_OUT) * scale_y;
+ float new_x = get_global_id(1) * scale_x;
+ float new_y = (get_global_id(2) % DEPTH_OUT) * scale_y;
#elif SAMPLING_POLICY_CENTER
- const float new_x = (get_global_id(1) + 0.5f) * scale_x;
- const float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y;
+ float new_x = (get_global_id(1) + 0.5f) * scale_x;
+ float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y;
#else /* SAMPLING_POLICY */
#error("Unsupported sampling policy");
#endif /* SAMPLING_POLICY */
+#ifdef ALIGN_CORNERS
+ new_x = round(new_x);
+ new_y = round(new_y);
+#endif /* ALIGN_CORNERS */
const float clamped_x = clamp(new_x, 0.0f, input_width - 1);
const float clamped_y = clamp(new_y, 0.0f, input_height - 1);
diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp
index f3acc3b31c..be8b6fc1c1 100644
--- a/src/core/CL/kernels/CLScaleKernel.cpp
+++ b/src/core/CL/kernels/CLScaleKernel.cpp
@@ -211,6 +211,7 @@ void CLScaleKernel::configure(const CLCompileContext &compile_context, const ICL
build_opts.add_option_if(info.border_mode == BorderMode::REPLICATE, "-DBORDER_MODE_REPLICATE");
build_opts.add_option_if(is_nhwc, "-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
build_opts.add_option_if_else(info.sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT");
+ build_opts.add_option_if(_align_corners, "-DALIGN_CORNERS");
if(call_quantized_kernel)
{
const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 0f329a1c2c..38a0706c12 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -28,6 +28,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/NEON/wrapper/wrapper.h"
#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/Rounding.h"
#include "arm_compute/core/utils/misc/Utility.h"
#include "src/core/utils/ScaleUtils.h"
@@ -167,7 +168,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
template <typename T>
inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets, ITensor *output,
- float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, float sampling_offset)
+ float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, float sampling_offset, bool align_corners)
{
const int window_step_x = 16 / sizeof(T);
const auto window_start_x = static_cast<int32_t>(window.x().start());
@@ -183,7 +184,7 @@ inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets
execute_window_loop(window, [&](const Coordinates & id)
{
const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())));
- const int in_yi = std::floor((id.z() + sampling_offset) * hr);
+ const auto in_yi = static_cast<int>(align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) : std::floor((id.z() + sampling_offset) * hr));
const int offset_row = in_yi * stride_h;
int32_t x = window_start_x;
for(; x < window_end_x - window_step_x; x += window_step_x)
@@ -460,8 +461,8 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
const uint8_t *const in_ptr = in.ptr();
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr);
- const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1));
+ const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
+ const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1));
ARM_COMPUTE_ERROR_ON(in_yi_clamped < -1 || in_yi_clamped > static_cast<int>(_input->info()->dimension(1)));
const int offset_row = in_yi_clamped * input_stride;
@@ -497,8 +498,8 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
const uint8_t *const in_ptr = in.ptr();
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr);
- const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1));
+ const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
+ const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1));
ARM_COMPUTE_ERROR_ON(in_yi_clamped < -1 || in_yi_clamped > static_cast<int>(_input->info()->dimension(1)));
const int offset_row = in_yi_clamped * input_stride;
@@ -537,9 +538,8 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
execute_window_loop(window, [&](const Coordinates & id)
{
const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
-
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr);
- const int offset_row = in_yi * input_stride;
+ const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
+ const int offset_row = in_yi * input_stride;
tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[2] + offset_row), tmp.val[0], 1);
@@ -578,9 +578,8 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
execute_window_loop(window, [&](const Coordinates & id)
{
const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
-
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr);
- const int offset_row = in_yi * input_stride;
+ const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
+ const int offset_row = in_yi * input_stride;
tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[2] + offset_row), tmp.val[0], 1);
@@ -621,9 +620,8 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
execute_window_loop(window, [&](const Coordinates & id)
{
const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
-
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr);
- const int offset_row = in_yi * input_stride;
+ const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
+ const int offset_row = in_yi * input_stride;
tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[4] + offset_row), tmp.val[0], 1);
@@ -1024,7 +1022,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
{
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
{
- scale_nearest_nhwc_core<int8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
+ scale_nearest_nhwc_core<int8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners);
}
else
{
@@ -1038,7 +1036,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
{
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
{
- scale_nearest_nhwc_core<uint8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
+ scale_nearest_nhwc_core<uint8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners);
}
else
{
@@ -1051,7 +1049,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
{
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
{
- scale_nearest_nhwc_core<int16_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
+ scale_nearest_nhwc_core<int16_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners);
}
else
{
@@ -1066,7 +1064,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
{
scale_nearest_nhwc_core<float16_t>(_input, _offsets, _output, hr,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
+ window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners);
}
else
{
@@ -1080,7 +1078,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
{
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
{
- scale_nearest_nhwc_core<float>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
+ scale_nearest_nhwc_core<float>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners);
}
else
{
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index 28fbab4303..eefdfdbaa7 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Window.h"
+#include "arm_compute/core/utils/misc/Rounding.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "arm_compute/runtime/TensorAllocator.h"
@@ -43,7 +44,7 @@ namespace arm_compute
{
namespace
{
-void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float wr, float hr, size_t input_element_size, SamplingPolicy sampling_policy)
+void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float wr, float hr, size_t input_element_size, SamplingPolicy sampling_policy, bool align_corners)
{
ARM_COMPUTE_ERROR_ON(nullptr == offsets);
ARM_COMPUTE_UNUSED(sampling_policy);
@@ -84,7 +85,8 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float
execute_window_loop(win, [&](const Coordinates & id)
{
- const size_t in_xi = std::floor((id.x() + sampling_offset) * wr);
+ const float float_in_xi = (id.x() + sampling_offset) * wr;
+ const auto in_xi = static_cast<size_t>(align_corners ? arm_compute::utils::rounding::round_half_away_from_zero(float_in_xi) : std::floor(float_in_xi));
*reinterpret_cast<int32_t *>(offsets_it.ptr()) = in_xi * input_element_size;
},
@@ -143,7 +145,7 @@ void NEScale::configure(ITensor *input, ITensor *output, const ScaleKernelInfo &
_offsets.allocator()->allocate();
// Pre-compute offsets for nearest interpolation
- precompute_dx_dy_offsets(nullptr, nullptr, &_offsets, wr, hr, input_element_size, info.sampling_policy);
+ precompute_dx_dy_offsets(nullptr, nullptr, &_offsets, wr, hr, input_element_size, info.sampling_policy, is_align_corners_used);
break;
}
case InterpolationPolicy::BILINEAR:
@@ -163,7 +165,7 @@ void NEScale::configure(ITensor *input, ITensor *output, const ScaleKernelInfo &
_dy.allocator()->allocate();
// Pre-compute dx, dy and offsets for bilinear interpolation
- precompute_dx_dy_offsets(&_dx, &_dy, &_offsets, wr, hr, input_element_size, info.sampling_policy);
+ precompute_dx_dy_offsets(&_dx, &_dy, &_offsets, wr, hr, input_element_size, info.sampling_policy, is_align_corners_used);
break;
}
case InterpolationPolicy::AREA:
diff --git a/tests/validation/GLES_COMPUTE/Scale.cpp b/tests/validation/GLES_COMPUTE/Scale.cpp
index 7a88159481..62b30da7a2 100644
--- a/tests/validation/GLES_COMPUTE/Scale.cpp
+++ b/tests/validation/GLES_COMPUTE/Scale.cpp
@@ -52,7 +52,7 @@ const auto ScaleDataTypes = framework::dataset::make("DataType",
DataType::F16,
});
-/** Align corners, this functionality is supported only by NEON */
+/** Aligned corners, this functionality is supported only by NEON and OpenCL backends */
const auto AlignCorners = framework::dataset::make("AlignCorners",
{
false,
diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp
index 44beabb2d4..72f96d446d 100644
--- a/tests/validation/reference/Scale.cpp
+++ b/tests/validation/reference/Scale.cpp
@@ -25,6 +25,7 @@
#include "Scale.h"
#include "Utils.h"
+#include "arm_compute/core/utils/misc/Rounding.h"
#include "arm_compute/core/utils/misc/Utility.h"
#include "src/core/utils/ScaleUtils.h"
@@ -81,8 +82,8 @@ SimpleTensor<T> scale_core(const SimpleTensor<T> &in, float scale_x, float scale
switch(sampling_policy)
{
case SamplingPolicy::TOP_LEFT:
- x_src = std::floor(idx * wr);
- y_src = std::floor(idy * hr);
+ x_src = align_corners ? arm_compute::utils::rounding::round_half_away_from_zero(idx * wr) : std::floor(idx * wr);
+ y_src = align_corners ? arm_compute::utils::rounding::round_half_away_from_zero(idy * hr) : std::floor(idy * hr);
break;
case SamplingPolicy::CENTER:
//Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords