aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2019-07-09 14:21:06 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-07-11 13:31:25 +0000
commitd473386e4d5e0edcf55e13a2bf3c422a23fac0de (patch)
tree64c4132e8a46c809639d719e0426a7e6b9dd0371
parent9c9b70b9d30482d34f4f9c9dbc6479df163f96a1 (diff)
downloadComputeLibrary-d473386e4d5e0edcf55e13a2bf3c422a23fac0de.tar.gz
COMPMID-2447: Align TFlite nearest neighbor NE/CL functions with ACL
Change-Id: Idd7b23247491d6e2e31d19b2a8aa522470ca174c Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/1500 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--src/core/CL/cl_kernels/scale.cl22
-rw-r--r--src/core/GLES_COMPUTE/cs_shaders/scale.cs9
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp21
-rw-r--r--src/runtime/NEON/functions/NEScale.cpp2
-rw-r--r--tests/validation/reference/CropResize.cpp4
-rw-r--r--tests/validation/reference/Scale.cpp47
7 files changed, 78 insertions, 37 deletions
diff --git a/src/core/CL/cl_kernels/scale.cl b/src/core/CL/cl_kernels/scale.cl
index 5ac6443c98..499f9ea53f 100644
--- a/src/core/CL/cl_kernels/scale.cl
+++ b/src/core/CL/cl_kernels/scale.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -33,10 +33,19 @@
*/
inline const float8 transform_nearest(const float2 coord, const float2 scale)
{
+#ifdef SAMPLING_POLICY_TOP_LEFT
+ const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
+ const float4 new_x = in_x_coords * (float4)(scale.s0);
+ const float4 new_y = (float4)(coord.s1 * scale.s1);
+ return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
+#elif SAMPLING_POLICY_CENTER
const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0);
const float4 new_x = (in_x_coords + ((float4)(0.5f))) * (float4)(scale.s0);
const float4 new_y = (float4)((coord.s1 + 0.5f) * scale.s1);
return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3);
+#else /* SAMPLING_POLICY */
+#error("Unsupported sampling policy");
+#endif /* SAMPLING_POLICY */
}
/** Transforms four 2D coordinates. This is used to map the output coordinates to the input coordinates.
@@ -172,8 +181,15 @@ __kernel void scale_nearest_neighbour_nhwc(
Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(in, 0);
Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT);
- const float new_x = (get_global_id(1) + 0.5f) * scale_x;
- const float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y;
+#ifdef SAMPLING_POLICY_TOP_LEFT
+ const float new_x = get_global_id(1) * scale_x;
+ const float new_y = (get_global_id(2) % DEPTH_OUT) * scale_y;
+#elif SAMPLING_POLICY_CENTER
+ const float new_x = (get_global_id(1) + 0.5f) * scale_x;
+ const float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y;
+#else /* SAMPLING_POLICY */
+#error("Unsupported sampling policy");
+#endif /* SAMPLING_POLICY */
const float clamped_x = clamp(new_x, 0.0f, input_width - 1);
const float clamped_y = clamp(new_y, 0.0f, input_height - 1);
diff --git a/src/core/GLES_COMPUTE/cs_shaders/scale.cs b/src/core/GLES_COMPUTE/cs_shaders/scale.cs
index b72c3392aa..8a1d3e4c2d 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/scale.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/scale.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -58,8 +58,15 @@ vec4[2] transform_nearest(vec2 coord, vec2 scale)
vec4 in_x_coords = vec4(coord.x, 1.f + coord.x, 2.f + coord.x, 3.f + coord.x);
vec4[2] t;
+#if defined(SAMPLING_POLICY_CENTER) /* SAMPLING_POLICY_CENTER */
t[0] = (in_x_coords + (vec4(0.5f))) * scale.x;
t[1] = vec4((coord.y + 0.5f) * scale.y);
+#elif defined(SAMPLING_POLICY_TOP_LEFT) /* SAMPLING_POLICY_TOP_LEFT */
+ t[0] = in_x_coords * scale.x;
+ t[1] = vec4(coord.y) * scale.y;
+#else /* Unsupported sampling policy */
+#error Unsupported sampling policy
+#endif
return t;
}
diff --git a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
index f87615a27c..1de0852da8 100644
--- a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -80,6 +80,14 @@ void GCScaleKernel::configure(const IGCTensor *input, IGCTensor *output, Interpo
build_opts.emplace("#define DATA_TYPE_FP16");
build_opts.emplace("#define BORDER_SIZE " + support::cpp11::to_string(border.right));
+ if(sampling_policy == SamplingPolicy::TOP_LEFT)
+ {
+ build_opts.emplace("#define SAMPLING_POLICY_TOP_LEFT");
+ }
+ else
+ {
+ build_opts.emplace("#define SAMPLING_POLICY_CENTER");
+ }
// Configure kernel window
unsigned int num_elems_processed_per_iteration = 4;
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 4a51627a20..8e9a34637b 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -169,7 +169,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
template <typename T>
inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets, ITensor *output,
- float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c)
+ float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, float sampling_offset)
{
const int window_step_x = 16 / sizeof(T);
const auto window_start_x = static_cast<int32_t>(window.x().start());
@@ -185,7 +185,7 @@ inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets
execute_window_loop(window, [&](const Coordinates & id)
{
const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())));
- const int in_yi = (id.z() + 0.5f) * hr;
+ const int in_yi = std::floor((id.z() + sampling_offset) * hr);
const int offset_row = in_yi * stride_h;
int32_t x = window_start_x;
for(; x < window_end_x - window_step_x; x += window_step_x)
@@ -459,7 +459,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
const uint8_t *const in_ptr = in.ptr();
- const int in_yi = std::floor((id.y() + 0.5f) * hr);
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr);
const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1));
ARM_COMPUTE_ERROR_ON(in_yi_clamped < -1 || in_yi_clamped > static_cast<int>(_input->info()->dimension(1)));
const int offset_row = in_yi_clamped * input_stride;
@@ -500,7 +500,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
{
const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const int in_yi = (id.y() + 0.5f) * hr;
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr);
const int offset_row = in_yi * input_stride;
tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
@@ -541,7 +541,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
{
const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const int in_yi = (id.y() + 0.5f) * hr;
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr);
const int offset_row = in_yi * input_stride;
tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
@@ -584,7 +584,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
{
const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const int in_yi = (id.y() + 0.5f) * hr;
+ const int in_yi = std::floor((id.y() + _sampling_offset) * hr);
const int offset_row = in_yi * input_stride;
tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
@@ -614,7 +614,6 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
}
default:
ARM_COMPUTE_ERROR("Not supported");
- break;
}
}
@@ -936,7 +935,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
{
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
{
- scale_nearest_nhwc_core<uint8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c);
+ scale_nearest_nhwc_core<uint8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
}
else
{
@@ -949,7 +948,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
{
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
{
- scale_nearest_nhwc_core<int16_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c);
+ scale_nearest_nhwc_core<int16_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
}
else
{
@@ -964,7 +963,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
{
scale_nearest_nhwc_core<float16_t>(_input, _offsets, _output, hr,
- window, win_in, input_stride_w, input_stride_h, input_stride_c);
+ window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
}
else
{
@@ -978,7 +977,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
{
if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
{
- scale_nearest_nhwc_core<float>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c);
+ scale_nearest_nhwc_core<float>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
}
else
{
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index 425ee6c4db..be643b3757 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -83,7 +83,7 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float
execute_window_loop(win, [&](const Coordinates & id)
{
- const size_t in_xi = (id.x() + 0.5f) * wr;
+ const size_t in_xi = std::floor((id.x() + sampling_offset) * wr);
*reinterpret_cast<int32_t *>(offsets_it.ptr()) = in_xi * input_element_size;
},
diff --git a/tests/validation/reference/CropResize.cpp b/tests/validation/reference/CropResize.cpp
index 8cfce97eec..f25a0317be 100644
--- a/tests/validation/reference/CropResize.cpp
+++ b/tests/validation/reference/CropResize.cpp
@@ -59,8 +59,8 @@ SimpleTensor<float> scale_image(const SimpleTensor<float> &in, const TensorShape
case InterpolationPolicy::NEAREST_NEIGHBOR:
{
//Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords
- float x_src = (idw + 0.5f) * wr;
- float y_src = (idh + 0.5f) * hr;
+ float x_src = std::floor(idw * wr);
+ float y_src = std::floor(idh * hr);
in_id.set(1, x_src);
in_id.set(2, y_src);
diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp
index 84f4fb83c1..63a2853c66 100644
--- a/tests/validation/reference/Scale.cpp
+++ b/tests/validation/reference/Scale.cpp
@@ -71,28 +71,25 @@ SimpleTensor<T> scale_core(const SimpleTensor<T> &in, float scale_x, float scale
float x_src = 0;
float y_src = 0;
- switch(sampling_policy)
- {
- case SamplingPolicy::TOP_LEFT:
- x_src = idx * wr;
- y_src = idy * hr;
- break;
- case SamplingPolicy::CENTER:
- x_src = (idx + 0.5f) * wr - 0.5f;
- y_src = (idy + 0.5f) * hr - 0.5f;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported sampling policy.");
- break;
- }
-
switch(policy)
{
case InterpolationPolicy::NEAREST_NEIGHBOR:
{
- //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords
- x_src = (idx + 0.5f) * wr;
- y_src = (idy + 0.5f) * hr;
+ switch(sampling_policy)
+ {
+ case SamplingPolicy::TOP_LEFT:
+ x_src = std::floor(idx * wr);
+ y_src = std::floor(idy * hr);
+ break;
+ case SamplingPolicy::CENTER:
+ //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords
+ x_src = (idx + 0.5f) * wr;
+ y_src = (idy + 0.5f) * hr;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported sampling policy.");
+ }
+
id.set(0, x_src);
id.set(1, y_src);
@@ -105,6 +102,20 @@ SimpleTensor<T> scale_core(const SimpleTensor<T> &in, float scale_x, float scale
}
case InterpolationPolicy::BILINEAR:
{
+ switch(sampling_policy)
+ {
+ case SamplingPolicy::TOP_LEFT:
+ x_src = idx * wr;
+ y_src = idy * hr;
+ break;
+ case SamplingPolicy::CENTER:
+ x_src = (idx + 0.5f) * wr - 0.5f;
+ y_src = (idy + 0.5f) * hr - 0.5f;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported sampling policy.");
+ }
+
id.set(0, std::floor(x_src));
id.set(1, std::floor(y_src));
if(is_valid_pixel_index(x_src, y_src, width, height, border_size))