From d473386e4d5e0edcf55e13a2bf3c422a23fac0de Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 9 Jul 2019 14:21:06 +0100 Subject: COMPMID-2447: Align TFlite nearest neighbor NE/CL functions with ACL Change-Id: Idd7b23247491d6e2e31d19b2a8aa522470ca174c Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/1500 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- src/core/CL/cl_kernels/scale.cl | 22 ++++++++++-- src/core/GLES_COMPUTE/cs_shaders/scale.cs | 9 ++++- src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp | 10 +++++- src/core/NEON/kernels/NEScaleKernel.cpp | 21 ++++++----- src/runtime/NEON/functions/NEScale.cpp | 2 +- tests/validation/reference/CropResize.cpp | 4 +-- tests/validation/reference/Scale.cpp | 47 +++++++++++++++---------- 7 files changed, 78 insertions(+), 37 deletions(-) diff --git a/src/core/CL/cl_kernels/scale.cl b/src/core/CL/cl_kernels/scale.cl index 5ac6443c98..499f9ea53f 100644 --- a/src/core/CL/cl_kernels/scale.cl +++ b/src/core/CL/cl_kernels/scale.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -33,10 +33,19 @@ */ inline const float8 transform_nearest(const float2 coord, const float2 scale) { +#ifdef SAMPLING_POLICY_TOP_LEFT + const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0); + const float4 new_x = in_x_coords * (float4)(scale.s0); + const float4 new_y = (float4)(coord.s1 * scale.s1); + return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3); +#elif SAMPLING_POLICY_CENTER const float4 in_x_coords = (float4)(coord.s0, 1 + coord.s0, 2 + coord.s0, 3 + coord.s0); const float4 new_x = (in_x_coords + ((float4)(0.5f))) * (float4)(scale.s0); const float4 new_y = (float4)((coord.s1 + 0.5f) * scale.s1); return (float8)(new_x.s0, new_y.s0, new_x.s1, new_y.s1, new_x.s2, new_y.s2, new_x.s3, new_y.s3); +#else /* SAMPLING_POLICY */ +#error("Unsupported sampling policy"); +#endif /* SAMPLING_POLICY */ } /** Transforms four 2D coordinates. This is used to map the output coordinates to the input coordinates. @@ -172,8 +181,15 @@ __kernel void scale_nearest_neighbour_nhwc( Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(in, 0); Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT); - const float new_x = (get_global_id(1) + 0.5f) * scale_x; - const float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y; +#ifdef SAMPLING_POLICY_TOP_LEFT + const float new_x = get_global_id(1) * scale_x; + const float new_y = (get_global_id(2) % DEPTH_OUT) * scale_y; +#elif SAMPLING_POLICY_CENTER + const float new_x = (get_global_id(1) + 0.5f) * scale_x; + const float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y; +#else /* SAMPLING_POLICY */ +#error("Unsupported sampling policy"); +#endif /* SAMPLING_POLICY */ const float clamped_x = clamp(new_x, 0.0f, input_width - 1); const float clamped_y = clamp(new_y, 0.0f, input_height - 1); diff --git a/src/core/GLES_COMPUTE/cs_shaders/scale.cs b/src/core/GLES_COMPUTE/cs_shaders/scale.cs index b72c3392aa..8a1d3e4c2d 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/scale.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/scale.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,8 +58,15 @@ vec4[2] transform_nearest(vec2 coord, vec2 scale) vec4 in_x_coords = vec4(coord.x, 1.f + coord.x, 2.f + coord.x, 3.f + coord.x); vec4[2] t; +#if defined(SAMPLING_POLICY_CENTER) /* SAMPLING_POLICY_CENTER */ t[0] = (in_x_coords + (vec4(0.5f))) * scale.x; t[1] = vec4((coord.y + 0.5f) * scale.y); +#elif defined(SAMPLING_POLICY_TOP_LEFT) /* SAMPLING_POLICY_TOP_LEFT */ + t[0] = in_x_coords * scale.x; + t[1] = vec4(coord.y) * scale.y; +#else /* Unsupported sampling policy */ +#error Unsupported sampling policy +#endif return t; } diff --git a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp index f87615a27c..1de0852da8 100644 --- a/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -80,6 +80,14 @@ void GCScaleKernel::configure(const IGCTensor *input, IGCTensor *output, Interpo build_opts.emplace("#define DATA_TYPE_FP16"); build_opts.emplace("#define BORDER_SIZE " + support::cpp11::to_string(border.right)); + if(sampling_policy == SamplingPolicy::TOP_LEFT) + { + build_opts.emplace("#define SAMPLING_POLICY_TOP_LEFT"); + } + else + { + build_opts.emplace("#define SAMPLING_POLICY_CENTER"); + } // Configure kernel window unsigned int num_elems_processed_per_iteration = 4; diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 4a51627a20..8e9a34637b 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -169,7 +169,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen template inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets, ITensor *output, - float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c) + float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, float sampling_offset) { const int window_step_x = 16 / sizeof(T); const auto window_start_x = static_cast(window.x().start()); @@ -185,7 +185,7 @@ inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets execute_window_loop(window, [&](const Coordinates & id) { const int32_t offset = *reinterpret_cast(offsets->ptr_to_element(Coordinates(id.y(), id.z()))); - const int in_yi = (id.z() + 0.5f) * hr; + const int in_yi = std::floor((id.z() + sampling_offset) * hr); const int offset_row = in_yi * stride_h; int32_t x = window_start_x; for(; x < window_end_x - window_step_x; x += window_step_x) @@ -459,7 +459,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) const auto offsets_ptr = reinterpret_cast(offsets.ptr()); const uint8_t *const in_ptr = in.ptr(); - const int in_yi = std::floor((id.y() + 0.5f) * hr); + const int in_yi = std::floor((id.y() + _sampling_offset) * hr); const int in_yi_clamped = std::min(static_cast(_input->info()->dimension(1)), std::max(in_yi, -1)); ARM_COMPUTE_ERROR_ON(in_yi_clamped < -1 || in_yi_clamped > static_cast(_input->info()->dimension(1))); const int offset_row = in_yi_clamped * input_stride; @@ -500,7 +500,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) { const auto offsets_ptr = reinterpret_cast(offsets.ptr()); - const int in_yi = (id.y() + 0.5f) * hr; + const int in_yi = std::floor((id.y() + _sampling_offset) * hr); const int offset_row = in_yi * input_stride; tmp.val[0] = vsetq_lane_s16(*reinterpret_cast(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0); @@ -541,7 +541,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) { const auto offsets_ptr = reinterpret_cast(offsets.ptr()); - const int in_yi = (id.y() + 0.5f) * hr; + const int in_yi = std::floor((id.y() + _sampling_offset) * hr); const int offset_row = in_yi * input_stride; tmp.val[0] = vsetq_lane_f16(*reinterpret_cast(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0); @@ -584,7 +584,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) { const auto offsets_ptr = reinterpret_cast(offsets.ptr()); - const int in_yi = (id.y() + 0.5f) * hr; + const int in_yi = std::floor((id.y() + _sampling_offset) * hr); const int offset_row = in_yi * input_stride; tmp.val[0] = vsetq_lane_f32(*reinterpret_cast(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0); @@ -614,7 +614,6 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) } default: ARM_COMPUTE_ERROR("Not supported"); - break; } } @@ -936,7 +935,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) { if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) { - scale_nearest_nhwc_core(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c); + scale_nearest_nhwc_core(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset); } else { @@ -949,7 +948,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) { if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) { - scale_nearest_nhwc_core(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c); + scale_nearest_nhwc_core(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset); } else { @@ -964,7 +963,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) { scale_nearest_nhwc_core(_input, _offsets, _output, hr, - window, win_in, input_stride_w, input_stride_h, input_stride_c); + window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset); } else { @@ -978,7 +977,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) { if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) { - scale_nearest_nhwc_core(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c); + scale_nearest_nhwc_core(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset); } else { diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp index 425ee6c4db..be643b3757 100644 --- a/src/runtime/NEON/functions/NEScale.cpp +++ b/src/runtime/NEON/functions/NEScale.cpp @@ -83,7 +83,7 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float execute_window_loop(win, [&](const Coordinates & id) { - const size_t in_xi = (id.x() + 0.5f) * wr; + const size_t in_xi = std::floor((id.x() + sampling_offset) * wr); *reinterpret_cast(offsets_it.ptr()) = in_xi * input_element_size; }, diff --git a/tests/validation/reference/CropResize.cpp b/tests/validation/reference/CropResize.cpp index 8cfce97eec..f25a0317be 100644 --- a/tests/validation/reference/CropResize.cpp +++ b/tests/validation/reference/CropResize.cpp @@ -59,8 +59,8 @@ SimpleTensor scale_image(const SimpleTensor &in, const TensorShape case InterpolationPolicy::NEAREST_NEIGHBOR: { //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords - float x_src = (idw + 0.5f) * wr; - float y_src = (idh + 0.5f) * hr; + float x_src = std::floor(idw * wr); + float y_src = std::floor(idh * hr); in_id.set(1, x_src); in_id.set(2, y_src); diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp index 84f4fb83c1..63a2853c66 100644 --- a/tests/validation/reference/Scale.cpp +++ b/tests/validation/reference/Scale.cpp @@ -71,28 +71,25 @@ SimpleTensor scale_core(const SimpleTensor &in, float scale_x, float scale float x_src = 0; float y_src = 0; - switch(sampling_policy) - { - case SamplingPolicy::TOP_LEFT: - x_src = idx * wr; - y_src = idy * hr; - break; - case SamplingPolicy::CENTER: - x_src = (idx + 0.5f) * wr - 0.5f; - y_src = (idy + 0.5f) * hr - 0.5f; - break; - default: - ARM_COMPUTE_ERROR("Unsupported sampling policy."); - break; - } - switch(policy) { case InterpolationPolicy::NEAREST_NEIGHBOR: { - //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords - x_src = (idx + 0.5f) * wr; - y_src = (idy + 0.5f) * hr; + switch(sampling_policy) + { + case SamplingPolicy::TOP_LEFT: + x_src = std::floor(idx * wr); + y_src = std::floor(idy * hr); + break; + case SamplingPolicy::CENTER: + //Calculate the source coords without -0.5f is equivalent to round the x_scr/y_src coords + x_src = (idx + 0.5f) * wr; + y_src = (idy + 0.5f) * hr; + break; + default: + ARM_COMPUTE_ERROR("Unsupported sampling policy."); + } + id.set(0, x_src); id.set(1, y_src); @@ -105,6 +102,20 @@ SimpleTensor scale_core(const SimpleTensor &in, float scale_x, float scale } case InterpolationPolicy::BILINEAR: { + switch(sampling_policy) + { + case SamplingPolicy::TOP_LEFT: + x_src = idx * wr; + y_src = idy * hr; + break; + case SamplingPolicy::CENTER: + x_src = (idx + 0.5f) * wr - 0.5f; + y_src = (idy + 0.5f) * hr - 0.5f; + break; + default: + ARM_COMPUTE_ERROR("Unsupported sampling policy."); + } + id.set(0, std::floor(x_src)); id.set(1, std::floor(y_src)); if(is_valid_pixel_index(x_src, y_src, width, height, border_size)) -- cgit v1.2.1