diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/cl_kernels/scale.cl | 26 | ||||
-rw-r--r-- | src/core/CL/kernels/CLScaleKernel.cpp | 1 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEScaleKernel.cpp | 38 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEScale.cpp | 10 |
4 files changed, 42 insertions, 33 deletions
diff --git a/src/core/CL/cl_kernels/scale.cl b/src/core/CL/cl_kernels/scale.cl index 499f9ea53f..971087021f 100644 --- a/src/core/CL/cl_kernels/scale.cl +++ b/src/core/CL/cl_kernels/scale.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -100,10 +100,14 @@ __kernel void scale_nearest_neighbour_nchw( const float scale_x, const float scale_y) { - Image in = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in); - Image out = CONVERT_TO_IMAGE_STRUCT(out); - const float2 r = (float2)(scale_x, scale_y); - const float8 tc = clamp_to_border_with_size(transform_nearest(get_current_coords(), r), input_width, input_height, BORDER_SIZE); + Image in = CONVERT_TO_IMAGE_STRUCT_NO_STEP(in); + Image out = CONVERT_TO_IMAGE_STRUCT(out); + const float2 r = (float2)(scale_x, scale_y); + float8 transformed = transform_nearest(get_current_coords(), r); +#ifdef ALIGN_CORNERS + transformed = round(transformed); +#endif // ALIGN_CORNERS + const float8 tc = clamp_to_border_with_size(transformed, input_width, input_height, BORDER_SIZE); vstore4(read_texels4(&in, convert_int8(tc)), 0, (__global DATA_TYPE *)out.ptr); } @@ -182,14 +186,18 @@ __kernel void scale_nearest_neighbour_nhwc( Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT); #ifdef SAMPLING_POLICY_TOP_LEFT - const float new_x = get_global_id(1) * scale_x; - const float new_y = (get_global_id(2) % DEPTH_OUT) * scale_y; + float new_x = get_global_id(1) * scale_x; + float new_y = (get_global_id(2) % DEPTH_OUT) * scale_y; #elif SAMPLING_POLICY_CENTER - const float new_x = (get_global_id(1) + 0.5f) * scale_x; - const float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y; + float new_x = (get_global_id(1) + 0.5f) * scale_x; + float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y; #else /* SAMPLING_POLICY */ #error("Unsupported sampling policy"); #endif /* SAMPLING_POLICY */ +#ifdef ALIGN_CORNERS + new_x = round(new_x); + new_y = round(new_y); +#endif /* ALIGN_CORNERS */ const float clamped_x = clamp(new_x, 0.0f, input_width - 1); const float clamped_y = clamp(new_y, 0.0f, input_height - 1); diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp index f3acc3b31c..be8b6fc1c1 100644 --- a/src/core/CL/kernels/CLScaleKernel.cpp +++ b/src/core/CL/kernels/CLScaleKernel.cpp @@ -211,6 +211,7 @@ void CLScaleKernel::configure(const CLCompileContext &compile_context, const ICL build_opts.add_option_if(info.border_mode == BorderMode::REPLICATE, "-DBORDER_MODE_REPLICATE"); build_opts.add_option_if(is_nhwc, "-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); build_opts.add_option_if_else(info.sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT"); + build_opts.add_option_if(_align_corners, "-DALIGN_CORNERS"); if(call_quantized_kernel) { const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 0f329a1c2c..38a0706c12 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/Rounding.h" #include "arm_compute/core/utils/misc/Utility.h" #include "src/core/utils/ScaleUtils.h" @@ -167,7 +168,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen template <typename T> inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets, ITensor *output, - float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, float sampling_offset) + float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, float sampling_offset, bool align_corners) { const int window_step_x = 16 / sizeof(T); const auto window_start_x = static_cast<int32_t>(window.x().start()); @@ -183,7 +184,7 @@ inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets execute_window_loop(window, [&](const Coordinates & id) { const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z()))); - const int in_yi = std::floor((id.z() + sampling_offset) * hr); + const auto in_yi = static_cast<int>(align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) : std::floor((id.z() + sampling_offset) * hr)); const int offset_row = in_yi * stride_h; int32_t x = window_start_x; for(; x < window_end_x - window_step_x; x += window_step_x) @@ -460,8 +461,8 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr()); const uint8_t *const in_ptr = in.ptr(); - const int in_yi = std::floor((id.y() + _sampling_offset) * hr); - const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1)); + const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr)); + const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1)); ARM_COMPUTE_ERROR_ON(in_yi_clamped < -1 || in_yi_clamped > static_cast<int>(_input->info()->dimension(1))); const int offset_row = in_yi_clamped * input_stride; @@ -497,8 +498,8 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr()); const uint8_t *const in_ptr = in.ptr(); - const int in_yi = std::floor((id.y() + _sampling_offset) * hr); - const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1)); + const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr)); + const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1)); ARM_COMPUTE_ERROR_ON(in_yi_clamped < -1 || in_yi_clamped > static_cast<int>(_input->info()->dimension(1))); const int offset_row = in_yi_clamped * input_stride; @@ -537,9 +538,8 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) execute_window_loop(window, [&](const Coordinates & id) { const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr()); - - const int in_yi = std::floor((id.y() + _sampling_offset) * hr); - const int offset_row = in_yi * input_stride; + const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr)); + const int offset_row = in_yi * input_stride; tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0); tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[2] + offset_row), tmp.val[0], 1); @@ -578,9 +578,8 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) execute_window_loop(window, [&](const Coordinates & id) { const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr()); - - const int in_yi = std::floor((id.y() + _sampling_offset) * hr); - const int offset_row = in_yi * input_stride; + const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr)); + const int offset_row = in_yi * input_stride; tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0); tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[2] + offset_row), tmp.val[0], 1); @@ -621,9 +620,8 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window) execute_window_loop(window, [&](const Coordinates & id) { const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr()); - - const int in_yi = std::floor((id.y() + _sampling_offset) * hr); - const int offset_row = in_yi * input_stride; + const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr)); + const int offset_row = in_yi * input_stride; tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0); tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[4] + offset_row), tmp.val[0], 1); @@ -1024,7 +1022,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) { if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) { - scale_nearest_nhwc_core<int8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset); + scale_nearest_nhwc_core<int8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners); } else { @@ -1038,7 +1036,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) { if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) { - scale_nearest_nhwc_core<uint8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset); + scale_nearest_nhwc_core<uint8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners); } else { @@ -1051,7 +1049,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) { if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) { - scale_nearest_nhwc_core<int16_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset); + scale_nearest_nhwc_core<int16_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners); } else { @@ -1066,7 +1064,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) { scale_nearest_nhwc_core<float16_t>(_input, _offsets, _output, hr, - window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset); + window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners); } else { @@ -1080,7 +1078,7 @@ void NEScaleKernel::scale_nhwc(const Window &window) { if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) { - scale_nearest_nhwc_core<float>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset); + scale_nearest_nhwc_core<float>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners); } else { diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp index 28fbab4303..eefdfdbaa7 100644 --- a/src/runtime/NEON/functions/NEScale.cpp +++ b/src/runtime/NEON/functions/NEScale.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/Rounding.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" @@ -43,7 +44,7 @@ namespace arm_compute { namespace { -void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float wr, float hr, size_t input_element_size, SamplingPolicy sampling_policy) +void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float wr, float hr, size_t input_element_size, SamplingPolicy sampling_policy, bool align_corners) { ARM_COMPUTE_ERROR_ON(nullptr == offsets); ARM_COMPUTE_UNUSED(sampling_policy); @@ -84,7 +85,8 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float execute_window_loop(win, [&](const Coordinates & id) { - const size_t in_xi = std::floor((id.x() + sampling_offset) * wr); + const float float_in_xi = (id.x() + sampling_offset) * wr; + const auto in_xi = static_cast<size_t>(align_corners ? arm_compute::utils::rounding::round_half_away_from_zero(float_in_xi) : std::floor(float_in_xi)); *reinterpret_cast<int32_t *>(offsets_it.ptr()) = in_xi * input_element_size; }, @@ -143,7 +145,7 @@ void NEScale::configure(ITensor *input, ITensor *output, const ScaleKernelInfo & _offsets.allocator()->allocate(); // Pre-compute offsets for nearest interpolation - precompute_dx_dy_offsets(nullptr, nullptr, &_offsets, wr, hr, input_element_size, info.sampling_policy); + precompute_dx_dy_offsets(nullptr, nullptr, &_offsets, wr, hr, input_element_size, info.sampling_policy, is_align_corners_used); break; } case InterpolationPolicy::BILINEAR: @@ -163,7 +165,7 @@ void NEScale::configure(ITensor *input, ITensor *output, const ScaleKernelInfo & _dy.allocator()->allocate(); // Pre-compute dx, dy and offsets for bilinear interpolation - precompute_dx_dy_offsets(&_dx, &_dy, &_offsets, wr, hr, input_element_size, info.sampling_policy); + precompute_dx_dy_offsets(&_dx, &_dy, &_offsets, wr, hr, input_element_size, info.sampling_policy, is_align_corners_used); break; } case InterpolationPolicy::AREA: |