aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGeorge Wort <george.wort@arm.com>2019-01-25 15:38:33 +0000
committerPablo Marquez <pablo.tello@arm.com>2019-03-05 11:21:01 +0000
commit05398a948a2b43584b16d91f6efdda9eb361ec74 (patch)
tree01963cd67610dd69915076be8577b28e025eb848 /src
parentf112ede50530374b48ea2f87c1f0e02262cffc78 (diff)
downloadComputeLibrary-05398a948a2b43584b16d91f6efdda9eb361ec74.tar.gz
COMPMID-1843: Implement NECrop
Change-Id: I27e8b1a00c2315c72106e8e596f84ad48fb770e3 Signed-off-by: George Wort <george.wort@arm.com> Reviewed-on: https://review.mlplatform.org/c/648 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/NEON/kernels/NECropKernel.cpp400
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp137
-rw-r--r--src/runtime/NEON/functions/NECropResize.cpp113
-rw-r--r--src/runtime/NEON/functions/NEScale.cpp30
4 files changed, 617 insertions, 63 deletions
diff --git a/src/core/NEON/kernels/NECropKernel.cpp b/src/core/NEON/kernels/NECropKernel.cpp
new file mode 100644
index 0000000000..b6fe5819e4
--- /dev/null
+++ b/src/core/NEON/kernels/NECropKernel.cpp
@@ -0,0 +1,400 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/NEON/kernels/NECropKernel.h"
+
+#include "arm_compute/core/CPP/Validate.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Window.h"
+
+#include "arm_compute/core/NEON/wrapper/wrapper.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/helpers/bit_ops.h"
+#include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include <map>
+
+namespace arm_compute
+{
+namespace
+{
+template <typename T>
+inline float32x4_t load_as_f32(T *ptr)
+{
+ ARM_COMPUTE_UNUSED(ptr);
+ ARM_COMPUTE_ERROR("Type not supported.");
+}
+
+template <>
+inline float32x4_t load_as_f32(float *ptr)
+{
+ return wrapper::vloadq(ptr);
+}
+
+template <>
+inline float32x4_t load_as_f32(int32_t *ptr)
+{
+ return vcvtq_f32_s32(wrapper::vloadq(ptr));
+}
+
+template <>
+inline float32x4_t load_as_f32(uint32_t *ptr)
+{
+ return vcvtq_f32_u32(wrapper::vloadq(ptr));
+}
+
+template <>
+inline float32x4_t load_as_f32(int16_t *ptr)
+{
+ return vcvtq_f32_s32(vmovl_s16(wrapper::vload(ptr)));
+}
+
+template <>
+inline float32x4_t load_as_f32(uint16_t *ptr)
+{
+ return vcvtq_f32_u32(vmovl_u16(wrapper::vload(ptr)));
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+template <>
+inline float32x4_t load_as_f32(float16_t *ptr)
+{
+ return vcvt_f32_f16(wrapper::vload(ptr));
+}
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+template <typename T, bool input_has_single_channel, bool is_width_flipped>
+inline void in_bounds_crop_window(const ITensor *input, const ITensor *output, float *output_ptr, Coordinates input_offset,
+ int32_t window_step_x, int32_t output_width_start, int32_t output_width_limit)
+{
+ // Reverse elements if width flipped.
+ if(is_width_flipped)
+ {
+ // Collapse first dimension if possible.
+ if(input_has_single_channel)
+ {
+ int32_t x = output_width_start;
+ Coordinates negative_offset(input_offset);
+ negative_offset.set(1, negative_offset[1] - window_step_x + 1);
+ for(; x <= output_width_limit - window_step_x; x += window_step_x, negative_offset[1] -= window_step_x)
+ {
+ auto in = load_as_f32(reinterpret_cast<T *>(input->ptr_to_element(negative_offset)));
+
+ in = wrapper::vrev64(in);
+ in = wrapper::vcombine(wrapper::vgethigh(in), wrapper::vgetlow(in));
+
+ wrapper::vstore(output_ptr + x, in);
+ }
+ input_offset[1] = negative_offset[1] + window_step_x - 1;
+ for(; x < output_width_limit; ++x, --input_offset[1])
+ {
+ *(output_ptr + x) = static_cast<float>(*reinterpret_cast<T *>(input->ptr_to_element(input_offset)));
+ }
+ }
+ else
+ {
+ for(int32_t x = output_width_start; x < output_width_limit; ++x, --input_offset[1])
+ {
+ input_offset.set(0, 0);
+ int32_t c = 0;
+ for(; c <= static_cast<int32_t>(input->info()->dimension(0)) - window_step_x; c += window_step_x, input_offset[0] += window_step_x)
+ {
+ auto in = load_as_f32(reinterpret_cast<T *>(input->ptr_to_element(input_offset)));
+ wrapper::vstore(output_ptr + x * output->info()->dimension(0) + c, in);
+ }
+ for(; c < static_cast<int32_t>(input->info()->dimension(0)); ++c, ++input_offset[0])
+ {
+ *(output_ptr + x * output->info()->dimension(0) + c) = static_cast<float>(*reinterpret_cast<T *>(input->ptr_to_element(input_offset)));
+ }
+ }
+ }
+ }
+ else
+ {
+ // Use memcpy if the elements don't need converting to float.
+ if(std::is_same<T, float>::value)
+ {
+ memcpy(static_cast<void *>(output_ptr + output_width_start * output->info()->dimension(0)),
+ reinterpret_cast<const void *>(input->ptr_to_element(input_offset)),
+ (output_width_limit - output_width_start) * output->info()->dimension(0) * output->info()->element_size());
+ }
+ else
+ {
+ int32_t x = 0;
+ int32_t limit = (output_width_limit - output_width_start) * static_cast<int32_t>(output->info()->dimension(0));
+ float *output_start_ptr = output_ptr + output_width_start * output->info()->dimension(0);
+ for(; x <= limit - window_step_x; x += window_step_x, input_offset[0] += window_step_x)
+ {
+ auto in = load_as_f32(reinterpret_cast<T *>(input->ptr_to_element(input_offset)));
+ wrapper::vstore(output_start_ptr + x, in);
+ }
+ for(; x < limit; ++x, ++input_offset[0])
+ {
+ *(output_start_ptr + x) = static_cast<float>(*reinterpret_cast<T *>(input->ptr_to_element(input_offset)));
+ }
+ }
+ }
+}
+
+inline void out_of_bounds_crop_window(const ITensor *output, float *output_ptr, float extrapolation_value,
+ int32_t window_step_x, int32_t output_width_start, int32_t output_width_limit)
+{
+ auto in = wrapper::vdup_n(extrapolation_value, wrapper::traits::vector_128_tag());
+ int32_t x = 0;
+ int32_t limit = (output_width_limit - output_width_start) * static_cast<int32_t>(output->info()->dimension(0));
+ float *output_start_ptr = output_ptr + output_width_start * output->info()->dimension(0);
+ for(; x <= limit - window_step_x; x += window_step_x)
+ {
+ wrapper::vstore(output_start_ptr + x, in);
+ }
+ for(; x < limit; ++x)
+ {
+ *(output_start_ptr + x) = extrapolation_value;
+ }
+}
+
+template <bool is_height_flipped, bool has_cols_in_bounds, bool has_cols_out_of_bounds_before, bool has_cols_out_of_bounds_after>
+inline void execute_window(const ITensor *input, const ITensor *output, Coordinates input_offset, float extrapolation_value,
+ const uint32_t rows_out_of_bounds[], const uint32_t cols_out_of_bounds[], NECropKernel::InBoundsCropFunction *in_bounds_crop_function)
+{
+ // Output is always float.
+ const int window_step_x = 16 / sizeof(float);
+ auto *output_ptr = reinterpret_cast<float *>(output->buffer());
+ // Output window:
+ // --------------------------------
+ // | Out of bounds |
+ // | rows before |
+ // |------------------------------|
+ // | Out of | In | Out of |
+ // | bounds | bounds | bounds |
+ // | cols | elements | cols |
+ // | before | copied | after |
+ // | | from input | |
+ // --------------------------------
+ // | Out of bounds |
+ // | rows after |
+ // |------------------------------|
+ // Fill all output rows that have no elements that are within the input bounds with the extrapolation value.
+ // First for the rows before the in bounds rows.
+ out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, 0, rows_out_of_bounds[0] * output->info()->dimension(1));
+ output_ptr += rows_out_of_bounds[0] * output->info()->dimension(1) * output->info()->dimension(0);
+ // Iterate through each row that has any elements within the input bounds.
+ for(uint32_t row = rows_out_of_bounds[0]; static_cast<int32_t>(row) < static_cast<int32_t>(output->info()->dimension(2) - rows_out_of_bounds[1]);
+ ++row, is_height_flipped ? --input_offset[2] : ++input_offset[2])
+ {
+ // Fill all elements in the row that are out of bounds with the extrapolation value.
+ // First for the elements before the in bounds elements.
+ if(has_cols_out_of_bounds_before)
+ {
+ out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, 0, cols_out_of_bounds[0]);
+ }
+ // Copy all elements within the input bounds from the input tensor.
+ if(has_cols_in_bounds)
+ {
+ (*in_bounds_crop_function)(input, output, output_ptr, input_offset, window_step_x, cols_out_of_bounds[0], output->info()->dimension(1) - cols_out_of_bounds[1]);
+ }
+ // Fill all elements after the in bounds elements with the extrapolation value.
+ if(has_cols_out_of_bounds_after)
+ {
+ out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, output->info()->dimension(1) - cols_out_of_bounds[1], output->info()->dimension(1));
+ }
+ output_ptr += output->info()->dimension(1) * output->info()->dimension(0);
+ }
+ // Fill all rows after the in bounds elements with the extrapolation value.
+ out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, 0, rows_out_of_bounds[1] * output->info()->dimension(1));
+}
+} // namespace
+
+NECropKernel::NECropKernel()
+ : _input(nullptr), _crop_boxes(nullptr), _box_ind(nullptr), _output(nullptr), _start(), _end(), _crop_box_ind(0), _extrapolation_value(0), _rows_out_of_bounds(), _cols_out_of_bounds(),
+ _in_bounds_crop_functions(), _in_bounds_crop_function(nullptr), _crop_function(nullptr)
+{
+}
+
+void NECropKernel::configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind, float extrapolation_value)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), crop_boxes->info(), box_ind->info(), output->info(), crop_box_ind, extrapolation_value));
+
+ _input = input;
+ _crop_boxes = crop_boxes;
+ _box_ind = box_ind;
+ _output = output;
+ _crop_box_ind = crop_box_ind;
+ _extrapolation_value = extrapolation_value;
+
+ const static std::map<std::pair<DataType, bool>, std::pair<NECropKernel::InBoundsCropFunction *, NECropKernel::InBoundsCropFunction *>> in_map_function =
+ {
+ { { DataType::F32, false }, { &in_bounds_crop_window<float, false, false>, &in_bounds_crop_window<float, false, true> } },
+ { { DataType::F32, true }, { &in_bounds_crop_window<float, true, false>, &in_bounds_crop_window<float, true, true> } },
+ { { DataType::U16, false }, { &in_bounds_crop_window<uint16_t, false, false>, &in_bounds_crop_window<uint16_t, false, true> } },
+ { { DataType::U16, true }, { &in_bounds_crop_window<uint16_t, true, false>, &in_bounds_crop_window<uint16_t, true, true> } },
+ { { DataType::S16, false }, { &in_bounds_crop_window<int16_t, false, false>, &in_bounds_crop_window<int16_t, false, true> } },
+ { { DataType::S16, true }, { &in_bounds_crop_window<int16_t, true, false>, &in_bounds_crop_window<int16_t, true, true> } },
+ { { DataType::U32, false }, { &in_bounds_crop_window<uint32_t, false, false>, &in_bounds_crop_window<uint32_t, false, true> } },
+ { { DataType::U32, true }, { &in_bounds_crop_window<uint32_t, true, false>, &in_bounds_crop_window<uint32_t, true, true> } },
+ { { DataType::S32, false }, { &in_bounds_crop_window<int32_t, false, false>, &in_bounds_crop_window<int32_t, false, true> } },
+ { { DataType::S32, true }, { &in_bounds_crop_window<int32_t, true, false>, &in_bounds_crop_window<int32_t, true, true> } },
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ { { DataType::F16, false }, { &in_bounds_crop_window<float16_t, false, false>, &in_bounds_crop_window<float16_t, false, true> } },
+ { { DataType::F16, false }, { &in_bounds_crop_window<float16_t, true, false>, &in_bounds_crop_window<float16_t, true, true> } }
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ };
+
+ auto in_it = in_map_function.find({ input->info()->data_type(), input->info()->dimension(0) == 1 });
+
+ if(in_it != in_map_function.end())
+ {
+ _in_bounds_crop_functions = in_it->second;
+ }
+}
+
+Status NECropKernel::validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind, float extrapolation_value)
+{
+ ARM_COMPUTE_UNUSED(extrapolation_value);
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::S16, DataType::F16, DataType::U32, DataType::S32, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().num_dimensions() > 4);
+ ARM_COMPUTE_RETURN_ERROR_ON(crop_boxes->tensor_shape()[0] != 4);
+ ARM_COMPUTE_RETURN_ERROR_ON(crop_boxes->tensor_shape()[1] != box_ind->tensor_shape()[0]);
+ ARM_COMPUTE_RETURN_ERROR_ON(crop_boxes->tensor_shape()[1] <= crop_box_ind);
+ ARM_COMPUTE_RETURN_ERROR_ON(box_ind->tensor_shape()[0] <= crop_box_ind);
+ if(output->total_size() > 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() != 3);
+ ARM_COMPUTE_RETURN_ERROR_ON(output->has_padding());
+ }
+ return Status{};
+}
+
+void NECropKernel::configure_output_shape()
+{
+ // _crop_box_ind is used to index _crop_boxes and retrieve the appropriate crop box.
+ // The crop box is specified by normalized coordinates [y0, x0, y1, x1].
+ const float x0 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(1, _crop_box_ind)));
+ const float y0 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(0, _crop_box_ind)));
+ const float x1 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(3, _crop_box_ind)));
+ const float y1 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(2, _crop_box_ind)));
+ // The normalized coordiantes are scaled to retrieve the floating point image coordinates which are rounded to integers.
+ _start = Coordinates(std::floor(x0 * (_input->info()->tensor_shape()[1] - 1) + 0.5f),
+ std::floor(y0 * (_input->info()->tensor_shape()[2] - 1) + 0.5f));
+ _end = Coordinates(std::floor(x1 * (_input->info()->tensor_shape()[1] - 1) + 0.5f),
+ std::floor(y1 * (_input->info()->tensor_shape()[2] - 1) + 0.5f));
+ const TensorShape out_shape(_input->info()->tensor_shape()[0], abs(_end[0] - _start[0]) + 1, abs(_end[1] - _start[1]) + 1);
+ _output->info()->set_tensor_shape(out_shape);
+
+ _in_bounds_crop_function = _start[0] <= _end[0] ? _in_bounds_crop_functions.first : _in_bounds_crop_functions.second;
+
+ bool is_width_flipped = _end[0] < _start[0];
+ bool is_height_flipped = _end[1] < _start[1];
+ if(is_height_flipped)
+ {
+ _rows_out_of_bounds[0] = _start[1] >= static_cast<int32_t>(_input->info()->dimension(2)) ? std::min(static_cast<uint32_t>(_start[1] - _input->info()->dimension(2) + 1),
+ static_cast<uint32_t>(_output->info()->dimension(2))) :
+ 0;
+ _rows_out_of_bounds[1] = _end[1] < 0 ? std::min(static_cast<uint32_t>(-_end[1]),
+ static_cast<uint32_t>(_output->info()->dimension(2))) :
+ 0;
+ }
+ else
+ {
+ _rows_out_of_bounds[0] = _start[1] < 0 ? std::min(static_cast<uint32_t>(-_start[1]),
+ static_cast<uint32_t>(_output->info()->dimension(2))) :
+ 0;
+ _rows_out_of_bounds[1] = _end[1] >= static_cast<int32_t>(_input->info()->dimension(2)) ? std::min(static_cast<uint32_t>(_end[1] - _input->info()->dimension(2) + 1),
+ static_cast<uint32_t>(_output->info()->dimension(2))) :
+ 0;
+ }
+ if(is_width_flipped)
+ {
+ _cols_out_of_bounds[0] = _start[0] >= static_cast<int32_t>(_input->info()->dimension(1)) ? std::min(static_cast<uint32_t>(_start[0] - _input->info()->dimension(1) + 1),
+ static_cast<uint32_t>(_output->info()->dimension(1))) :
+ 0;
+ _cols_out_of_bounds[1] = _end[0] < 0 ? std::min(static_cast<uint32_t>(-_end[0]),
+ static_cast<uint32_t>(_output->info()->dimension(1))) :
+ 0;
+ }
+ else
+ {
+ _cols_out_of_bounds[0] = _start[0] < 0 ? std::min(static_cast<uint32_t>(-_start[0]),
+ static_cast<uint32_t>(_output->info()->dimension(1))) :
+ 0;
+ _cols_out_of_bounds[1] = _end[0] >= static_cast<int32_t>(_input->info()->dimension(1)) ? std::min(static_cast<uint32_t>(_end[0] - _input->info()->dimension(1) + 1),
+ static_cast<uint32_t>(_output->info()->dimension(1))) :
+ 0;
+ }
+
+ const static std::map<std::tuple<bool, bool, bool, bool>, NECropKernel::CropFunction *> map_function =
+ {
+ { std::make_tuple(false, false, false, false), &execute_window<false, false, false, false> },
+ { std::make_tuple(false, false, false, true), &execute_window<false, false, false, true> },
+ { std::make_tuple(false, false, true, false), &execute_window<false, false, true, false> },
+ { std::make_tuple(false, false, true, true), &execute_window<false, false, true, true> },
+ { std::make_tuple(false, true, false, false), &execute_window<false, true, false, false> },
+ { std::make_tuple(false, true, false, true), &execute_window<false, true, false, true> },
+ { std::make_tuple(false, true, true, false), &execute_window<false, true, true, false> },
+ { std::make_tuple(false, true, true, true), &execute_window<false, true, true, true> },
+ { std::make_tuple(true, false, false, false), &execute_window<true, false, false, false> },
+ { std::make_tuple(true, false, false, true), &execute_window<true, false, false, true> },
+ { std::make_tuple(true, false, true, false), &execute_window<true, false, true, false> },
+ { std::make_tuple(true, false, true, true), &execute_window<true, false, true, true> },
+ { std::make_tuple(true, true, false, false), &execute_window<true, true, false, false> },
+ { std::make_tuple(true, true, false, true), &execute_window<true, true, false, true> },
+ { std::make_tuple(true, true, true, false), &execute_window<true, true, true, false> },
+ { std::make_tuple(true, true, true, true), &execute_window<true, true, true, true> },
+ };
+
+ auto it = map_function.find(std::make_tuple(is_height_flipped,
+ _cols_out_of_bounds[0] + _cols_out_of_bounds[1] < _output->info()->dimension(1),
+ _cols_out_of_bounds[0] > 0,
+ _cols_out_of_bounds[1] > 0));
+
+ if(it != map_function.end())
+ {
+ _crop_function = it->second;
+ }
+
+ INEKernel::configure(calculate_max_window(*_output->info()));
+}
+
+void NECropKernel::run(const Window &window, const ThreadInfo &info)
+{
+ ARM_COMPUTE_UNUSED(window, info);
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+
+ ARM_COMPUTE_ERROR_ON(_input->info()->has_padding());
+ ARM_COMPUTE_ERROR_ON(_output->info()->has_padding());
+
+ uint32_t batch_index = *(reinterpret_cast<int32_t *>(_box_ind->ptr_to_element(Coordinates(_crop_box_ind))));
+ Coordinates input_offset(0, _end[0] < _start[0] ? _start[0] - _cols_out_of_bounds[0] : _start[0] + _cols_out_of_bounds[0],
+ _end[1] < _start[1] ? _start[1] - _rows_out_of_bounds[0] : _start[1] + _rows_out_of_bounds[0], batch_index);
+ (*_crop_function)(_input, _output, input_offset, _extrapolation_value, _rows_out_of_bounds, _cols_out_of_bounds, _in_bounds_crop_function);
+}
+} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 3d300ef26b..64f35290ba 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -45,7 +45,7 @@ namespace
{
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy,
const ITensorInfo *offsets, ITensorInfo *output, InterpolationPolicy policy,
- BorderMode border_mode, SamplingPolicy sampling_policy)
+ BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8);
@@ -53,7 +53,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(output == input);
ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER && sampling_policy != SamplingPolicy::TOP_LEFT);
- ARM_COMPUTE_UNUSED(border_mode);
+ ARM_COMPUTE_RETURN_ERROR_ON(!use_padding && border_mode != BorderMode::CONSTANT);
+ ARM_COMPUTE_UNUSED(constant_border_value);
const DataLayout data_layout = input->data_layout();
ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH)) == 0);
@@ -121,40 +122,44 @@ std::pair<Status, Window> validate_and_configure_window_nchw(ITensorInfo *input,
std::pair<Status, Window> validate_and_configure_window_nhwc(ITensorInfo *input, ITensorInfo *output,
InterpolationPolicy policy, bool border_undefined,
- SamplingPolicy sampling_policy, BorderSize border_size)
+ SamplingPolicy sampling_policy, BorderSize border_size, bool use_padding)
{
bool window_changed{ false };
Window win{};
- const unsigned int num_elems_processed_per_iteration = (policy == InterpolationPolicy::NEAREST_NEIGHBOR) ? 16 / input->element_size() : 1;
+ const unsigned int num_elems_processed_per_iteration = (use_padding && policy == InterpolationPolicy::NEAREST_NEIGHBOR) ? 16 / input->element_size() : 1;
// Configure kernel window
win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- AccessWindowStatic input_access(input, 0, -border_size.top,
- ceil_to_multiple(input->tensor_shape()[0], num_elems_processed_per_iteration),
- input->tensor_shape()[1]);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
- window_changed = update_window_and_padding(win, input_access, output_access);
- output->set_valid_region(calculate_valid_region_scale(*input, output->tensor_shape(),
- policy, sampling_policy, border_undefined));
+ if(use_padding)
+ {
+ AccessWindowStatic input_access(input, 0, -border_size.top, use_padding ? ceil_to_multiple(input->tensor_shape()[0], num_elems_processed_per_iteration) : num_elems_processed_per_iteration,
+ input->tensor_shape()[1]);
+ AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+ window_changed = update_window_and_padding(win, input_access, output_access);
+ output->set_valid_region(calculate_valid_region_scale(*input, output->tensor_shape(), policy, sampling_policy, border_undefined));
+ }
Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
return std::make_pair(err, win);
}
std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *dx, ITensorInfo *dy, ITensorInfo *offsets, ITensorInfo *output,
- InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy, BorderSize border_size)
+ InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy, BorderSize border_size, bool use_padding)
{
std::pair<Status, Window> win_config;
switch(input->data_layout())
{
case DataLayout::NCHW:
+ if(!use_padding)
+ {
+ return std::make_pair(ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Padding required for NCHW"), Window{});
+ }
win_config = validate_and_configure_window_nchw(input, dx, dy, offsets, output, policy, border_undefined, sampling_policy, border_size);
break;
case DataLayout::NHWC:
- win_config = validate_and_configure_window_nhwc(input, output, policy, border_undefined, sampling_policy, border_size);
+ win_config = validate_and_configure_window_nhwc(input, output, policy, border_undefined, sampling_policy, border_size, use_padding);
break;
default:
win_config = std::make_pair(ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported data layout!"), Window{});
@@ -167,6 +172,12 @@ template <typename T>
inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets, ITensor *output,
float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c)
{
+ const int window_step_x = 16 / sizeof(T);
+ const auto window_start_x = static_cast<int32_t>(window.x().start());
+ const auto window_end_x = static_cast<int32_t>(window.x().end());
+
+ window.set(Window::DimX, Window::Dimension(0, 1, 1));
+
Iterator in(input, win_in);
Iterator out(output, window);
@@ -174,18 +185,28 @@ inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets
execute_window_loop(window, [&](const Coordinates & id)
{
- const auto offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())));
- const int in_yi = (id.z() + 0.5f) * hr;
- const int offset_row = in_yi * stride_h + id.x() * stride_c;
- wrapper::vstore(reinterpret_cast<T *>(out.ptr()),
- wrapper::vloadq(reinterpret_cast<const T *>(in.ptr() + offset * offsets_stride + offset_row)));
+ const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())));
+ const int in_yi = (id.z() + 0.5f) * hr;
+ const int offset_row = in_yi * stride_h;
+ int32_t x = window_start_x;
+ for(; x < window_end_x - window_step_x; x += window_step_x)
+ {
+ wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x,
+ wrapper::vloadq(reinterpret_cast<const T *>(in.ptr() + offset * offsets_stride + offset_row + x * stride_c)));
+ }
+ for(; x < window_end_x; ++x)
+ {
+ *(reinterpret_cast<T *>(out.ptr()) + x) =
+ *(reinterpret_cast<const T *>(in.ptr() + offset * offsets_stride + offset_row + x * stride_c));
+ }
},
in, out);
}
-template <typename T>
+template <typename T, typename ConstType>
inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offsets, const ITensor *dx, const ITensor *dy, ITensor *output,
- float hr, float sampling_offset, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, BorderMode border_mode)
+ float hr, float sampling_offset, Window window, const Window &win_in, size_t stride_w, size_t stride_h,
+ size_t stride_c, BorderMode border_mode, PixelValue constant_border_value, bool use_padding)
{
Iterator in(input, win_in);
Iterator out(output, window);
@@ -196,7 +217,15 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
const int input_width = input->info()->dimension(1);
const int input_height = input->info()->dimension(2);
- const T *border_area = reinterpret_cast<T *>(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w);
+ T border_value;
+ if(use_padding)
+ {
+ border_value = *reinterpret_cast<T *>(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w);
+ }
+ else
+ {
+ border_value = static_cast<T>(constant_border_value.get<ConstType>());
+ }
auto is_valid = [](int x, int low_x, int high_x, int y, int low_y, int high_y)
{
@@ -224,10 +253,10 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
if(border_mode == BorderMode::CONSTANT)
{
- a00 = is_valid(offset, 0, input_width - 1, in_yi, 0, input_height - 1) ? *in_ptr : *border_area;
- a01 = is_valid(offset + 1, 0, input_width - 1, in_yi, 0, input_height - 1) ? *(in_ptr + stride_w_elems) : *border_area;
- a10 = is_valid(offset, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems) : *border_area;
- a11 = is_valid(offset + 1, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems + stride_w_elems) : *border_area;
+ a00 = is_valid(offset, 0, input_width - 1, in_yi, 0, input_height - 1) ? *in_ptr : border_value;
+ a01 = is_valid(offset + 1, 0, input_width - 1, in_yi, 0, input_height - 1) ? *(in_ptr + stride_w_elems) : border_value;
+ a10 = is_valid(offset, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems) : border_value;
+ a11 = is_valid(offset + 1, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems + stride_w_elems) : border_value;
}
else if(border_mode == BorderMode::REPLICATE)
{
@@ -279,7 +308,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
{
if(border_mode == BorderMode::CONSTANT)
{
- *reinterpret_cast<T *>(out.ptr()) = *border_area;
+ *reinterpret_cast<T *>(out.ptr()) = border_value;
}
else if(border_mode == BorderMode::REPLICATE)
{
@@ -294,7 +323,8 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
} // namespace
NEScaleKernel::NEScaleKernel()
- : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode(), _sampling_offset(0)
+ : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode(), _constant_border_value(0), _sampling_offset(0),
+ _use_padding(true)
{
}
@@ -304,31 +334,33 @@ BorderSize NEScaleKernel::border_size() const
}
void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets,
- ITensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy)
+ ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy,
+ bool use_padding)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(),
dx != nullptr ? dx->info() : nullptr,
dy != nullptr ? dy->info() : nullptr,
offsets != nullptr ? offsets->info() : nullptr,
output->info(),
- policy, border_mode, sampling_policy));
+ policy, border_mode, constant_border_value, sampling_policy, use_padding));
// Get data layout and width/height indices
const DataLayout data_layout = input->info()->data_layout();
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- _input = input;
- _output = output;
- _offsets = offsets;
- _dx = dx;
- _dy = dy;
- _policy = policy;
- _border_size = BorderSize(1);
- _border_mode = border_mode;
+ _input = input;
+ _output = output;
+ _offsets = offsets;
+ _dx = dx;
+ _dy = dy;
+ _policy = policy;
+ _border_size = BorderSize(1);
+ _border_mode = border_mode;
+ _constant_border_value = constant_border_value;
+ _use_padding = use_padding;
if(sampling_policy == SamplingPolicy::CENTER)
{
@@ -342,7 +374,7 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe
// Add constant border only on top in case of NHWC layout
if(data_layout == DataLayout::NHWC)
{
- _border_size = (border_mode == BorderMode::CONSTANT && policy == InterpolationPolicy::BILINEAR) ? BorderSize(1, 0, 0, 0) : BorderSize(0);
+ _border_size = (border_mode == BorderMode::CONSTANT && policy == InterpolationPolicy::BILINEAR && use_padding) ? BorderSize(1, 0, 0, 0) : BorderSize(0);
}
// Area interpolation behaves as Nearest Neighbour in case of up-sampling
@@ -379,7 +411,8 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe
dy != nullptr ? dy->info() : nullptr,
offsets != nullptr ? offsets->info() : nullptr,
output->info(),
- policy, border_mode == BorderMode::UNDEFINED, sampling_policy, border_size());
+ policy, border_mode == BorderMode::UNDEFINED, sampling_policy, border_size(), use_padding);
+
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
INEKernel::configure(win_config.second);
}
@@ -904,8 +937,8 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<uint8_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
+ scale_bilinear_nhwc_core<uint8_t, uint8_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
+ window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
}
break;
}
@@ -917,8 +950,8 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<int16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
+ scale_bilinear_nhwc_core<int16_t, int16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
+ window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
}
break;
}
@@ -932,8 +965,8 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<float16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
+ scale_bilinear_nhwc_core<float16_t, half>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
+ window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
}
break;
}
@@ -946,8 +979,8 @@ void NEScaleKernel::scale_nhwc(const Window &window)
}
else
{
- scale_bilinear_nhwc_core<float>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode);
+ scale_bilinear_nhwc_core<float, float>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
+ window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
}
break;
}
@@ -959,7 +992,7 @@ void NEScaleKernel::scale_nhwc(const Window &window)
Status NEScaleKernel::validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy,
const ITensorInfo *offsets, ITensorInfo *output, InterpolationPolicy policy,
- BorderMode border_mode, SamplingPolicy sampling_policy)
+ BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding)
{
BorderSize border_size(1);
if(input->data_layout() == DataLayout::NHWC)
@@ -967,13 +1000,13 @@ Status NEScaleKernel::validate(const ITensorInfo *input, const ITensorInfo *dx,
border_size = (border_mode == BorderMode::CONSTANT && policy == InterpolationPolicy::BILINEAR) ? BorderSize(1, 0, 0, 0) : BorderSize(0);
}
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, dx, dy, offsets, output, policy, border_mode, sampling_policy));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, dx, dy, offsets, output, policy, border_mode, constant_border_value, sampling_policy, use_padding));
ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
dx != nullptr ? dx->clone().get() : nullptr,
dy != nullptr ? dy->clone().get() : nullptr,
offsets != nullptr ? offsets->clone().get() : nullptr,
output->clone().get(),
- policy, border_mode == BorderMode::UNDEFINED, sampling_policy, border_size)
+ policy, border_mode == BorderMode::UNDEFINED, sampling_policy, border_size, use_padding)
.first);
return Status{};
diff --git a/src/runtime/NEON/functions/NECropResize.cpp b/src/runtime/NEON/functions/NECropResize.cpp
new file mode 100644
index 0000000000..4360b50dfb
--- /dev/null
+++ b/src/runtime/NEON/functions/NECropResize.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+#include "arm_compute/runtime/NEON/functions/NECropResize.h"
+
+#include <cstddef>
+
+namespace arm_compute
+{
+NECropResize::NECropResize()
+ : _output(nullptr), _num_boxes(0), _method(), _extrapolation_value(0), _crop(), _scale()
+{
+}
+
+Status NECropResize::validate(const ITensorInfo *input, const ITensorInfo *boxes, const ITensorInfo *box_ind, const ITensorInfo *output,
+ Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON(crop_size.x <= 0 || crop_size.y <= 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(method == InterpolationPolicy::AREA);
+ TensorInfo temp_info;
+ ARM_COMPUTE_RETURN_ON_ERROR(NECropKernel::validate(input->clone().get(), boxes->clone().get(), box_ind->clone().get(), &temp_info, boxes->tensor_shape()[1] - 1, extrapolation_value));
+ if(output->total_size() > 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
+ TensorShape out_shape(input->tensor_shape()[0], crop_size.x, crop_size.y, boxes->tensor_shape()[1]);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), out_shape);
+ }
+ return Status{};
+}
+
+void NECropResize::configure(const ITensor *input, const ITensor *boxes, const ITensor *box_ind, ITensor *output, Coordinates2D crop_size,
+ InterpolationPolicy method, float extrapolation_value)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_ERROR_THROW_ON(NECropResize::validate(input->info(), boxes->info(), box_ind->info(), output->info(), crop_size, method, extrapolation_value));
+
+ _num_boxes = boxes->info()->tensor_shape()[1];
+ TensorShape out_shape(input->info()->tensor_shape()[0], crop_size.x, crop_size.y);
+
+ _output = output;
+ _method = method;
+ _extrapolation_value = extrapolation_value;
+
+ // For each crop box:
+ // - A crop kernel is used to extract the initial cropped image as specified by boxes[i] from the 3D image input[box_ind[i]].
+ // - A tensor is required to hold this initial cropped image.
+ // - A scale function is used to resize the cropped image to the size specified by crop_size.
+ // - A tensor is required to hold the final scaled image before it is copied into the 4D output
+ // that will hold all final cropped and scaled 3D images.
+ _crop = arm_compute::support::cpp14::make_unique<NECropKernel[]>(_num_boxes);
+ _crop_results = arm_compute::support::cpp14::make_unique<Tensor[]>(_num_boxes);
+ _scale = arm_compute::support::cpp14::make_unique<NEScale[]>(_num_boxes);
+ _scaled_results = arm_compute::support::cpp14::make_unique<Tensor[]>(_num_boxes);
+
+ for(unsigned int i = 0; i < _num_boxes; ++i)
+ {
+ TensorInfo crop_result_info(1, DataType::F32);
+ crop_result_info.set_data_layout(DataLayout::NHWC);
+ _crop_results[i].allocator()->init(crop_result_info);
+
+ TensorInfo scaled_result_info(out_shape, 1, DataType::F32);
+ scaled_result_info.set_data_layout(DataLayout::NHWC);
+ _scaled_results[i].allocator()->init(scaled_result_info);
+
+ _crop[i].configure(input, boxes, box_ind, &_crop_results[i], i, _extrapolation_value);
+ }
+}
+
+void NECropResize::run()
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_output == nullptr, "Unconfigured function");
+
+ for(unsigned int i = 0; i < _num_boxes; ++i)
+ {
+ // Size of the crop box in _boxes and thus the shape of _crop_results[i]
+ // may not be known until run-time and so the kernels cannot be configured until then.
+ _crop[i].configure_output_shape();
+ _crop_results[i].allocator()->allocate();
+ NEScheduler::get().schedule(&_crop[i], Window::DimZ);
+
+ // Scale the cropped image.
+ _scale[i].configure(&_crop_results[i], &_scaled_results[i], _method, BorderMode::CONSTANT, PixelValue(_extrapolation_value), SamplingPolicy::TOP_LEFT, false);
+ _scaled_results[i].allocator()->allocate();
+ _scale[i].run();
+
+ // Copy scaled image into output.
+ std::copy_n(_scaled_results[i].buffer(), _scaled_results[i].info()->total_size(), _output->ptr_to_element(Coordinates(0, 0, 0, i)));
+ }
+}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index 483aa4c0b5..425ee6c4db 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -97,14 +97,17 @@ NEScale::NEScale() // NOLINT
_dx(),
_dy(),
_scale_kernel(),
- _border_handler()
+ _border_handler(),
+ _use_padding(true)
{
}
-void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy)
+void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(NEScale::validate(input->info(), output->info(), policy, border_mode, constant_border_value, sampling_policy));
+ ARM_COMPUTE_ERROR_THROW_ON(NEScale::validate(input->info(), output->info(), policy, border_mode, constant_border_value, sampling_policy, use_padding));
+
+ _use_padding = use_padding;
// Get data layout and width/height indices
const DataLayout data_layout = input->info()->data_layout();
@@ -134,7 +137,7 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
TensorInfo tensor_info_offsets(shape, Format::S32);
_offsets.allocator()->init(tensor_info_offsets);
- _scale_kernel.configure(input, nullptr, nullptr, &_offsets, output, policy, border_mode, sampling_policy);
+ _scale_kernel.configure(input, nullptr, nullptr, &_offsets, output, policy, border_mode, constant_border_value, sampling_policy, use_padding);
// Allocate once the configure methods have been called
_offsets.allocator()->allocate();
@@ -152,7 +155,7 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
_dx.allocator()->init(tensor_info_dxdy);
_dy.allocator()->init(tensor_info_dxdy);
- _scale_kernel.configure(input, &_dx, &_dy, &_offsets, output, policy, border_mode, sampling_policy);
+ _scale_kernel.configure(input, &_dx, &_dy, &_offsets, output, policy, border_mode, constant_border_value, sampling_policy, use_padding);
// Allocate once the configure methods have been called
_offsets.allocator()->allocate();
@@ -165,18 +168,20 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol
}
case InterpolationPolicy::AREA:
{
- _scale_kernel.configure(input, nullptr, nullptr, nullptr, output, policy, border_mode);
+ _scale_kernel.configure(input, nullptr, nullptr, nullptr, output, policy, border_mode, constant_border_value);
break;
}
default:
ARM_COMPUTE_ERROR("Unsupported interpolation mode");
}
-
- _border_handler.configure(input, _scale_kernel.border_size(), border_mode, constant_border_value);
+ if(use_padding)
+ {
+ _border_handler.configure(input, _scale_kernel.border_size(), border_mode, constant_border_value);
+ }
}
Status NEScale::validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy,
- BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy)
+ BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(sampling_policy != SamplingPolicy::CENTER && sampling_policy != SamplingPolicy::TOP_LEFT);
@@ -213,12 +218,15 @@ Status NEScale::validate(const ITensorInfo *input, const ITensorInfo *output, In
}
ARM_COMPUTE_RETURN_ON_ERROR(NEScaleKernel::validate(input->clone().get(), dx, dy, offsets, output->clone().get(),
- policy, border_mode, sampling_policy));
+ policy, border_mode, constant_border_value, sampling_policy, use_padding));
return Status{};
}
void NEScale::run()
{
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ if(_use_padding)
+ {
+ NEScheduler::get().schedule(&_border_handler, Window::DimZ);
+ }
NEScheduler::get().schedule(&_scale_kernel, Window::DimY);
}