aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2020-08-26 16:28:38 +0100
committerManuel Bottini <manuel.bottini@arm.com>2020-10-02 13:55:52 +0000
commitfc2f6d0427e1d886fcccc68867d1af1ccd96608b (patch)
tree7b8d65c9354f65fa92a6c15106ffc55f3c79403c
parent9aa05a67b58b65603fd52d76690a17a155e89030 (diff)
downloadComputeLibrary-fc2f6d0427e1d886fcccc68867d1af1ccd96608b.tar.gz
COMPMID-3145: Remove padding from NEScaleKernel
Change-Id: I530b12c6270d7dbeb3ef7af62484842ebcb65925 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4000 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/NEScaleKernel.h34
-rw-r--r--arm_compute/runtime/NEON/functions/NEScale.h49
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp1208
-rw-r--r--src/runtime/NEON/functions/NEGaussianPyramid.cpp2
-rw-r--r--src/runtime/NEON/functions/NELaplacianReconstruct.cpp2
-rw-r--r--src/runtime/NEON/functions/NEScale.cpp65
-rw-r--r--tests/datasets/BorderModeDataset.h4
-rw-r--r--tests/validation/NEON/Scale.cpp92
-rw-r--r--tests/validation/fixtures/ScaleFixture.h2
9 files changed, 481 insertions, 977 deletions
diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h
index a2328b13bc..b35bb72741 100644
--- a/arm_compute/core/NEON/kernels/NEScaleKernel.h
+++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h
@@ -83,34 +83,44 @@ public:
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
- BorderSize border_size() const override;
private:
- /** function to perform scale using nearest interpolation on the given window */
- void scale_nearest_nchw(const Window &window);
- /** function to perform scale using bilinear interpolation on the given window */
- void scale_bilinear_nchw(const Window &window);
/** function to perform scale using area interpolation on the given window
*
* @note Used only in case down-sampling.
*/
- void scale_area_nchw(const Window &window);
- /** function to perform scale on the given window */
- void scale_nhwc(const Window &window);
- /** Scale function to use for the particular interpolation type passed to configure() */
- void (NEScaleKernel::*_func)(const Window &window);
+ void scale_area_nchw_u8(const Window &window);
+
+ /** function to perform scale using bilinear interpolation on the given window */
+ template <typename T>
+ void scale_bilinear_nchw(const Window &window);
+ /** function to perform scale using bilinear interpolation on the given window */
+ template <typename T>
+ void scale_bilinear_nhwc(const Window &window);
+ /** function to perform scale using bilinear interpolation on the given window */
+ template <typename T>
+ void scale_bilinear_qasymm(const Window &window);
+
+ /** function to perform scale using nearest neighbour on the given window */
+ template <typename T>
+ void scale_nearest_nchw(const Window &window);
+ /** function to perform scale using nearest neighbour on the given window */
+ template <typename T>
+ void scale_nearest_nhwc(const Window &window);
+
+ /** Scale function to use for the particular function to use */
+ using ScaleFunctionPtr = void (NEScaleKernel::*)(const Window &window);
+ ScaleFunctionPtr _func;
const ITensor *_offsets;
const ITensor *_dx;
const ITensor *_dy;
const ITensor *_input;
ITensor *_output;
InterpolationPolicy _policy;
- BorderSize _border_size;
BorderMode _border_mode;
PixelValue _constant_border_value;
float _sampling_offset;
- bool _use_padding;
bool _align_corners;
};
} // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h
index f149e3bbb9..4063e558a6 100644
--- a/arm_compute/runtime/NEON/functions/NEScale.h
+++ b/arm_compute/runtime/NEON/functions/NEScale.h
@@ -24,20 +24,17 @@
#ifndef ARM_COMPUTE_NESCALEIMAGE_H
#define ARM_COMPUTE_NESCALEIMAGE_H
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
#include "arm_compute/runtime/Tensor.h"
-#include <cstdint>
-
namespace arm_compute
{
class ITensor;
/** Basic function to run @ref NEScaleKernel */
-class NEScale : public IFunction
+class NEScale : public INESimpleFunctionNoBorder
{
public:
/** Constructor
@@ -47,20 +44,6 @@ public:
NEScale();
/** Initialize the function's source, destination, interpolation type and border_mode.
*
- * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true.
- * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
- */
- ARM_COMPUTE_DEPRECATED_REL(20.08)
- void configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
- SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
- /** Initialize the function's source, destination, interpolation type and border_mode.
- *
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
* @param[out] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info @ref ScaleKernelInfo to be used for configuration
@@ -68,22 +51,6 @@ public:
void configure(ITensor *input, ITensor *output, const ScaleKernelInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEScale
*
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
- * @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
- * @param[in] use_padding (Optional) Is padding in use or not. Defaults to true.
- * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
- *
- * @return a status
- */
- ARM_COMPUTE_DEPRECATED_REL(20.08)
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy, BorderMode border_mode,
- PixelValue constant_border_value = PixelValue(), SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
- /** Static function to check if given info will lead to a valid configuration of @ref NEScale
- *
* @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
* @param[in] output Destination tensor. Data type supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
* @param[in] info @ref ScaleKernelInfo to be used for validation
@@ -92,16 +59,10 @@ public:
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info);
- // Inherited methods overridden:
- void run() override;
-
private:
- Tensor _offsets; /**< Offset to access the element with NEAREST interpolation or the top-left element with BILINEAR interpolation in the input tensor */
- Tensor _dx; /**< Element's distance between the X real coordinate and the smallest X following integer */
- Tensor _dy; /**< Element's distance between the Y real coordinate and the smallest Y following integer */
- NEScaleKernel _scale_kernel; /**< Kernel to perform the scaling */
- NEFillBorderKernel _border_handler; /**< kernel to handle tensor borders */
- bool _use_padding; /**< Is padding used on the tensors */
+ Tensor _offsets; /**< Offset to access the element with NEAREST interpolation or the top-left element with BILINEAR interpolation in the input tensor */
+ Tensor _dx; /**< Element's distance between the X real coordinate and the smallest X following integer */
+ Tensor _dy; /**< Element's distance between the Y real coordinate and the smallest Y following integer */
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NESCALEIMAGE_H */
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 94fcfe2ff8..1a853527b9 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -34,11 +34,24 @@
#include "src/core/utils/ScaleUtils.h"
#include <arm_neon.h>
+#include <map>
namespace arm_compute
{
namespace
{
+inline float compute_bilinear(float a00, float a01, float a10, float a11, float dx_val, float dy_val)
+{
+ const float dx1_val = 1.0f - dx_val;
+ const float dy1_val = 1.0f - dy_val;
+
+ const float w1 = dx1_val * dy1_val;
+ const float w2 = dx_val * dy1_val;
+ const float w3 = dx1_val * dy_val;
+ const float w4 = dx_val * dy_val;
+ return a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4;
+}
+
Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy,
const ITensorInfo *offsets, ITensorInfo *output, const ScaleKernelInfo &info)
{
@@ -48,8 +61,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
ARM_COMPUTE_RETURN_ERROR_ON(output == input);
ARM_COMPUTE_RETURN_ERROR_ON(info.sampling_policy != SamplingPolicy::CENTER && info.sampling_policy != SamplingPolicy::TOP_LEFT);
- ARM_COMPUTE_RETURN_ERROR_ON(!info.use_padding && info.border_mode != BorderMode::CONSTANT);
ARM_COMPUTE_UNUSED(info.constant_border_value);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.use_padding, "Padding is not supported");
const DataLayout data_layout = input->data_layout();
const auto width_index = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
@@ -71,7 +84,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dy, 1, DataType::F32);
}
- ARM_COMPUTE_RETURN_ERROR_ON(info.align_corners && !arm_compute::scale_utils::is_align_corners_allowed_sampling_policy(info.sampling_policy));
+ ARM_COMPUTE_RETURN_ERROR_ON(info.align_corners && !scale_utils::is_align_corners_allowed_sampling_policy(info.sampling_policy));
if(info.interpolation_policy == InterpolationPolicy::AREA)
{
@@ -81,265 +94,12 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
return Status{};
}
-
-std::pair<Status, Window> validate_and_configure_window_nchw(ITensorInfo *input, ITensorInfo *dx, ITensorInfo *dy, ITensorInfo *offsets, ITensorInfo *output,
- const ScaleKernelInfo &info, BorderSize border_size)
-{
- bool window_changed{ false };
- Window win{};
-
- constexpr unsigned int num_elems_processed_per_iteration = 16;
-
- // Configure kernel window
- win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
-
- const ValidRegion &input_valid_region = input->valid_region();
-
- if(offsets != nullptr)
- {
- AccessWindowHorizontal offsets_access(offsets, 0, num_elems_processed_per_iteration);
- window_changed = window_changed || update_window_and_padding(win, offsets_access);
- }
- if(dx != nullptr && dy != nullptr)
- {
- AccessWindowHorizontal dx_access(dx, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal dy_access(dy, 0, num_elems_processed_per_iteration);
- window_changed = window_changed || update_window_and_padding(win, dx_access, dy_access);
- }
-
- // Reads can occur within the valid region of the input
- AccessWindowStatic input_access(input, input_valid_region.anchor[0] - border_size.left,
- input_valid_region.anchor[1] - border_size.top,
- input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size.right,
- input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size.bottom);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- window_changed = window_changed || update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, calculate_valid_region_scale(*input, output->tensor_shape(),
- info.interpolation_policy, info.sampling_policy, info.border_mode == BorderMode::UNDEFINED));
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
-}
-
-std::pair<Status, Window> validate_and_configure_window_nhwc(ITensorInfo *input, ITensorInfo *output, const ScaleKernelInfo &info, BorderSize border_size)
-{
- bool window_changed{ false };
- Window win{};
-
- const unsigned int num_elems_processed_per_iteration = (info.use_padding && info.interpolation_policy == InterpolationPolicy::NEAREST_NEIGHBOR) ? 16 / input->element_size() : 1;
-
- // Configure kernel window
- win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
-
- if(info.use_padding)
- {
- AccessWindowStatic input_access(input, 0, -border_size.top, ceil_to_multiple(input->tensor_shape()[0], num_elems_processed_per_iteration), input->tensor_shape()[1]);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- window_changed = update_window_and_padding(win, input_access, output_access);
- output->set_valid_region(calculate_valid_region_scale(*input, output->tensor_shape(), info.interpolation_policy, info.sampling_policy, info.border_mode == BorderMode::UNDEFINED));
- }
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_pair(err, win);
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *dx, ITensorInfo *dy, ITensorInfo *offsets, ITensorInfo *output,
- const ScaleKernelInfo &info, BorderSize border_size)
-{
- std::pair<Status, Window> win_config;
- switch(input->data_layout())
- {
- case DataLayout::NCHW:
- if(!info.use_padding)
- {
- return std::make_pair(ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Padding required for NCHW"), Window{});
- }
- win_config = validate_and_configure_window_nchw(input, dx, dy, offsets, output, info, border_size);
- break;
- case DataLayout::NHWC:
- win_config = validate_and_configure_window_nhwc(input, output, info, border_size);
- break;
- default:
- win_config = std::make_pair(ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported data layout!"), Window{});
- }
-
- return win_config;
-}
-
-template <typename T>
-inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets, ITensor *output,
- float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, float sampling_offset, bool align_corners)
-{
- const int window_step_x = 16 / sizeof(T);
- const auto window_start_x = static_cast<int32_t>(window.x().start());
- const auto window_end_x = static_cast<int32_t>(window.x().end());
-
- window.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- Iterator in(input, win_in);
- Iterator out(output, window);
-
- const size_t offsets_stride = stride_w / sizeof(T);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const int32_t offset = *reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())));
- const auto in_yi = static_cast<int>(align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) : std::floor((id.z() + sampling_offset) * hr));
- const int offset_row = in_yi * stride_h;
- int32_t x = window_start_x;
- for(; x < window_end_x - window_step_x; x += window_step_x)
- {
- wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x,
- wrapper::vloadq(reinterpret_cast<const T *>(in.ptr() + offset * offsets_stride + offset_row + x * stride_c)));
- }
- for(; x < window_end_x; ++x)
- {
- *(reinterpret_cast<T *>(out.ptr()) + x) =
- *(reinterpret_cast<const T *>(in.ptr() + offset * offsets_stride + offset_row + x * stride_c));
- }
- },
- in, out);
-}
-
-template <typename T, typename ConstType>
-inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offsets, const ITensor *dx, const ITensor *dy, ITensor *output,
- float hr, float sampling_offset, Window window, const Window &win_in, size_t stride_w, size_t stride_h,
- size_t stride_c, BorderMode border_mode, PixelValue constant_border_value, bool use_padding)
-{
- Iterator in(input, win_in);
- Iterator out(output, window);
-
- const size_t stride_w_elems = stride_w / sizeof(T);
- const size_t stride_h_elems = stride_h / sizeof(T);
-
- const int input_width = input->info()->dimension(1);
- const int input_height = input->info()->dimension(2);
-
- T border_value;
- if(use_padding && border_mode != BorderMode::REPLICATE)
- {
- // configure() sets top border to 0 for BorderMode::REPLICATE and border_value is not needed in execute_window_loop() for REPLICATE
- border_value = *reinterpret_cast<T *>(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w);
- }
- else
- {
- border_value = static_cast<T>(constant_border_value.get<ConstType>());
- }
-
- auto is_valid = [](int64_t x, int64_t low_x, int64_t high_x, int64_t y, int64_t low_y, int64_t high_y)
- {
- return !(x < low_x || x > high_x || y < low_y || y > high_y);
- };
-
- int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1;
-
- const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offset = (*reinterpret_cast<const int32_t *>(offsets->ptr_to_element(Coordinates(id.y(), id.z())))) / static_cast<int>(sizeof(T));
- const auto dx_scale = *reinterpret_cast<const float *>(dx->ptr_to_element(Coordinates(id.y(), id.z())));
- const auto dy_scale = *reinterpret_cast<const float *>(dy->ptr_to_element(Coordinates(id.y(), id.z())));
- const int in_yi = std::floor((id.z() + sampling_offset) * hr - sampling_offset);
- const int offset_row = in_yi * stride_h + id.x() * stride_c;
- const T *in_ptr = reinterpret_cast<T *>(in.ptr() + offset * stride_w + offset_row);
-
- if(is_valid(offset, -border_size, input_width - 1 + border_size, in_yi, -border_size, input_height - 1 + border_size))
- {
- T a00 = 0;
- T a01 = 0;
- T a10 = 0;
- T a11 = 0;
-
- if(border_mode == BorderMode::CONSTANT)
- {
- a00 = is_valid(offset, 0, input_width - 1, in_yi, 0, input_height - 1) ? *in_ptr : border_value;
- a01 = is_valid(offset + 1, 0, input_width - 1, in_yi, 0, input_height - 1) ? *(in_ptr + stride_w_elems) : border_value;
- a10 = is_valid(offset, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems) : border_value;
- a11 = is_valid(offset + 1, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems + stride_w_elems) : border_value;
- }
- else if(border_mode == BorderMode::REPLICATE)
- {
- auto clamped_x = utility::clamp<int>(offset, 0, input_width - 1);
- auto clamped_x1 = utility::clamp<int>(offset + 1, 0, input_width - 1);
- auto clamped_y = utility::clamp<int>(in_yi, 0, input_height - 1);
- auto clamped_y1 = utility::clamp<int>(in_yi + 1, 0, input_height - 1);
-
- a00 = *reinterpret_cast<T *>(in.ptr() + clamped_x * stride_w + clamped_y * stride_h + id.x() * stride_c);
- a01 = *reinterpret_cast<T *>(in.ptr() + clamped_x1 * stride_w + clamped_y * stride_h + id.x() * stride_c);
- a10 = *reinterpret_cast<T *>(in.ptr() + clamped_x * stride_w + clamped_y1 * stride_h + id.x() * stride_c);
- a11 = *reinterpret_cast<T *>(in.ptr() + clamped_x1 * stride_w + clamped_y1 * stride_h + id.x() * stride_c);
- }
- else
- {
- a00 = is_valid(offset, 0, input_width - 1, in_yi, 0, input_height - 1) ? *in_ptr : 0;
- a01 = is_valid(offset + 1, 0, input_width - 1, in_yi, 0, input_height - 1) ? *(in_ptr + stride_w_elems) : 0;
- a10 = is_valid(offset, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems) : 0;
- a11 = is_valid(offset + 1, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems + stride_w_elems) : 0;
- }
-
- // Perform interpolation
- const float dx1 = 1.0f - dx_scale;
- const float dy1 = 1.0f - dy_scale;
-
- const float w1 = dx1 * dy1;
- const float w2 = dx_scale * dy1;
- const float w3 = dx1 * dy_scale;
- const float w4 = dx_scale * dy_scale;
-
- T res = 0;
- //dequantize quantized input
- if(input->info()->data_type() == DataType::QASYMM8)
- {
- float inp00 = dequantize_qasymm8(a00, iq_info);
- float inp01 = dequantize_qasymm8(a01, iq_info);
- float inp10 = dequantize_qasymm8(a10, iq_info);
- float inp11 = dequantize_qasymm8(a11, iq_info);
- res = static_cast<T>(quantize_qasymm8((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info));
- }
- else if(input->info()->data_type() == DataType::QASYMM8_SIGNED)
- {
- float inp00 = dequantize_qasymm8_signed(a00, iq_info);
- float inp01 = dequantize_qasymm8_signed(a01, iq_info);
- float inp10 = dequantize_qasymm8_signed(a10, iq_info);
- float inp11 = dequantize_qasymm8_signed(a11, iq_info);
- res = static_cast<T>(quantize_qasymm8_signed((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info));
- }
- else
- {
- res = static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
- }
- // Store result
- *reinterpret_cast<T *>(out.ptr()) = res;
- }
- else
- {
- if(border_mode == BorderMode::CONSTANT)
- {
- *reinterpret_cast<T *>(out.ptr()) = border_value;
- }
- else if(border_mode == BorderMode::REPLICATE)
- {
- auto clamped_x = utility::clamp<int>(offset, 0, input_width - 1);
- auto clamped_y = utility::clamp<int>(in_yi, 0, input_height - 1);
- *reinterpret_cast<T *>(out.ptr()) = *reinterpret_cast<T *>(in.ptr() + clamped_x * stride_w + clamped_y * stride_h + id.x() * stride_c);
- }
- }
- },
- in, out);
-}
} // namespace
NEScaleKernel::NEScaleKernel()
- : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode(), _constant_border_value(PixelValue()),
- _sampling_offset(0), _use_padding(true), _align_corners(false)
-{
-}
-
-BorderSize NEScaleKernel::border_size() const
+ : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_mode(), _constant_border_value(PixelValue()), _sampling_offset(0),
+ _align_corners(false)
{
- return _border_size;
}
void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets,
@@ -365,10 +125,8 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe
_dx = dx;
_dy = dy;
_policy = info.interpolation_policy;
- _border_size = BorderSize(1);
_border_mode = info.border_mode;
_constant_border_value = info.constant_border_value;
- _use_padding = info.use_padding;
_align_corners = info.align_corners;
if(info.sampling_policy == SamplingPolicy::CENTER)
@@ -377,58 +135,85 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe
}
// Compute the ratio between source width/height and destination width/height
- const auto wr = arm_compute::scale_utils::calculate_resize_ratio(input->info()->dimension(idx_width), output->info()->dimension(idx_width), _align_corners);
- const auto hr = arm_compute::scale_utils::calculate_resize_ratio(input->info()->dimension(idx_height), output->info()->dimension(idx_height), _align_corners);
+ const auto wr = scale_utils::calculate_resize_ratio(input->info()->dimension(idx_width), output->info()->dimension(idx_width), _align_corners);
+ const auto hr = scale_utils::calculate_resize_ratio(input->info()->dimension(idx_height), output->info()->dimension(idx_height), _align_corners);
- // Add constant border only on top in case of NHWC layout
- if(data_layout == DataLayout::NHWC)
+ // Area interpolation behaves as Nearest Neighbour in case of up-sampling
+ const auto policy_to_use = (info.interpolation_policy == InterpolationPolicy::AREA && wr <= 1.f && hr <= 1.f) ? InterpolationPolicy::NEAREST_NEIGHBOR : _policy;
+
+ if(_border_mode == BorderMode::UNDEFINED)
{
- _border_size = (info.border_mode != BorderMode::REPLICATE && info.interpolation_policy == InterpolationPolicy::BILINEAR && info.use_padding) ? BorderSize(1, 0, 0, 0) : BorderSize(0);
+ _border_mode = BorderMode::CONSTANT;
+ _constant_border_value = PixelValue();
}
+ std::string function_to_call("scale_");
+ function_to_call += string_from_data_type(_input->info()->data_type()) + "_";
+ function_to_call += string_from_data_layout(_input->info()->data_layout()) + "_";
+ function_to_call += string_from_interpolation_policy(policy_to_use);
- // Area interpolation behaves as Nearest Neighbour in case of up-sampling
- const auto policy_to_use = (info.interpolation_policy == InterpolationPolicy::AREA && wr <= 1.f && hr <= 1.f) ? InterpolationPolicy::NEAREST_NEIGHBOR : _policy;
+ static std::map<std::string, ScaleFunctionPtr> map_function =
+ {
+ { "scale_U8_NCHW_AREA_CONSTANT", &NEScaleKernel::scale_area_nchw_u8 },
+
+ { "scale_U8_NCHW_BILINEAR", &NEScaleKernel::scale_bilinear_nchw<uint8_t> },
+ { "scale_U8_NCHW_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nchw<uint8_t> },
- // Select interpolation function
- switch(policy_to_use)
+ { "scale_U8_NHWC_BILINEAR", &NEScaleKernel::scale_bilinear_nhwc<uint8_t> },
+ { "scale_U8_NHWC_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nhwc<uint8_t> },
+
+ { "scale_QASYMM8_NCHW_BILINEAR", &NEScaleKernel::scale_bilinear_qasymm<uint8_t> },
+ { "scale_QASYMM8_NCHW_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nchw<uint8_t> },
+
+ { "scale_QASYMM8_NHWC_BILINEAR", &NEScaleKernel::scale_bilinear_qasymm<uint8_t> },
+ { "scale_QASYMM8_NHWC_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nhwc<uint8_t> },
+
+ { "scale_QASYMM8_SIGNED_NCHW_BILINEAR", &NEScaleKernel::scale_bilinear_qasymm<int8_t> },
+ { "scale_QASYMM8_SIGNED_NCHW_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nchw<uint8_t> },
+
+ { "scale_QASYMM8_SIGNED_NHWC_BILINEAR", &NEScaleKernel::scale_bilinear_qasymm<int8_t> },
+ { "scale_QASYMM8_SIGNED_NHWC_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nhwc<uint8_t> },
+
+ { "scale_S16_NCHW_BILINEAR", &NEScaleKernel::scale_bilinear_nchw<int16_t> },
+ { "scale_S16_NCHW_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nchw<uint16_t> },
+
+ { "scale_S16_NHWC_BILINEAR", &NEScaleKernel::scale_bilinear_nhwc<int16_t> },
+ { "scale_S16_NHWC_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nhwc<uint16_t> },
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ { "scale_F16_NCHW_BILINEAR", &NEScaleKernel::scale_bilinear_nchw<float16_t> },
+ { "scale_F16_NCHW_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nchw<uint16_t> },
+
+ { "scale_F16_NHWC_BILINEAR", &NEScaleKernel::scale_bilinear_nhwc<float16_t> },
+ { "scale_F16_NHWC_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nhwc<uint16_t> },
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
+ { "scale_F32_NCHW_BILINEAR", &NEScaleKernel::scale_bilinear_nchw<float> },
+ { "scale_F32_NCHW_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nchw<float> },
+
+ { "scale_F32_NHWC_BILINEAR", &NEScaleKernel::scale_bilinear_nhwc<float> },
+ { "scale_F32_NHWC_NEAREST_NEIGHBOUR", &NEScaleKernel::scale_nearest_nhwc<float> },
+ };
+ auto it = map_function.find(function_to_call);
+ if(it != map_function.end())
{
- case InterpolationPolicy::NEAREST_NEIGHBOR:
- {
- _func = (data_layout == DataLayout::NCHW) ? &NEScaleKernel::scale_nearest_nchw : &NEScaleKernel::scale_nhwc;
- break;
- }
- case InterpolationPolicy::BILINEAR:
- {
- _func = (data_layout == DataLayout::NCHW) ? &NEScaleKernel::scale_bilinear_nchw : &NEScaleKernel::scale_nhwc;
- break;
- }
- case InterpolationPolicy::AREA:
- {
- _func = &NEScaleKernel::scale_area_nchw;
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Unsupported interpolation mode");
+ _func = it->second;
}
// Configure window
- std::pair<Status, Window> win_config = validate_and_configure_window(input->info(),
- dx != nullptr ? dx->info() : nullptr,
- dy != nullptr ? dy->info() : nullptr,
- offsets != nullptr ? offsets->info() : nullptr,
- output->info(),
- info, border_size());
-
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- INEKernel::configure(win_config.second);
+ Window win = calculate_max_window(*output->info(), Steps());
+ Coordinates coord;
+ coord.set_num_dimensions(output->info()->num_dimensions());
+ output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
+ INEKernel::configure(win);
}
+template <typename T>
void NEScaleKernel::scale_nearest_nchw(const Window &window)
{
- const size_t input_stride = _input->info()->strides_in_bytes()[1];
+ const size_t in_dim_x = _input->info()->dimension(0);
// Compute the ratio between source height and destination height
- const auto hr = arm_compute::scale_utils::calculate_resize_ratio(_input->info()->dimension(1), _output->info()->dimension(1), _align_corners);
+ const auto hr = scale_utils::calculate_resize_ratio(_input->info()->dimension(1), _output->info()->dimension(1), _align_corners);
// Don't increment in X and Y direction for the input tensor
// A pointer to the start of this plane is needed as base for the precomputed offsets
@@ -449,216 +234,24 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
Iterator in(_input, win_in);
Iterator out(_output, window);
Iterator offsets(_offsets, win_off);
-
- switch(_input->info()->data_type())
+ execute_window_loop(window, [&](const Coordinates & id)
{
- case DataType::QASYMM8_SIGNED:
- {
- int8x16_t tmp = vdupq_n_s8(0);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const uint8_t *const in_ptr = in.ptr();
-
- const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
- const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1));
- ARM_COMPUTE_ERROR_ON(in_yi_clamped < -1 || in_yi_clamped > static_cast<int>(_input->info()->dimension(1)));
- const int offset_row = in_yi_clamped * input_stride;
-
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[0] + offset_row], tmp, 0);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[1] + offset_row], tmp, 1);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[2] + offset_row], tmp, 2);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[3] + offset_row], tmp, 3);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[4] + offset_row], tmp, 4);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[5] + offset_row], tmp, 5);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[6] + offset_row], tmp, 6);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[7] + offset_row], tmp, 7);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[8] + offset_row], tmp, 8);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[9] + offset_row], tmp, 9);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[10] + offset_row], tmp, 10);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[11] + offset_row], tmp, 11);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[12] + offset_row], tmp, 12);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[13] + offset_row], tmp, 13);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[14] + offset_row], tmp, 14);
- tmp = vsetq_lane_s8(in_ptr[offsets_ptr[15] + offset_row], tmp, 15);
-
- vst1q_s8(reinterpret_cast<int8_t *>(out.ptr()), tmp);
- },
- in, offsets, out);
- break;
- }
- case DataType::QASYMM8:
- case DataType::U8:
- {
- uint8x16_t tmp = vdupq_n_u8(0);
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const uint8_t *const in_ptr = in.ptr();
-
- const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
- const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1));
- ARM_COMPUTE_ERROR_ON(in_yi_clamped < -1 || in_yi_clamped > static_cast<int>(_input->info()->dimension(1)));
- const int offset_row = in_yi_clamped * input_stride;
-
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[0] + offset_row], tmp, 0);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[1] + offset_row], tmp, 1);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[2] + offset_row], tmp, 2);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[3] + offset_row], tmp, 3);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[4] + offset_row], tmp, 4);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[5] + offset_row], tmp, 5);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[6] + offset_row], tmp, 6);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[7] + offset_row], tmp, 7);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[8] + offset_row], tmp, 8);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[9] + offset_row], tmp, 9);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[10] + offset_row], tmp, 10);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[11] + offset_row], tmp, 11);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[12] + offset_row], tmp, 12);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[13] + offset_row], tmp, 13);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[14] + offset_row], tmp, 14);
- tmp = vsetq_lane_u8(in_ptr[offsets_ptr[15] + offset_row], tmp, 15);
-
- vst1q_u8(out.ptr(), tmp);
- },
- in, offsets, out);
- break;
- }
- case DataType::S16:
- {
- int16x8x2_t tmp =
- {
- {
- vdupq_n_s16(0),
- vdupq_n_s16(0)
- }
- };
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
- const int offset_row = in_yi * input_stride;
-
- tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
- tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[2] + offset_row), tmp.val[0], 1);
- tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[4] + offset_row), tmp.val[0], 2);
- tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[6] + offset_row), tmp.val[0], 3);
- tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[8] + offset_row), tmp.val[0], 4);
- tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[10] + offset_row), tmp.val[0], 5);
- tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[12] + offset_row), tmp.val[0], 6);
- tmp.val[0] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[14] + offset_row), tmp.val[0], 7);
-
- tmp.val[1] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[1] + offset_row), tmp.val[1], 0);
- tmp.val[1] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[3] + offset_row), tmp.val[1], 1);
- tmp.val[1] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[5] + offset_row), tmp.val[1], 2);
- tmp.val[1] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[7] + offset_row), tmp.val[1], 3);
- tmp.val[1] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[9] + offset_row), tmp.val[1], 4);
- tmp.val[1] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[11] + offset_row), tmp.val[1], 5);
- tmp.val[1] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[13] + offset_row), tmp.val[1], 6);
- tmp.val[1] = vsetq_lane_s16(*reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[15] + offset_row), tmp.val[1], 7);
-
- vst2q_s16(reinterpret_cast<int16_t *>(out.ptr()), tmp);
- },
- in, offsets, out);
- break;
- }
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- case DataType::F16:
- {
- float16x8x2_t tmp =
- {
- {
- vdupq_n_f16(0),
- vdupq_n_f16(0)
- }
- };
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
- const int offset_row = in_yi * input_stride;
-
- tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
- tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[2] + offset_row), tmp.val[0], 1);
- tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[4] + offset_row), tmp.val[0], 2);
- tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[6] + offset_row), tmp.val[0], 3);
- tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[8] + offset_row), tmp.val[0], 4);
- tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[10] + offset_row), tmp.val[0], 5);
- tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[12] + offset_row), tmp.val[0], 6);
- tmp.val[0] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[14] + offset_row), tmp.val[0], 7);
-
- tmp.val[1] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[1] + offset_row), tmp.val[1], 0);
- tmp.val[1] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[3] + offset_row), tmp.val[1], 1);
- tmp.val[1] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[5] + offset_row), tmp.val[1], 2);
- tmp.val[1] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[7] + offset_row), tmp.val[1], 3);
- tmp.val[1] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[9] + offset_row), tmp.val[1], 4);
- tmp.val[1] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[11] + offset_row), tmp.val[1], 5);
- tmp.val[1] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[13] + offset_row), tmp.val[1], 6);
- tmp.val[1] = vsetq_lane_f16(*reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[15] + offset_row), tmp.val[1], 7);
-
- vst2q_f16(reinterpret_cast<__fp16 *>(out.ptr()), tmp);
- },
- in, offsets, out);
- break;
- }
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- case DataType::F32:
- {
- float32x4x4_t tmp =
- {
- {
- vdupq_n_f32(0),
- vdupq_n_f32(0),
- vdupq_n_f32(0),
- vdupq_n_f32(0)
- }
- };
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const auto in_yi = static_cast<int>(_align_corners ? arm_compute::utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
- const int offset_row = in_yi * input_stride;
-
- tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[0] + offset_row), tmp.val[0], 0);
- tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[4] + offset_row), tmp.val[0], 1);
- tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[8] + offset_row), tmp.val[0], 2);
- tmp.val[0] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[12] + offset_row), tmp.val[0], 3);
-
- tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[1] + offset_row), tmp.val[1], 0);
- tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[5] + offset_row), tmp.val[1], 1);
- tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[9] + offset_row), tmp.val[1], 2);
- tmp.val[1] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[13] + offset_row), tmp.val[1], 3);
-
- tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[2] + offset_row), tmp.val[2], 0);
- tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[6] + offset_row), tmp.val[2], 1);
- tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[10] + offset_row), tmp.val[2], 2);
- tmp.val[2] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[14] + offset_row), tmp.val[2], 3);
-
- tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[3] + offset_row), tmp.val[3], 0);
- tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[7] + offset_row), tmp.val[3], 1);
- tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[11] + offset_row), tmp.val[3], 2);
- tmp.val[3] = vsetq_lane_f32(*reinterpret_cast<const float *>(in.ptr() + offsets_ptr[15] + offset_row), tmp.val[3], 3);
-
- vst4q_f32(reinterpret_cast<float *>(out.ptr()), tmp);
- },
- in, offsets, out);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not supported");
- }
+ const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
+ const auto in_yi = static_cast<int32_t>(_align_corners ? utils::rounding::round_half_away_from_zero((id.y() + _sampling_offset) * hr) : std::floor((id.y() + _sampling_offset) * hr));
+ const int32_t offset_row = in_yi * in_dim_x;
+ *reinterpret_cast<T *>(out.ptr()) = *(reinterpret_cast<const T *>(in.ptr()) + offsets_ptr[0] + offset_row);
+ },
+ in, offsets, out);
}
+template <typename T>
void NEScaleKernel::scale_bilinear_nchw(const Window &window)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::F32);
-
// Compute the ratio between source height and destination height
- const auto hr = arm_compute::scale_utils::calculate_resize_ratio(_input->info()->dimension(1), _output->info()->dimension(1), _align_corners);
+ const auto hr = scale_utils::calculate_resize_ratio(_input->info()->dimension(1), _output->info()->dimension(1), _align_corners);
+ Window win_off;
+ win_off.set(Window::DimX, window.x());
+ win_off.set(Window::DimY, window.y());
// Don't increment in X and Y direction for the input tensor
// A pointer to the start of this plane is needed as base for the precomputed offsets
@@ -666,10 +259,6 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- Window win_off;
- win_off.set(Window::DimX, window.x());
- win_off.set(Window::DimY, window.y());
-
for(size_t d = Window::DimZ; d < _offsets->info()->num_dimensions(); ++d)
{
win_off.set(d, Window::Dimension(0, 0, 0));
@@ -681,271 +270,71 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
Iterator dx(_dx, win_off);
Iterator dy(_dy, win_off);
- /* Input image stride */
- const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1];
- const size_t in_stride = in_stide_in_bytes / _input->info()->element_size();
-
- const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
+ const int32_t in_dim_w = _input->info()->dimension(0);
+ const int32_t in_dim_h = _input->info()->dimension(1);
- switch(_input->info()->data_type())
+ if(_border_mode == BorderMode::CONSTANT)
{
- case DataType::QASYMM8_SIGNED:
- {
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
- const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
- const auto in_ptr = reinterpret_cast<const int8_t *>(in.ptr());
-
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
- const int offset_row = in_yi * in_stide_in_bytes;
-
- int8x8_t tmp0 = vdup_n_s8(0);
-
- tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0);
- tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1);
- tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2);
- tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3);
- tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4);
- tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5);
- tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6);
- tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7);
-
- int8x8_t tmp1 = vdup_n_s8(0);
-
- tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0);
- tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1);
- tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2);
- tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3);
- tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4);
- tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5);
- tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6);
- tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7);
-
- vst1q_s8(reinterpret_cast<int8_t *>(out.ptr()), vcombine_s8(tmp0, tmp1));
- },
- in, offsets, dx, dy, out);
- break;
- }
- case DataType::QASYMM8:
- {
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
- const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
- const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
-
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
- const int offset_row = in_yi * in_stide_in_bytes;
-
- uint8x8_t tmp0 = vdup_n_u8(0);
-
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6);
- tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7);
-
- uint8x8_t tmp1 = vdup_n_u8(0);
-
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6);
- tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7);
-
- vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1));
- },
- in, offsets, dx, dy, out);
- break;
- }
- case DataType::U8:
- {
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
- const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
- const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
-
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
- const int offset_row = in_yi * in_stide_in_bytes;
-
- uint8x8_t tmp0 = vdup_n_u8(0);
-
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6);
- tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7);
-
- uint8x8_t tmp1 = vdup_n_u8(0);
-
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6);
- tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7);
-
- vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1));
- },
- in, offsets, dx, dy, out);
- break;
- }
- case DataType::S16:
- {
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
- const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
-
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
- const int offset_row = in_yi * in_stide_in_bytes;
-
- int16x8x2_t tmp =
- {
- {
- vdupq_n_s16(0),
- vdupq_n_s16(0)
- }
- };
-
- tmp.val[0] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[0] + offset_row), in_stride, dx_ptr[0], dy_ptr[0]), tmp.val[0], 0);
- tmp.val[0] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[2] + offset_row), in_stride, dx_ptr[2], dy_ptr[2]), tmp.val[0], 1);
- tmp.val[0] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[4] + offset_row), in_stride, dx_ptr[4], dy_ptr[4]), tmp.val[0], 2);
- tmp.val[0] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[6] + offset_row), in_stride, dx_ptr[6], dy_ptr[6]), tmp.val[0], 3);
- tmp.val[0] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[8] + offset_row), in_stride, dx_ptr[8], dy_ptr[8]), tmp.val[0], 4);
- tmp.val[0] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[10] + offset_row), in_stride, dx_ptr[10], dy_ptr[10]), tmp.val[0], 5);
- tmp.val[0] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[12] + offset_row), in_stride, dx_ptr[12], dy_ptr[12]), tmp.val[0], 6);
- tmp.val[0] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[14] + offset_row), in_stride, dx_ptr[14], dy_ptr[14]), tmp.val[0], 7);
-
- tmp.val[1] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[1] + offset_row), in_stride, dx_ptr[1], dy_ptr[1]), tmp.val[1], 0);
- tmp.val[1] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[3] + offset_row), in_stride, dx_ptr[3], dy_ptr[3]), tmp.val[1], 1);
- tmp.val[1] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[5] + offset_row), in_stride, dx_ptr[5], dy_ptr[5]), tmp.val[1], 2);
- tmp.val[1] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[7] + offset_row), in_stride, dx_ptr[7], dy_ptr[7]), tmp.val[1], 3);
- tmp.val[1] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[9] + offset_row), in_stride, dx_ptr[9], dy_ptr[9]), tmp.val[1], 4);
- tmp.val[1] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[11] + offset_row), in_stride, dx_ptr[11], dy_ptr[11]), tmp.val[1], 5);
- tmp.val[1] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[13] + offset_row), in_stride, dx_ptr[13], dy_ptr[13]), tmp.val[1], 6);
- tmp.val[1] = vsetq_lane_s16(delta_bilinear_c1(reinterpret_cast<const int16_t *>(in.ptr() + offsets_ptr[15] + offset_row), in_stride, dx_ptr[15], dy_ptr[15]), tmp.val[1], 7);
-
- vst2q_s16(reinterpret_cast<int16_t *>(out.ptr()), tmp);
- },
- in, offsets, dx, dy, out);
- break;
- }
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- case DataType::F16:
- {
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
- const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
-
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
- const int offset_row = in_yi * in_stide_in_bytes;
-
- float16x8x2_t tmp =
- {
- {
- vdupq_n_f16(0),
- vdupq_n_f16(0)
- }
- };
-
- tmp.val[0] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[0] + offset_row), in_stride, dx_ptr[0], dy_ptr[0]), tmp.val[0], 0);
- tmp.val[0] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[2] + offset_row), in_stride, dx_ptr[2], dy_ptr[2]), tmp.val[0], 1);
- tmp.val[0] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[4] + offset_row), in_stride, dx_ptr[4], dy_ptr[4]), tmp.val[0], 2);
- tmp.val[0] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[6] + offset_row), in_stride, dx_ptr[6], dy_ptr[6]), tmp.val[0], 3);
- tmp.val[0] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[8] + offset_row), in_stride, dx_ptr[8], dy_ptr[8]), tmp.val[0], 4);
- tmp.val[0] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[10] + offset_row), in_stride, dx_ptr[10], dy_ptr[10]), tmp.val[0], 5);
- tmp.val[0] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[12] + offset_row), in_stride, dx_ptr[12], dy_ptr[12]), tmp.val[0], 6);
- tmp.val[0] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[14] + offset_row), in_stride, dx_ptr[14], dy_ptr[14]), tmp.val[0], 7);
-
- tmp.val[1] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[1] + offset_row), in_stride, dx_ptr[1], dy_ptr[1]), tmp.val[1], 0);
- tmp.val[1] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[3] + offset_row), in_stride, dx_ptr[3], dy_ptr[3]), tmp.val[1], 1);
- tmp.val[1] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[5] + offset_row), in_stride, dx_ptr[5], dy_ptr[5]), tmp.val[1], 2);
- tmp.val[1] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[7] + offset_row), in_stride, dx_ptr[7], dy_ptr[7]), tmp.val[1], 3);
- tmp.val[1] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[9] + offset_row), in_stride, dx_ptr[9], dy_ptr[9]), tmp.val[1], 4);
- tmp.val[1] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[11] + offset_row), in_stride, dx_ptr[11], dy_ptr[11]), tmp.val[1], 5);
- tmp.val[1] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[13] + offset_row), in_stride, dx_ptr[13], dy_ptr[13]), tmp.val[1], 6);
- tmp.val[1] = vsetq_lane_f16(delta_bilinear_c1(reinterpret_cast<const __fp16 *>(in.ptr() + offsets_ptr[15] + offset_row), in_stride, dx_ptr[15], dy_ptr[15]), tmp.val[1], 7);
-
- vst2q_f16(reinterpret_cast<__fp16 *>(out.ptr()), tmp);
- },
- in, offsets, dx, dy, out);
- break;
- }
+ using ConstType = typename std::conditional<std::is_same<T, float16_t>::value, half, T>::type;
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ using ConstType = T;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- case DataType::F32:
+ const T const_border_value = static_cast<T>(_constant_border_value.get<ConstType>());
+ execute_window_loop(window, [&](const Coordinates & id)
{
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
- const auto dx_ptr = reinterpret_cast<const float *>(dx.ptr());
- const auto dy_ptr = reinterpret_cast<const float *>(dy.ptr());
-
- const int in_yi = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
- const int offset_row = in_yi * in_stide_in_bytes;
-
- float32x4x4_t tmp =
- {
- {
- vdupq_n_f32(0),
- vdupq_n_f32(0),
- vdupq_n_f32(0),
- vdupq_n_f32(0)
- }
- };
-
- tmp.val[0] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[0] + offset_row), in_stride, dx_ptr[0], dy_ptr[0]), tmp.val[0], 0);
- tmp.val[0] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[4] + offset_row), in_stride, dx_ptr[4], dy_ptr[4]), tmp.val[0], 1);
- tmp.val[0] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[8] + offset_row), in_stride, dx_ptr[8], dy_ptr[8]), tmp.val[0], 2);
- tmp.val[0] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[12] + offset_row), in_stride, dx_ptr[12], dy_ptr[12]), tmp.val[0], 3);
-
- tmp.val[1] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[1] + offset_row), in_stride, dx_ptr[1], dy_ptr[1]), tmp.val[1], 0);
- tmp.val[1] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[5] + offset_row), in_stride, dx_ptr[5], dy_ptr[5]), tmp.val[1], 1);
- tmp.val[1] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[9] + offset_row), in_stride, dx_ptr[9], dy_ptr[9]), tmp.val[1], 2);
- tmp.val[1] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[13] + offset_row), in_stride, dx_ptr[13], dy_ptr[13]), tmp.val[1], 3);
-
- tmp.val[2] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[2] + offset_row), in_stride, dx_ptr[2], dy_ptr[2]), tmp.val[2], 0);
- tmp.val[2] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[6] + offset_row), in_stride, dx_ptr[6], dy_ptr[6]), tmp.val[2], 1);
- tmp.val[2] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[10] + offset_row), in_stride, dx_ptr[10], dy_ptr[10]), tmp.val[2], 2);
- tmp.val[2] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[14] + offset_row), in_stride, dx_ptr[14], dy_ptr[14]), tmp.val[2], 3);
-
- tmp.val[3] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[3] + offset_row), in_stride, dx_ptr[3], dy_ptr[3]), tmp.val[3], 0);
- tmp.val[3] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[7] + offset_row), in_stride, dx_ptr[7], dy_ptr[7]), tmp.val[3], 1);
- tmp.val[3] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[11] + offset_row), in_stride, dx_ptr[11], dy_ptr[11]), tmp.val[3], 2);
- tmp.val[3] = vsetq_lane_f32(delta_bilinear_c1(reinterpret_cast<const float *>(in.ptr() + offsets_ptr[15] + offset_row), in_stride, dx_ptr[15], dy_ptr[15]), tmp.val[3], 3);
-
- vst4q_f32(reinterpret_cast<float *>(out.ptr()), tmp);
- },
- in, offsets, dx, dy, out);
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
+ const int32_t index_h = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
+ const auto index_w = *(reinterpret_cast<const int32_t *>(offsets.ptr()));
+ const auto dx_val = *(reinterpret_cast<const float *>(dx.ptr()));
+ const auto dy_val = *(reinterpret_cast<const float *>(dy.ptr()));
+ const auto pixel_row_ptr = reinterpret_cast<const T *>(in.ptr());
+
+ const auto a00 = (0 <= index_w && index_w < in_dim_w && 0 <= index_h && index_h < in_dim_h) ? (*(pixel_row_ptr + index_w + index_h * in_dim_w)) : const_border_value;
+ const auto a01 = (-1 <= index_w && index_w < in_dim_w - 1 && 0 <= index_h && index_h < in_dim_h) ? (*(pixel_row_ptr + index_w + 1 + index_h * in_dim_w)) : const_border_value;
+ const auto a10 = (0 <= index_w && index_w < in_dim_w && -1 <= index_h
+ && index_h < in_dim_h - 1) ?
+ (*(pixel_row_ptr + index_w + index_h * in_dim_w + in_dim_w)) :
+ const_border_value;
+ const auto a11 = (-1 <= index_w && index_w < in_dim_w - 1 && -1 <= index_h
+ && index_h < in_dim_h - 1) ?
+ (*(pixel_row_ptr + index_w + 1 + index_h * in_dim_w + in_dim_w)) :
+ const_border_value;
+
+ *reinterpret_cast<T *>(out.ptr()) = static_cast<T>(compute_bilinear(a00, a01, a10, a11, dx_val, dy_val));
+ },
+ in, offsets, dx, dy, out);
+ }
+ else if(_border_mode == BorderMode::REPLICATE)
+ {
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const int index_h = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
+ const auto index_w = *(reinterpret_cast<const int32_t *>(offsets.ptr()));
+ const auto dx_val = *(reinterpret_cast<const float *>(dx.ptr()));
+ const auto dy_val = *(reinterpret_cast<const float *>(dy.ptr()));
+ const auto pixel_row_ptr = reinterpret_cast<const T *>(in.ptr());
+
+ auto clamped_x = utility::clamp<int>(index_w, 0, in_dim_w - 1);
+ auto clamped_x1 = utility::clamp<int>(index_w + 1, 0, in_dim_w - 1);
+ auto clamped_y = utility::clamp<int>(index_h, 0, in_dim_h - 1);
+ auto clamped_y1 = utility::clamp<int>(index_h + 1, 0, in_dim_h - 1);
+
+ const auto a00 = *(pixel_row_ptr + clamped_x + clamped_y * in_dim_w);
+ const auto a01 = *(pixel_row_ptr + clamped_x1 + clamped_y * in_dim_w);
+ const auto a10 = *(pixel_row_ptr + clamped_x + clamped_y1 * in_dim_w);
+ const auto a11 = *(pixel_row_ptr + clamped_x1 + clamped_y1 * in_dim_w);
+
+ *reinterpret_cast<T *>(out.ptr()) = static_cast<T>(compute_bilinear(a00, a01, a10, a11, dx_val, dy_val));
+ },
+ in, offsets, dx, dy, out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not implemented");
}
}
-void NEScaleKernel::scale_area_nchw(const Window &window)
+void NEScaleKernel::scale_area_nchw_u8(const Window &window)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8);
@@ -959,8 +348,8 @@ void NEScaleKernel::scale_area_nchw(const Window &window)
Iterator in(_input, win_in);
Iterator out(_output, window);
- const auto wr = arm_compute::scale_utils::calculate_resize_ratio(_input->info()->dimension(0), _output->info()->dimension(0), _align_corners);
- const auto hr = arm_compute::scale_utils::calculate_resize_ratio(_input->info()->dimension(1), _output->info()->dimension(1), _align_corners);
+ const auto wr = scale_utils::calculate_resize_ratio(_input->info()->dimension(0), _output->info()->dimension(0), _align_corners);
+ const auto hr = scale_utils::calculate_resize_ratio(_input->info()->dimension(1), _output->info()->dimension(1), _align_corners);
const auto w = _input->info()->dimension(0);
const auto h = _input->info()->dimension(1);
const size_t in_stride = _input->info()->strides_in_bytes()[1];
@@ -994,123 +383,234 @@ void NEScaleKernel::scale_area_nchw(const Window &window)
in, out);
}
-void NEScaleKernel::scale_nhwc(const Window &window)
+template <typename T>
+void NEScaleKernel::scale_nearest_nhwc(const Window &window)
{
- // Get data layout and width/height indices
- const DataLayout data_layout = DataLayout::NHWC;
- const int idx_channels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-
- const size_t input_stride_w = _input->info()->strides_in_bytes()[idx_width];
- const size_t input_stride_h = _input->info()->strides_in_bytes()[idx_height];
- const size_t input_stride_c = _input->info()->strides_in_bytes()[idx_channels];
+ const size_t in_dim_w = _input->info()->dimension(1);
+ const size_t in_dim_h = _input->info()->dimension(2);
+ const size_t in_dim_c = _input->info()->dimension(0);
+ const size_t in_dim_wc = in_dim_w * in_dim_c;
// Compute the ratio between source height and destination height
- const auto hr = arm_compute::scale_utils::calculate_resize_ratio(_input->info()->dimension(idx_height), _output->info()->dimension(idx_height), _align_corners);
+ const auto hr = scale_utils::calculate_resize_ratio(in_dim_h, _output->info()->dimension(2), _align_corners);
+ const auto window_start_x = static_cast<int32_t>(window.x().start());
+ const auto window_end_x = static_cast<int32_t>(window.x().end());
+ const int window_step_x = 16 / sizeof(T);
- // Don't increment in width/height/channels for the input tensor
+ Window win(window);
+ win.set(Window::DimX, Window::Dimension(0, 1, 1));
+ // Don't increment in X and Y direction for the input tensor
// A pointer to the start of this plane is needed as base for the precomputed offsets
Window win_in(window);
win_in.set(Window::DimX, Window::Dimension(0, 0, 0));
win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
win_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
+ Iterator in(_input, win_in);
+ Iterator out(_output, win);
- switch(_input->info()->data_type())
+ execute_window_loop(win, [&](const Coordinates & id)
{
- case DataType::QASYMM8_SIGNED:
- {
- if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
- {
- scale_nearest_nhwc_core<int8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners);
- }
- else
- {
- scale_bilinear_nhwc_core<int8_t, int8_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
- }
- break;
- }
- case DataType::QASYMM8:
- case DataType::U8:
+ const int32_t offset = *reinterpret_cast<const int32_t *>(_offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_dim_c;
+ const auto in_hi = static_cast<int>(_align_corners ? utils::rounding::round_half_away_from_zero((id.z() + _sampling_offset) * hr) : std::floor((id.z() + _sampling_offset) * hr));
+ const int offset_row = in_hi * in_dim_wc;
+ int32_t x = window_start_x;
+ for(; x <= window_end_x - window_step_x; x += window_step_x)
{
- if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
- {
- scale_nearest_nhwc_core<uint8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners);
- }
- else
- {
- scale_bilinear_nhwc_core<uint8_t, uint8_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
- }
- break;
+ wrapper::vstore(reinterpret_cast<T *>(out.ptr()) + x,
+ wrapper::vloadq(reinterpret_cast<const T *>(in.ptr()) + offset + offset_row + x));
}
- case DataType::S16:
+ for(; x < window_end_x; ++x)
{
- if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
- {
- scale_nearest_nhwc_core<int16_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners);
- }
- else
- {
- scale_bilinear_nhwc_core<int16_t, int16_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
- }
- break;
+ *(reinterpret_cast<T *>(out.ptr()) + x) = *(reinterpret_cast<const T *>(in.ptr()) + offset + offset_row + x);
}
+ },
+ in, out);
+}
+
+template <typename T>
+void NEScaleKernel::scale_bilinear_nhwc(const Window &window)
+{
+ // Compute the ratio between source height and destination height
+ const auto hr = scale_utils::calculate_resize_ratio(_input->info()->dimension(2), _output->info()->dimension(2), _align_corners);
+
+ Iterator out(_output, window);
+ const int in_dim_c = _input->info()->dimension(0);
+ const int in_dim_w = _input->info()->dimension(1);
+ const int in_dim_h = _input->info()->dimension(2);
+ const int input_wc = in_dim_c * in_dim_w;
+
+ // Don't increment in Y and Z direction for the input tensor
+ // A pointer to the start of this plane is needed as base for the precomputed offsets
+ Window win_in(window);
+ win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+ win_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
+ Iterator in(_input, win_in);
+
+ if(_border_mode == BorderMode::CONSTANT)
+ {
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
- case DataType::F16:
- {
- if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
- {
- scale_nearest_nhwc_core<float16_t>(_input, _offsets, _output, hr,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners);
- }
- else
- {
- scale_bilinear_nhwc_core<float16_t, half>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
- }
- break;
- }
+ using ConstType = typename std::conditional<std::is_same<T, float16_t>::value, half, T>::type;
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ using ConstType = T;
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
- case DataType::F32:
+ const T const_border_value = static_cast<T>(_constant_border_value.get<ConstType>());
+ execute_window_loop(window, [&](const Coordinates & id)
{
- if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
- {
- scale_nearest_nhwc_core<float>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset, _align_corners);
- }
- else
- {
- scale_bilinear_nhwc_core<float, float>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
- window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Not supported");
- break;
+ const auto offset = *reinterpret_cast<const int32_t *>(_offsets->ptr_to_element(Coordinates(id.y(), id.z())));
+ const auto dx_val = *reinterpret_cast<const float *>(_dx->ptr_to_element(Coordinates(id.y(), id.z())));
+ const auto dy_val = *reinterpret_cast<const float *>(_dy->ptr_to_element(Coordinates(id.y(), id.z())));
+ const int32_t in_hi = std::floor((id.z() + _sampling_offset) * hr - _sampling_offset);
+ const T *in_ptr = reinterpret_cast<const T *>(in.ptr()) + offset * in_dim_c + in_hi * input_wc;
+
+ const auto a00 = (0 <= offset && offset < in_dim_w && 0 <= in_hi && in_hi < in_dim_h) ? *in_ptr : const_border_value;
+ const auto a01 = (-1 <= offset && offset < in_dim_w - 1 && 0 <= in_hi && in_hi < in_dim_h) ? *(in_ptr + in_dim_c) : const_border_value;
+ const auto a10 = (0 <= offset && offset < in_dim_w && -1 <= in_hi && in_hi < in_dim_h - 1) ? *(in_ptr + input_wc) : const_border_value;
+ const auto a11 = (-1 <= offset && offset < in_dim_w - 1 && -1 <= in_hi && in_hi < in_dim_h - 1) ? *(in_ptr + in_dim_c + input_wc) : const_border_value;
+
+ *reinterpret_cast<T *>(out.ptr()) = static_cast<T>(compute_bilinear(a00, a01, a10, a11, dx_val, dy_val));
+ },
+ in, out);
+ }
+ else if(_border_mode == BorderMode::REPLICATE)
+ {
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const auto offset = *reinterpret_cast<const int32_t *>(_offsets->ptr_to_element(Coordinates(id.y(), id.z())));
+ const auto dx_val = *reinterpret_cast<const float *>(_dx->ptr_to_element(Coordinates(id.y(), id.z())));
+ const auto dy_val = *reinterpret_cast<const float *>(_dy->ptr_to_element(Coordinates(id.y(), id.z())));
+ const int in_hi = std::floor((id.z() + _sampling_offset) * hr - _sampling_offset);
+
+ auto clamped_w = utility::clamp<int>(offset, 0, in_dim_w - 1);
+ auto clamped_w1 = utility::clamp<int>(offset + 1, 0, in_dim_w - 1);
+ auto clamped_h = utility::clamp<int>(in_hi, 0, in_dim_h - 1);
+ auto clamped_h1 = utility::clamp<int>(in_hi + 1, 0, in_dim_h - 1);
+
+ const auto a00 = *(reinterpret_cast<const T *>(in.ptr()) + clamped_w * in_dim_c + clamped_h * input_wc);
+ const auto a01 = *(reinterpret_cast<const T *>(in.ptr()) + clamped_w1 * in_dim_c + clamped_h * input_wc);
+ const auto a10 = *(reinterpret_cast<const T *>(in.ptr()) + clamped_w * in_dim_c + clamped_h1 * input_wc);
+ const auto a11 = *(reinterpret_cast<const T *>(in.ptr()) + clamped_w1 * in_dim_c + clamped_h1 * input_wc);
+
+ *reinterpret_cast<T *>(out.ptr()) = static_cast<T>(compute_bilinear(a00, a01, a10, a11, dx_val, dy_val));
+ },
+ in, out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not implemented");
}
}
-Status NEScaleKernel::validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy,
- const ITensorInfo *offsets, ITensorInfo *output, const ScaleKernelInfo &info)
+template <typename T>
+void NEScaleKernel::scale_bilinear_qasymm(const Window &window)
{
- BorderSize border_size(1);
- if(input->data_layout() == DataLayout::NHWC)
+ // Get data layout and width/height indices
+ const DataLayout data_layout = _input->info()->data_layout();
+ const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
+ // Compute the ratio between source height and destination height
+ const auto hr = scale_utils::calculate_resize_ratio(_input->info()->dimension(idx_height), _output->info()->dimension(idx_height), _align_corners);
+ Window win_off;
+ win_off.set(Window::DimX, Window::Dimension(0, 0, 0));
+ win_off.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+ // Don't increment in X and Y direction for the input tensor
+ // A pointer to the start of this plane is needed as base for the precomputed offsets
+ Window win_in(window);
+ win_in.set(idx_width, Window::Dimension(0, 0, 0));
+ win_in.set(idx_height, Window::Dimension(0, 0, 0));
+
+ for(size_t d = Window::DimZ; d < _offsets->info()->num_dimensions(); ++d)
{
- border_size = (info.border_mode == BorderMode::CONSTANT && info.interpolation_policy == InterpolationPolicy::BILINEAR) ? BorderSize(1, 0, 0, 0) : BorderSize(0);
+ win_off.set(d, Window::Dimension(0, 0, 0));
}
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, dx, dy, offsets, output, info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
- dx != nullptr ? dx->clone().get() : nullptr,
- dy != nullptr ? dy->clone().get() : nullptr,
- offsets != nullptr ? offsets->clone().get() : nullptr,
- output->clone().get(),
- info, border_size)
- .first);
+ Iterator in(_input, win_in);
+ Iterator out(_output, window);
+
+ const int32_t in_dim_w = _input->info()->dimension(idx_width);
+ const int32_t in_dim_h = _input->info()->dimension(idx_height);
+ const int32_t stride_w = _input->info()->strides_in_bytes()[idx_width];
+ const int32_t stride_h = _input->info()->strides_in_bytes()[idx_height];
+
+ const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
+
+ if(_border_mode == BorderMode::CONSTANT)
+ {
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ using ConstType = typename std::conditional<std::is_same<T, float16_t>::value, half, T>::type;
+#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ using ConstType = T;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+ const T const_border_value = static_cast<T>(_constant_border_value.get<ConstType>());
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const int32_t index_h = std::floor((id[idx_height] + _sampling_offset) * hr - _sampling_offset);
+ const int32_t index_w = *(reinterpret_cast<const int32_t *>(_offsets->ptr_to_element(Coordinates(id[idx_width], id[idx_height]))));
+ const auto dx_val = *(reinterpret_cast<const float *>(_dx->ptr_to_element(Coordinates(id[idx_width], id[idx_height]))));
+ const auto dy_val = *(reinterpret_cast<const float *>(_dy->ptr_to_element(Coordinates(id[idx_width], id[idx_height]))));
+ const auto pixel_row_ptr = reinterpret_cast<const T *>(in.ptr());
+
+ const auto a00 = (0 <= index_w && index_w < in_dim_w && 0 <= index_h && index_h < in_dim_h) ?
+ (*(pixel_row_ptr + index_w * stride_w + index_h * stride_h)) :
+ const_border_value;
+ const auto a01 = (-1 <= index_w && index_w < in_dim_w - 1 && 0 <= index_h && index_h < in_dim_h) ?
+ (*(pixel_row_ptr + (index_w + 1) * stride_w + index_h * stride_h)) :
+ const_border_value;
+ const auto a10 = (0 <= index_w && index_w < in_dim_w && -1 <= index_h && index_h < in_dim_h - 1) ?
+ (*(pixel_row_ptr + index_w * stride_w + (index_h + 1) * stride_h)) :
+ const_border_value;
+ const auto a11 = (-1 <= index_w && index_w < in_dim_w - 1 && -1 <= index_h && index_h < in_dim_h - 1) ?
+ (*(pixel_row_ptr + (index_w + 1) * stride_w + (index_h + 1) * stride_h)) :
+ const_border_value;
+
+ const float inp00 = Qasymm8QuantizationHelper<T>::dequantize(a00, iq_info);
+ const float inp01 = Qasymm8QuantizationHelper<T>::dequantize(a01, iq_info);
+ const float inp10 = Qasymm8QuantizationHelper<T>::dequantize(a10, iq_info);
+ const float inp11 = Qasymm8QuantizationHelper<T>::dequantize(a11, iq_info);
+ *reinterpret_cast<T *>(out.ptr()) = Qasymm8QuantizationHelper<T>::quantize(compute_bilinear(inp00, inp01, inp10, inp11, dx_val, dy_val), oq_info);
+ },
+ in, out);
+ }
+ else if(_border_mode == BorderMode::REPLICATE)
+ {
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const int index_h = std::floor((id[idx_height] + _sampling_offset) * hr - _sampling_offset);
+ const int32_t index_w = *(reinterpret_cast<const int32_t *>(_offsets->ptr_to_element(Coordinates(id[idx_width], id[idx_height]))));
+ const auto dx_val = *(reinterpret_cast<const float *>(_dx->ptr_to_element(Coordinates(id[idx_width], id[idx_height]))));
+ const auto dy_val = *(reinterpret_cast<const float *>(_dy->ptr_to_element(Coordinates(id[idx_width], id[idx_height]))));
+ const auto pixel_row_ptr = reinterpret_cast<const T *>(in.ptr());
+
+ auto clamped_w = utility::clamp<int>(index_w, 0, in_dim_w - 1);
+ auto clamped_w1 = utility::clamp<int>(index_w + 1, 0, in_dim_w - 1);
+ auto clamped_h = utility::clamp<int>(index_h, 0, in_dim_h - 1);
+ auto clamped_h1 = utility::clamp<int>(index_h + 1, 0, in_dim_h - 1);
+
+ const auto a00 = *(pixel_row_ptr + clamped_w * stride_w + clamped_h * stride_h);
+ const auto a01 = *(pixel_row_ptr + clamped_w1 * stride_w + clamped_h * stride_h);
+ const auto a10 = *(pixel_row_ptr + clamped_w * stride_w + clamped_h1 * stride_h);
+ const auto a11 = *(pixel_row_ptr + clamped_w1 * stride_w + clamped_h1 * stride_h);
+
+ const float inp00 = Qasymm8QuantizationHelper<T>::dequantize(a00, iq_info);
+ const float inp01 = Qasymm8QuantizationHelper<T>::dequantize(a01, iq_info);
+ const float inp10 = Qasymm8QuantizationHelper<T>::dequantize(a10, iq_info);
+ const float inp11 = Qasymm8QuantizationHelper<T>::dequantize(a11, iq_info);
+ *reinterpret_cast<T *>(out.ptr()) = Qasymm8QuantizationHelper<T>::quantize(compute_bilinear(inp00, inp01, inp10, inp11, dx_val, dy_val), oq_info);
+ },
+ in, out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Not implemented");
+ }
+}
+Status NEScaleKernel::validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy,
+ const ITensorInfo *offsets, ITensorInfo *output, const ScaleKernelInfo &info)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, dx, dy, offsets, output, info));
return Status{};
}
diff --git a/src/runtime/NEON/functions/NEGaussianPyramid.cpp b/src/runtime/NEON/functions/NEGaussianPyramid.cpp
index ae883bcb20..e4e20e041b 100644
--- a/src/runtime/NEON/functions/NEGaussianPyramid.cpp
+++ b/src/runtime/NEON/functions/NEGaussianPyramid.cpp
@@ -168,7 +168,7 @@ void NEGaussianPyramidOrb::configure(const ITensor *input, IPyramid *pyramid, Bo
_gaus5x5[i].configure(_pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value);
/* Configure scale */
- _scale_nearest[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED });
+ _scale_nearest[i].configure(_tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, PixelValue(), SamplingPolicy::CENTER, false });
}
_tmp.allocate();
diff --git a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp
index 24755fc99b..aa5f8a21ca 100644
--- a/src/runtime/NEON/functions/NELaplacianReconstruct.cpp
+++ b/src/runtime/NEON/functions/NELaplacianReconstruct.cpp
@@ -73,7 +73,7 @@ void NELaplacianReconstruct::configure(const IPyramid *pyramid, ITensor *input,
// Scale levels n-1 to 1, and add levels n-2 to 0
for(size_t l = 0; l < last_level; ++l)
{
- _scalef[l].configure(_tmp_pyr.get_pyramid_level(l + 1), _tmp_pyr.get_pyramid_level(l), ScaleKernelInfo{ arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value });
+ _scalef[l].configure(_tmp_pyr.get_pyramid_level(l + 1), _tmp_pyr.get_pyramid_level(l), ScaleKernelInfo{ arm_compute::InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, constant_border_value, SamplingPolicy::CENTER, false });
_addf[l].configure(_tmp_pyr.get_pyramid_level(l), pyramid->get_pyramid_level(l), _tmp_pyr.get_pyramid_level(l), ConvertPolicy::SATURATE);
}
diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp
index 424049f24a..2278f07a1c 100644
--- a/src/runtime/NEON/functions/NEScale.cpp
+++ b/src/runtime/NEON/functions/NEScale.cpp
@@ -44,7 +44,7 @@ namespace arm_compute
{
namespace
{
-void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float wr, float hr, size_t input_element_size, SamplingPolicy sampling_policy, bool align_corners)
+void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float wr, float hr, SamplingPolicy sampling_policy, bool align_corners)
{
ARM_COMPUTE_ERROR_ON(nullptr == offsets);
ARM_COMPUTE_UNUSED(sampling_policy);
@@ -72,7 +72,7 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float
const int in_xi = std::floor(in_x);
const int in_yi = std::floor(in_y);
- *reinterpret_cast<int32_t *>(offsets_it.ptr()) = in_xi * static_cast<int>(input_element_size);
+ *reinterpret_cast<int32_t *>(offsets_it.ptr()) = in_xi;
*reinterpret_cast<float *>(dx_it.ptr()) = in_x - in_xi;
*reinterpret_cast<float *>(dy_it.ptr()) = in_y - in_yi;
},
@@ -85,23 +85,17 @@ void precompute_dx_dy_offsets(ITensor *dx, ITensor *dy, ITensor *offsets, float
execute_window_loop(win, [&](const Coordinates & id)
{
- const float float_in_xi = (id.x() + sampling_offset) * wr;
- const auto in_xi = static_cast<size_t>(align_corners ? arm_compute::utils::rounding::round_half_away_from_zero(float_in_xi) : std::floor(float_in_xi));
-
- *reinterpret_cast<int32_t *>(offsets_it.ptr()) = in_xi * input_element_size;
+ const float float_in_xi = (id.x() + sampling_offset) * wr;
+ const auto in_xi = static_cast<size_t>(align_corners ? arm_compute::utils::rounding::round_half_away_from_zero(float_in_xi) : std::floor(float_in_xi));
+ *reinterpret_cast<int32_t *>(offsets_it.ptr()) = in_xi;
},
offsets_it);
}
}
} // namespace
-NEScale::NEScale() // NOLINT
- : _offsets(),
- _dx(),
- _dy(),
- _scale_kernel(),
- _border_handler(),
- _use_padding(true)
+NEScale::NEScale()
+ : _offsets(), _dx(), _dy()
{
}
@@ -110,7 +104,6 @@ void NEScale::configure(ITensor *input, ITensor *output, const ScaleKernelInfo &
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(NEScale::validate(input->info(), output->info(), info));
- _use_padding = info.use_padding;
const bool is_align_corners_used = info.align_corners && arm_compute::scale_utils::is_align_corners_allowed_sampling_policy(info.sampling_policy);
// Get data layout and width/height indices
@@ -119,18 +112,17 @@ void NEScale::configure(ITensor *input, ITensor *output, const ScaleKernelInfo &
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
// Get the tensor shape
- const TensorShape shape(output->info()->dimension(idx_width), output->info()->dimension(idx_height));
+ TensorShape shape(output->info()->dimension(idx_width));
+ shape.set(1, output->info()->dimension(idx_height), false);
// Compute the ratio between source width/height and destination width/height
const auto wr = arm_compute::scale_utils::calculate_resize_ratio(input->info()->dimension(idx_width), output->info()->dimension(idx_width), is_align_corners_used);
const auto hr = arm_compute::scale_utils::calculate_resize_ratio(input->info()->dimension(idx_height), output->info()->dimension(idx_height), is_align_corners_used);
- // Get the element size of the input image
- const size_t input_element_size = input->info()->element_size();
-
// Area interpolation behaves as Nearest Neighbour in case of up-sampling
const auto policy_to_use = (info.interpolation_policy == InterpolationPolicy::AREA && wr <= 1.f && hr <= 1.f) ? InterpolationPolicy::NEAREST_NEIGHBOR : info.interpolation_policy;
+ auto scale_kernel = arm_compute::support::cpp14::make_unique<NEScaleKernel>();
switch(policy_to_use)
{
case InterpolationPolicy::NEAREST_NEIGHBOR:
@@ -138,13 +130,13 @@ void NEScale::configure(ITensor *input, ITensor *output, const ScaleKernelInfo &
TensorInfo tensor_info_offsets(shape, Format::S32);
_offsets.allocator()->init(tensor_info_offsets);
- _scale_kernel.configure(input, nullptr, nullptr, &_offsets, output, info);
+ scale_kernel->configure(input, nullptr, nullptr, &_offsets, output, info);
// Allocate once the configure methods have been called
_offsets.allocator()->allocate();
// Pre-compute offsets for nearest interpolation
- precompute_dx_dy_offsets(nullptr, nullptr, &_offsets, wr, hr, input_element_size, info.sampling_policy, is_align_corners_used);
+ precompute_dx_dy_offsets(nullptr, nullptr, &_offsets, wr, hr, info.sampling_policy, is_align_corners_used);
break;
}
case InterpolationPolicy::BILINEAR:
@@ -156,7 +148,7 @@ void NEScale::configure(ITensor *input, ITensor *output, const ScaleKernelInfo &
_dx.allocator()->init(tensor_info_dxdy);
_dy.allocator()->init(tensor_info_dxdy);
- _scale_kernel.configure(input, &_dx, &_dy, &_offsets, output, info);
+ scale_kernel->configure(input, &_dx, &_dy, &_offsets, output, info);
// Allocate once the configure methods have been called
_offsets.allocator()->allocate();
@@ -164,27 +156,18 @@ void NEScale::configure(ITensor *input, ITensor *output, const ScaleKernelInfo &
_dy.allocator()->allocate();
// Pre-compute dx, dy and offsets for bilinear interpolation
- precompute_dx_dy_offsets(&_dx, &_dy, &_offsets, wr, hr, input_element_size, info.sampling_policy, is_align_corners_used);
+ precompute_dx_dy_offsets(&_dx, &_dy, &_offsets, wr, hr, info.sampling_policy, is_align_corners_used);
break;
}
case InterpolationPolicy::AREA:
{
- _scale_kernel.configure(input, nullptr, nullptr, nullptr, output, info);
+ scale_kernel->configure(input, nullptr, nullptr, nullptr, output, info);
break;
}
default:
ARM_COMPUTE_ERROR("Unsupported interpolation mode");
}
- if(info.use_padding)
- {
- _border_handler.configure(input, _scale_kernel.border_size(), info.border_mode, info.constant_border_value);
- }
-}
-
-void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding,
- bool align_corners)
-{
- configure(input, output, ScaleKernelInfo{ policy, border_mode, constant_border_value, sampling_policy, use_padding, align_corners });
+ _kernel = std::move(scale_kernel);
}
Status NEScale::validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info)
@@ -225,20 +208,4 @@ Status NEScale::validate(const ITensorInfo *input, const ITensorInfo *output, co
ARM_COMPUTE_RETURN_ON_ERROR(NEScaleKernel::validate(input->clone().get(), dx, dy, offsets, output->clone().get(), info));
return Status{};
}
-
-Status NEScale::validate(const ITensorInfo *input, const ITensorInfo *output, InterpolationPolicy policy,
- BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding, bool align_corners)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(NEScale::validate(input, output, ScaleKernelInfo{ policy, border_mode, constant_border_value, sampling_policy, use_padding, align_corners }));
- return Status{};
-}
-
-void NEScale::run()
-{
- if(_use_padding)
- {
- NEScheduler::get().schedule(&_border_handler, Window::DimZ);
- }
- NEScheduler::get().schedule(&_scale_kernel, Window::DimY);
-}
} // namespace arm_compute
diff --git a/tests/datasets/BorderModeDataset.h b/tests/datasets/BorderModeDataset.h
index 84a7a4cfb1..bb90ad2214 100644
--- a/tests/datasets/BorderModeDataset.h
+++ b/tests/datasets/BorderModeDataset.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,8 +27,6 @@
#include "arm_compute/core/Types.h"
#include "tests/framework/datasets/ContainerDataset.h"
-#include "utils/TypePrinter.h"
-
namespace arm_compute
{
namespace test
diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp
index 1b59faaf07..9a1e9b01b3 100644
--- a/tests/validation/NEON/Scale.cpp
+++ b/tests/validation/NEON/Scale.cpp
@@ -117,9 +117,8 @@ const auto output_shape = TensorShape{ 4, 6, 3, 2 };
constexpr auto default_data_type = DataType::U8;
constexpr auto default_data_layout = DataLayout::NHWC;
constexpr auto default_interpolation_policy = InterpolationPolicy::NEAREST_NEIGHBOR;
-constexpr auto default_border_mode = BorderMode::UNDEFINED;
+constexpr auto default_border_mode = BorderMode::CONSTANT;
constexpr auto default_sampling_policy = SamplingPolicy::CENTER;
-constexpr bool default_use_padding = false;
TEST_CASE(NullPtr, framework::DatasetMode::ALL)
{
@@ -128,11 +127,11 @@ TEST_CASE(NullPtr, framework::DatasetMode::ALL)
Status result{};
// nullptr is given as input
- result = NEScale::validate(nullptr, &output, ScaleKernelInfo{ default_interpolation_policy, default_border_mode });
+ result = NEScale::validate(nullptr, &output, ScaleKernelInfo{ default_interpolation_policy, default_border_mode, PixelValue(), SamplingPolicy::CENTER, false });
ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
// nullptr is given as output
- result = NEScale::validate(&input, nullptr, ScaleKernelInfo{ default_interpolation_policy, default_border_mode });
+ result = NEScale::validate(&input, nullptr, ScaleKernelInfo{ default_interpolation_policy, default_border_mode, PixelValue(), SamplingPolicy::CENTER, false });
ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
}
@@ -170,7 +169,7 @@ TEST_CASE(SupportDataType, framework::DatasetMode::ALL)
const auto input = TensorInfo{ input_shape, 1, kv.first, default_data_layout };
const auto output = TensorInfo{ output_shape, 1, kv.first, default_data_layout };
- result = NEScale::validate(&input, &output, ScaleKernelInfo{ default_interpolation_policy, default_border_mode });
+ result = NEScale::validate(&input, &output, ScaleKernelInfo{ default_interpolation_policy, default_border_mode, PixelValue(), SamplingPolicy::CENTER, false });
ARM_COMPUTE_EXPECT(bool(result) == kv.second, framework::LogLevel::ERRORS);
}
}
@@ -183,7 +182,7 @@ TEST_CASE(MissmatchingDataType, framework::DatasetMode::ALL)
const auto output = TensorInfo{ output_shape, 1, non_default_data_type, default_data_layout };
Status result{};
- result = NEScale::validate(&input, &output, ScaleKernelInfo{ default_interpolation_policy, default_border_mode });
+ result = NEScale::validate(&input, &output, ScaleKernelInfo{ default_interpolation_policy, default_border_mode, PixelValue(), SamplingPolicy::CENTER, false });
ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
}
@@ -193,9 +192,9 @@ TEST_CASE(UsePadding, framework::DatasetMode::ALL)
const auto output = TensorInfo{ output_shape, 1, default_data_type, default_data_layout };
Status result{};
- // When use padding is false, border mode should be constant
- constexpr auto border_mode = BorderMode::UNDEFINED;
- constexpr bool use_padding = false;
+ // Padding is not supported anymore
+ constexpr auto border_mode = BorderMode::CONSTANT;
+ constexpr bool use_padding = true;
result = NEScale::validate(&input, &output, ScaleKernelInfo{ default_interpolation_policy, border_mode, PixelValue(), default_sampling_policy, use_padding });
ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
@@ -211,7 +210,7 @@ TEST_CASE(AreaWithNHWC, framework::DatasetMode::ALL)
const auto output = TensorInfo{ output_shape, 1, default_data_type, data_layout };
Status result{};
- result = NEScale::validate(&input, &output, ScaleKernelInfo{ interpolation_policy, default_border_mode });
+ result = NEScale::validate(&input, &output, ScaleKernelInfo{ interpolation_policy, default_border_mode, PixelValue(), SamplingPolicy::CENTER, false });
ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
}
@@ -226,7 +225,7 @@ TEST_CASE(AreaWithNonU8, framework::DatasetMode::ALL)
const auto output = TensorInfo{ output_shape, 1, data_type, data_layout };
Status result{};
- result = NEScale::validate(&input, &output, ScaleKernelInfo{ interpolation_policy, default_border_mode });
+ result = NEScale::validate(&input, &output, ScaleKernelInfo{ interpolation_policy, default_border_mode, PixelValue(), SamplingPolicy::CENTER, false });
ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
}
@@ -241,11 +240,80 @@ TEST_CASE(AlignedCornerNotSupported, framework::DatasetMode::ALL)
const auto output = TensorInfo{ output_shape, 1, default_data_type, default_data_layout };
Status result{};
- result = NEScale::validate(&input, &output, ScaleKernelInfo{ interpolation_policy, default_border_mode, PixelValue(), sampling_policy, default_use_padding, align_corners });
+ result = NEScale::validate(&input, &output, ScaleKernelInfo{ interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false, align_corners });
ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
}
TEST_SUITE_END() // Validate
+DATA_TEST_CASE(CheckNoPadding, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::Medium4DShapes(),
+ framework::dataset::make("DataType", { DataType::F32, DataType::QASYMM8 })),
+ framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::BILINEAR, InterpolationPolicy::NEAREST_NEIGHBOR })),
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER, SamplingPolicy::TOP_LEFT })),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })),
+ shape, data_type, interpolation_policy, sampling_policy, data_layout)
+{
+ constexpr auto default_border_mode = BorderMode::CONSTANT;
+ ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false);
+
+ // Create tensors
+ Tensor src = create_tensor<Tensor>(shape, data_type);
+ src.info()->set_data_layout(data_layout);
+
+ const float scale_x = 0.5f;
+ const float scale_y = 0.5f;
+ TensorShape shape_scaled(shape);
+ const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ shape_scaled.set(idx_width, shape[idx_width] * scale_x, /* apply_dim_correction = */ false);
+ shape_scaled.set(idx_height, shape[idx_height] * scale_y, /* apply_dim_correction = */ false);
+ Tensor dst = create_tensor<Tensor>(shape_scaled, data_type);
+
+ ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+ // Create and configure function
+ NEScale scale;
+ scale.configure(&src, &dst, info);
+
+ validate(src.info()->padding(), PaddingSize(0, 0, 0, 0));
+ validate(dst.info()->padding(), PaddingSize(0, 0, 0, 0));
+}
+
+DATA_TEST_CASE(CheckNoPaddingInterpAREA, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::Medium4DShapes(),
+ framework::dataset::make("DataType", { DataType::U8 })),
+ framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::AREA })),
+ framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER, SamplingPolicy::TOP_LEFT })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+ shape, data_type, interpolation_policy, sampling_policy, data_layout)
+{
+ constexpr auto default_border_mode = BorderMode::CONSTANT;
+ ScaleKernelInfo info(interpolation_policy, default_border_mode, PixelValue(), sampling_policy, false);
+
+ // Create tensors
+ Tensor src = create_tensor<Tensor>(shape, data_type);
+ src.info()->set_data_layout(data_layout);
+
+ const float scale_x = 0.5f;
+ const float scale_y = 0.5f;
+ TensorShape shape_scaled(shape);
+ const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ shape_scaled.set(idx_width, shape[idx_width] * scale_x, /* apply_dim_correction = */ false);
+ shape_scaled.set(idx_height, shape[idx_height] * scale_y, /* apply_dim_correction = */ false);
+
+ Tensor dst = create_tensor<Tensor>(shape, data_type);
+
+ ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+ // Create and configure function
+ NEScale scale;
+ scale.configure(&src, &dst, info);
+
+ validate(src.info()->padding(), PaddingSize(0, 0, 0, 0));
+ validate(dst.info()->padding(), PaddingSize(0, 0, 0, 0));
+}
+
template <typename T>
using NEScaleFixture = ScaleValidationFixture<Tensor, Accessor, NEScale, T>;
template <typename T>
diff --git a/tests/validation/fixtures/ScaleFixture.h b/tests/validation/fixtures/ScaleFixture.h
index e2ed3ab6f9..1e66306715 100644
--- a/tests/validation/fixtures/ScaleFixture.h
+++ b/tests/validation/fixtures/ScaleFixture.h
@@ -137,7 +137,7 @@ protected:
// Create and configure function
FunctionType scale;
- scale.configure(&src, &dst, ScaleKernelInfo{ _policy, _border_mode, _constant_border_value, _sampling_policy, /* use_padding */ true, _align_corners });
+ scale.configure(&src, &dst, ScaleKernelInfo{ _policy, _border_mode, _constant_border_value, _sampling_policy, /* use_padding */ false, _align_corners });
ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);