From 393fa4c87c84356132303170d1b9ce9a45b3c3bf Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 8 May 2018 15:54:53 +0100 Subject: COMPMID-814: NEScale NHWC support Change-Id: Ibf5c624a5c5482faa42eb02bc8abe9ae0d65b0d1 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130608 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- arm_compute/core/NEON/kernels/NEScaleKernel.h | 41 ++-- src/core/Helpers.cpp | 28 ++- src/core/NEON/kernels/NEScaleKernel.cpp | 332 +++++++++++++++++++++----- src/runtime/NEON/functions/NEScale.cpp | 25 +- tests/benchmark/CL/Scale.cpp | 8 +- tests/benchmark/NEON/Scale.cpp | 8 +- tests/benchmark/fixtures/ScaleFixture.h | 15 +- tests/validation/CL/Scale.cpp | 30 ++- tests/validation/GLES_COMPUTE/Scale.cpp | 7 +- tests/validation/NEON/Scale.cpp | 70 ++++-- tests/validation/fixtures/ScaleFixture.h | 33 ++- tests/validation/reference/Scale.cpp | 1 + 12 files changed, 451 insertions(+), 147 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h index eb47409692..0a3a952537 100644 --- a/arm_compute/core/NEON/kernels/NEScaleKernel.h +++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h @@ -56,17 +56,17 @@ public: * * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor * - * @param[in] input Source tensor. Data types supported: U8/S16/F32. - * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 - * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 - * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. - * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. - * @param[in] policy Interpolation type to use - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. - * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER + * @param[in] input Source tensor. Data types supported: U8/S16/F32. + * @param[in] dx Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32 + * @param[in] dy Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32 + * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane. + * @param[in] policy Interpolation type to use + * @param[in] border_mode Border mode policy + * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER */ - void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, InterpolationPolicy policy, bool border_undefined, - SamplingPolicy sampling_policy = SamplingPolicy::CENTER); + void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, + InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -74,22 +74,27 @@ public: private: /** function to perform scale using nearest interpolation on the given window */ - void scale_nearest(const Window &window); + void scale_nearest_nchw(const Window &window); /** function to perform scale using bilinear interpolation on the given window */ - void scale_bilinear(const Window &window); + void scale_bilinear_nchw(const Window &window); /** function to perform scale using area interpolation on the given window * * @note Used only in case down-sampling. */ - void scale_area(const Window &window); + void scale_area_nchw(const Window &window); + /** function to perform scale on the given window */ + void scale_nhwc(const Window &window); /** Scale function to use for the particular interpolation type passed to configure() */ void (NEScaleKernel::*_func)(const Window &window); - const ITensor *_offsets; - const ITensor *_dx; - const ITensor *_dy; - const ITensor *_input; - ITensor *_output; + const ITensor *_offsets; + const ITensor *_dx; + const ITensor *_dy; + const ITensor *_input; + ITensor *_output; + InterpolationPolicy _policy; + BorderSize _border_size; + BorderMode _border_mode; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NESCALEKERNEL_H__ */ diff --git a/src/core/Helpers.cpp b/src/core/Helpers.cpp index c39922bf03..e336331663 100644 --- a/src/core/Helpers.cpp +++ b/src/core/Helpers.cpp @@ -177,21 +177,25 @@ Window arm_compute::calculate_max_window_horizontal(const ValidRegion &valid_reg ValidRegion arm_compute::calculate_valid_region_scale(const ITensorInfo &src_info, const TensorShape &dst_shape, InterpolationPolicy interpolate_policy, SamplingPolicy sampling_policy, bool border_undefined) { - const float scale_x = static_cast(dst_shape[0]) / src_info.tensor_shape()[0]; - const float scale_y = static_cast(dst_shape[1]) / src_info.tensor_shape()[1]; + const DataLayout data_layout = src_info.data_layout(); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + const float scale_x = static_cast(dst_shape[idx_width]) / src_info.tensor_shape()[idx_width]; + const float scale_y = static_cast(dst_shape[idx_height]) / src_info.tensor_shape()[idx_height]; const float sampling_point = (sampling_policy == SamplingPolicy::CENTER) ? 0.5f : 0.0f; // Get input's valid region start and end points - const int valid_start_in_x = src_info.valid_region().anchor[0]; - const int valid_start_in_y = src_info.valid_region().anchor[1]; - const int valid_end_in_x = src_info.valid_region().anchor[0] + src_info.valid_region().shape[0]; - const int valid_end_in_y = src_info.valid_region().anchor[1] + src_info.valid_region().shape[1]; + const int valid_start_in_x = src_info.valid_region().anchor[idx_width]; + const int valid_start_in_y = src_info.valid_region().anchor[idx_height]; + const int valid_end_in_x = src_info.valid_region().anchor[idx_width] + src_info.valid_region().shape[idx_width]; + const int valid_end_in_y = src_info.valid_region().anchor[idx_height] + src_info.valid_region().shape[idx_height]; // Initialize output's valid region start and end points auto valid_start_out_x = static_cast(valid_start_in_x * scale_x); auto valid_start_out_y = static_cast(valid_start_in_y * scale_y); - auto valid_end_out_x = std::min(std::ceil(valid_end_in_x * scale_x), dst_shape[0]); - auto valid_end_out_y = std::min(std::ceil(valid_end_in_y * scale_y), dst_shape[1]); + auto valid_end_out_x = std::min(std::ceil(valid_end_in_x * scale_x), dst_shape[idx_width]); + auto valid_end_out_y = std::min(std::ceil(valid_end_in_y * scale_y), dst_shape[idx_height]); // Handle valid points in case of the bi-linear interpolation if(border_undefined) @@ -237,11 +241,11 @@ ValidRegion arm_compute::calculate_valid_region_scale(const ITensorInfo &src_inf // Setup output valid region ValidRegion valid_region{ Coordinates(), dst_shape, src_info.tensor_shape().num_dimensions() }; - valid_region.anchor.set(0, std::max(0, valid_start_out_x)); - valid_region.anchor.set(1, std::max(0, valid_start_out_y)); + valid_region.anchor.set(idx_width, std::max(0, valid_start_out_x)); + valid_region.anchor.set(idx_height, std::max(0, valid_start_out_y)); - valid_region.shape.set(0, std::min(valid_end_out_x - valid_start_out_x, dst_shape[0])); - valid_region.shape.set(1, std::min(valid_end_out_y - valid_start_out_y, dst_shape[1])); + valid_region.shape.set(idx_width, std::min(valid_end_out_x - valid_start_out_x, dst_shape[idx_width])); + valid_region.shape.set(idx_height, std::min(valid_end_out_y - valid_start_out_y, dst_shape[idx_height])); return valid_region; } \ No newline at end of file diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 852ec3e023..311c807009 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -28,28 +28,174 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/wrapper/wrapper.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/Utility.h" #include #include #include -using namespace arm_compute; +namespace arm_compute +{ +namespace +{ +Window configure_nchw(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, + InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy, BorderSize border_size) +{ + constexpr unsigned int num_elems_processed_per_iteration = 16; + + // Configure kernel window + Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); + + const ValidRegion &input_valid_region = input->info()->valid_region(); + + // Reads can occur within the valid region of the input + AccessWindowStatic input_access(input->info(), input_valid_region.anchor[0] - border_size.left, + input_valid_region.anchor[1] - border_size.top, + input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size.right, + input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size.bottom); + AccessWindowHorizontal offsets_access(offsets == nullptr ? nullptr : offsets->info(), 0, + num_elems_processed_per_iteration); + AccessWindowHorizontal dx_access(dx == nullptr ? nullptr : dx->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal dy_access(dy == nullptr ? nullptr : dy->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, input_access, offsets_access, dx_access, dy_access, output_access); + + output_access.set_valid_region(win, calculate_valid_region_scale(*(input->info()), output->info()->tensor_shape(), + policy, sampling_policy, border_undefined)); + + return win; +} +Window configure_nhwc(const ITensor *input, ITensor *output, + InterpolationPolicy policy, bool border_undefined, SamplingPolicy sampling_policy, BorderSize border_size) +{ + unsigned int num_elems_processed_per_iteration = (policy == InterpolationPolicy::NEAREST_NEIGHBOR) ? 16 / input->info()->element_size() : 1; + + // Configure kernel window + Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowStatic input_access(input->info(), 0, -border_size.top, + ceil_to_multiple(input->info()->tensor_shape()[0], num_elems_processed_per_iteration), + input->info()->tensor_shape()[1]); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, input_access, output_access); + output->info()->set_valid_region(calculate_valid_region_scale(*(input->info()), output->info()->tensor_shape(), + policy, sampling_policy, border_undefined)); + + return win; +} + +template +inline void scale_nearest_nhwc_core(const ITensor *input, const ITensor *offsets, ITensor *output, + float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c) +{ + Iterator in(input, win_in); + Iterator out(output, window); + + const size_t offsets_stride = stride_w / sizeof(T); + + execute_window_loop(window, [&](const Coordinates & id) + { + const auto offset = *reinterpret_cast(offsets->ptr_to_element(Coordinates(id.y(), id.z()))); + const int in_yi = (id.z() + 0.5f) * hr; + const int offset_row = in_yi * stride_h + id.x() * stride_c; + wrapper::vstore(reinterpret_cast(out.ptr()), + wrapper::vloadq(reinterpret_cast(in.ptr() + offset * offsets_stride + offset_row))); + }, + in, out); +} + +template +inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offsets, const ITensor *dx, const ITensor *dy, ITensor *output, + float hr, Window window, const Window &win_in, size_t stride_w, size_t stride_h, size_t stride_c, BorderMode border_mode) +{ + Iterator in(input, win_in); + Iterator out(output, window); + + const size_t stride_w_elems = stride_w / sizeof(T); + const size_t stride_h_elems = stride_h / sizeof(T); + + const size_t input_width = input->info()->dimension(1); + const size_t input_height = input->info()->dimension(2); + + const T *border_area = reinterpret_cast(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w); + + auto is_valid = [](int x, int low_x, int high_x, int y, int low_y, int high_y) + { + return !(x < low_x || x > high_x || y < low_y || y > high_y); + }; + + execute_window_loop(window, [&](const Coordinates & id) + { + const auto offset = (*reinterpret_cast(offsets->ptr_to_element(Coordinates(id.y(), id.z())))) / sizeof(T); + const auto dx_scale = *reinterpret_cast(dx->ptr_to_element(Coordinates(id.y(), id.z()))); + const auto dy_scale = *reinterpret_cast(dy->ptr_to_element(Coordinates(id.y(), id.z()))); + const int in_yi = std::floor((id.z() + 0.5f) * hr - 0.5f); + const int offset_row = in_yi * stride_h + id.x() * stride_c; + const T *in_ptr = reinterpret_cast(in.ptr() + offset * stride_w + offset_row); + + T a00 = 0, a01 = 0, a10 = 0, a11 = 0; + + if(border_mode == BorderMode::CONSTANT) + { + a00 = is_valid(offset, 0, input_width - 1, in_yi, 0, input_height - 1) ? *in_ptr : *border_area; + a01 = is_valid(offset + 1, 0, input_width - 1, in_yi, 0, input_height - 1) ? *(in_ptr + stride_w_elems) : *border_area; + a10 = is_valid(offset, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems) : *border_area; + a11 = is_valid(offset + 1, 0, input_width - 1, in_yi + 1, 0, input_height - 1) ? *(in_ptr + stride_h_elems + stride_w_elems) : *border_area; + } + else if(border_mode == BorderMode::REPLICATE) + { + auto clamped_x = utility::clamp(offset, 0, input_width - 1); + auto clamped_x1 = utility::clamp(offset + 1, 0, input_width - 1); + auto clamped_y = utility::clamp(in_yi, 0, input_height - 1); + auto clamped_y1 = utility::clamp(in_yi + 1, 0, input_height - 1); + + a00 = *reinterpret_cast(in.ptr() + clamped_x * stride_w + clamped_y * stride_h + id.x() * stride_c); + a01 = *reinterpret_cast(in.ptr() + clamped_x1 * stride_w + clamped_y * stride_h + id.x() * stride_c); + a10 = *reinterpret_cast(in.ptr() + clamped_x * stride_w + clamped_y1 * stride_h + id.x() * stride_c); + a11 = *reinterpret_cast(in.ptr() + clamped_x1 * stride_w + clamped_y1 * stride_h + id.x() * stride_c); + } + else + { + a00 = *in_ptr; + a01 = *(in_ptr + stride_w_elems); + a10 = *(in_ptr + stride_h_elems); + a11 = *(in_ptr + stride_h_elems + stride_w_elems); + } + + // Perform interpolation + const float dx1 = 1.0f - dx_scale; + const float dy1 = 1.0f - dy_scale; + + const float w1 = dx1 * dy1; + const float w2 = dx_scale * dy1; + const float w3 = dx1 * dy_scale; + const float w4 = dx_scale * dy_scale; + + // Store result + *reinterpret_cast(out.ptr()) = static_cast(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4); + }, + in, out); +} +} // namespace NEScaleKernel::NEScaleKernel() - : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr) + : _func(nullptr), _offsets(nullptr), _dx(nullptr), _dy(nullptr), _input(nullptr), _output(nullptr), _policy(), _border_size(1), _border_mode() { } BorderSize NEScaleKernel::border_size() const { - return BorderSize(1); + return _border_size; } -void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output, InterpolationPolicy policy, bool border_undefined, - SamplingPolicy sampling_policy) +void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, + ITensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32); ARM_COMPUTE_ERROR_ON_NULLPTR(output); @@ -70,35 +216,45 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dy, 1, DataType::F32); } - ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) == 0); - ARM_COMPUTE_ERROR_ON(output->info()->dimension(1) == 0); - - for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) + // Get data layout and width/height indices + const DataLayout data_layout = input->info()->data_layout(); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + ARM_COMPUTE_ERROR_ON(output->info()->dimension(idx_width) == 0); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(idx_height) == 0); + + _input = input; + _output = output; + _offsets = offsets; + _dx = dx; + _dy = dy; + _policy = policy; + _border_size = BorderSize(1); + _border_mode = border_mode; + + // Compute the ratio between source width/height and destination width/height + const auto wr = static_cast(input->info()->dimension(idx_width)) / static_cast(output->info()->dimension(idx_width)); + const auto hr = static_cast(input->info()->dimension(idx_height)) / static_cast(output->info()->dimension(idx_height)); + + // Add constant border only on top in case of NHWC layout + if(data_layout == DataLayout::NHWC) { - ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i)); + _border_size = (border_mode == BorderMode::CONSTANT && policy == InterpolationPolicy::BILINEAR) ? BorderSize(1, 0, 0, 0) : BorderSize(0); } - _input = input; - _output = output; - _offsets = offsets; - _dx = dx; - _dy = dy; - - /* Compute the ratio between source width/height and destination width/height */ - const auto wr = static_cast(input->info()->dimension(0)) / static_cast(output->info()->dimension(0)); - const auto hr = static_cast(input->info()->dimension(1)) / static_cast(output->info()->dimension(1)); - - /* Area interpolation behaves as Nearest Neighbour in case of up-sampling */ + // Area interpolation behaves as Nearest Neighbour in case of up-sampling if(policy == InterpolationPolicy::AREA && wr <= 1.f && hr <= 1.f) { policy = InterpolationPolicy::NEAREST_NEIGHBOR; } + // Select interpolation function switch(policy) { case InterpolationPolicy::NEAREST_NEIGHBOR: { - _func = &NEScaleKernel::scale_nearest; + _func = (data_layout == DataLayout::NCHW) ? &NEScaleKernel::scale_nearest_nchw : &NEScaleKernel::scale_nhwc; break; } case InterpolationPolicy::BILINEAR: @@ -106,51 +262,37 @@ void NEScaleKernel::configure(const ITensor *input, const ITensor *dx, const ITe ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_dx, 1, DataType::F32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_dy, 1, DataType::F32); - _func = &NEScaleKernel::scale_bilinear; + _func = (data_layout == DataLayout::NCHW) ? &NEScaleKernel::scale_bilinear_nchw : &NEScaleKernel::scale_nhwc; break; } case InterpolationPolicy::AREA: { - _func = &NEScaleKernel::scale_area; + ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NCHW); + + _func = &NEScaleKernel::scale_area_nchw; break; } default: ARM_COMPUTE_ERROR("Unsupported interpolation mode"); } - constexpr unsigned int num_elems_processed_per_iteration = 16; - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); - - const ValidRegion &input_valid_region = input->info()->valid_region(); - - // Reads can occur within the valid region of the input - AccessWindowStatic input_access(input->info(), - input_valid_region.anchor[0] - border_size().left, input_valid_region.anchor[1] - border_size().top, - input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right, - input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom); - AccessWindowHorizontal offsets_access(offsets == nullptr ? nullptr : offsets->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal dx_access(dx == nullptr ? nullptr : dx->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal dy_access(dy == nullptr ? nullptr : dy->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, - input_access, - offsets_access, - dx_access, - dy_access, - output_access); - - output_access.set_valid_region(win, calculate_valid_region_scale(*(input->info()), - output->info()->tensor_shape(), - policy, - sampling_policy, - border_undefined)); + // Configure window + Window win{}; + switch(data_layout) + { + case DataLayout::NCHW: + win = configure_nchw(input, dx, dy, offsets, output, policy, border_mode == BorderMode::UNDEFINED, sampling_policy, border_size()); + break; + case DataLayout::NHWC: + win = configure_nhwc(input, output, policy, border_mode == BorderMode::UNDEFINED, sampling_policy, border_size()); + break; + default: + ARM_COMPUTE_ERROR("Unsupported data layout"); + } INEKernel::configure(win); } -void NEScaleKernel::scale_nearest(const Window &window) +void NEScaleKernel::scale_nearest_nchw(const Window &window) { const size_t input_stride = _input->info()->strides_in_bytes()[1]; @@ -163,15 +305,16 @@ void NEScaleKernel::scale_nearest(const Window &window) win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); + // Set offsets window Window win_off; win_off.set(Window::DimX, window[Window::DimX]); win_off.set(Window::DimY, window[Window::DimY]); - for(size_t d = Window::DimZ; d < _offsets->info()->num_dimensions(); ++d) { win_off.set(d, Window::Dimension(0, 0, 0)); } + // Create iterators Iterator in(_input, win_in); Iterator out(_output, window); Iterator offsets(_offsets, win_off); @@ -304,7 +447,7 @@ void NEScaleKernel::scale_nearest(const Window &window) } } -void NEScaleKernel::scale_bilinear(const Window &window) +void NEScaleKernel::scale_bilinear_nchw(const Window &window) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::S16, DataType::F32); @@ -469,15 +612,16 @@ void NEScaleKernel::scale_bilinear(const Window &window) } } -void NEScaleKernel::scale_area(const Window &window) +void NEScaleKernel::scale_area_nchw(const Window &window) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8); - // Don't increment in X and Y direction for the input tensor + // Don't increment in width/height/channels for the input tensor // A pointer to the start of this plane is needed as base for the precomputed offsets Window win_in(window); win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); + win_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); Iterator in(_input, win_in); Iterator out(_output, window); @@ -517,6 +661,77 @@ void NEScaleKernel::scale_area(const Window &window) in, out); } +void NEScaleKernel::scale_nhwc(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::S16, DataType::F32); + + // Get data layout and width/height indices + const DataLayout data_layout = _input->info()->data_layout(); + const int idx_channels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + const size_t input_stride_w = _input->info()->strides_in_bytes()[idx_width]; + const size_t input_stride_h = _input->info()->strides_in_bytes()[idx_height]; + const size_t input_stride_c = _input->info()->strides_in_bytes()[idx_channels]; + + // Compute the ratio between source height and destination height + const auto hr = static_cast(_input->info()->dimension(idx_height)) / static_cast(_output->info()->dimension(idx_height)); + + // Don't increment in width/height/channels for the input tensor + // A pointer to the start of this plane is needed as base for the precomputed offsets + Window win_in(window); + win_in.set(Window::DimX, Window::Dimension(0, 0, 0)); + win_in.set(Window::DimY, Window::Dimension(0, 0, 0)); + win_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); + + switch(_input->info()->data_type()) + { + case DataType::U8: + { + if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) + { + scale_nearest_nhwc_core(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c); + } + else + { + scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, + window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode); + } + break; + } + case DataType::S16: + { + if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) + { + scale_nearest_nhwc_core(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c); + } + else + { + scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, + window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode); + } + break; + } + case DataType::F32: + { + if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR) + { + scale_nearest_nhwc_core(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c); + } + else + { + scale_bilinear_nhwc_core(_input, _offsets, _dx, _dy, _output, hr, + window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode); + } + break; + } + default: + ARM_COMPUTE_ERROR("Not supported"); + break; + } +} + void NEScaleKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); @@ -526,3 +741,4 @@ void NEScaleKernel::run(const Window &window, const ThreadInfo &info) (this->*_func)(window); } +} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEScale.cpp b/src/runtime/NEON/functions/NEScale.cpp index bd565c92ff..43ef6199ba 100644 --- a/src/runtime/NEON/functions/NEScale.cpp +++ b/src/runtime/NEON/functions/NEScale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -102,17 +102,17 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol ARM_COMPUTE_ERROR_ON(nullptr == input); ARM_COMPUTE_ERROR_ON(nullptr == output); - for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) - { - ARM_COMPUTE_ERROR_ON(input->info()->dimension(i) != output->info()->dimension(i)); - } + // Get data layout and width/height indices + const DataLayout data_layout = input->info()->data_layout(); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); // Get the tensor shape - const TensorShape shape(output->info()->dimension(0), output->info()->dimension(1)); + const TensorShape shape(output->info()->dimension(idx_width), output->info()->dimension(idx_height)); // Compute the ratio between source width/height and destination width/height - const auto wr = static_cast(input->info()->dimension(0)) / static_cast(output->info()->dimension(0)); - const auto hr = static_cast(input->info()->dimension(1)) / static_cast(output->info()->dimension(1)); + const auto wr = static_cast(input->info()->dimension(idx_width)) / static_cast(output->info()->dimension(idx_width)); + const auto hr = static_cast(input->info()->dimension(idx_height)) / static_cast(output->info()->dimension(idx_height)); // Get the element size of the input image const size_t input_element_size = input->info()->element_size(); @@ -123,9 +123,6 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol policy = InterpolationPolicy::NEAREST_NEIGHBOR; } - // Check if the border mode is UNDEFINED - const bool border_undefined = border_mode == BorderMode::UNDEFINED; - switch(policy) { case InterpolationPolicy::NEAREST_NEIGHBOR: @@ -133,7 +130,7 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol TensorInfo tensor_info_offsets(shape, Format::S32); _offsets.allocator()->init(tensor_info_offsets); - _scale_kernel.configure(input, nullptr, nullptr, &_offsets, output, policy, border_undefined, sampling_policy); + _scale_kernel.configure(input, nullptr, nullptr, &_offsets, output, policy, border_mode, sampling_policy); // Allocate once the configure methods have been called _offsets.allocator()->allocate(); @@ -151,7 +148,7 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol _dx.allocator()->init(tensor_info_dxdy); _dy.allocator()->init(tensor_info_dxdy); - _scale_kernel.configure(input, &_dx, &_dy, &_offsets, output, policy, border_undefined, sampling_policy); + _scale_kernel.configure(input, &_dx, &_dy, &_offsets, output, policy, border_mode, sampling_policy); // Allocate once the configure methods have been called _offsets.allocator()->allocate(); @@ -164,7 +161,7 @@ void NEScale::configure(ITensor *input, ITensor *output, InterpolationPolicy pol } case InterpolationPolicy::AREA: { - _scale_kernel.configure(input, nullptr, nullptr, nullptr, output, policy, border_undefined); + _scale_kernel.configure(input, nullptr, nullptr, nullptr, output, policy, border_mode); break; } default: diff --git a/tests/benchmark/CL/Scale.cpp b/tests/benchmark/CL/Scale.cpp index a1cc0a5640..98f64f164b 100644 --- a/tests/benchmark/CL/Scale.cpp +++ b/tests/benchmark/CL/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,11 +50,13 @@ using CLScaleFixture = ScaleFixture; TEST_SUITE(CL) TEST_SUITE(Scale) -REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallImageShapes(), framework::dataset::make("DataType", { DataType::F16, DataType::F32 })), +REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallImageShapes(), framework::dataset::make("DataType", { DataType::F16, DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), interpolation_types), datasets::BorderModes()), datasets::SamplingPolicies())); -REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeImageShapes(), framework::dataset::make("DataType", { DataType::F16, DataType::F32 })), +REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeImageShapes(), framework::dataset::make("DataType", { DataType::F16, DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), interpolation_types), datasets::BorderModes()), datasets::SamplingPolicies())); diff --git a/tests/benchmark/NEON/Scale.cpp b/tests/benchmark/NEON/Scale.cpp index 9b2f0bc867..b75d1a6464 100644 --- a/tests/benchmark/NEON/Scale.cpp +++ b/tests/benchmark/NEON/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -49,11 +49,13 @@ using NEScaleFixture = ScaleFixture; TEST_SUITE(NEON) TEST_SUITE(Scale) -REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallImageShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16, DataType::F32 })), +REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallImageShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16, DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), interpolation_types), datasets::BorderModes()), framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))); -REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeImageShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16, DataType::F32 })), +REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeImageShapes(), framework::dataset::make("DataType", { DataType::U8, DataType::S16, DataType::F32 })), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), interpolation_types), datasets::BorderModes()), framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))); diff --git a/tests/benchmark/fixtures/ScaleFixture.h b/tests/benchmark/fixtures/ScaleFixture.h index cd51f5778f..b2fbd9c3b6 100644 --- a/tests/benchmark/fixtures/ScaleFixture.h +++ b/tests/benchmark/fixtures/ScaleFixture.h @@ -41,11 +41,17 @@ class ScaleFixture : public framework::Fixture { public: template - void setup(TensorShape shape, DataType data_type, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy) + void setup(TensorShape shape, DataType data_type, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy) { constexpr float max_width = 8192.0f; constexpr float max_height = 6384.0f; + // Change shape in case of NHWC. + if(data_layout == DataLayout::NHWC) + { + permute(shape, PermutationVector(2U, 0U, 1U)); + } + std::mt19937 generator(library->seed()); std::uniform_real_distribution distribution_float(0.25f, 3.0f); float scale_x = distribution_float(generator); @@ -57,9 +63,12 @@ public: std::uniform_int_distribution distribution_u8(0, 255); uint8_t constant_border_value = static_cast(distribution_u8(generator)); + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + TensorShape shape_scaled(shape); - shape_scaled.set(0, shape[0] * scale_x); - shape_scaled.set(1, shape[1] * scale_y); + shape_scaled.set(idx_width, shape[idx_width] * scale_x); + shape_scaled.set(idx_height, shape[idx_height] * scale_y); // Create tensors src = create_tensor(shape, data_type); diff --git a/tests/validation/CL/Scale.cpp b/tests/validation/CL/Scale.cpp index cc4fdb0564..3d8750ad28 100644 --- a/tests/validation/CL/Scale.cpp +++ b/tests/validation/CL/Scale.cpp @@ -118,7 +118,9 @@ using CLScaleFixture = ScaleValidationFixture; TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), +FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), datasets::SamplingPolicies())) @@ -130,7 +132,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode:: // Validate output validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), +FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), datasets::SamplingPolicies())) @@ -144,7 +148,9 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode:: } TEST_SUITE_END() TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), +FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), datasets::SamplingPolicies())) @@ -156,8 +162,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::A // Validate output validate(CLAccessor(_target), _reference, valid_region, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), datasets::SamplingPolicies())) @@ -174,7 +181,9 @@ TEST_SUITE_END() TEST_SUITE(Integer) TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)), +FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::U8)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), datasets::SamplingPolicies())) @@ -186,7 +195,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode // Validate output validate(CLAccessor(_target), _reference, valid_region, tolerance_u8); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::U8)), +FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", + DataType::U8)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), datasets::SamplingPolicies())) @@ -200,7 +211,9 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode } TEST_SUITE_END() TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::S16)), +FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::S16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), datasets::SamplingPolicies())) @@ -212,8 +225,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLScaleFixture, framework::DatasetMode // Validate output validate(CLAccessor(_target), _reference, valid_region, tolerance_s16); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunLarge, CLScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::S16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), datasets::SamplingPolicies())) diff --git a/tests/validation/GLES_COMPUTE/Scale.cpp b/tests/validation/GLES_COMPUTE/Scale.cpp index 9f670e4d4d..4bfa08f060 100644 --- a/tests/validation/GLES_COMPUTE/Scale.cpp +++ b/tests/validation/GLES_COMPUTE/Scale.cpp @@ -108,7 +108,9 @@ using GCScaleFixture = ScaleValidationFixture; TEST_SUITE(Float) TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, GCScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), +FIXTURE_DATA_TEST_CASE(RunSmall, GCScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR })), datasets::BorderModes()), datasets::SamplingPolicies())) @@ -120,8 +122,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, GCScaleFixture, framework::DatasetMode::A // Validate output validate(GCAccessor(_target), _reference, valid_region, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, GCScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunLarge, GCScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR })), datasets::BorderModes()), datasets::SamplingPolicies())) diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp index 5f76a0ca06..b21affd9d3 100644 --- a/tests/validation/NEON/Scale.cpp +++ b/tests/validation/NEON/Scale.cpp @@ -55,6 +55,13 @@ const auto ScaleDataTypes = framework::dataset::make("DataType", DataType::F32, }); +/** Scale data types */ +const auto ScaleDataLayouts = framework::dataset::make("DataLayout", +{ + DataLayout::NCHW, + DataLayout::NHWC, +}); + /** Tolerance */ constexpr AbsoluteTolerance tolerance_u8(1); constexpr AbsoluteTolerance tolerance_s16(1); @@ -67,29 +74,42 @@ constexpr float tolerance_num_f32 = 0.01f; TEST_SUITE(NEON) TEST_SUITE(Scale) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), ScaleDataTypes), +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), ScaleDataTypes), ScaleDataLayouts), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER })), - shape, data_type, policy, border_mode, sampling_policy) + shape, data_type, data_layout, policy, border_mode, sampling_policy) { std::mt19937 generator(library->seed()); std::uniform_real_distribution distribution_float(0.25, 2); const float scale_x = distribution_float(generator); const float scale_y = distribution_float(generator); uint8_t constant_border_value = 0; + TensorShape src_shape = shape; if(border_mode == BorderMode::CONSTANT) { std::uniform_int_distribution distribution_u8(0, 255); constant_border_value = distribution_u8(generator); } + // Get width/height indices depending on layout + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + // Change shape in case of NHWC. + if(data_layout == DataLayout::NHWC) + { + permute(src_shape, PermutationVector(2U, 0U, 1U)); + } + + // Calculate scaled shape + TensorShape shape_scaled(src_shape); + shape_scaled.set(idx_width, src_shape[idx_width] * scale_x); + shape_scaled.set(idx_height, src_shape[idx_height] * scale_y); + // Create tensors - Tensor src = create_tensor(shape, data_type); - TensorShape shape_scaled(shape); - shape_scaled.set(0, shape[0] * scale_x); - shape_scaled.set(1, shape[1] * scale_y); - Tensor dst = create_tensor(shape_scaled, data_type); + Tensor src = create_tensor(src_shape, data_type, 1, 0, QuantizationInfo(), data_layout); + Tensor dst = create_tensor(shape_scaled, data_type, 1, 0, QuantizationInfo(), data_layout); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -100,14 +120,26 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combi // Validate valid region const ValidRegion dst_valid_region = calculate_valid_region_scale(*(src.info()), shape_scaled, policy, sampling_policy, (border_mode == BorderMode::UNDEFINED)); - validate(dst.info()->valid_region(), dst_valid_region); // Validate padding - PaddingCalculator calculator(shape_scaled.x(), 16); + int num_elements_processed_x = 16; + if(data_layout == DataLayout::NHWC) + { + num_elements_processed_x = (policy == InterpolationPolicy::BILINEAR) ? 1 : 16 / src.info()->element_size(); + } + PaddingCalculator calculator(shape_scaled.x(), num_elements_processed_x); calculator.set_border_mode(border_mode); - const PaddingSize read_padding(1); + PaddingSize read_padding(1); + if(data_layout == DataLayout::NHWC) + { + read_padding = calculator.required_padding(PaddingCalculator::Option::EXCLUDE_BORDER); + if(border_mode == BorderMode::CONSTANT && policy == InterpolationPolicy::BILINEAR) + { + read_padding.top = 1; + } + } const PaddingSize write_padding = calculator.required_padding(PaddingCalculator::Option::EXCLUDE_BORDER); validate(src.info()->padding(), read_padding); validate(dst.info()->padding(), write_padding); @@ -118,8 +150,9 @@ using NEScaleFixture = ScaleValidationFixture; TEST_SUITE(Float) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) @@ -131,8 +164,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode:: // Validate output validate(Accessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) @@ -149,8 +183,9 @@ TEST_SUITE_END() TEST_SUITE(Integer) TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) @@ -162,8 +197,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode // Validate output validate(Accessor(_target), _reference, valid_region, tolerance_u8); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::U8)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) @@ -177,8 +213,9 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode } TEST_SUITE_END() TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::S16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) @@ -190,8 +227,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture, framework::DatasetMode // Validate output validate(Accessor(_target), _reference, valid_region, tolerance_s16, tolerance_num_s16); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", +FIXTURE_DATA_TEST_CASE(RunLarge, NEScaleFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::S16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), datasets::BorderModes()), framework::dataset::make("SamplingPolicy", { SamplingPolicy::CENTER }))) diff --git a/tests/validation/fixtures/ScaleFixture.h b/tests/validation/fixtures/ScaleFixture.h index 604bfb2622..ec102313c5 100644 --- a/tests/validation/fixtures/ScaleFixture.h +++ b/tests/validation/fixtures/ScaleFixture.h @@ -44,7 +44,7 @@ class ScaleValidationFixture : public framework::Fixture { public: template - void setup(TensorShape shape, DataType data_type, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy) + void setup(TensorShape shape, DataType data_type, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy) { constexpr float max_width = 8192.0f; constexpr float max_height = 6384.0f; @@ -60,13 +60,16 @@ public: float scale_x = distribution_float(generator); float scale_y = distribution_float(generator); - scale_x = ((shape.x() * scale_x) > max_width) ? (max_width / shape.x()) : scale_x; - scale_y = ((shape.y() * scale_y) > max_height) ? (max_height / shape.y()) : scale_y; + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + scale_x = ((shape[idx_width] * scale_x) > max_width) ? (max_width / shape[idx_width]) : scale_x; + scale_y = ((shape[idx_height] * scale_y) > max_height) ? (max_height / shape[idx_height]) : scale_y; std::uniform_int_distribution distribution_u8(0, 255); T constant_border_value = static_cast(distribution_u8(generator)); - _target = compute_target(shape, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy); + _target = compute_target(shape, data_layout, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy); _reference = compute_reference(shape, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy); } @@ -86,15 +89,25 @@ protected: } } - TensorType compute_target(const TensorShape &shape, const float scale_x, const float scale_y, + TensorType compute_target(TensorShape shape, DataLayout data_layout, const float scale_x, const float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, SamplingPolicy sampling_policy) { + // Change shape in case of NHWC. + if(data_layout == DataLayout::NHWC) + { + permute(shape, PermutationVector(2U, 0U, 1U)); + } + // Create tensors - TensorType src = create_tensor(shape, _data_type); + TensorType src = create_tensor(shape, _data_type, 1, 0, QuantizationInfo(), data_layout); + + const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + TensorShape shape_scaled(shape); - shape_scaled.set(0, shape[0] * scale_x); - shape_scaled.set(1, shape[1] * scale_y); - TensorType dst = create_tensor(shape_scaled, _data_type); + shape_scaled.set(idx_width, shape[idx_width] * scale_x); + shape_scaled.set(idx_height, shape[idx_height] * scale_y); + TensorType dst = create_tensor(shape_scaled, _data_type, 1, 0, QuantizationInfo(), data_layout); // Create and configure function FunctionType scale; @@ -123,7 +136,7 @@ protected: InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, SamplingPolicy sampling_policy) { // Create reference - SimpleTensor src{ shape, _data_type }; + SimpleTensor src{ shape, _data_type, 1, 0, QuantizationInfo() }; // Fill reference fill(src); diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp index 5c9e95633c..f8a8b88cf9 100644 --- a/tests/validation/reference/Scale.cpp +++ b/tests/validation/reference/Scale.cpp @@ -23,6 +23,7 @@ */ #include "Scale.h" + #include "Utils.h" #include "arm_compute/core/utils/misc/Utility.h" #include "support/ToolchainSupport.h" -- cgit v1.2.1