From 36dff9f81e3a95aea19fcc7246a4896930a14bc6 Mon Sep 17 00:00:00 2001 From: Frederick Liardet Date: Thu, 22 Apr 2021 21:13:21 +0100 Subject: Add NHWC support to CLRemap Add NHWC support to CLRemap, also add relevant tests. Partially resolves COMPMID-4335. Change-Id: I119bea99be497fb85d5cd83a10f8d4e8e1f97f17 Signed-off-by: Freddie Liardet Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5773 Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- arm_compute/core/KernelDescriptors.h | 12 +++ arm_compute/core/Types.h | 3 - arm_compute/runtime/CL/functions/CLRemap.h | 4 +- src/core/CL/cl_kernels/remap.cl | 162 ++++++++++++++++++++++++++++- src/core/CL/kernels/CLRemapKernel.cpp | 142 +++++++++++++++++-------- src/core/CL/kernels/CLRemapKernel.h | 47 ++++++--- src/core/gpu/cl/ClKernelLibrary.cpp | 6 +- src/runtime/CL/functions/CLRemap.cpp | 10 +- tests/validation/CL/Remap.cpp | 20 ++-- tests/validation/fixtures/RemapFixture.h | 62 ++++++++--- 10 files changed, 362 insertions(+), 106 deletions(-) diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h index 1a7ead4700..6c1fc74b1e 100644 --- a/arm_compute/core/KernelDescriptors.h +++ b/arm_compute/core/KernelDescriptors.h @@ -210,5 +210,17 @@ struct ScaleKernelInfo bool align_corners; /**< Align corners of input and output */ DataLayout data_layout; /**< Data layout to use */ }; + +struct RemapInfo +{ + RemapInfo() = default; + RemapInfo(InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value) + : policy(policy), border_mode(border_mode), constant_border_value(constant_border_value) + { + } + InterpolationPolicy policy; + BorderMode border_mode; + PixelValue constant_border_value; +}; } // namespace arm_compute #endif /* ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H */ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 2dc9a77c39..48c87cd8ac 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -105,9 +105,6 @@ enum class SamplingPolicy TOP_LEFT /**< Samples are taken at pixel top left corner */ }; -/** Constant value of the border pixels when using BorderMode::CONSTANT */ -constexpr uint8_t CONSTANT_BORDER_VALUE = 199; - /** [DataLayout enum definition] **/ /** Supported tensor data layouts */ diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h index 8466a79bb9..f69b045c9b 100644 --- a/arm_compute/runtime/CL/functions/CLRemap.h +++ b/arm_compute/runtime/CL/functions/CLRemap.h @@ -58,7 +58,7 @@ public: * @param[in] map_y Map for Y coords. Data types supported: F32. * @param[out] output Output tensor. Data types supported: U8. * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. - * @param[in] border_mode Border mode to use on the input tensor. + * @param[in] border_mode Border mode to use on the input tensor. Only CONSTANT and UNDEFINED are supported. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. * */ @@ -72,7 +72,7 @@ public: * @param[in] map_y Map for Y coords. Data types supported: F32. * @param[out] output Output tensor. Data types supported: U8. * @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. - * @param[in] border_mode Border mode to use on the input tensor. + * @param[in] border_mode Border mode to use on the input tensor. Only CONSTANT and UNDEFINED are supported. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. * */ diff --git a/src/core/CL/cl_kernels/remap.cl b/src/core/CL/cl_kernels/remap.cl index 0f013c5127..8ea4e84e96 100644 --- a/src/core/CL/cl_kernels/remap.cl +++ b/src/core/CL/cl_kernels/remap.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Arm Limited. + * Copyright (c) 2017, 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -56,7 +56,7 @@ * @param[in] width Width of the input image * @param[in] height Height of the input image */ -__kernel void remap_nearest_neighbour( +__kernel void remap_nearest_neighbour_nchw( IMAGE_DECLARATION(in), IMAGE_DECLARATION(out), IMAGE_DECLARATION(mapx), @@ -73,7 +73,6 @@ __kernel void remap_nearest_neighbour( float4 mapy_coords = vload4(0, (__global float *)mapy.ptr); float8 map_coords = (float8)(mapx_coords.s0, mapy_coords.s0, mapx_coords.s1, mapy_coords.s1, mapx_coords.s2, mapy_coords.s2, mapx_coords.s3, mapy_coords.s3); - map_coords += (float8)(0.5f); vstore4(read_texels4(&in, convert_int8(clamp_to_border(map_coords, width, height))), 0, out.ptr); } @@ -110,7 +109,7 @@ __kernel void remap_nearest_neighbour( * @param[in] width Width of the input image * @param[in] height Height of the input image */ -__kernel void remap_bilinear( +__kernel void remap_bilinear_nchw( IMAGE_DECLARATION(in), IMAGE_DECLARATION(out), IMAGE_DECLARATION(mapx), @@ -130,3 +129,158 @@ __kernel void remap_bilinear( vstore4(bilinear_interpolate(&in, clamp_to_border(map_coords, width, height), width, height), 0, out.ptr); } + +/** Performs a remapping of an input image to an output given two remapping image using nearest neighbor as interpolation. + * Also applies constant border value, "border_val", if "CONSTANT_BORDER" is set. + * + * This kernel performs remapping with this method of pixel coordinate translation: + * out(x,y) = in(mapx(x,y), mapy(x,y)); + * + * @param[in] in_ptr Pointer to the source image. Supported data types: U8. + * @param[in] in_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes) + * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] in_step_y in_stride_y * number of elements along Y processed per work item (in bytes) + * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image + * @param[out] out_ptr Pointer to the destination image. Supported data types: U8. + * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] out_step_x out_stride_x * number of elements along X processed per work item (in bytes) + * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] out_step_y out_stride_y * number of elements along Y processed per work item (in bytes) + * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image + * @param[in] mapx_ptr Pointer to the x remapping image. Supported data types: F32. + * @param[in] mapx_stride_x Stride of the remapping image in X dimension (in bytes) + * @param[in] mapx_step_x mapx_stride_x * number of elements along X processed per work item (in bytes) + * @param[in] mapx_stride_y Stride of the remapping image in Y dimension (in bytes) + * @param[in] mapx_step_y mapy_stride_y * number of elements along Y processed per work item (in bytes) + * @param[in] mapx_offset_first_element_in_bytes Offset of the first element in the remapping image + * @param[in] mapy_ptr Pointer to the x remapping image. Supported data types: F32. + * @param[in] mapy_stride_x Stride of the remapping image in X dimension (in bytes) + * @param[in] mapy_step_x mapy_stride_x * number of elements along X processed per work item (in bytes) + * @param[in] mapy_stride_y Stride of the remapping image in Y dimension (in bytes) + * @param[in] mapy_step_y mapy_stride_y * number of elements along Y processed per work item (in bytes) + * @param[in] mapy_offset_first_element_in_bytes Offset of the first element in the remapping image + * @param[in] width Width of the input image + * @param[in] height Height of the input image + */ + +#if defined(DEPTH_OUT) + +__kernel void remap_nearest_neighbour_nhwc( + TENSOR4D_DECLARATION(in), + TENSOR4D_DECLARATION(out), + TENSOR4D_DECLARATION(mapx), + TENSOR4D_DECLARATION(mapy), + const float width, + const float height +#ifdef CONSTANT_BORDER + , + const DATA_TYPE border_val +#endif // CONSTANT_BORDER +) +{ + Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(in, 0); + Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT); + Tensor4D mapx = CONVERT_TO_TENSOR4D_STRUCT(mapx, DEPTH_OUT); + Tensor4D mapy = CONVERT_TO_TENSOR4D_STRUCT(mapy, DEPTH_OUT); + + float mapx_coord = (float) * (__global float *)mapx.ptr; + float mapy_coord = (float) * (__global float *)mapy.ptr; + +#ifdef CONSTANT_BORDER + if(mapx_coord < 0 || mapx_coord > width - 1 || mapy_coord < 0 || mapy_coord > height - 1) + { + *((__global DATA_TYPE *)out.ptr) = border_val; + return; + } +#else // CONSTANT_BORDER + mapx_coord = clamp(mapx_coord, 0.0f, width - 1); + mapy_coord = clamp(mapy_coord, 0.0f, height - 1); +#endif // CONSTANT_BORDER + *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(mapx_coord), convert_int(mapy_coord), (get_global_id(2) / DEPTH_OUT))); +} + +/** Performs a remapping of an input image to an output given two remapping image using bilinear as interpolation. + * Also applies constant border value, "border_val", if "CONSTANT_BORDER" is set. + * + * This kernel performs remapping with this method of pixel coordinate translation: + * out(x,y) = in(mapx(x,y), mapy(x,y)); + * + * @param[in] in_ptr Pointer to the source image. Supported data types: U8. + * @param[in] in_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes) + * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] in_step_y in_stride_y * number of elements along Y processed per work item (in bytes) + * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image + * @param[out] out_ptr Pointer to the destination image. Supported data types: U8. + * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] out_step_x out_stride_x * number of elements along X processed per work item (in bytes) + * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] out_step_y out_stride_y * number of elements along Y processed per work item (in bytes) + * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image + * @param[in] mapx_ptr Pointer to the x remapping image. Supported data types: F32. + * @param[in] mapx_stride_x Stride of the remapping image in X dimension (in bytes) + * @param[in] mapx_step_x mapx_stride_x * number of elements along X processed per work item (in bytes) + * @param[in] mapx_stride_y Stride of the remapping image in Y dimension (in bytes) + * @param[in] mapx_step_y mapy_stride_y * number of elements along Y processed per work item (in bytes) + * @param[in] mapx_offset_first_element_in_bytes Offset of the first element in the remapping image + * @param[in] mapy_ptr Pointer to the x remapping image. Supported data types: F32. + * @param[in] mapy_stride_x Stride of the remapping image in X dimension (in bytes) + * @param[in] mapy_step_x mapy_stride_x * number of elements along X processed per work item (in bytes) + * @param[in] mapy_stride_y Stride of the remapping image in Y dimension (in bytes) + * @param[in] mapy_step_y mapy_stride_y * number of elements along Y processed per work item (in bytes) + * @param[in] mapy_offset_first_element_in_bytes Offset of the first element in the remapping image + * @param[in] width Width of the input image + * @param[in] height Height of the input image + */ +__kernel void remap_bilinear_nhwc( + TENSOR4D_DECLARATION(in), + TENSOR4D_DECLARATION(out), + TENSOR4D_DECLARATION(mapx), + TENSOR4D_DECLARATION(mapy), + const float width, + const float height +#ifdef CONSTANT_BORDER + , + const DATA_TYPE border_val +#endif // CONSTANT_BORDER +) +{ + Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(in, 0); + Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT); + Tensor4D mapx = CONVERT_TO_TENSOR4D_STRUCT(mapx, DEPTH_OUT); + Tensor4D mapy = CONVERT_TO_TENSOR4D_STRUCT(mapy, DEPTH_OUT); + + float mapx_coord = (float) * (__global float *)mapx.ptr; + float mapy_coord = (float) * (__global float *)mapy.ptr; + +#ifdef CONSTANT_BORDER + if(mapx_coord < 0 || mapx_coord > width - 1 || mapy_coord < 0 || mapy_coord > height - 1) + { + *((__global DATA_TYPE *)out.ptr) = border_val; + return; + } +#endif // CONSTANT_BORDER + + const float new_xf = floor(mapx_coord); + const float new_yf = floor(mapy_coord); + const float clamped_x = clamp(new_xf, 0.0f, width - 1); + const float clamped_x1 = clamp(new_xf + 1, 0.0f, width - 1); + const float clamped_y = clamp(new_yf, 0.0f, height - 1); + const float clamped_y1 = clamp(new_yf + 1, 0.0f, height - 1); + + float4 ins = (float4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))), + *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))), + *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))), + *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT)))); + + const float a = mapx_coord - new_xf; + const float b = 1.f - a; + const float a1 = mapy_coord - new_yf; + const float b1 = 1.f - a1; + const float fr = ((ins.s0 * b * b1) + (ins.s1 * a * b1) + (ins.s2 * b * a1) + (ins.s3 * a * a1)); + + *((__global DATA_TYPE *)out.ptr) = CONVERT(fr, DATA_TYPE); +} + +#endif // DEPTH_OUT diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp index 335be9b3e7..6edd744db7 100644 --- a/src/core/CL/kernels/CLRemapKernel.cpp +++ b/src/core/CL/kernels/CLRemapKernel.cpp @@ -34,81 +34,131 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ CLRemapKernel::CLRemapKernel() - : _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr) + : _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr), _data_layout(DataLayout::NCHW) { } BorderSize CLRemapKernel::border_size() const { - return BorderSize(1); + return _data_layout == DataLayout::NCHW ? BorderSize(1) : BorderSize(0); +} + +template +void CLRemapKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value) +{ + T value; + constant_border_value.get(value); + ICLKernel::add_argument(idx, static_cast(value)); } -void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined) +Status CLRemapKernel::validate(const ITensorInfo *input, const ITensorInfo *map_x, const ITensorInfo *map_y, ITensorInfo *output, RemapInfo info) { - configure(CLKernelLibrary::get().get_compile_context(), input, map_x, map_y, output, policy, border_undefined); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, map_x, map_y, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.policy == InterpolationPolicy::AREA, "Area interpolation is not supported!"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.border_mode != BorderMode::CONSTANT && info.border_mode != BorderMode::UNDEFINED, "Border mode not supported"); + return Status{}; } -void CLRemapKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, - bool border_undefined) +void CLRemapKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, RemapInfo info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported!"); - ARM_COMPUTE_UNUSED(border_undefined); - - _input = input; - _output = output; - _map_x = map_x; - _map_y = map_y; + CLRemapKernel::validate(input->info(), map_x->info(), map_y->info(), output->info(), info); + + _input = input; + _output = output; + _map_x = map_x; + _map_y = map_y; + _data_layout = input->info()->data_layout(); + + const bool is_nhwc = _data_layout == DataLayout::NHWC; + const bool is_constant_border = info.border_mode == BorderMode::CONSTANT; // Create kernel - std::set build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())) }; - std::string interpolation_name = string_from_interpolation_policy(policy); - std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower); - std::string kernel_name = "remap_" + interpolation_name; - _kernel = create_kernel(compile_context, kernel_name, build_opts); + CLBuildOptions build_opts; + build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.add_option_if(is_nhwc, "-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); + build_opts.add_option_if(is_constant_border, "-DCONSTANT_BORDER"); - // Configure window - constexpr unsigned int num_elems_processed_per_iteration = 4; + const std::string interpolation_name = lower_string(string_from_interpolation_policy(info.policy)); + const std::string kernel_name = "remap_" + interpolation_name + "_" + lower_string(string_from_data_layout(_data_layout)); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); - const int total_right = ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration); - const int access_right = total_right + (((total_right - input->info()->dimension(0)) == 0) ? border_size().right : 0); + const unsigned int num_elems_processed_per_iteration = is_nhwc ? 1 : 4; + const int idx_height = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + const int idx_width = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const int input_height = input->info()->dimension(idx_height); + const int input_width = input->info()->dimension(idx_width); - Window win = calculate_max_window(*_output->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input->info()->dimension(1) + border_size().bottom); + // Configure window + Window win = calculate_max_window(*_output->info(), Steps(num_elems_processed_per_iteration)); + + // Update padding in NCHW case + if(_data_layout == DataLayout::NCHW) + { + const int total_right = ceil_to_multiple(input_width, num_elems_processed_per_iteration); + const int access_right = total_right + (((total_right - input_width) == 0) ? border_size().right : 0); + AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input_height + border_size().bottom); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); + update_window_and_padding(win, input_access, output_access); + } ICLKernel::configure_internal(win); // Set static arguments - unsigned int idx = 4 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, input->info()->dimension(0)); - _kernel.setArg(idx++, input->info()->dimension(1)); + unsigned int idx = 4 * (is_nhwc ? num_arguments_per_4D_tensor() : num_arguments_per_2D_tensor()); + _kernel.setArg(idx++, input_width); + _kernel.setArg(idx++, input_height); + if(is_nhwc && is_constant_border) + { + set_constant_border(idx, info.constant_border_value); + } } void CLRemapKernel::run(const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - - do + switch(_data_layout) { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument(idx, _output, slice); - add_2D_tensor_argument(idx, _map_x, slice); - add_2D_tensor_argument(idx, _map_y, slice); - enqueue(queue, *this, slice, lws_hint()); + case DataLayout::NCHW: + { + Window slice = window.first_slice_window_2D(); + do + { + unsigned int idx = 0; + add_2D_tensor_argument(idx, _input, slice); + add_2D_tensor_argument(idx, _output, slice); + add_2D_tensor_argument(idx, _map_x, slice); + add_2D_tensor_argument(idx, _map_y, slice); + enqueue(queue, *this, slice, lws_hint()); + + } + while(window.slide_window_slice_2D(slice)); + break; + } + case DataLayout::NHWC: + { + Window collapsed = window.collapse(ICLKernel::window(), Window::DimZ); + Window slice = collapsed.first_slice_window_4D(); + + unsigned int idx = 0; + add_4D_tensor_argument(idx, _input, slice); + add_4D_tensor_argument(idx, _output, slice); + add_4D_tensor_argument(idx, _map_x, slice); + add_4D_tensor_argument(idx, _map_y, slice); + enqueue(queue, *this, slice, lws_hint()); + break; + } + default: + ARM_COMPUTE_ERROR("Invalid Data layout"); } - while(window.slide_window_slice_2D(slice)); } +} // namespace arm_compute diff --git a/src/core/CL/kernels/CLRemapKernel.h b/src/core/CL/kernels/CLRemapKernel.h index 8efcf091ed..1e3a4ad13f 100644 --- a/src/core/CL/kernels/CLRemapKernel.h +++ b/src/core/CL/kernels/CLRemapKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_CLREMAPKERNEL_H #define ARM_COMPUTE_CLREMAPKERNEL_H +#include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "src/core/CL/ICLKernel.h" @@ -47,25 +48,36 @@ public: CLRemapKernel &operator=(CLRemapKernel &&) = default; /** Initialize the kernel's input, output and border mode. * - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] map_x Map for X coordinates. Data types supported: F32. - * @param[in] map_y Map for Y coordinates. Data types supported: F32. - * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. - * @param[in] policy The interpolation type. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + * @param[in] compile_context The compile context to be used. + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] map_x Map for X coordinates. Data types supported: F32. + * @param[in] map_y Map for Y coordinates. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] info RemapInfo struct: + * - policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. + * - border_mode Border mode to use on the input tensor. Only CONSTANT and UNDEFINED are supported. + * - constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. */ - void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); - /** Initialize the kernel's input, output and border mode. + void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, RemapInfo info); + /** Validate the kernel's input, output and border mode. + * + * @param[in] input Source tensor. Data types supported: U8. + * @param[in] map_x Map for X coordinates. Data types supported: F32. + * @param[in] map_y Map for Y coordinates. Data types supported: F32. + * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. + * @param[in] info RemapInfo struct: + * - policy Interpolation policy to use. Only NEAREST and BILINEAR are supported. + * - border_mode Border mode to use on the input tensor. Only CONSTANT and UNDEFINED are supported. + * - constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *map_x, const ITensorInfo *map_y, ITensorInfo *output, RemapInfo info); + /** Function to set the constant value on fill border kernel depending on type. * - * @param[in] compile_context The compile context to be used. - * @param[in] input Source tensor. Data types supported: U8. - * @param[in] map_x Map for X coordinates. Data types supported: F32. - * @param[in] map_y Map for Y coordinates. Data types supported: F32. - * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane. - * @param[in] policy The interpolation type. - * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant. + * @param[in] idx Index of the kernel argument to set. + * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined); + template + void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -76,6 +88,7 @@ private: ICLTensor *_output; const ICLTensor *_map_x; const ICLTensor *_map_y; + DataLayout _data_layout; }; } // namespace arm_compute #endif /*ARM_COMPUTE_CLREMAPKERNEL_H */ diff --git a/src/core/gpu/cl/ClKernelLibrary.cpp b/src/core/gpu/cl/ClKernelLibrary.cpp index b0458d7c3a..9d516e54a7 100644 --- a/src/core/gpu/cl/ClKernelLibrary.cpp +++ b/src/core/gpu/cl/ClKernelLibrary.cpp @@ -397,8 +397,10 @@ const std::map ClKernelLibrary::_kernel_program_map = { "reduction_operation_y", "reduction_operation.cl" }, { "reduction_operation_z", "reduction_operation.cl" }, { "reduction_operation_w", "reduction_operation.cl" }, - { "remap_nearest_neighbour", "remap.cl" }, - { "remap_bilinear", "remap.cl" }, + { "remap_nearest_neighbour_nchw", "remap.cl" }, + { "remap_bilinear_nchw", "remap.cl" }, + { "remap_nearest_neighbour_nhwc", "remap.cl" }, + { "remap_bilinear_nhwc", "remap.cl" }, { "reorg_layer_nchw", "reorg_layer.cl" }, { "reorg_layer_nhwc", "reorg_layer.cl" }, { "reshape_layer", "reshape_layer.cl" }, diff --git a/src/runtime/CL/functions/CLRemap.cpp b/src/runtime/CL/functions/CLRemap.cpp index a4cfc60368..0a1f864543 100644 --- a/src/runtime/CL/functions/CLRemap.cpp +++ b/src/runtime/CL/functions/CLRemap.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,14 +44,8 @@ void CLRemap::configure(const CLCompileContext &compile_context, ICLTensor *inpu BorderMode border_mode, uint8_t constant_border_value) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported"); - auto k = std::make_unique(); - k->configure(compile_context, input, map_x, map_y, output, policy, border_mode == BorderMode::UNDEFINED); + k->configure(compile_context, input, map_x, map_y, output, RemapInfo{ policy, border_mode, PixelValue(constant_border_value) }); _kernel = std::move(k); _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } diff --git a/tests/validation/CL/Remap.cpp b/tests/validation/CL/Remap.cpp index f73073105b..bbb3cecea9 100644 --- a/tests/validation/CL/Remap.cpp +++ b/tests/validation/CL/Remap.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,30 +44,30 @@ namespace validation namespace { constexpr AbsoluteTolerance tolerance_value(1); -constexpr float tolerance_number = 0.2f; } // namespace TEST_SUITE(CL) TEST_SUITE(Remap) template using CLRemapFixture = RemapValidationFixture; +template +using CLRemapLayoutFixture = RemapValidationMixedLayoutFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, CLRemapFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - framework::dataset::make("DataType", - DataType::U8)), - framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT }))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLRemapLayoutFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), + framework::dataset::make("DataType", DataType::U8)), + framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { // Validate output - validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number); + validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value); } FIXTURE_DATA_TEST_CASE(RunLarge, CLRemapFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })), - framework::dataset::make("DataType", - DataType::U8)), + framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT }))) { // Validate output - validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number); + validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value); } TEST_SUITE_END() diff --git a/tests/validation/fixtures/RemapFixture.h b/tests/validation/fixtures/RemapFixture.h index 14ea23b247..2cb8e67f62 100644 --- a/tests/validation/fixtures/RemapFixture.h +++ b/tests/validation/fixtures/RemapFixture.h @@ -42,18 +42,19 @@ namespace test namespace validation { template -class RemapValidationFixture : public framework::Fixture +class RemapValidationGenericFixture : public framework::Fixture { public: template - void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode) + void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, DataLayout data_layout = DataLayout::NCHW) { std::mt19937 gen(library->seed()); std::uniform_int_distribution distribution(0, 255); const T constant_border_value = static_cast(distribution(gen)); - _target = compute_target(shape, policy, data_type, border_mode, constant_border_value); - _reference = compute_reference(shape, policy, data_type, border_mode, constant_border_value); + _data_layout = data_layout; + _target = compute_target(shape, policy, data_type, border_mode, constant_border_value); + _reference = compute_reference(shape, policy, data_type, border_mode, constant_border_value); } protected: @@ -64,13 +65,18 @@ protected: library->fill(tensor, distribution, i); } - TensorType compute_target(const TensorShape &shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value) + TensorType compute_target(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value) { + if(_data_layout == DataLayout::NHWC) + { + permute(shape, PermutationVector(2U, 0U, 1U)); + } + // Create tensors - TensorType src = create_tensor(shape, data_type); - TensorType map_x = create_tensor(shape, DataType::F32); - TensorType map_y = create_tensor(shape, DataType::F32); - TensorType dst = create_tensor(shape, data_type); + TensorType src = create_tensor(shape, data_type, 1, QuantizationInfo(), _data_layout); + TensorType map_x = create_tensor(shape, DataType::F32, 1, QuantizationInfo(), _data_layout); + TensorType map_y = create_tensor(shape, DataType::F32, 1, QuantizationInfo(), _data_layout); + TensorType dst = create_tensor(shape, data_type, 1, QuantizationInfo(), _data_layout); // Create and configure function FunctionType remap; @@ -93,9 +99,11 @@ protected: ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors + int max_val = std::max({ shape.x(), shape.y(), shape.z() }); + fill(AccessorType(src), 0, 0, 255); - fill(AccessorType(map_x), 1, -5, shape.x() + 5); - fill(AccessorType(map_y), 2, -5, shape.y() + 5); + fill(AccessorType(map_x), 1, -5, max_val); + fill(AccessorType(map_y), 2, -5, max_val); // Compute function remap.run(); @@ -103,7 +111,7 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value) + SimpleTensor compute_reference(const TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value) { ARM_COMPUTE_ERROR_ON(data_type != DataType::U8); @@ -116,9 +124,11 @@ protected: _valid_mask = SimpleTensor { shape, data_type }; // Fill reference + int max_val = std::max({ shape.x(), shape.y(), shape.z() }); + fill(src, 0, 0, 255); - fill(map_x, 1, -5, shape.x() + 5); - fill(map_y, 2, -5, shape.y() + 5); + fill(map_x, 1, -5, max_val); + fill(map_y, 2, -5, max_val); // Compute reference return reference::remap(src, map_x, map_y, _valid_mask, policy, border_mode, constant_border_value); @@ -127,7 +137,31 @@ protected: TensorType _target{}; SimpleTensor _reference{}; SimpleTensor _valid_mask{}; + DataLayout _data_layout{}; +}; + +template +class RemapValidationFixture : public RemapValidationGenericFixture +{ +public: + template + void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode) + { + RemapValidationGenericFixture::setup(shape, policy, data_type, border_mode); + } }; + +template +class RemapValidationMixedLayoutFixture : public RemapValidationGenericFixture +{ +public: + template + void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, DataLayout data_layout) + { + RemapValidationGenericFixture::setup(shape, policy, data_type, border_mode, data_layout); + } +}; + } // namespace validation } // namespace test } // namespace arm_compute -- cgit v1.2.1