From 36dff9f81e3a95aea19fcc7246a4896930a14bc6 Mon Sep 17 00:00:00 2001 From: Frederick Liardet Date: Thu, 22 Apr 2021 21:13:21 +0100 Subject: Add NHWC support to CLRemap Add NHWC support to CLRemap, also add relevant tests. Partially resolves COMPMID-4335. Change-Id: I119bea99be497fb85d5cd83a10f8d4e8e1f97f17 Signed-off-by: Freddie Liardet Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5773 Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/core/CL/kernels/CLRemapKernel.cpp | 142 +++++++++++++++++++++++----------- 1 file changed, 96 insertions(+), 46 deletions(-) (limited to 'src/core/CL/kernels/CLRemapKernel.cpp') diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp index 335be9b3e7..6edd744db7 100644 --- a/src/core/CL/kernels/CLRemapKernel.cpp +++ b/src/core/CL/kernels/CLRemapKernel.cpp @@ -34,81 +34,131 @@ #include -using namespace arm_compute; - +namespace arm_compute +{ CLRemapKernel::CLRemapKernel() - : _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr) + : _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr), _data_layout(DataLayout::NCHW) { } BorderSize CLRemapKernel::border_size() const { - return BorderSize(1); + return _data_layout == DataLayout::NCHW ? BorderSize(1) : BorderSize(0); +} + +template +void CLRemapKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value) +{ + T value; + constant_border_value.get(value); + ICLKernel::add_argument(idx, static_cast(value)); } -void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined) +Status CLRemapKernel::validate(const ITensorInfo *input, const ITensorInfo *map_x, const ITensorInfo *map_y, ITensorInfo *output, RemapInfo info) { - configure(CLKernelLibrary::get().get_compile_context(), input, map_x, map_y, output, policy, border_undefined); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, map_x, map_y, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.policy == InterpolationPolicy::AREA, "Area interpolation is not supported!"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.border_mode != BorderMode::CONSTANT && info.border_mode != BorderMode::UNDEFINED, "Border mode not supported"); + return Status{}; } -void CLRemapKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, - bool border_undefined) +void CLRemapKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, RemapInfo info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32); - ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported!"); - ARM_COMPUTE_UNUSED(border_undefined); - - _input = input; - _output = output; - _map_x = map_x; - _map_y = map_y; + CLRemapKernel::validate(input->info(), map_x->info(), map_y->info(), output->info(), info); + + _input = input; + _output = output; + _map_x = map_x; + _map_y = map_y; + _data_layout = input->info()->data_layout(); + + const bool is_nhwc = _data_layout == DataLayout::NHWC; + const bool is_constant_border = info.border_mode == BorderMode::CONSTANT; // Create kernel - std::set build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())) }; - std::string interpolation_name = string_from_interpolation_policy(policy); - std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower); - std::string kernel_name = "remap_" + interpolation_name; - _kernel = create_kernel(compile_context, kernel_name, build_opts); + CLBuildOptions build_opts; + build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.add_option_if(is_nhwc, "-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); + build_opts.add_option_if(is_constant_border, "-DCONSTANT_BORDER"); - // Configure window - constexpr unsigned int num_elems_processed_per_iteration = 4; + const std::string interpolation_name = lower_string(string_from_interpolation_policy(info.policy)); + const std::string kernel_name = "remap_" + interpolation_name + "_" + lower_string(string_from_data_layout(_data_layout)); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); - const int total_right = ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration); - const int access_right = total_right + (((total_right - input->info()->dimension(0)) == 0) ? border_size().right : 0); + const unsigned int num_elems_processed_per_iteration = is_nhwc ? 1 : 4; + const int idx_height = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + const int idx_width = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const int input_height = input->info()->dimension(idx_height); + const int input_width = input->info()->dimension(idx_width); - Window win = calculate_max_window(*_output->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input->info()->dimension(1) + border_size().bottom); + // Configure window + Window win = calculate_max_window(*_output->info(), Steps(num_elems_processed_per_iteration)); + + // Update padding in NCHW case + if(_data_layout == DataLayout::NCHW) + { + const int total_right = ceil_to_multiple(input_width, num_elems_processed_per_iteration); + const int access_right = total_right + (((total_right - input_width) == 0) ? border_size().right : 0); + AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input_height + border_size().bottom); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); + update_window_and_padding(win, input_access, output_access); + } ICLKernel::configure_internal(win); // Set static arguments - unsigned int idx = 4 * num_arguments_per_2D_tensor(); //Skip the input and output parameters - _kernel.setArg(idx++, input->info()->dimension(0)); - _kernel.setArg(idx++, input->info()->dimension(1)); + unsigned int idx = 4 * (is_nhwc ? num_arguments_per_4D_tensor() : num_arguments_per_2D_tensor()); + _kernel.setArg(idx++, input_width); + _kernel.setArg(idx++, input_height); + if(is_nhwc && is_constant_border) + { + set_constant_border(idx, info.constant_border_value); + } } void CLRemapKernel::run(const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - Window slice = window.first_slice_window_2D(); - - do + switch(_data_layout) { - unsigned int idx = 0; - add_2D_tensor_argument(idx, _input, slice); - add_2D_tensor_argument(idx, _output, slice); - add_2D_tensor_argument(idx, _map_x, slice); - add_2D_tensor_argument(idx, _map_y, slice); - enqueue(queue, *this, slice, lws_hint()); + case DataLayout::NCHW: + { + Window slice = window.first_slice_window_2D(); + do + { + unsigned int idx = 0; + add_2D_tensor_argument(idx, _input, slice); + add_2D_tensor_argument(idx, _output, slice); + add_2D_tensor_argument(idx, _map_x, slice); + add_2D_tensor_argument(idx, _map_y, slice); + enqueue(queue, *this, slice, lws_hint()); + + } + while(window.slide_window_slice_2D(slice)); + break; + } + case DataLayout::NHWC: + { + Window collapsed = window.collapse(ICLKernel::window(), Window::DimZ); + Window slice = collapsed.first_slice_window_4D(); + + unsigned int idx = 0; + add_4D_tensor_argument(idx, _input, slice); + add_4D_tensor_argument(idx, _output, slice); + add_4D_tensor_argument(idx, _map_x, slice); + add_4D_tensor_argument(idx, _map_y, slice); + enqueue(queue, *this, slice, lws_hint()); + break; + } + default: + ARM_COMPUTE_ERROR("Invalid Data layout"); } - while(window.slide_window_slice_2D(slice)); } +} // namespace arm_compute -- cgit v1.2.1