aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLRemapKernel.cpp
diff options
context:
space:
mode:
authorFrederick Liardet <frederick.liardet@arm.com>2021-04-22 21:13:21 +0100
committerfrederick.liardet <frederick.liardet@arm.com>2021-06-15 11:24:53 +0000
commit36dff9f81e3a95aea19fcc7246a4896930a14bc6 (patch)
tree64f3194e806bb4a8a5e6f2f30c202295c5e853c6 /src/core/CL/kernels/CLRemapKernel.cpp
parentee301b384f4aeb697a5c249b8bb848d784146582 (diff)
downloadComputeLibrary-36dff9f81e3a95aea19fcc7246a4896930a14bc6.tar.gz
Add NHWC support to CLRemap
Add NHWC support to CLRemap, also add relevant tests. Partially resolves COMPMID-4335. Change-Id: I119bea99be497fb85d5cd83a10f8d4e8e1f97f17 Signed-off-by: Freddie Liardet <frederick.liardet@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5773 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLRemapKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLRemapKernel.cpp142
1 files changed, 96 insertions, 46 deletions
diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp
index 335be9b3e7..6edd744db7 100644
--- a/src/core/CL/kernels/CLRemapKernel.cpp
+++ b/src/core/CL/kernels/CLRemapKernel.cpp
@@ -34,81 +34,131 @@
#include <algorithm>
-using namespace arm_compute;
-
+namespace arm_compute
+{
CLRemapKernel::CLRemapKernel()
- : _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr)
+ : _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr), _data_layout(DataLayout::NCHW)
{
}
BorderSize CLRemapKernel::border_size() const
{
- return BorderSize(1);
+ return _data_layout == DataLayout::NCHW ? BorderSize(1) : BorderSize(0);
+}
+
+template <class T>
+void CLRemapKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value)
+{
+ T value;
+ constant_border_value.get(value);
+ ICLKernel::add_argument<T>(idx, static_cast<T>(value));
}
-void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined)
+Status CLRemapKernel::validate(const ITensorInfo *input, const ITensorInfo *map_x, const ITensorInfo *map_y, ITensorInfo *output, RemapInfo info)
{
- configure(CLKernelLibrary::get().get_compile_context(), input, map_x, map_y, output, policy, border_undefined);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, map_x, map_y, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.policy == InterpolationPolicy::AREA, "Area interpolation is not supported!");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.border_mode != BorderMode::CONSTANT && info.border_mode != BorderMode::UNDEFINED, "Border mode not supported");
+ return Status{};
}
-void CLRemapKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy,
- bool border_undefined)
+void CLRemapKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, RemapInfo info)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported!");
- ARM_COMPUTE_UNUSED(border_undefined);
-
- _input = input;
- _output = output;
- _map_x = map_x;
- _map_y = map_y;
+ CLRemapKernel::validate(input->info(), map_x->info(), map_y->info(), output->info(), info);
+
+ _input = input;
+ _output = output;
+ _map_x = map_x;
+ _map_y = map_y;
+ _data_layout = input->info()->data_layout();
+
+ const bool is_nhwc = _data_layout == DataLayout::NHWC;
+ const bool is_constant_border = info.border_mode == BorderMode::CONSTANT;
// Create kernel
- std::set<std::string> build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())) };
- std::string interpolation_name = string_from_interpolation_policy(policy);
- std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
- std::string kernel_name = "remap_" + interpolation_name;
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
+ CLBuildOptions build_opts;
+ build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+ build_opts.add_option_if(is_nhwc, "-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
+ build_opts.add_option_if(is_constant_border, "-DCONSTANT_BORDER");
- // Configure window
- constexpr unsigned int num_elems_processed_per_iteration = 4;
+ const std::string interpolation_name = lower_string(string_from_interpolation_policy(info.policy));
+ const std::string kernel_name = "remap_" + interpolation_name + "_" + lower_string(string_from_data_layout(_data_layout));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
- const int total_right = ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration);
- const int access_right = total_right + (((total_right - input->info()->dimension(0)) == 0) ? border_size().right : 0);
+ const unsigned int num_elems_processed_per_iteration = is_nhwc ? 1 : 4;
+ const int idx_height = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
+ const int idx_width = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
+ const int input_height = input->info()->dimension(idx_height);
+ const int input_width = input->info()->dimension(idx_width);
- Window win = calculate_max_window(*_output->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input->info()->dimension(1) + border_size().bottom);
+ // Configure window
+ Window win = calculate_max_window(*_output->info(), Steps(num_elems_processed_per_iteration));
+
+ // Update padding in NCHW case
+ if(_data_layout == DataLayout::NCHW)
+ {
+ const int total_right = ceil_to_multiple(input_width, num_elems_processed_per_iteration);
+ const int access_right = total_right + (((total_right - input_width) == 0) ? border_size().right : 0);
+ AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input_height + border_size().bottom);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+ AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- update_window_and_padding(win, input_access, output_access);
+ update_window_and_padding(win, input_access, output_access);
+ }
ICLKernel::configure_internal(win);
// Set static arguments
- unsigned int idx = 4 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg<cl_float>(idx++, input->info()->dimension(0));
- _kernel.setArg<cl_float>(idx++, input->info()->dimension(1));
+ unsigned int idx = 4 * (is_nhwc ? num_arguments_per_4D_tensor() : num_arguments_per_2D_tensor());
+ _kernel.setArg<cl_float>(idx++, input_width);
+ _kernel.setArg<cl_float>(idx++, input_height);
+ if(is_nhwc && is_constant_border)
+ {
+ set_constant_border<uint8_t>(idx, info.constant_border_value);
+ }
}
void CLRemapKernel::run(const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
-
- do
+ switch(_data_layout)
{
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument(idx, _output, slice);
- add_2D_tensor_argument(idx, _map_x, slice);
- add_2D_tensor_argument(idx, _map_y, slice);
- enqueue(queue, *this, slice, lws_hint());
+ case DataLayout::NCHW:
+ {
+ Window slice = window.first_slice_window_2D();
+ do
+ {
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _input, slice);
+ add_2D_tensor_argument(idx, _output, slice);
+ add_2D_tensor_argument(idx, _map_x, slice);
+ add_2D_tensor_argument(idx, _map_y, slice);
+ enqueue(queue, *this, slice, lws_hint());
+
+ }
+ while(window.slide_window_slice_2D(slice));
+ break;
+ }
+ case DataLayout::NHWC:
+ {
+ Window collapsed = window.collapse(ICLKernel::window(), Window::DimZ);
+ Window slice = collapsed.first_slice_window_4D();
+
+ unsigned int idx = 0;
+ add_4D_tensor_argument(idx, _input, slice);
+ add_4D_tensor_argument(idx, _output, slice);
+ add_4D_tensor_argument(idx, _map_x, slice);
+ add_4D_tensor_argument(idx, _map_y, slice);
+ enqueue(queue, *this, slice, lws_hint());
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Invalid Data layout");
}
- while(window.slide_window_slice_2D(slice));
}
+} // namespace arm_compute