aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Liardet <frederick.liardet@arm.com>2021-04-22 21:13:21 +0100
committerfrederick.liardet <frederick.liardet@arm.com>2021-06-15 11:24:53 +0000
commit36dff9f81e3a95aea19fcc7246a4896930a14bc6 (patch)
tree64f3194e806bb4a8a5e6f2f30c202295c5e853c6
parentee301b384f4aeb697a5c249b8bb848d784146582 (diff)
downloadComputeLibrary-36dff9f81e3a95aea19fcc7246a4896930a14bc6.tar.gz
Add NHWC support to CLRemap
Add NHWC support to CLRemap, also add relevant tests. Partially resolves COMPMID-4335. Change-Id: I119bea99be497fb85d5cd83a10f8d4e8e1f97f17 Signed-off-by: Freddie Liardet <frederick.liardet@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5773 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/KernelDescriptors.h12
-rw-r--r--arm_compute/core/Types.h3
-rw-r--r--arm_compute/runtime/CL/functions/CLRemap.h4
-rw-r--r--src/core/CL/cl_kernels/remap.cl162
-rw-r--r--src/core/CL/kernels/CLRemapKernel.cpp142
-rw-r--r--src/core/CL/kernels/CLRemapKernel.h47
-rw-r--r--src/core/gpu/cl/ClKernelLibrary.cpp6
-rw-r--r--src/runtime/CL/functions/CLRemap.cpp10
-rw-r--r--tests/validation/CL/Remap.cpp20
-rw-r--r--tests/validation/fixtures/RemapFixture.h62
10 files changed, 362 insertions, 106 deletions
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 1a7ead4700..6c1fc74b1e 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -210,5 +210,17 @@ struct ScaleKernelInfo
bool align_corners; /**< Align corners of input and output */
DataLayout data_layout; /**< Data layout to use */
};
+
+struct RemapInfo
+{
+ RemapInfo() = default;
+ RemapInfo(InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value)
+ : policy(policy), border_mode(border_mode), constant_border_value(constant_border_value)
+ {
+ }
+ InterpolationPolicy policy;
+ BorderMode border_mode;
+ PixelValue constant_border_value;
+};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H */
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 2dc9a77c39..48c87cd8ac 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -105,9 +105,6 @@ enum class SamplingPolicy
TOP_LEFT /**< Samples are taken at pixel top left corner */
};
-/** Constant value of the border pixels when using BorderMode::CONSTANT */
-constexpr uint8_t CONSTANT_BORDER_VALUE = 199;
-
/** [DataLayout enum definition] **/
/** Supported tensor data layouts */
diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h
index 8466a79bb9..f69b045c9b 100644
--- a/arm_compute/runtime/CL/functions/CLRemap.h
+++ b/arm_compute/runtime/CL/functions/CLRemap.h
@@ -58,7 +58,7 @@ public:
* @param[in] map_y Map for Y coords. Data types supported: F32.
* @param[out] output Output tensor. Data types supported: U8.
* @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported.
- * @param[in] border_mode Border mode to use on the input tensor.
+ * @param[in] border_mode Border mode to use on the input tensor. Only CONSTANT and UNDEFINED are supported.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*
*/
@@ -72,7 +72,7 @@ public:
* @param[in] map_y Map for Y coords. Data types supported: F32.
* @param[out] output Output tensor. Data types supported: U8.
* @param[in] policy Interpolation policy to use. Only NEAREST and BILINEAR are supported.
- * @param[in] border_mode Border mode to use on the input tensor.
+ * @param[in] border_mode Border mode to use on the input tensor. Only CONSTANT and UNDEFINED are supported.
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*
*/
diff --git a/src/core/CL/cl_kernels/remap.cl b/src/core/CL/cl_kernels/remap.cl
index 0f013c5127..8ea4e84e96 100644
--- a/src/core/CL/cl_kernels/remap.cl
+++ b/src/core/CL/cl_kernels/remap.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Arm Limited.
+ * Copyright (c) 2017, 2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -56,7 +56,7 @@
* @param[in] width Width of the input image
* @param[in] height Height of the input image
*/
-__kernel void remap_nearest_neighbour(
+__kernel void remap_nearest_neighbour_nchw(
IMAGE_DECLARATION(in),
IMAGE_DECLARATION(out),
IMAGE_DECLARATION(mapx),
@@ -73,7 +73,6 @@ __kernel void remap_nearest_neighbour(
float4 mapy_coords = vload4(0, (__global float *)mapy.ptr);
float8 map_coords = (float8)(mapx_coords.s0, mapy_coords.s0, mapx_coords.s1, mapy_coords.s1,
mapx_coords.s2, mapy_coords.s2, mapx_coords.s3, mapy_coords.s3);
- map_coords += (float8)(0.5f);
vstore4(read_texels4(&in, convert_int8(clamp_to_border(map_coords, width, height))), 0, out.ptr);
}
@@ -110,7 +109,7 @@ __kernel void remap_nearest_neighbour(
* @param[in] width Width of the input image
* @param[in] height Height of the input image
*/
-__kernel void remap_bilinear(
+__kernel void remap_bilinear_nchw(
IMAGE_DECLARATION(in),
IMAGE_DECLARATION(out),
IMAGE_DECLARATION(mapx),
@@ -130,3 +129,158 @@ __kernel void remap_bilinear(
vstore4(bilinear_interpolate(&in, clamp_to_border(map_coords, width, height), width, height), 0, out.ptr);
}
+
+/** Performs a remapping of an input image to an output given two remapping image using nearest neighbor as interpolation.
+ * Also applies constant border value, "border_val", if "CONSTANT_BORDER" is set.
+ *
+ * This kernel performs remapping with this method of pixel coordinate translation:
+ * out(x,y) = in(mapx(x,y), mapy(x,y));
+ *
+ * @param[in] in_ptr Pointer to the source image. Supported data types: U8.
+ * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes)
+ * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] in_step_y in_stride_y * number of elements along Y processed per work item (in bytes)
+ * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image
+ * @param[out] out_ptr Pointer to the destination image. Supported data types: U8.
+ * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] out_step_x out_stride_x * number of elements along X processed per work item (in bytes)
+ * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] out_step_y out_stride_y * number of elements along Y processed per work item (in bytes)
+ * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image
+ * @param[in] mapx_ptr Pointer to the x remapping image. Supported data types: F32.
+ * @param[in] mapx_stride_x Stride of the remapping image in X dimension (in bytes)
+ * @param[in] mapx_step_x mapx_stride_x * number of elements along X processed per work item (in bytes)
+ * @param[in] mapx_stride_y Stride of the remapping image in Y dimension (in bytes)
+ * @param[in] mapx_step_y mapy_stride_y * number of elements along Y processed per work item (in bytes)
+ * @param[in] mapx_offset_first_element_in_bytes Offset of the first element in the remapping image
+ * @param[in] mapy_ptr Pointer to the x remapping image. Supported data types: F32.
+ * @param[in] mapy_stride_x Stride of the remapping image in X dimension (in bytes)
+ * @param[in] mapy_step_x mapy_stride_x * number of elements along X processed per work item (in bytes)
+ * @param[in] mapy_stride_y Stride of the remapping image in Y dimension (in bytes)
+ * @param[in] mapy_step_y mapy_stride_y * number of elements along Y processed per work item (in bytes)
+ * @param[in] mapy_offset_first_element_in_bytes Offset of the first element in the remapping image
+ * @param[in] width Width of the input image
+ * @param[in] height Height of the input image
+ */
+
+#if defined(DEPTH_OUT)
+
+__kernel void remap_nearest_neighbour_nhwc(
+ TENSOR4D_DECLARATION(in),
+ TENSOR4D_DECLARATION(out),
+ TENSOR4D_DECLARATION(mapx),
+ TENSOR4D_DECLARATION(mapy),
+ const float width,
+ const float height
+#ifdef CONSTANT_BORDER
+ ,
+ const DATA_TYPE border_val
+#endif // CONSTANT_BORDER
+)
+{
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(in, 0);
+ Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT);
+ Tensor4D mapx = CONVERT_TO_TENSOR4D_STRUCT(mapx, DEPTH_OUT);
+ Tensor4D mapy = CONVERT_TO_TENSOR4D_STRUCT(mapy, DEPTH_OUT);
+
+ float mapx_coord = (float) * (__global float *)mapx.ptr;
+ float mapy_coord = (float) * (__global float *)mapy.ptr;
+
+#ifdef CONSTANT_BORDER
+ if(mapx_coord < 0 || mapx_coord > width - 1 || mapy_coord < 0 || mapy_coord > height - 1)
+ {
+ *((__global DATA_TYPE *)out.ptr) = border_val;
+ return;
+ }
+#else // CONSTANT_BORDER
+ mapx_coord = clamp(mapx_coord, 0.0f, width - 1);
+ mapy_coord = clamp(mapy_coord, 0.0f, height - 1);
+#endif // CONSTANT_BORDER
+ *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(mapx_coord), convert_int(mapy_coord), (get_global_id(2) / DEPTH_OUT)));
+}
+
+/** Performs a remapping of an input image to an output given two remapping image using bilinear as interpolation.
+ * Also applies constant border value, "border_val", if "CONSTANT_BORDER" is set.
+ *
+ * This kernel performs remapping with this method of pixel coordinate translation:
+ * out(x,y) = in(mapx(x,y), mapy(x,y));
+ *
+ * @param[in] in_ptr Pointer to the source image. Supported data types: U8.
+ * @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] in_step_x in_stride_x * number of elements along X processed per work item (in bytes)
+ * @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] in_step_y in_stride_y * number of elements along Y processed per work item (in bytes)
+ * @param[in] in_offset_first_element_in_bytes Offset of the first element in the source image
+ * @param[out] out_ptr Pointer to the destination image. Supported data types: U8.
+ * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] out_step_x out_stride_x * number of elements along X processed per work item (in bytes)
+ * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] out_step_y out_stride_y * number of elements along Y processed per work item (in bytes)
+ * @param[in] out_offset_first_element_in_bytes Offset of the first element in the destination image
+ * @param[in] mapx_ptr Pointer to the x remapping image. Supported data types: F32.
+ * @param[in] mapx_stride_x Stride of the remapping image in X dimension (in bytes)
+ * @param[in] mapx_step_x mapx_stride_x * number of elements along X processed per work item (in bytes)
+ * @param[in] mapx_stride_y Stride of the remapping image in Y dimension (in bytes)
+ * @param[in] mapx_step_y mapy_stride_y * number of elements along Y processed per work item (in bytes)
+ * @param[in] mapx_offset_first_element_in_bytes Offset of the first element in the remapping image
+ * @param[in] mapy_ptr Pointer to the x remapping image. Supported data types: F32.
+ * @param[in] mapy_stride_x Stride of the remapping image in X dimension (in bytes)
+ * @param[in] mapy_step_x mapy_stride_x * number of elements along X processed per work item (in bytes)
+ * @param[in] mapy_stride_y Stride of the remapping image in Y dimension (in bytes)
+ * @param[in] mapy_step_y mapy_stride_y * number of elements along Y processed per work item (in bytes)
+ * @param[in] mapy_offset_first_element_in_bytes Offset of the first element in the remapping image
+ * @param[in] width Width of the input image
+ * @param[in] height Height of the input image
+ */
+__kernel void remap_bilinear_nhwc(
+ TENSOR4D_DECLARATION(in),
+ TENSOR4D_DECLARATION(out),
+ TENSOR4D_DECLARATION(mapx),
+ TENSOR4D_DECLARATION(mapy),
+ const float width,
+ const float height
+#ifdef CONSTANT_BORDER
+ ,
+ const DATA_TYPE border_val
+#endif // CONSTANT_BORDER
+)
+{
+ Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(in, 0);
+ Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT);
+ Tensor4D mapx = CONVERT_TO_TENSOR4D_STRUCT(mapx, DEPTH_OUT);
+ Tensor4D mapy = CONVERT_TO_TENSOR4D_STRUCT(mapy, DEPTH_OUT);
+
+ float mapx_coord = (float) * (__global float *)mapx.ptr;
+ float mapy_coord = (float) * (__global float *)mapy.ptr;
+
+#ifdef CONSTANT_BORDER
+ if(mapx_coord < 0 || mapx_coord > width - 1 || mapy_coord < 0 || mapy_coord > height - 1)
+ {
+ *((__global DATA_TYPE *)out.ptr) = border_val;
+ return;
+ }
+#endif // CONSTANT_BORDER
+
+ const float new_xf = floor(mapx_coord);
+ const float new_yf = floor(mapy_coord);
+ const float clamped_x = clamp(new_xf, 0.0f, width - 1);
+ const float clamped_x1 = clamp(new_xf + 1, 0.0f, width - 1);
+ const float clamped_y = clamp(new_yf, 0.0f, height - 1);
+ const float clamped_y1 = clamp(new_yf + 1, 0.0f, height - 1);
+
+ float4 ins = (float4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))));
+
+ const float a = mapx_coord - new_xf;
+ const float b = 1.f - a;
+ const float a1 = mapy_coord - new_yf;
+ const float b1 = 1.f - a1;
+ const float fr = ((ins.s0 * b * b1) + (ins.s1 * a * b1) + (ins.s2 * b * a1) + (ins.s3 * a * a1));
+
+ *((__global DATA_TYPE *)out.ptr) = CONVERT(fr, DATA_TYPE);
+}
+
+#endif // DEPTH_OUT
diff --git a/src/core/CL/kernels/CLRemapKernel.cpp b/src/core/CL/kernels/CLRemapKernel.cpp
index 335be9b3e7..6edd744db7 100644
--- a/src/core/CL/kernels/CLRemapKernel.cpp
+++ b/src/core/CL/kernels/CLRemapKernel.cpp
@@ -34,81 +34,131 @@
#include <algorithm>
-using namespace arm_compute;
-
+namespace arm_compute
+{
CLRemapKernel::CLRemapKernel()
- : _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr)
+ : _input(nullptr), _output(nullptr), _map_x(nullptr), _map_y(nullptr), _data_layout(DataLayout::NCHW)
{
}
BorderSize CLRemapKernel::border_size() const
{
- return BorderSize(1);
+ return _data_layout == DataLayout::NCHW ? BorderSize(1) : BorderSize(0);
+}
+
+template <class T>
+void CLRemapKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value)
+{
+ T value;
+ constant_border_value.get(value);
+ ICLKernel::add_argument<T>(idx, static_cast<T>(value));
}
-void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined)
+Status CLRemapKernel::validate(const ITensorInfo *input, const ITensorInfo *map_x, const ITensorInfo *map_y, ITensorInfo *output, RemapInfo info)
{
- configure(CLKernelLibrary::get().get_compile_context(), input, map_x, map_y, output, policy, border_undefined);
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, map_x, map_y, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.policy == InterpolationPolicy::AREA, "Area interpolation is not supported!");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.border_mode != BorderMode::CONSTANT && info.border_mode != BorderMode::UNDEFINED, "Border mode not supported");
+ return Status{};
}
-void CLRemapKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy,
- bool border_undefined)
+void CLRemapKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, RemapInfo info)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported!");
- ARM_COMPUTE_UNUSED(border_undefined);
-
- _input = input;
- _output = output;
- _map_x = map_x;
- _map_y = map_y;
+ CLRemapKernel::validate(input->info(), map_x->info(), map_y->info(), output->info(), info);
+
+ _input = input;
+ _output = output;
+ _map_x = map_x;
+ _map_y = map_y;
+ _data_layout = input->info()->data_layout();
+
+ const bool is_nhwc = _data_layout == DataLayout::NHWC;
+ const bool is_constant_border = info.border_mode == BorderMode::CONSTANT;
// Create kernel
- std::set<std::string> build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())) };
- std::string interpolation_name = string_from_interpolation_policy(policy);
- std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
- std::string kernel_name = "remap_" + interpolation_name;
- _kernel = create_kernel(compile_context, kernel_name, build_opts);
+ CLBuildOptions build_opts;
+ build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+ build_opts.add_option_if(is_nhwc, "-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
+ build_opts.add_option_if(is_constant_border, "-DCONSTANT_BORDER");
- // Configure window
- constexpr unsigned int num_elems_processed_per_iteration = 4;
+ const std::string interpolation_name = lower_string(string_from_interpolation_policy(info.policy));
+ const std::string kernel_name = "remap_" + interpolation_name + "_" + lower_string(string_from_data_layout(_data_layout));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
- const int total_right = ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration);
- const int access_right = total_right + (((total_right - input->info()->dimension(0)) == 0) ? border_size().right : 0);
+ const unsigned int num_elems_processed_per_iteration = is_nhwc ? 1 : 4;
+ const int idx_height = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
+ const int idx_width = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
+ const int input_height = input->info()->dimension(idx_height);
+ const int input_width = input->info()->dimension(idx_width);
- Window win = calculate_max_window(*_output->info(), Steps(num_elems_processed_per_iteration));
- AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input->info()->dimension(1) + border_size().bottom);
+ // Configure window
+ Window win = calculate_max_window(*_output->info(), Steps(num_elems_processed_per_iteration));
+
+ // Update padding in NCHW case
+ if(_data_layout == DataLayout::NCHW)
+ {
+ const int total_right = ceil_to_multiple(input_width, num_elems_processed_per_iteration);
+ const int access_right = total_right + (((total_right - input_width) == 0) ? border_size().right : 0);
+ AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input_height + border_size().bottom);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+ AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- update_window_and_padding(win, input_access, output_access);
+ update_window_and_padding(win, input_access, output_access);
+ }
ICLKernel::configure_internal(win);
// Set static arguments
- unsigned int idx = 4 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
- _kernel.setArg<cl_float>(idx++, input->info()->dimension(0));
- _kernel.setArg<cl_float>(idx++, input->info()->dimension(1));
+ unsigned int idx = 4 * (is_nhwc ? num_arguments_per_4D_tensor() : num_arguments_per_2D_tensor());
+ _kernel.setArg<cl_float>(idx++, input_width);
+ _kernel.setArg<cl_float>(idx++, input_height);
+ if(is_nhwc && is_constant_border)
+ {
+ set_constant_border<uint8_t>(idx, info.constant_border_value);
+ }
}
void CLRemapKernel::run(const Window &window, cl::CommandQueue &queue)
{
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_2D();
-
- do
+ switch(_data_layout)
{
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice);
- add_2D_tensor_argument(idx, _output, slice);
- add_2D_tensor_argument(idx, _map_x, slice);
- add_2D_tensor_argument(idx, _map_y, slice);
- enqueue(queue, *this, slice, lws_hint());
+ case DataLayout::NCHW:
+ {
+ Window slice = window.first_slice_window_2D();
+ do
+ {
+ unsigned int idx = 0;
+ add_2D_tensor_argument(idx, _input, slice);
+ add_2D_tensor_argument(idx, _output, slice);
+ add_2D_tensor_argument(idx, _map_x, slice);
+ add_2D_tensor_argument(idx, _map_y, slice);
+ enqueue(queue, *this, slice, lws_hint());
+
+ }
+ while(window.slide_window_slice_2D(slice));
+ break;
+ }
+ case DataLayout::NHWC:
+ {
+ Window collapsed = window.collapse(ICLKernel::window(), Window::DimZ);
+ Window slice = collapsed.first_slice_window_4D();
+
+ unsigned int idx = 0;
+ add_4D_tensor_argument(idx, _input, slice);
+ add_4D_tensor_argument(idx, _output, slice);
+ add_4D_tensor_argument(idx, _map_x, slice);
+ add_4D_tensor_argument(idx, _map_y, slice);
+ enqueue(queue, *this, slice, lws_hint());
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Invalid Data layout");
}
- while(window.slide_window_slice_2D(slice));
}
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLRemapKernel.h b/src/core/CL/kernels/CLRemapKernel.h
index 8efcf091ed..1e3a4ad13f 100644
--- a/src/core/CL/kernels/CLRemapKernel.h
+++ b/src/core/CL/kernels/CLRemapKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,6 +24,7 @@
#ifndef ARM_COMPUTE_CLREMAPKERNEL_H
#define ARM_COMPUTE_CLREMAPKERNEL_H
+#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "src/core/CL/ICLKernel.h"
@@ -47,25 +48,36 @@ public:
CLRemapKernel &operator=(CLRemapKernel &&) = default;
/** Initialize the kernel's input, output and border mode.
*
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] map_x Map for X coordinates. Data types supported: F32.
- * @param[in] map_y Map for Y coordinates. Data types supported: F32.
- * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] map_x Map for X coordinates. Data types supported: F32.
+ * @param[in] map_y Map for Y coordinates. Data types supported: F32.
+ * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
+ * @param[in] info RemapInfo struct:
+ * - policy Interpolation policy to use. Only NEAREST and BILINEAR are supported.
+ * - border_mode Border mode to use on the input tensor. Only CONSTANT and UNDEFINED are supported.
+ * - constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
*/
- void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
- /** Initialize the kernel's input, output and border mode.
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, RemapInfo info);
+ /** Validate the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] map_x Map for X coordinates. Data types supported: F32.
+ * @param[in] map_y Map for Y coordinates. Data types supported: F32.
+ * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
+ * @param[in] info RemapInfo struct:
+ * - policy Interpolation policy to use. Only NEAREST and BILINEAR are supported.
+ * - border_mode Border mode to use on the input tensor. Only CONSTANT and UNDEFINED are supported.
+ * - constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *map_x, const ITensorInfo *map_y, ITensorInfo *output, RemapInfo info);
+ /** Function to set the constant value on fill border kernel depending on type.
*
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] map_x Map for X coordinates. Data types supported: F32.
- * @param[in] map_y Map for Y coordinates. Data types supported: F32.
- * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ * @param[in] idx Index of the kernel argument to set.
+ * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
+ template <class T>
+ void set_constant_border(unsigned int idx, const PixelValue &constant_border_value);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
@@ -76,6 +88,7 @@ private:
ICLTensor *_output;
const ICLTensor *_map_x;
const ICLTensor *_map_y;
+ DataLayout _data_layout;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLREMAPKERNEL_H */
diff --git a/src/core/gpu/cl/ClKernelLibrary.cpp b/src/core/gpu/cl/ClKernelLibrary.cpp
index b0458d7c3a..9d516e54a7 100644
--- a/src/core/gpu/cl/ClKernelLibrary.cpp
+++ b/src/core/gpu/cl/ClKernelLibrary.cpp
@@ -397,8 +397,10 @@ const std::map<std::string, std::string> ClKernelLibrary::_kernel_program_map =
{ "reduction_operation_y", "reduction_operation.cl" },
{ "reduction_operation_z", "reduction_operation.cl" },
{ "reduction_operation_w", "reduction_operation.cl" },
- { "remap_nearest_neighbour", "remap.cl" },
- { "remap_bilinear", "remap.cl" },
+ { "remap_nearest_neighbour_nchw", "remap.cl" },
+ { "remap_bilinear_nchw", "remap.cl" },
+ { "remap_nearest_neighbour_nhwc", "remap.cl" },
+ { "remap_bilinear_nhwc", "remap.cl" },
{ "reorg_layer_nchw", "reorg_layer.cl" },
{ "reorg_layer_nhwc", "reorg_layer.cl" },
{ "reshape_layer", "reshape_layer.cl" },
diff --git a/src/runtime/CL/functions/CLRemap.cpp b/src/runtime/CL/functions/CLRemap.cpp
index a4cfc60368..0a1f864543 100644
--- a/src/runtime/CL/functions/CLRemap.cpp
+++ b/src/runtime/CL/functions/CLRemap.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,14 +44,8 @@ void CLRemap::configure(const CLCompileContext &compile_context, ICLTensor *inpu
BorderMode border_mode,
uint8_t constant_border_value)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_x, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(map_y, 1, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MSG(policy == InterpolationPolicy::AREA, "Area interpolation is not supported");
-
auto k = std::make_unique<CLRemapKernel>();
- k->configure(compile_context, input, map_x, map_y, output, policy, border_mode == BorderMode::UNDEFINED);
+ k->configure(compile_context, input, map_x, map_y, output, RemapInfo{ policy, border_mode, PixelValue(constant_border_value) });
_kernel = std::move(k);
_border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
diff --git a/tests/validation/CL/Remap.cpp b/tests/validation/CL/Remap.cpp
index f73073105b..bbb3cecea9 100644
--- a/tests/validation/CL/Remap.cpp
+++ b/tests/validation/CL/Remap.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,30 +44,30 @@ namespace validation
namespace
{
constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
-constexpr float tolerance_number = 0.2f;
} // namespace
TEST_SUITE(CL)
TEST_SUITE(Remap)
template <typename T>
using CLRemapFixture = RemapValidationFixture<CLTensor, CLAccessor, CLRemap, T>;
+template <typename T>
+using CLRemapLayoutFixture = RemapValidationMixedLayoutFixture<CLTensor, CLAccessor, CLRemap, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLRemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
- framework::dataset::make("DataType",
- DataType::U8)),
- framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLRemapLayoutFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+ framework::dataset::make("DataType", DataType::U8)),
+ framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
// Validate output
- validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
+ validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value);
}
FIXTURE_DATA_TEST_CASE(RunLarge, CLRemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
- framework::dataset::make("DataType",
- DataType::U8)),
+ framework::dataset::make("DataType", DataType::U8)),
framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
{
// Validate output
- validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
+ validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value);
}
TEST_SUITE_END()
diff --git a/tests/validation/fixtures/RemapFixture.h b/tests/validation/fixtures/RemapFixture.h
index 14ea23b247..2cb8e67f62 100644
--- a/tests/validation/fixtures/RemapFixture.h
+++ b/tests/validation/fixtures/RemapFixture.h
@@ -42,18 +42,19 @@ namespace test
namespace validation
{
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class RemapValidationFixture : public framework::Fixture
+class RemapValidationGenericFixture : public framework::Fixture
{
public:
template <typename...>
- void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode)
+ void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, DataLayout data_layout = DataLayout::NCHW)
{
std::mt19937 gen(library->seed());
std::uniform_int_distribution<uint8_t> distribution(0, 255);
const T constant_border_value = static_cast<T>(distribution(gen));
- _target = compute_target(shape, policy, data_type, border_mode, constant_border_value);
- _reference = compute_reference(shape, policy, data_type, border_mode, constant_border_value);
+ _data_layout = data_layout;
+ _target = compute_target(shape, policy, data_type, border_mode, constant_border_value);
+ _reference = compute_reference(shape, policy, data_type, border_mode, constant_border_value);
}
protected:
@@ -64,13 +65,18 @@ protected:
library->fill(tensor, distribution, i);
}
- TensorType compute_target(const TensorShape &shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value)
+ TensorType compute_target(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value)
{
+ if(_data_layout == DataLayout::NHWC)
+ {
+ permute(shape, PermutationVector(2U, 0U, 1U));
+ }
+
// Create tensors
- TensorType src = create_tensor<TensorType>(shape, data_type);
- TensorType map_x = create_tensor<TensorType>(shape, DataType::F32);
- TensorType map_y = create_tensor<TensorType>(shape, DataType::F32);
- TensorType dst = create_tensor<TensorType>(shape, data_type);
+ TensorType src = create_tensor<TensorType>(shape, data_type, 1, QuantizationInfo(), _data_layout);
+ TensorType map_x = create_tensor<TensorType>(shape, DataType::F32, 1, QuantizationInfo(), _data_layout);
+ TensorType map_y = create_tensor<TensorType>(shape, DataType::F32, 1, QuantizationInfo(), _data_layout);
+ TensorType dst = create_tensor<TensorType>(shape, data_type, 1, QuantizationInfo(), _data_layout);
// Create and configure function
FunctionType remap;
@@ -93,9 +99,11 @@ protected:
ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
// Fill tensors
+ int max_val = std::max({ shape.x(), shape.y(), shape.z() });
+
fill(AccessorType(src), 0, 0, 255);
- fill(AccessorType(map_x), 1, -5, shape.x() + 5);
- fill(AccessorType(map_y), 2, -5, shape.y() + 5);
+ fill(AccessorType(map_x), 1, -5, max_val);
+ fill(AccessorType(map_y), 2, -5, max_val);
// Compute function
remap.run();
@@ -103,7 +111,7 @@ protected:
return dst;
}
- SimpleTensor<T> compute_reference(const TensorShape &shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value)
+ SimpleTensor<T> compute_reference(const TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, T constant_border_value)
{
ARM_COMPUTE_ERROR_ON(data_type != DataType::U8);
@@ -116,9 +124,11 @@ protected:
_valid_mask = SimpleTensor<T> { shape, data_type };
// Fill reference
+ int max_val = std::max({ shape.x(), shape.y(), shape.z() });
+
fill(src, 0, 0, 255);
- fill(map_x, 1, -5, shape.x() + 5);
- fill(map_y, 2, -5, shape.y() + 5);
+ fill(map_x, 1, -5, max_val);
+ fill(map_y, 2, -5, max_val);
// Compute reference
return reference::remap<T>(src, map_x, map_y, _valid_mask, policy, border_mode, constant_border_value);
@@ -127,7 +137,31 @@ protected:
TensorType _target{};
SimpleTensor<T> _reference{};
SimpleTensor<T> _valid_mask{};
+ DataLayout _data_layout{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class RemapValidationFixture : public RemapValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ template <typename...>
+ void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode)
+ {
+ RemapValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, policy, data_type, border_mode);
+ }
};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class RemapValidationMixedLayoutFixture : public RemapValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ template <typename...>
+ void setup(TensorShape shape, InterpolationPolicy policy, DataType data_type, BorderMode border_mode, DataLayout data_layout)
+ {
+ RemapValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, policy, data_type, border_mode, data_layout);
+ }
+};
+
} // namespace validation
} // namespace test
} // namespace arm_compute