aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-11-30 10:53:31 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:41:58 +0000
commit0162436565106e835e04c47d4a4e29a02ec1e351 (patch)
tree282a1127bb14c3691e2da0cea827b584fb88ce45
parent45bcc3a1c287a208098ae99288273a5129ddd5eb (diff)
downloadComputeLibrary-0162436565106e835e04c47d4a4e29a02ec1e351.tar.gz
COMPMID-684: 2D In-Map normalization support for CL
Change-Id: I73a11ef3ff7265abce196b128413f54623d33cae Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111294 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com> Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
-rw-r--r--arm_compute/runtime/CL/functions/CLNormalizationLayer.h4
-rw-r--r--src/core/CL/CLKernelLibrary.cpp2
-rw-r--r--src/core/CL/cl_kernels/normalization_layer.cl44
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.cpp39
-rw-r--r--tests/validation/CL/NormalizationLayer.cpp15
5 files changed, 64 insertions, 40 deletions
diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
index 7e18ce511c..51cd28e27a 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
@@ -51,7 +51,7 @@ public:
/** Set the input and output tensors.
*
* @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32 (Written to by the border handler)
+ * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32 (Written to by the border handler)
* @param[out] output Destination tensor. Dimensions, data type and number of channels must match the input ones.
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*/
@@ -59,7 +59,7 @@ public:
/** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayer
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32
+ * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32
* @param[in] output Destination tensor. Dimensions, data type and number of channels must match the input ones.
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 948fe441cf..4bc4a4827e 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -278,7 +278,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map =
{ "non_linear_filter_disk5x5", "non_linear_filter5x5.cl" },
{ "non_max_suppression", "nonmax.cl" },
{ "normalization_layer_cross_map", "normalization_layer.cl" },
- { "normalization_layer_in_map_1D", "normalization_layer.cl" },
+ { "normalization_layer_in_map", "normalization_layer.cl" },
{ "batchnormalization_layer", "batchnormalization_layer.cl" },
{ "NV12_to_IYUV_bt709", "color_convert.cl" },
{ "NV12_to_RGB888_bt709", "color_convert.cl" },
diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl
index f8705892f0..bc00252fbd 100644
--- a/src/core/CL/cl_kernels/normalization_layer.cl
+++ b/src/core/CL/cl_kernels/normalization_layer.cl
@@ -50,7 +50,7 @@
#endif // FIXED_POINT_POSITION
-/** Apply cross map normalization.
+/** Apply cross-map normalization.
*
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16
@@ -92,9 +92,8 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input),
kappa_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(KAPPA);
const int current_slice = get_global_id(2);
-
- const int left_slice = max(-(int)RADIUS, -current_slice);
- const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice);
+ const int left_slice = max(-(int)RADIUS, -current_slice);
+ const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice);
for(int i = left_slice; i <= right_slice; i++)
{
@@ -112,7 +111,7 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input),
STORE_OP(normalized_pixel, 0, (__global DATA_TYPE *)out.ptr);
}
-/** Apply in map normalization.
+/** Apply in-map normalization.
*
* @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short
* @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16
@@ -137,8 +136,8 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input),
* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
*/
-__kernel void normalization_layer_in_map_1D(TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(output))
+__kernel void normalization_layer_in_map(TENSOR3D_DECLARATION(input),
+ TENSOR3D_DECLARATION(output))
{
Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
@@ -152,17 +151,34 @@ __kernel void normalization_layer_in_map_1D(TENSOR3D_DECLARATION(input),
const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
kappa_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(KAPPA);
- const int current_pos = get_global_id(0) << 2;
+ const int current_col = get_global_id(0) << 2;
+ const int left_pos = max(-(int)RADIUS, -3 - current_col);
+ const int right_pos = min((int)RADIUS, (int)((get_global_size(0) << 2) + 3 - 1 - current_col));
- const int left_pos = max(current_pos - (int)RADIUS, -3);
- const int right_pos = min(current_pos + (int)RADIUS, (int)((get_global_size(0) << 2) + 3 - 1));
+#if defined(IN_MAP_2D)
+ const int current_row = get_global_id(1);
+ const int first_row = max(-(int)RADIUS, -current_row);
+ const int last_row = min((int)RADIUS, (int)get_global_size(1) - 1 - current_row);
+#endif /* defined(IN_MAP_2D) */
- for(int i = left_pos; i <= right_pos; i += 1)
+#if defined(IN_MAP_2D)
+ for(int j = first_row; j <= last_row; ++j)
{
- VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, i - current_pos, 0, 0));
- acc = ADD_OP(acc, MUL_OP(values, values));
+#endif /* defined(IN_MAP_2D) */
+ for(int i = left_pos; i <= right_pos; ++i)
+ {
+#if defined(IN_MAP_2D)
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, i, j, 0));
+#else /* defined(IN_MAP_2D) */
+ VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+ values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, i, 0, 0));
+#endif /* defined(IN_MAP_2D) */
+ acc = ADD_OP(acc, MUL_OP(values, values));
+ }
+#if defined(IN_MAP_2D)
}
+#endif /* defined(IN_MAP_2D) */
acc = ADD_OP(MUL_OP(acc, coeff_v), kappa_v);
const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index d94f7b28d2..c46b5987b7 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -43,7 +43,6 @@ Error validate_arguments(const ITensorInfo *input, const ITensorInfo *output, No
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(norm_info.type() == NormType::IN_MAP_2D, "2D In-Map Normalization not implemented");
if(is_data_type_fixed_point(input->data_type()))
{
@@ -63,8 +62,11 @@ Error validate_arguments(const ITensorInfo *input, const ITensorInfo *output, No
return Error{};
}
-std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, bool is_in_map, unsigned int norm_size)
+std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, NormalizationLayerInfo norm_info)
{
+ const unsigned int norm_size = norm_info.norm_size();
+ bool is_in_map = norm_info.is_in_map();
+
const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0;
const BorderSize border_size = BorderSize(0, border_width);
@@ -73,6 +75,7 @@ std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input, ITens
Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
+ // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside the kernel, avoiding padding
AccessWindowHorizontal input_access(input, -border_size.left, num_elems_read_per_iteration);
AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
@@ -113,27 +116,27 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou
_border_size = BorderSize(0, border_width);
const unsigned int num_elems_processed_per_iteration = (is_data_type_fixed_point(input->info()->data_type())) ? 16 : 4;
+ const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D);
// Set build options
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- if(is_data_type_fixed_point(input->info()->data_type()))
- {
- build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())));
- }
- build_opts.emplace(("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff())));
- build_opts.emplace(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta())));
- build_opts.emplace(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa())));
- build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- build_opts.emplace(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size() / 2)));
- build_opts.emplace(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2))));
+ CLBuildOptions build_opts;
+ build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+ build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()),
+ "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
+ build_opts.add_option(("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff())));
+ build_opts.add_option(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta())));
+ build_opts.add_option(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa())));
+ build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+ build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size() / 2)));
+ build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2))));
+ build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D");
// Create kernel
- std::string kernel_name = (norm_info.type() == NormType::IN_MAP_1D) ? "normalization_layer_in_map_1D" : "normalization_layer_cross_map";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ std::string kernel_name = _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map";
+ _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
// Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), _is_in_map, norm_info.norm_size());
+ auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
ICLKernel::configure(win_config.second);
@@ -153,7 +156,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou
Error CLNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info.is_in_map(), norm_info.norm_size()).first);
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first);
return Error{};
}
diff --git a/tests/validation/CL/NormalizationLayer.cpp b/tests/validation/CL/NormalizationLayer.cpp
index 2e6ff8973c..0ed52840ae 100644
--- a/tests/validation/CL/NormalizationLayer.cpp
+++ b/tests/validation/CL/NormalizationLayer.cpp
@@ -49,14 +49,19 @@ RelativeTolerance<float> tolerance_f32(0.05f);
/** Tolerance for fixed point operations */
constexpr AbsoluteTolerance<int8_t> tolerance_qs8(2);
-constexpr AbsoluteTolerance<int16_t> tolerance_qs16(3);
+constexpr AbsoluteTolerance<int16_t> tolerance_qs16(4);
/** Input data set. */
-const auto NormalizationDataset = combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })),
+const auto NormalizationDataset = combine(combine(combine(combine(datasets::SmallShapes(), datasets::NormalizationTypes()),
framework::dataset::make("NormalizationSize", 3, 9, 2)),
framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
framework::dataset::make("IsScaled", { true }));
-const auto NormalizationDatasetFP32 = combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })),
+const auto NormalizationDatasetFP16 = combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })),
+ framework::dataset::make("NormalizationSize", 3, 9, 2)),
+ framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
+ framework::dataset::make("IsScaled", { true }));
+
+const auto NormalizationDatasetFP32 = combine(combine(combine(combine(datasets::SmallShapes(), datasets::NormalizationTypes()),
framework::dataset::make("NormalizationSize", 3, 9, 2)),
framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })),
framework::dataset::make("IsScaled", { true, false }));
@@ -107,12 +112,12 @@ using CLNormalizationLayerFixture = NormalizationValidationFixture<CLTensor, CLA
TEST_SUITE(Float)
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDatasetFP16, framework::dataset::make("DataType", DataType::F16)))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(NormalizationDatasetFP16, framework::dataset::make("DataType", DataType::F16)))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16);