From 0162436565106e835e04c47d4a4e29a02ec1e351 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 30 Nov 2017 10:53:31 +0000 Subject: COMPMID-684: 2D In-Map normalization support for CL Change-Id: I73a11ef3ff7265abce196b128413f54623d33cae Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111294 Reviewed-by: Anthony Barbier Reviewed-by: Pablo Tello Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com --- .../runtime/CL/functions/CLNormalizationLayer.h | 4 +- src/core/CL/CLKernelLibrary.cpp | 2 +- src/core/CL/cl_kernels/normalization_layer.cl | 44 +++++++++++++++------- src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 39 ++++++++++--------- tests/validation/CL/NormalizationLayer.cpp | 15 +++++--- 5 files changed, 64 insertions(+), 40 deletions(-) diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h index 7e18ce511c..51cd28e27a 100644 --- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h @@ -51,7 +51,7 @@ public: /** Set the input and output tensors. * * @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32 (Written to by the border handler) + * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32 (Written to by the border handler) * @param[out] output Destination tensor. Dimensions, data type and number of channels must match the input ones. * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. */ @@ -59,7 +59,7 @@ public: /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayer * * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32 + * and an optional 4th dimension for batch of inputs. Data types supported: QS8/QS16/F16/F32 * @param[in] output Destination tensor. Dimensions, data type and number of channels must match the input ones. * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. * diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 948fe441cf..4bc4a4827e 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -278,7 +278,7 @@ const std::map CLKernelLibrary::_kernel_program_map = { "non_linear_filter_disk5x5", "non_linear_filter5x5.cl" }, { "non_max_suppression", "nonmax.cl" }, { "normalization_layer_cross_map", "normalization_layer.cl" }, - { "normalization_layer_in_map_1D", "normalization_layer.cl" }, + { "normalization_layer_in_map", "normalization_layer.cl" }, { "batchnormalization_layer", "batchnormalization_layer.cl" }, { "NV12_to_IYUV_bt709", "color_convert.cl" }, { "NV12_to_RGB888_bt709", "color_convert.cl" }, diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl index f8705892f0..bc00252fbd 100644 --- a/src/core/CL/cl_kernels/normalization_layer.cl +++ b/src/core/CL/cl_kernels/normalization_layer.cl @@ -50,7 +50,7 @@ #endif // FIXED_POINT_POSITION -/** Apply cross map normalization. +/** Apply cross-map normalization. * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16 @@ -92,9 +92,8 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), kappa_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(KAPPA); const int current_slice = get_global_id(2); - - const int left_slice = max(-(int)RADIUS, -current_slice); - const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice); + const int left_slice = max(-(int)RADIUS, -current_slice); + const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice); for(int i = left_slice; i <= right_slice; i++) { @@ -112,7 +111,7 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), STORE_OP(normalized_pixel, 0, (__global DATA_TYPE *)out.ptr); } -/** Apply in map normalization. +/** Apply in-map normalization. * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16 @@ -137,8 +136,8 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor */ -__kernel void normalization_layer_in_map_1D(TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(output)) +__kernel void normalization_layer_in_map(TENSOR3D_DECLARATION(input), + TENSOR3D_DECLARATION(output)) { Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input); Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output); @@ -152,17 +151,34 @@ __kernel void normalization_layer_in_map_1D(TENSOR3D_DECLARATION(input), const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) kappa_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(KAPPA); - const int current_pos = get_global_id(0) << 2; + const int current_col = get_global_id(0) << 2; + const int left_pos = max(-(int)RADIUS, -3 - current_col); + const int right_pos = min((int)RADIUS, (int)((get_global_size(0) << 2) + 3 - 1 - current_col)); - const int left_pos = max(current_pos - (int)RADIUS, -3); - const int right_pos = min(current_pos + (int)RADIUS, (int)((get_global_size(0) << 2) + 3 - 1)); +#if defined(IN_MAP_2D) + const int current_row = get_global_id(1); + const int first_row = max(-(int)RADIUS, -current_row); + const int last_row = min((int)RADIUS, (int)get_global_size(1) - 1 - current_row); +#endif /* defined(IN_MAP_2D) */ - for(int i = left_pos; i <= right_pos; i += 1) +#if defined(IN_MAP_2D) + for(int j = first_row; j <= last_row; ++j) { - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, i - current_pos, 0, 0)); - acc = ADD_OP(acc, MUL_OP(values, values)); +#endif /* defined(IN_MAP_2D) */ + for(int i = left_pos; i <= right_pos; ++i) + { +#if defined(IN_MAP_2D) + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, i, j, 0)); +#else /* defined(IN_MAP_2D) */ + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, i, 0, 0)); +#endif /* defined(IN_MAP_2D) */ + acc = ADD_OP(acc, MUL_OP(values, values)); + } +#if defined(IN_MAP_2D) } +#endif /* defined(IN_MAP_2D) */ acc = ADD_OP(MUL_OP(acc, coeff_v), kappa_v); const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index d94f7b28d2..c46b5987b7 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -43,7 +43,6 @@ Error validate_arguments(const ITensorInfo *input, const ITensorInfo *output, No ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(norm_info.type() == NormType::IN_MAP_2D, "2D In-Map Normalization not implemented"); if(is_data_type_fixed_point(input->data_type())) { @@ -63,8 +62,11 @@ Error validate_arguments(const ITensorInfo *input, const ITensorInfo *output, No return Error{}; } -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, bool is_in_map, unsigned int norm_size) +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, NormalizationLayerInfo norm_info) { + const unsigned int norm_size = norm_info.norm_size(); + bool is_in_map = norm_info.is_in_map(); + const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0; const BorderSize border_size = BorderSize(0, border_width); @@ -73,6 +75,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITens Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside the kernel, avoiding padding AccessWindowHorizontal input_access(input, -border_size.left, num_elems_read_per_iteration); AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); @@ -113,27 +116,27 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou _border_size = BorderSize(0, border_width); const unsigned int num_elems_processed_per_iteration = (is_data_type_fixed_point(input->info()->data_type())) ? 16 : 4; + const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D); // Set build options - std::set build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - if(is_data_type_fixed_point(input->info()->data_type())) - { - build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()))); - } - build_opts.emplace(("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff()))); - build_opts.emplace(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta()))); - build_opts.emplace(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa()))); - build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - build_opts.emplace(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size() / 2))); - build_opts.emplace(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2)))); + CLBuildOptions build_opts; + build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()), + "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); + build_opts.add_option(("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff()))); + build_opts.add_option(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta()))); + build_opts.add_option(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa()))); + build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); + build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size() / 2))); + build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2)))); + build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D"); // Create kernel - std::string kernel_name = (norm_info.type() == NormType::IN_MAP_1D) ? "normalization_layer_in_map_1D" : "normalization_layer_cross_map"; - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + std::string kernel_name = _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map"; + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), _is_in_map, norm_info.norm_size()); + auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure(win_config.second); @@ -153,7 +156,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou Error CLNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info.is_in_map(), norm_info.norm_size()).first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first); return Error{}; } diff --git a/tests/validation/CL/NormalizationLayer.cpp b/tests/validation/CL/NormalizationLayer.cpp index 2e6ff8973c..0ed52840ae 100644 --- a/tests/validation/CL/NormalizationLayer.cpp +++ b/tests/validation/CL/NormalizationLayer.cpp @@ -49,14 +49,19 @@ RelativeTolerance tolerance_f32(0.05f); /** Tolerance for fixed point operations */ constexpr AbsoluteTolerance tolerance_qs8(2); -constexpr AbsoluteTolerance tolerance_qs16(3); +constexpr AbsoluteTolerance tolerance_qs16(4); /** Input data set. */ -const auto NormalizationDataset = combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })), +const auto NormalizationDataset = combine(combine(combine(combine(datasets::SmallShapes(), datasets::NormalizationTypes()), framework::dataset::make("NormalizationSize", 3, 9, 2)), framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })), framework::dataset::make("IsScaled", { true })); -const auto NormalizationDatasetFP32 = combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })), +const auto NormalizationDatasetFP16 = combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })), + framework::dataset::make("NormalizationSize", 3, 9, 2)), + framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })), + framework::dataset::make("IsScaled", { true })); + +const auto NormalizationDatasetFP32 = combine(combine(combine(combine(datasets::SmallShapes(), datasets::NormalizationTypes()), framework::dataset::make("NormalizationSize", 3, 9, 2)), framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })), framework::dataset::make("IsScaled", { true, false })); @@ -107,12 +112,12 @@ using CLNormalizationLayerFixture = NormalizationValidationFixture, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(NormalizationDatasetFP16, framework::dataset::make("DataType", DataType::F16))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F16))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(NormalizationDatasetFP16, framework::dataset::make("DataType", DataType::F16))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); -- cgit v1.2.1