diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2017-11-30 10:53:31 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:41:58 +0000 |
commit | 0162436565106e835e04c47d4a4e29a02ec1e351 (patch) | |
tree | 282a1127bb14c3691e2da0cea827b584fb88ce45 /src | |
parent | 45bcc3a1c287a208098ae99288273a5129ddd5eb (diff) | |
download | ComputeLibrary-0162436565106e835e04c47d4a4e29a02ec1e351.tar.gz |
COMPMID-684: 2D In-Map normalization support for CL
Change-Id: I73a11ef3ff7265abce196b128413f54623d33cae
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111294
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/CLKernelLibrary.cpp | 2 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/normalization_layer.cl | 44 | ||||
-rw-r--r-- | src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 39 |
3 files changed, 52 insertions, 33 deletions
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 948fe441cf..4bc4a4827e 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -278,7 +278,7 @@ const std::map<std::string, std::string> CLKernelLibrary::_kernel_program_map = { "non_linear_filter_disk5x5", "non_linear_filter5x5.cl" }, { "non_max_suppression", "nonmax.cl" }, { "normalization_layer_cross_map", "normalization_layer.cl" }, - { "normalization_layer_in_map_1D", "normalization_layer.cl" }, + { "normalization_layer_in_map", "normalization_layer.cl" }, { "batchnormalization_layer", "batchnormalization_layer.cl" }, { "NV12_to_IYUV_bt709", "color_convert.cl" }, { "NV12_to_RGB888_bt709", "color_convert.cl" }, diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl index f8705892f0..bc00252fbd 100644 --- a/src/core/CL/cl_kernels/normalization_layer.cl +++ b/src/core/CL/cl_kernels/normalization_layer.cl @@ -50,7 +50,7 @@ #endif // FIXED_POINT_POSITION -/** Apply cross map normalization. +/** Apply cross-map normalization. * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16 @@ -92,9 +92,8 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), kappa_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(KAPPA); const int current_slice = get_global_id(2); - - const int left_slice = max(-(int)RADIUS, -current_slice); - const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice); + const int left_slice = max(-(int)RADIUS, -current_slice); + const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice); for(int i = left_slice; i <= right_slice; i++) { @@ -112,7 +111,7 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), STORE_OP(normalized_pixel, 0, (__global DATA_TYPE *)out.ptr); } -/** Apply in map normalization. +/** Apply in-map normalization. * * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16 @@ -137,8 +136,8 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor */ -__kernel void normalization_layer_in_map_1D(TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(output)) +__kernel void normalization_layer_in_map(TENSOR3D_DECLARATION(input), + TENSOR3D_DECLARATION(output)) { Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input); Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output); @@ -152,17 +151,34 @@ __kernel void normalization_layer_in_map_1D(TENSOR3D_DECLARATION(input), const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) kappa_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(KAPPA); - const int current_pos = get_global_id(0) << 2; + const int current_col = get_global_id(0) << 2; + const int left_pos = max(-(int)RADIUS, -3 - current_col); + const int right_pos = min((int)RADIUS, (int)((get_global_size(0) << 2) + 3 - 1 - current_col)); - const int left_pos = max(current_pos - (int)RADIUS, -3); - const int right_pos = min(current_pos + (int)RADIUS, (int)((get_global_size(0) << 2) + 3 - 1)); +#if defined(IN_MAP_2D) + const int current_row = get_global_id(1); + const int first_row = max(-(int)RADIUS, -current_row); + const int last_row = min((int)RADIUS, (int)get_global_size(1) - 1 - current_row); +#endif /* defined(IN_MAP_2D) */ - for(int i = left_pos; i <= right_pos; i += 1) +#if defined(IN_MAP_2D) + for(int j = first_row; j <= last_row; ++j) { - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, i - current_pos, 0, 0)); - acc = ADD_OP(acc, MUL_OP(values, values)); +#endif /* defined(IN_MAP_2D) */ + for(int i = left_pos; i <= right_pos; ++i) + { +#if defined(IN_MAP_2D) + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, i, j, 0)); +#else /* defined(IN_MAP_2D) */ + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, i, 0, 0)); +#endif /* defined(IN_MAP_2D) */ + acc = ADD_OP(acc, MUL_OP(values, values)); + } +#if defined(IN_MAP_2D) } +#endif /* defined(IN_MAP_2D) */ acc = ADD_OP(MUL_OP(acc, coeff_v), kappa_v); const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index d94f7b28d2..c46b5987b7 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -43,7 +43,6 @@ Error validate_arguments(const ITensorInfo *input, const ITensorInfo *output, No ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(norm_info.type() == NormType::IN_MAP_2D, "2D In-Map Normalization not implemented"); if(is_data_type_fixed_point(input->data_type())) { @@ -63,8 +62,11 @@ Error validate_arguments(const ITensorInfo *input, const ITensorInfo *output, No return Error{}; } -std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, bool is_in_map, unsigned int norm_size) +std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, NormalizationLayerInfo norm_info) { + const unsigned int norm_size = norm_info.norm_size(); + bool is_in_map = norm_info.is_in_map(); + const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0; const BorderSize border_size = BorderSize(0, border_width); @@ -73,6 +75,7 @@ std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input, ITens Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside the kernel, avoiding padding AccessWindowHorizontal input_access(input, -border_size.left, num_elems_read_per_iteration); AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); @@ -113,27 +116,27 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou _border_size = BorderSize(0, border_width); const unsigned int num_elems_processed_per_iteration = (is_data_type_fixed_point(input->info()->data_type())) ? 16 : 4; + const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D); // Set build options - std::set<std::string> build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - if(is_data_type_fixed_point(input->info()->data_type())) - { - build_opts.emplace(("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()))); - } - build_opts.emplace(("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff()))); - build_opts.emplace(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta()))); - build_opts.emplace(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa()))); - build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - build_opts.emplace(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size() / 2))); - build_opts.emplace(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2)))); + CLBuildOptions build_opts; + build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); + build_opts.add_option_if(is_data_type_fixed_point(input->info()->data_type()), + "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); + build_opts.add_option(("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff()))); + build_opts.add_option(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta()))); + build_opts.add_option(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa()))); + build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); + build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size() / 2))); + build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2)))); + build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D"); // Create kernel - std::string kernel_name = (norm_info.type() == NormType::IN_MAP_1D) ? "normalization_layer_in_map_1D" : "normalization_layer_cross_map"; - _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); + std::string kernel_name = _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map"; + _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), _is_in_map, norm_info.norm_size()); + auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure(win_config.second); @@ -153,7 +156,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou Error CLNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info.is_in_map(), norm_info.norm_size()).first); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first); return Error{}; } |