From aed61f2a14199fd54b8f7b0af9980689921fc0cc Mon Sep 17 00:00:00 2001 From: SiCongLi Date: Thu, 26 Aug 2021 17:44:08 +0100 Subject: Fix CLNormalizationLayer NCHW border calculation * Calculate border using both norm size and vec_size_x * Expose reference tensor printer Resolves: COMPMID-4793 Change-Id: I7bd8e49779baf7d6848271757bc7993aa1ed2960 Signed-off-by: SiCongLi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6201 Reviewed-by: Michele Di Giorgio Reviewed-by: Giorgio Arena Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/CL/cl_kernels/nchw/normalization_layer.cl | 5 +-- src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 44 ++++++++++++++++------ tests/SimpleTensorPrinter.h | 5 +-- 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/src/core/CL/cl_kernels/nchw/normalization_layer.cl b/src/core/CL/cl_kernels/nchw/normalization_layer.cl index 0fef98e295..deada49db5 100644 --- a/src/core/CL/cl_kernels/nchw/normalization_layer.cl +++ b/src/core/CL/cl_kernels/nchw/normalization_layer.cl @@ -134,9 +134,8 @@ __kernel void normalization_layer_in_map_nchw(TENSOR3D_DECLARATION(input), const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) kappa_v = SQCVT_SAT(KAPPA); - const int current_col = get_global_id(0) << 2; - const int left_pos = max(-(int)RADIUS, -3 - current_col); - const int right_pos = min((int)RADIUS, (int)WIDTH_SIZE - 1 - current_col); + const int left_pos = -(int)RADIUS; + const int right_pos = (int)RADIUS; #if defined(IN_MAP_2D) const int current_row = get_global_id(1); diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index a5dfafe338..2765300925 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -71,18 +71,29 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen const DataLayout data_layout = input->data_layout(); if(data_layout == DataLayout::NCHW) { - const unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0)); - const unsigned int norm_idx = get_normalization_dimension_index(input->data_layout(), norm_info); - const bool is_norm_accross_width = norm_idx == 0; - - const unsigned int border_width = is_norm_accross_width ? vec_size_x - 1 : 0; - const BorderSize border_size = BorderSize(0, border_width); + const unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0)); + const unsigned int norm_idx = get_normalization_dimension_index(input->data_layout(), norm_info); + const bool is_norm_across_width = norm_idx == 0; + + const unsigned int norm_radius = norm_info.norm_size() / 2; + // Border / padding calculation: + // For NCHW no border handling is impelmeneted in the kernel in the x axis. + // This means the x axis is fully-padded depending on vec_size_x and norm_size + // E.G. for input x dimension = 3, norm_size = 3 (radius = 1), vec_size_x = 2 ('#' is element 'p' is padding): + // In : |p|#|#|#|p|p| + // Out: |#|#|#|p| + // The output has 1 right padding because of the vec_size_x. + // The input has 1 left padding because radius = 1. + // The input has 2 right padding because of radius = 1 AND because of the extra output padding + const unsigned int border_width_left = is_norm_across_width ? norm_radius : 0; + const unsigned int border_width_right = is_norm_across_width ? norm_radius + (vec_size_x - input->dimension(0) % vec_size_x) : 0; + const BorderSize border_size = BorderSize(0, border_width_right, 0, border_width_left); win = calculate_max_window(*input, Steps(vec_size_x)); // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside the kernel, avoiding padding // Reads can occur within the valid region of the input - if(is_norm_accross_width) + if(is_norm_across_width) { AccessWindowStatic input_access(input, -border_size.left, 0, input->dimension(0) + border_size.right, 0); window_changed = window_changed || update_window_and_padding(win, input_access); @@ -150,10 +161,21 @@ void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_conte if(data_layout == DataLayout::NCHW) { - const unsigned int norm_idx = get_normalization_dimension_index(data_layout, norm_info); - _is_norm_across_width = norm_idx == 0; - const unsigned int border_width = _is_norm_across_width ? vec_size_x - 1 : 0; - _border_size = BorderSize(0, border_width); + const unsigned int norm_idx = get_normalization_dimension_index(data_layout, norm_info); + _is_norm_across_width = norm_idx == 0; + const unsigned int norm_radius = norm_info.norm_size() / 2; + // Border / padding calculation: + // For NCHW no border handling is impelmeneted in the kernel in the x axis. + // This means the x axis is fully-padded depending on vec_size_x and norm_size + // E.G. for input x dimension = 3, norm_size = 3 (radius = 1), vec_size_x = 2 ('#' is element 'p' is padding): + // In : |p|#|#|#|p|p| + // Out: |#|#|#|p| + // The output has 1 right padding because of the vec_size_x. + // The input has 1 left padding because radius = 1. + // The input has 2 right padding because of radius = 1 AND the extra output padding + const unsigned int border_width_left = _is_norm_across_width ? norm_radius : 0; + const unsigned int border_width_right = _is_norm_across_width ? norm_radius + (vec_size_x - input->info()->dimension(0) % vec_size_x) : 0; + _border_size = BorderSize(0, border_width_right, 0, border_width_left); } const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D); diff --git a/tests/SimpleTensorPrinter.h b/tests/SimpleTensorPrinter.h index 6c1506b40d..5d0299a696 100644 --- a/tests/SimpleTensorPrinter.h +++ b/tests/SimpleTensorPrinter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 Arm Limited. + * Copyright (c) 2017-2018, 2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,8 +34,6 @@ namespace arm_compute { namespace test { -namespace -{ template inline std::string prettify_tensor(const SimpleTensor &input, const IOFormatInfo &io_fmt = IOFormatInfo{ IOFormatInfo::PrintRegion::NoPadding }) { @@ -152,6 +150,5 @@ void print_simpletensor(const SimpleTensor &tensor, const std::string &title, } } #endif // PRINT_TENSOR_LIMIT -} } // namespace test } // namespace arm_compute -- cgit v1.2.1