aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiCongLi <sicong.li@arm.com>2021-08-26 17:44:08 +0100
committerSiCong Li <sicong.li@arm.com>2021-09-03 09:45:37 +0000
commitaed61f2a14199fd54b8f7b0af9980689921fc0cc (patch)
tree0020bfc4a36add3a3547674af4abe1dc3fa730ba
parent50335fd3d0734157382741fcf1bfdaf630c60c4b (diff)
downloadComputeLibrary-aed61f2a14199fd54b8f7b0af9980689921fc0cc.tar.gz
Fix CLNormalizationLayer NCHW border calculation
* Calculate border using both norm size and vec_size_x * Expose reference tensor printer Resolves: COMPMID-4793 Change-Id: I7bd8e49779baf7d6848271757bc7993aa1ed2960 Signed-off-by: SiCongLi <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6201 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/nchw/normalization_layer.cl5
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.cpp44
-rw-r--r--tests/SimpleTensorPrinter.h5
3 files changed, 36 insertions, 18 deletions
diff --git a/src/core/CL/cl_kernels/nchw/normalization_layer.cl b/src/core/CL/cl_kernels/nchw/normalization_layer.cl
index 0fef98e295..deada49db5 100644
--- a/src/core/CL/cl_kernels/nchw/normalization_layer.cl
+++ b/src/core/CL/cl_kernels/nchw/normalization_layer.cl
@@ -134,9 +134,8 @@ __kernel void normalization_layer_in_map_nchw(TENSOR3D_DECLARATION(input),
const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
kappa_v = SQCVT_SAT(KAPPA);
- const int current_col = get_global_id(0) << 2;
- const int left_pos = max(-(int)RADIUS, -3 - current_col);
- const int right_pos = min((int)RADIUS, (int)WIDTH_SIZE - 1 - current_col);
+ const int left_pos = -(int)RADIUS;
+ const int right_pos = (int)RADIUS;
#if defined(IN_MAP_2D)
const int current_row = get_global_id(1);
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index a5dfafe338..2765300925 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -71,18 +71,29 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
const DataLayout data_layout = input->data_layout();
if(data_layout == DataLayout::NCHW)
{
- const unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0));
- const unsigned int norm_idx = get_normalization_dimension_index(input->data_layout(), norm_info);
- const bool is_norm_accross_width = norm_idx == 0;
-
- const unsigned int border_width = is_norm_accross_width ? vec_size_x - 1 : 0;
- const BorderSize border_size = BorderSize(0, border_width);
+ const unsigned int vec_size_x = adjust_vec_size(max_cl_vector_width / input->element_size(), input->dimension(0));
+ const unsigned int norm_idx = get_normalization_dimension_index(input->data_layout(), norm_info);
+ const bool is_norm_across_width = norm_idx == 0;
+
+ const unsigned int norm_radius = norm_info.norm_size() / 2;
+ // Border / padding calculation:
+ // For NCHW no border handling is impelmeneted in the kernel in the x axis.
+ // This means the x axis is fully-padded depending on vec_size_x and norm_size
+ // E.G. for input x dimension = 3, norm_size = 3 (radius = 1), vec_size_x = 2 ('#' is element 'p' is padding):
+ // In : |p|#|#|#|p|p|
+ // Out: |#|#|#|p|
+ // The output has 1 right padding because of the vec_size_x.
+ // The input has 1 left padding because radius = 1.
+ // The input has 2 right padding because of radius = 1 AND because of the extra output padding
+ const unsigned int border_width_left = is_norm_across_width ? norm_radius : 0;
+ const unsigned int border_width_right = is_norm_across_width ? norm_radius + (vec_size_x - input->dimension(0) % vec_size_x) : 0;
+ const BorderSize border_size = BorderSize(0, border_width_right, 0, border_width_left);
win = calculate_max_window(*input, Steps(vec_size_x));
// We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside the kernel, avoiding padding
// Reads can occur within the valid region of the input
- if(is_norm_accross_width)
+ if(is_norm_across_width)
{
AccessWindowStatic input_access(input, -border_size.left, 0, input->dimension(0) + border_size.right, 0);
window_changed = window_changed || update_window_and_padding(win, input_access);
@@ -150,10 +161,21 @@ void CLNormalizationLayerKernel::configure(const CLCompileContext &compile_conte
if(data_layout == DataLayout::NCHW)
{
- const unsigned int norm_idx = get_normalization_dimension_index(data_layout, norm_info);
- _is_norm_across_width = norm_idx == 0;
- const unsigned int border_width = _is_norm_across_width ? vec_size_x - 1 : 0;
- _border_size = BorderSize(0, border_width);
+ const unsigned int norm_idx = get_normalization_dimension_index(data_layout, norm_info);
+ _is_norm_across_width = norm_idx == 0;
+ const unsigned int norm_radius = norm_info.norm_size() / 2;
+ // Border / padding calculation:
+ // For NCHW no border handling is impelmeneted in the kernel in the x axis.
+ // This means the x axis is fully-padded depending on vec_size_x and norm_size
+ // E.G. for input x dimension = 3, norm_size = 3 (radius = 1), vec_size_x = 2 ('#' is element 'p' is padding):
+ // In : |p|#|#|#|p|p|
+ // Out: |#|#|#|p|
+ // The output has 1 right padding because of the vec_size_x.
+ // The input has 1 left padding because radius = 1.
+ // The input has 2 right padding because of radius = 1 AND the extra output padding
+ const unsigned int border_width_left = _is_norm_across_width ? norm_radius : 0;
+ const unsigned int border_width_right = _is_norm_across_width ? norm_radius + (vec_size_x - input->info()->dimension(0) % vec_size_x) : 0;
+ _border_size = BorderSize(0, border_width_right, 0, border_width_left);
}
const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D);
diff --git a/tests/SimpleTensorPrinter.h b/tests/SimpleTensorPrinter.h
index 6c1506b40d..5d0299a696 100644
--- a/tests/SimpleTensorPrinter.h
+++ b/tests/SimpleTensorPrinter.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2018, 2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -34,8 +34,6 @@ namespace arm_compute
{
namespace test
{
-namespace
-{
template <typename T>
inline std::string prettify_tensor(const SimpleTensor<T> &input, const IOFormatInfo &io_fmt = IOFormatInfo{ IOFormatInfo::PrintRegion::NoPadding })
{
@@ -152,6 +150,5 @@ void print_simpletensor(const SimpleTensor<T> &tensor, const std::string &title,
}
}
#endif // PRINT_TENSOR_LIMIT
-}
} // namespace test
} // namespace arm_compute