aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-11-15 16:17:22 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commit624b77859dc9d0618056dad66833b9c37033337b (patch)
tree2ed3b814d077004605274f2897c93c8cca1ebd72
parentd03006a6b97f3b1830acd19bcbc3720261ade64f (diff)
downloadComputeLibrary-624b77859dc9d0618056dad66833b9c37033337b.tar.gz
COMPMID-556: Fix CLNormalization issues.
-Extracts calculations from the CL kernel core loop. -Changes the access elements for CROSS_MAP to reduce the applied redundant padding. Change-Id: If41c3adddd977be9386fe34940d055c301ccbb91 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95917 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--src/core/CL/cl_kernels/normalization_layer.cl6
-rw-r--r--src/core/CL/kernels/CLNormalizationLayerKernel.cpp2
2 files changed, 4 insertions, 4 deletions
diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl
index 4e65560b95..f8705892f0 100644
--- a/src/core/CL/cl_kernels/normalization_layer.cl
+++ b/src/core/CL/cl_kernels/normalization_layer.cl
@@ -93,13 +93,13 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input),
const int current_slice = get_global_id(2);
- const int left_slice = max(current_slice - (int)RADIUS, (int)0);
- const int right_slice = min(current_slice + (int)RADIUS, (int)(NUM_SLICES - 1));
+ const int left_slice = max(-(int)RADIUS, -current_slice);
+ const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice);
for(int i = left_slice; i <= right_slice; i++)
{
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, 0, 0, i - current_slice));
+ values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, 0, 0, i));
acc = ADD_OP(acc, MUL_OP(values, values));
}
diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
index a74473980b..6481ad0b2a 100644
--- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp
@@ -73,7 +73,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou
_border_size = BorderSize(0, border_width);
const unsigned int num_elems_processed_per_iteration = (is_data_type_fixed_point(input->info()->data_type())) ? 16 : 4;
- const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
+ const unsigned int num_elems_read_per_iteration = _is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2)) : num_elems_processed_per_iteration;
// Set build options
std::set<std::string> build_opts;