diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2017-11-15 16:17:22 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | 624b77859dc9d0618056dad66833b9c37033337b (patch) | |
tree | 2ed3b814d077004605274f2897c93c8cca1ebd72 /src/core/CL | |
parent | d03006a6b97f3b1830acd19bcbc3720261ade64f (diff) | |
download | ComputeLibrary-624b77859dc9d0618056dad66833b9c37033337b.tar.gz |
COMPMID-556: Fix CLNormalization issues.
-Extracts calculations from the CL kernel core loop.
-Changes the access elements for CROSS_MAP to reduce the applied
redundant padding.
Change-Id: If41c3adddd977be9386fe34940d055c301ccbb91
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95917
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL')
-rw-r--r-- | src/core/CL/cl_kernels/normalization_layer.cl | 6 | ||||
-rw-r--r-- | src/core/CL/kernels/CLNormalizationLayerKernel.cpp | 2 |
2 files changed, 4 insertions, 4 deletions
diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl index 4e65560b95..f8705892f0 100644 --- a/src/core/CL/cl_kernels/normalization_layer.cl +++ b/src/core/CL/cl_kernels/normalization_layer.cl @@ -93,13 +93,13 @@ __kernel void normalization_layer_cross_map(TENSOR3D_DECLARATION(input), const int current_slice = get_global_id(2); - const int left_slice = max(current_slice - (int)RADIUS, (int)0); - const int right_slice = min(current_slice + (int)RADIUS, (int)(NUM_SLICES - 1)); + const int left_slice = max(-(int)RADIUS, -current_slice); + const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice); for(int i = left_slice; i <= right_slice; i++) { VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) - values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, 0, 0, i - current_slice)); + values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, 0, 0, i)); acc = ADD_OP(acc, MUL_OP(values, values)); } diff --git a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp index a74473980b..6481ad0b2a 100644 --- a/src/core/CL/kernels/CLNormalizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizationLayerKernel.cpp @@ -73,7 +73,7 @@ void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *ou _border_size = BorderSize(0, border_width); const unsigned int num_elems_processed_per_iteration = (is_data_type_fixed_point(input->info()->data_type())) ? 16 : 4; - const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2); + const unsigned int num_elems_read_per_iteration = _is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2)) : num_elems_processed_per_iteration; // Set build options std::set<std::string> build_opts; |