From 4646d2e026a0fa92085fcba2f4aec5ec148956aa Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Wed, 19 Jun 2019 12:28:47 +0100
Subject: COMPMID-2072: Use a constexpr for num_elems_processed_per_iteration
 where possible

Change-Id: I26cb699ae3a77003ef7d05ac30d3ed518214e25f
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1375
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
---
 src/core/CL/kernels/CLAccumulateKernel.cpp             | 13 ++++++++-----
 src/core/CL/kernels/CLChannelCombineKernel.cpp         | 14 ++++++++------
 src/core/CL/kernels/CLCol2ImKernel.cpp                 |  8 +++++---
 src/core/CL/kernels/CLDepthConvertLayerKernel.cpp      |  4 ++--
 .../kernels/CLDirectConvolutionOutputStageKernel.cpp   |  9 +++++----
 src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp      |  6 ++----
 .../CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp    | 15 +++++++--------
 src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp       | 18 ++++++++----------
 src/core/CL/kernels/CLROIAlignLayerKernel.cpp          |  4 ++--
 src/core/CL/kernels/CLROIPoolingLayerKernel.cpp        |  6 +++---
 src/core/CL/kernels/CLStridedSliceKernel.cpp           |  4 +---
 src/core/CL/kernels/CLWarpAffineKernel.cpp             |  7 ++++---
 src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp  | 10 +++++-----
 src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp   |  4 ++--
 src/core/NEON/kernels/NEHistogramKernel.cpp            | 14 ++++----------
 .../kernels/NELocallyConnectedMatrixMultiplyKernel.cpp |  8 +++-----
 src/core/NEON/kernels/NEStridedSliceKernel.cpp         |  4 +---
 src/core/NEON/kernels/NEThresholdKernel.cpp            |  6 ++----
 18 files changed, 72 insertions(+), 82 deletions(-)

(limited to 'src')

diff --git a/src/core/CL/kernels/CLAccumulateKernel.cpp b/src/core/CL/kernels/CLAccumulateKernel.cpp
index 6333f04e71..12ee210243 100644
--- a/src/core/CL/kernels/CLAccumulateKernel.cpp
+++ b/src/core/CL/kernels/CLAccumulateKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,7 +31,12 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Validate.h"
 
-using namespace arm_compute;
+namespace arm_compute
+{
+namespace
+{
+constexpr unsigned int num_elems_processed_per_iteration = 16;
+} // namespace
 
 void CLAccumulateKernel::configure(const ICLTensor *input, ICLTensor *accum)
 {
@@ -42,7 +47,6 @@ void CLAccumulateKernel::configure(const ICLTensor *input, ICLTensor *accum)
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("accumulate"));
 
     // Make sure _kernel is initialized before calling the parent's configure
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
     ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration);
 }
 
@@ -60,7 +64,6 @@ void CLAccumulateWeightedKernel::configure(const ICLTensor *input, float alpha,
     _kernel.setArg(idx++, alpha);
 
     // Configure kernel window
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
     ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration);
 }
 
@@ -78,6 +81,6 @@ void CLAccumulateSquaredKernel::configure(const ICLTensor *input, uint32_t shift
     _kernel.setArg(idx++, shift);
 
     // Configure kernel window
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
     ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration);
 }
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLChannelCombineKernel.cpp b/src/core/CL/kernels/CLChannelCombineKernel.cpp
index c7b1da41dc..11cd8ec5d0 100644
--- a/src/core/CL/kernels/CLChannelCombineKernel.cpp
+++ b/src/core/CL/kernels/CLChannelCombineKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,7 +39,12 @@
 #include <set>
 #include <string>
 
-using namespace arm_compute;
+namespace arm_compute
+{
+namespace
+{
+constexpr unsigned int num_elems_processed_per_iteration = 16;
+} // namespace
 
 CLChannelCombineKernel::CLChannelCombineKernel()
     : _planes{ { nullptr } }, _output(nullptr), _output_multi(nullptr), _x_subsampling{ { 1, 1, 1 } }, _y_subsampling{ { 1, 1, 1 } }
@@ -107,8 +112,6 @@ void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor
     _kernel                 = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
 
     // Configure window
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
-
     Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
 
     AccessWindowHorizontal plane0_access(plane0->info(), 0, num_elems_processed_per_iteration);
@@ -211,8 +214,6 @@ void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *p
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
 
     // Configure window
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
-
     Window win = calculate_max_window(*plane0->info(), Steps(num_elems_processed_per_iteration));
 
     AccessWindowRectangle input_plane0_access(plane0->info(), 0, 0, num_elems_processed_per_iteration, 1.f);
@@ -292,3 +293,4 @@ void CLChannelCombineKernel::run(const Window &window, cl::CommandQueue &queue)
     }
     while(window.slide_window_slice_2D(slice));
 }
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp
index d748745999..8726f024d0 100644
--- a/src/core/CL/kernels/CLCol2ImKernel.cpp
+++ b/src/core/CL/kernels/CLCol2ImKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,9 +35,10 @@
 
 #include <cmath>
 
-using namespace arm_compute;
 using namespace arm_compute::misc::shape_calculator;
 
+namespace arm_compute
+{
 namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups)
@@ -64,7 +65,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
     // Output auto inizialitation if not yet initialized
     auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_col2im_shape(*input, convolved_dims, true, num_groups)).set_data_layout(DataLayout::NCHW));
 
-    const unsigned int num_elems_read_per_iteration = 8;
+    constexpr unsigned int num_elems_read_per_iteration = 8;
 
     // Configure window
     Window win = calculate_max_window(*input, Steps(num_elems_read_per_iteration));
@@ -166,3 +167,4 @@ void CLCol2ImKernel::run(const Window &window, cl::CommandQueue &queue)
     }
     while(collapsed.slide_window_slice_3D(slice) && collapsed_out.slide_window_slice_4D(slice_out));
 }
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
index 14e5c98c0c..0b663e8498 100644
--- a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
@@ -84,7 +84,7 @@ void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *out
     const size_t output_size = data_size_from_type(output->info()->data_type());
 
     // Get number of elements to process per iterations
-    const unsigned int num_elems_processed_per_iteration = 16;
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
 
     // Set build options
     CLBuildOptions build_opts;
@@ -131,4 +131,4 @@ Status CLDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITens
 
     return Status{};
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
index 22149b4ea4..515321cdfc 100644
--- a/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionOutputStageKernel.cpp
@@ -34,8 +34,8 @@
 #include <cstddef>
 #include <cstdint>
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 namespace
 {
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output)
@@ -87,8 +87,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, con
 
 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *bias, ITensorInfo *output)
 {
-    bool         window_changed                    = false;
-    unsigned int num_elems_processed_per_iteration = 16 / element_size_from_data_type(input->data_type());
+    bool               window_changed                    = false;
+    const unsigned int num_elems_processed_per_iteration = 16 / element_size_from_data_type(input->data_type());
 
     // Configure kernel window
     Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
@@ -206,3 +206,4 @@ void CLDirectConvolutionLayerOutputStageKernel::run(const Window &window, cl::Co
     }
     while(window.slide_window_slice_3D(slice));
 }
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
index e878dbcdcd..0c0b0ec817 100644
--- a/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpReductionKernel.cpp
@@ -38,12 +38,9 @@
 #include <cstddef>
 #include <cstdint>
 
-using namespace arm_compute;
-
 namespace arm_compute
 {
 class Coordinates;
-} // namespace arm_compute
 
 namespace
 {
@@ -56,7 +53,7 @@ Status validate_arguments_matrix_a_reduction(const ITensorInfo *input, const ITe
 }
 std::pair<Status, Window> validate_and_configure_window_matrix_a_reduction(ITensorInfo *input, ITensorInfo *output)
 {
-    const unsigned int num_elems_processed_per_iteration = 1;
+    constexpr unsigned int num_elems_processed_per_iteration = 1;
 
     Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
 
@@ -221,3 +218,4 @@ void CLGEMMLowpMatrixBReductionKernel::run(const Window &window, cl::CommandQueu
     }
     while(collapsed.slide_window_slice_2D(slice_out));
 }
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
index 0ff2f1343a..9e5d677e89 100644
--- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp
@@ -33,10 +33,13 @@
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Types.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 namespace
 {
+constexpr unsigned int num_elems_read_per_iteration = 4;
+constexpr unsigned int num_rows_read_per_iteration  = 4;
+
 Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input0);
@@ -50,9 +53,6 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
 
 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output)
 {
-    constexpr unsigned int num_elems_read_per_iteration = 4;
-    constexpr unsigned int num_rows_read_per_iteration  = 4;
-
     const unsigned int border_x = ceil_to_multiple(input0->dimension(0), num_elems_read_per_iteration) - input0->dimension(0);
     const unsigned int border_y = ceil_to_multiple(input0->dimension(1), num_rows_read_per_iteration) - input0->dimension(1);
 
@@ -113,9 +113,7 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const
     }
 
     // Configure kernel window
-    const unsigned int num_elems_read_per_iteration = 4;
-
-    _num_rows_read_per_iteration = 4;
+    _num_rows_read_per_iteration = num_rows_read_per_iteration;
 
     const unsigned int border_x = ceil_to_multiple(input0->info()->dimension(0), num_elems_read_per_iteration) - input0->info()->dimension(0);
     const unsigned int border_y = ceil_to_multiple(input0->info()->dimension(1), _num_rows_read_per_iteration) - input0->info()->dimension(1);
@@ -172,3 +170,4 @@ void CLGEMMMatrixVectorMultiplyKernel::run(const Window &window, cl::CommandQueu
     }
     while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out));
 }
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index 288e1e8e61..8816138e2e 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -37,15 +37,12 @@
 
 namespace arm_compute
 {
-CLL2NormalizeLayerKernel::CLL2NormalizeLayerKernel()
-    : _input(nullptr), _sum(nullptr), _output(nullptr), _actual_axis(0), _epsilon(1e-12)
-{
-}
-
 namespace
 {
 constexpr int max_input_tensor_dim = 3;
 
+constexpr unsigned int num_elems_processed_per_iteration = 16;
+
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
 {
     ARM_COMPUTE_UNUSED(epsilon);
@@ -76,8 +73,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, cons
 
 std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
 {
-    const unsigned int num_elems_processed_per_iteration = 16;
-
     Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
 
     // Output tensor auto initialization if not yet initialized
@@ -95,6 +90,11 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
 }
 } // namespace
 
+CLL2NormalizeLayerKernel::CLL2NormalizeLayerKernel()
+    : _input(nullptr), _sum(nullptr), _output(nullptr), _actual_axis(0), _epsilon(1e-12)
+{
+}
+
 void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
@@ -106,8 +106,6 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor
     _actual_axis = wrap_around(axis, max_input_tensor_dim);
     _epsilon     = epsilon;
 
-    const unsigned int num_elems_processed_per_iteration = 16;
-
     // Set build options
     std::set<std::string> build_opts;
     build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
@@ -221,4 +219,4 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
             ARM_COMPUTE_ERROR("Not supported");
     }
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
index 66d26231d7..47dc62c4f4 100644
--- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
@@ -72,8 +72,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
     output->set_data_layout(input->data_layout());
 
     // Configure kernel window
-    const unsigned int num_elems_processed_per_iteration = 1;
-    Window             win                               = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+    constexpr unsigned int num_elems_processed_per_iteration = 1;
+    Window                 win                               = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
 
     AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
     AccessWindowHorizontal input_access(input, input->valid_region().start(0), num_elems_processed_per_iteration);
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
index df7687edea..c32ec1bb2b 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
@@ -52,8 +52,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
     auto_init_if_empty((*output), output_shape, 1, input->data_type());
 
     // Configure kernel window
-    const unsigned int num_elems_processed_per_iteration = 1;
-    Window             win                               = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+    constexpr unsigned int num_elems_processed_per_iteration = 1;
+    Window                 win                               = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
 
     AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
     AccessWindowHorizontal input_access(input, input->valid_region().start(0), num_elems_processed_per_iteration);
@@ -145,4 +145,4 @@ void CLROIPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue)
 
     enqueue(queue, *this, slice);
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLStridedSliceKernel.cpp b/src/core/CL/kernels/CLStridedSliceKernel.cpp
index 5a6b958719..c2bdf7f299 100644
--- a/src/core/CL/kernels/CLStridedSliceKernel.cpp
+++ b/src/core/CL/kernels/CLStridedSliceKernel.cpp
@@ -89,9 +89,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
     auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
 
     // Create window
-    const unsigned int num_elems_processed_per_iteration = 1;
-
-    Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+    Window win = calculate_max_window(*output, Steps());
     output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
 
     return std::make_pair(Status{}, win);
diff --git a/src/core/CL/kernels/CLWarpAffineKernel.cpp b/src/core/CL/kernels/CLWarpAffineKernel.cpp
index 684305ccd4..cd20d42016 100644
--- a/src/core/CL/kernels/CLWarpAffineKernel.cpp
+++ b/src/core/CL/kernels/CLWarpAffineKernel.cpp
@@ -38,8 +38,8 @@
 #include <sstream>
 #include <string>
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 namespace
 {
 void options_add_matrix(std::set<std::string> &options, const std::array<float, 9> &matrix)
@@ -84,7 +84,7 @@ void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, co
     _kernel.setArg<cl_int>(idx++, input->info()->dimension(1));
 
     // Configure kernel window
-    const unsigned int num_elems_processed_per_iteration = 4;
+    constexpr unsigned int num_elems_processed_per_iteration = 4;
 
     Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
 
@@ -123,3 +123,4 @@ void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, co
     _config_id += "_";
     _config_id += lower_string(string_from_interpolation_policy(policy));
 }
+} // namespace arm_compute
diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
index b577944a03..01fef481a8 100644
--- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
+++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp
@@ -40,13 +40,14 @@
 
 #include <map>
 
-using namespace arm_compute;
+namespace arm_compute
+{
 namespace
 {
+constexpr unsigned int num_elems_processed_per_iteration = 16;
+
 std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsigned int width_offset, ITensorInfo *output)
 {
-    const unsigned int num_elems_processed_per_iteration = 16;
-
     // The window needs to be based on input as we copy all the widths of input
     Window                 win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
     AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
@@ -98,8 +99,6 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i
     _output       = output;
     _width_offset = width_offset;
 
-    const unsigned int num_elems_processed_per_iteration = 16;
-
     // Add build options
     CLBuildOptions build_opts;
     build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input->info()->data_type()));
@@ -137,3 +136,4 @@ void CLWidthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &
     add_4D_tensor_argument(idx, _output, window);
     enqueue(queue, *this, window);
 }
+} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
index 86bea849e4..8ee46eae42 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp
@@ -36,6 +36,8 @@ namespace arm_compute
 {
 namespace
 {
+constexpr unsigned int num_elems_processed_per_iteration = 16;
+
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, float beta)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
@@ -134,7 +136,6 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output
     }
 
     // Configure kernel window
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
     INESimpleKernel::configure(input, output, num_elems_processed_per_iteration);
 
     _beta = beta;
@@ -142,7 +143,6 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output
 
 Status NEGEMMMatrixAdditionKernel::validate(const ITensorInfo *input, const ITensorInfo *output, float beta)
 {
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, beta));
     ARM_COMPUTE_RETURN_ON_ERROR(INESimpleKernel::validate(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration));
     return Status{};
diff --git a/src/core/NEON/kernels/NEHistogramKernel.cpp b/src/core/NEON/kernels/NEHistogramKernel.cpp
index 02de566f6a..b088a232a8 100644
--- a/src/core/NEON/kernels/NEHistogramKernel.cpp
+++ b/src/core/NEON/kernels/NEHistogramKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,12 +35,9 @@
 #include <arm_neon.h>
 #include <array>
 
-using namespace arm_compute;
-
 namespace arm_compute
 {
 class Coordinates;
-} // namespace arm_compute
 
 inline void NEHistogramKernel::merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins)
 {
@@ -217,9 +214,7 @@ void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output,
     // Set appropriate function
     _func = &NEHistogramKernel::histogram_U8;
 
-    constexpr unsigned int num_elems_processed_per_iteration = 1;
-
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+    Window win = calculate_max_window(*input->info(), Steps());
 
     INEKernel::configure(win);
 }
@@ -236,9 +231,7 @@ void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output)
     // Set appropriate function
     _func = &NEHistogramKernel::histogram_fixed_U8;
 
-    constexpr unsigned int num_elems_processed_per_iteration = 1;
-
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+    Window win = calculate_max_window(*input->info(), Steps());
 
     INEKernel::configure(win);
 }
@@ -251,3 +244,4 @@ void NEHistogramKernel::run(const Window &window, const ThreadInfo &info)
 
     (this->*_func)(window, info);
 }
+} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
index 46b7913223..467546a5d3 100644
--- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
+++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,12 +40,9 @@
 #include <cstdint>
 #include <tuple>
 
-using namespace arm_compute;
-
 namespace arm_compute
 {
 class Coordinates;
-} // namespace arm_compute
 
 namespace
 {
@@ -317,7 +314,7 @@ Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1,
 
 std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output)
 {
-    const unsigned int num_elems_processed_per_iteration_x = 16;
+    constexpr unsigned int num_elems_processed_per_iteration_x = 16;
 
     Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration_x));
 
@@ -389,3 +386,4 @@ void NELocallyConnectedMatrixMultiplyKernel::run(const Window &window, const Thr
         }
     }
 }
+} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEStridedSliceKernel.cpp b/src/core/NEON/kernels/NEStridedSliceKernel.cpp
index 2ae029b64c..ece291e0a3 100644
--- a/src/core/NEON/kernels/NEStridedSliceKernel.cpp
+++ b/src/core/NEON/kernels/NEStridedSliceKernel.cpp
@@ -86,9 +86,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
     auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
 
     // Create window
-    const unsigned int num_elems_processed_per_iteration = 1;
-
-    Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
+    Window win = calculate_max_window(*output, Steps());
     output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
 
     return std::make_pair(Status{}, win);
diff --git a/src/core/NEON/kernels/NEThresholdKernel.cpp b/src/core/NEON/kernels/NEThresholdKernel.cpp
index ae9c62bc92..5c3b2a7540 100644
--- a/src/core/NEON/kernels/NEThresholdKernel.cpp
+++ b/src/core/NEON/kernels/NEThresholdKernel.cpp
@@ -30,12 +30,9 @@
 
 #include <arm_neon.h>
 
-using namespace arm_compute;
-
 namespace arm_compute
 {
 class Coordinates;
-} // namespace arm_compute
 
 NEThresholdKernel::NEThresholdKernel()
     : _func(nullptr), _input(nullptr), _output(nullptr), _threshold(0), _false_value(0), _true_value(0), _upper(0)
@@ -67,7 +64,7 @@ void NEThresholdKernel::configure(const ITensor *input, ITensor *output, uint8_t
             break;
     }
 
-    const unsigned int num_elems_processed_per_iteration = 16;
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
 
     Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
     AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
@@ -128,3 +125,4 @@ void NEThresholdKernel::run(const Window &window, const ThreadInfo &info)
 
     (this->*_func)(window);
 }
+} // namespace arm_compute
-- 
cgit v1.2.1