From 4b5c588ed5bbf635bfb4d20b662db417caa4558f Mon Sep 17 00:00:00 2001
From: Manuel Bottini <manuel.bottini@arm.com>
Date: Tue, 14 May 2019 10:38:30 +0100
Subject: COMPMID-2248 L2NormalizeLayer: negative axis

Change-Id: Ic164d7a9ddf1615a2e3b0e10430c34194a70f221
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1127
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 .../core/CL/kernels/CLL2NormalizeLayerKernel.h     | 12 ++++-----
 .../core/NEON/kernels/NEL2NormalizeLayerKernel.h   | 12 ++++-----
 .../runtime/CL/functions/CLL2NormalizeLayer.h      | 10 ++++----
 .../runtime/NEON/functions/NEL2NormalizeLayer.h    | 10 ++++----
 src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp   | 30 ++++++++++++----------
 src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp | 26 +++++++++++--------
 src/runtime/CL/functions/CLL2NormalizeLayer.cpp    | 17 ++++++++----
 src/runtime/NEON/functions/NEL2NormalizeLayer.cpp  | 17 ++++++++----
 tests/validation/CL/L2NormalizeLayer.cpp           | 24 ++++++++++++-----
 tests/validation/NEON/L2NormalizeLayer.cpp         | 28 +++++++++++++-------
 .../validation/fixtures/L2NormalizeLayerFixture.h  | 23 ++++++++++-------
 11 files changed, 128 insertions(+), 81 deletions(-)

diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
index 8dd4609250..ec192bed42 100644
--- a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,10 +55,10 @@ public:
      *                     Sum will have the same number of dimensions as input.
      * @param[out] output  Destination tensor. Data types and data layouts supported: Same as @p input.
      *                     Output will have the same number of dimensions as input.
-     * @param[in]  axis    Axis along which to reduce. Supported reduction axis : 0, 1, 2
+     * @param[in]  axis    Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
      * @param[in]  epsilon Lower bound value for the normalization.
      */
-    void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon);
+    void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel.
      *
@@ -67,12 +67,12 @@ public:
      *                    Sum will have the same number of dimensions as input.
      * @param[in] output  Destination tensor info. Data types and data layouts supported: Same as @p input.
      *                    Output will have the same number of dimensions as input.
-     * @param[in] axis    Axis along which to reduce. Supported reduction axis : 0, 1, 2
+     * @param[in] axis    Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
      * @param[in] epsilon Lower bound value for the normalization.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
@@ -81,7 +81,7 @@ private:
     const ICLTensor *_input;
     const ICLTensor *_sum;
     ICLTensor       *_output;
-    unsigned int     _axis;
+    unsigned int     _actual_axis;
     float            _epsilon;
 };
 } // namespace arm_compute
diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
index f893c4ae6b..ab5e040885 100644
--- a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,10 +57,10 @@ public:
      *                     Sum will have the same number of dimensions as input.
      * @param[out] output  Destination tensor. Data types and data layouts supported: same as @p input.
      *                     Output will have the same number of dimensions as input.
-     * @param[in]  axis    Dimension along which to reduce. Supported reduction axis : 0, 1, 2
+     * @param[in]  axis    Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
      * @param[in]  epsilon Lower bound value for the normalization.
      */
-    void configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon);
+    void configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayerKernel.
      *
@@ -69,12 +69,12 @@ public:
      *                    Sum will have the same number of dimensions as input.
      * @param[in] output  Destination tensor info. Data types and data layouts supported: same as @p input.
      *                    Output will have the same number of dimensions as input.
-     * @param[in] axis    Dimension along which to reduce. Supported reduction axis : 0, 1, 2
+     * @param[in] axis    Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
      * @param[in] epsilon Lower bound value for the normalization.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -83,7 +83,7 @@ private:
     const ITensor *_input;
     const ITensor *_sum;
     ITensor       *_output;
-    unsigned int   _axis;
+    unsigned int   _actual_axis;
     float          _epsilon;
 };
 } // namespace arm_compute
diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
index 2cabaee5de..15dcc58310 100644
--- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,21 +55,21 @@ public:
      *
      * @param[in]  input   Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
      * @param[out] output  Destination tensor. Data types and data layouts supported: Same as @p input.
-     * @param[in]  axis    Axis along which to reduce. Supported reduction axis : 0, 1, 2
+     * @param[in]  axis    Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
      * @param[in]  epsilon (Optional) Lower bound value for the normalization.
      */
-    void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon = 1e-12f);
+    void configure(ICLTensor *input, ICLTensor *output, int axis, float epsilon = 1e-12f);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayer.
      *
      * @param[in] input   Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
      * @param[in] output  Destination tensor info. Data types and data layouts supported: Same as @p input.
-     * @param[in] axis    Axis along which to reduce. Supported reduction axis : 0, 1, 2
+     * @param[in] axis    Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
      * @param[in] epsilon (Optional) Lower bound value for the normalization.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, float epsilon = 1e-12f);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, int axis, float epsilon = 1e-12f);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
index ba506fa9ab..e778f96e22 100644
--- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -52,21 +52,21 @@ public:
      *
      * @param[in, out] input   Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0)
      * @param[out]     output  Destination tensor. Data types and data layouts supported: same as @p input.
-     * @param[in]      axis    Dimension along which to reduce. Supported reduction axis : 0, 1, 2
+     * @param[in]      axis    Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
      * @param[in]      epsilon (Optional) Lower bound value for the normalization.
      */
-    void configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon = 1e-12f);
+    void configure(ITensor *input, ITensor *output, int axis, float epsilon = 1e-12f);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayer.
      *
      * @param[in] input   Source tensor info. Data types supported: F16/F32. (Written to only for border_size != 0)
      * @param[in] output  Destination tensor info. Data types and data layouts supported: same as @p input.
-     * @param[in] axis    Dimension along which to reduce. Supported reduction axis : 0, 1, 2
+     * @param[in] axis    Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
      * @param[in] epsilon (Optional) Lower bound value for the normalization.
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, float epsilon = 1e-12f);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, int axis, float epsilon = 1e-12f);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
index cb2e29449c..00af590104 100644
--- a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -35,29 +35,32 @@
 
 #include "support/ToolchainSupport.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 CLL2NormalizeLayerKernel::CLL2NormalizeLayerKernel()
-    : _input(nullptr), _sum(nullptr), _output(nullptr), _axis(0), _epsilon(1e-12)
+    : _input(nullptr), _sum(nullptr), _output(nullptr), _actual_axis(0), _epsilon(1e-12)
 {
 }
 
 namespace
 {
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon)
+constexpr int max_input_tensor_dim = 3;
+
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
 {
     ARM_COMPUTE_UNUSED(epsilon);
 
+    const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum);
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 2, "Axis greater than 2 is not supported");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis > 2, "Actual axis greater than 2 is not supported");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis >= TensorShape::num_max_dimensions, "Actual normalization axis greater than max number of dimensions");
 
     // Reduce shape on axis
     TensorShape sum_shape = input->tensor_shape();
-    sum_shape.set(axis, 1);
+    sum_shape.set(actual_axis, 1);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(sum->tensor_shape(), sum_shape);
 
     if(output->total_size() != 0)
@@ -92,7 +95,7 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
 }
 } // namespace
 
-void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon)
+void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), sum->info(), output->info(), axis, epsilon));
@@ -100,7 +103,7 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor
     _input   = input;
     _sum     = sum;
     _output  = output;
-    _axis    = axis;
+    _actual_axis    = wrap_around(axis, max_input_tensor_dim);
     _epsilon = epsilon;
 
     const unsigned int num_elems_processed_per_iteration = 16;
@@ -113,7 +116,7 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor
     // Create kernel
     std::string  kernel_name;
     unsigned int idx = 0;
-    switch(axis)
+    switch(_actual_axis)
     {
         case 0:
             kernel_name = "x";
@@ -128,7 +131,7 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor
             idx         = num_arguments_per_3D_tensor() * 3;
             break;
         default:
-            ARM_COMPUTE_ERROR("Not supported");
+            ARM_COMPUTE_ERROR("Axis not supported");
     }
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("l2_normalize_" + kernel_name, build_opts));
 
@@ -149,7 +152,7 @@ void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor
     ICLKernel::configure_internal(std::get<1>(win_config));
 }
 
-Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon)
+Status CLL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
 {
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, sum, output, axis, epsilon));
     ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
@@ -164,7 +167,7 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
 
     Window window_sum(window);
 
-    switch(_axis)
+    switch(_actual_axis)
     {
         case 0:
         {
@@ -218,3 +221,4 @@ void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue
             ARM_COMPUTE_ERROR("Not supported");
     }
 }
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
index efdcc44e0e..9900446218 100644
--- a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
@@ -40,6 +40,8 @@ namespace arm_compute
 {
 namespace
 {
+constexpr int max_input_tensor_dim = 3;
+
 template <typename T, int S>
 void l2_normalize_X(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window)
 {
@@ -141,19 +143,20 @@ void l2_normalize_Z(const ITensor *in, const ITensor *sum, ITensor *out, float e
     while(window.slide_window_slice_3D(in_slice) && window.slide_window_slice_3D(sum_slice));
 }
 
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
 {
     ARM_COMPUTE_UNUSED(epsilon);
 
+    const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 2, "Axis greater than 2 is not supported");
-    ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Normalization axis greater than max number of dimensions");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis > 2, "Actual axis greater than 2 is not supported");
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis >= TensorShape::num_max_dimensions, "Actual normalization axis greater than max number of dimensions");
 
     // Reduce shape on axis
     TensorShape sum_shape = input->tensor_shape();
-    sum_shape.set(axis, 1);
+    sum_shape.set(actual_axis, 1);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(sum->tensor_shape(), sum_shape);
 
     if(output->total_size() != 0)
@@ -167,10 +170,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, cons
     return Status{};
 }
 
-std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *sum, ITensorInfo *output, unsigned int axis)
+std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *sum, ITensorInfo *output, int axis)
 {
+    const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
     const unsigned int num_elems_processed_per_iteration     = 16 / data_size_from_type(input->data_type());
-    const unsigned int num_elems_processed_per_iteration_sum = (axis == 0) ? 1 : num_elems_processed_per_iteration;
+    const unsigned int num_elems_processed_per_iteration_sum = (actual_axis == 0) ? 1 : num_elems_processed_per_iteration;
 
     Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
 
@@ -191,11 +195,11 @@ std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITe
 } // namespace
 
 NEL2NormalizeLayerKernel::NEL2NormalizeLayerKernel()
-    : _input(nullptr), _sum(nullptr), _output(nullptr), _axis(0), _epsilon(1e-12)
+    : _input(nullptr), _sum(nullptr), _output(nullptr), _actual_axis(0), _epsilon(1e-12)
 {
 }
 
-void NEL2NormalizeLayerKernel::configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon)
+void NEL2NormalizeLayerKernel::configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), sum->info(), output->info(), axis, epsilon));
@@ -203,7 +207,7 @@ void NEL2NormalizeLayerKernel::configure(const ITensor *input, const ITensor *su
     _input   = input;
     _sum     = sum;
     _output  = output;
-    _axis    = axis;
+    _actual_axis    = wrap_around(axis, max_input_tensor_dim);
     _epsilon = epsilon;
 
     // Configure kernel window
@@ -213,7 +217,7 @@ void NEL2NormalizeLayerKernel::configure(const ITensor *input, const ITensor *su
     INEKernel::configure(std::get<1>(win_config));
 }
 
-Status NEL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, unsigned int axis, float epsilon)
+Status NEL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
 {
     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, sum, output, axis, epsilon));
     ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), sum->clone().get(), output->clone().get(), axis)));
@@ -227,7 +231,7 @@ void NEL2NormalizeLayerKernel::run(const Window &window, const ThreadInfo &info)
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
 
-    switch(_axis)
+    switch(_actual_axis)
     {
         case 0:
             switch(_input->info()->data_type())
diff --git a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
index 136cb5edef..e76e4f601e 100644
--- a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
+++ b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
@@ -34,25 +34,31 @@
 
 namespace arm_compute
 {
+namespace
+{
+constexpr int max_input_tensor_dim = 3;
+} // namespace
+    
 CLL2NormalizeLayer::CLL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
 {
 }
 
-void CLL2NormalizeLayer::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon)
+void CLL2NormalizeLayer::configure(ICLTensor *input, ICLTensor *output, int axis, float epsilon)
 {
     // Manage intermediate buffers
     _memory_group.manage(&_sumsq);
 
     // Configure kernels
-    _reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE);
+    const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
+    _reduce_func.configure(input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE);
     _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon);
 
     // Allocate intermediate tensor
     _sumsq.allocator()->allocate();
 }
 
-Status CLL2NormalizeLayer::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, float epsilon)
+Status CLL2NormalizeLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int axis, float epsilon)
 {
     TensorShape shape(input->tensor_shape());
 
@@ -61,10 +67,11 @@ Status CLL2NormalizeLayer::validate(const ITensorInfo *input, const ITensorInfo
     sum_sq.set_data_type(input->data_type());
     sum_sq.set_tensor_shape(shape);
 
-    ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperation::validate(input, &sum_sq, axis, ReductionOperation::SUM_SQUARE));
+    const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
+    ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperation::validate(input, &sum_sq, actual_axis, ReductionOperation::SUM_SQUARE));
 
     // Reduce shape on axis
-    shape.set(axis, 1);
+    shape.set(actual_axis, 1);
     sum_sq.set_tensor_shape(shape);
 
     ARM_COMPUTE_RETURN_ON_ERROR(CLL2NormalizeLayerKernel::validate(input, &sum_sq, output, axis, epsilon));
diff --git a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp
index c9ab5c98e2..88ffdbfd08 100644
--- a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp
+++ b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp
@@ -28,25 +28,31 @@
 
 namespace arm_compute
 {
+namespace
+{
+constexpr int max_input_tensor_dim = 3;
+} // namespace
+
 NEL2NormalizeLayer::NEL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
 {
 }
 
-void NEL2NormalizeLayer::configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon)
+void NEL2NormalizeLayer::configure(ITensor *input, ITensor *output, int axis, float epsilon)
 {
     // Manage intermediate buffers
     _memory_group.manage(&_sumsq);
 
     // Configure Kernels
-    _reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE);
+    const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
+    _reduce_func.configure(input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE);
     _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon);
 
     // Allocate intermediate tensors
     _sumsq.allocator()->allocate();
 }
 
-Status NEL2NormalizeLayer::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, float epsilon)
+Status NEL2NormalizeLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int axis, float epsilon)
 {
     TensorShape shape(input->tensor_shape());
 
@@ -55,10 +61,11 @@ Status NEL2NormalizeLayer::validate(const ITensorInfo *input, const ITensorInfo
     sum_sq.set_data_type(input->data_type());
     sum_sq.set_tensor_shape(shape);
 
-    ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperation::validate(input, &sum_sq, axis, ReductionOperation::SUM_SQUARE));
+    const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
+    ARM_COMPUTE_RETURN_ON_ERROR(NEReductionOperation::validate(input, &sum_sq, actual_axis, ReductionOperation::SUM_SQUARE));
 
     // Reduce shape on axis
-    shape.set(axis, 1);
+    shape.set(actual_axis, 1);
     sum_sq.set_tensor_shape(shape);
 
     ARM_COMPUTE_RETURN_ON_ERROR(NEL2NormalizeLayerKernel::validate(input, &sum_sq, output, axis, epsilon));
diff --git a/tests/validation/CL/L2NormalizeLayer.cpp b/tests/validation/CL/L2NormalizeLayer.cpp
index fdbfa3ed4d..beedd81335 100644
--- a/tests/validation/CL/L2NormalizeLayer.cpp
+++ b/tests/validation/CL/L2NormalizeLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,8 +46,8 @@ namespace
 constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f);
 constexpr AbsoluteTolerance<float> tolerance_f16(0.2f);
 
-auto data = concat(combine(framework::dataset::make("DataLayout", { DataLayout::NCHW }), framework::dataset::make("Axis", { 0, 1, 2 })), combine(framework::dataset::make("DataLayout", { DataLayout::NHWC }),
-                   framework::dataset::make("Axis", { 1, 2 })));
+auto data = concat(combine(framework::dataset::make("DataLayout", { DataLayout::NCHW }), framework::dataset::make("Axis", { -1, 0, 2 })), combine(framework::dataset::make("DataLayout", { DataLayout::NHWC }),
+                   framework::dataset::make("Axis", { -2, 2 })));
 
 } // namespace
 
@@ -61,8 +61,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching shape input/output
                                              TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F32
-                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions
-                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 2
+                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::F32)
                                            }),
     framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F16),
@@ -71,10 +72,19 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::S16),
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::F32)
                                            })),
-    framework::dataset::make("Axis",       { 0U, 0U, 0U, 0U, static_cast<unsigned int>(TensorShape::num_max_dimensions), 3U, 0U })),
-    framework::dataset::make("Expected",   { false, false, false, false, false, false, true })),
+    framework::dataset::make("Axis",       {
+                                            0,
+                                            0,
+                                            0,
+                                            0,
+                                            static_cast<int>(TensorShape::num_max_dimensions),
+                                            3,
+                                            -2,
+                                            0 })),
+    framework::dataset::make("Expected",   { false, false, false, false, true, true, true, true })),
     input_info, output_info, axis, expected)
 {
     bool is_valid = bool(CLL2NormalizeLayer::validate(&input_info.clone()->set_is_resizable(false),
diff --git a/tests/validation/NEON/L2NormalizeLayer.cpp b/tests/validation/NEON/L2NormalizeLayer.cpp
index 3164a65417..17147c1d50 100644
--- a/tests/validation/NEON/L2NormalizeLayer.cpp
+++ b/tests/validation/NEON/L2NormalizeLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,8 +59,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Mismatching shape input/output
                                              TensorInfo(TensorShape(128U, 64U), 2, DataType::F32), // Number of Input channels != 1
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::S16), // DataType != F32
-                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis >= num_max_dimensions
-                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32), // Axis > 2
+                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::F32)
                                            }),
     framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(128U, 64U), 1, DataType::F16),
@@ -69,10 +70,19 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::S16),
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
+                                             TensorInfo(TensorShape(128U, 64U), 1, DataType::F32),
                                              TensorInfo(TensorShape(128U, 64U), 1, DataType::F32)
                                            })),
-    framework::dataset::make("Axis",       { 0U, 0U, 0U, 0U, static_cast<unsigned int>(TensorShape::num_max_dimensions), 3U, 0U })),
-    framework::dataset::make("Expected",   { false, false, false, false, false, false, true })),
+    framework::dataset::make("Axis",       {
+                                            0,
+                                            0,
+                                            0,
+                                            0,
+                                            static_cast<int>(TensorShape::num_max_dimensions),
+                                            3,
+                                            -2,
+                                            0 })),
+    framework::dataset::make("Expected",   { false, false, false, false, true, true, true, true })),
     input_info, output_info, axis, expected)
 {
     bool is_valid = bool(NEL2NormalizeLayer::validate(&input_info.clone()->set_is_resizable(false),
@@ -89,7 +99,7 @@ using NEL2NormalizeLayerFixture = L2NormalizeLayerValidationFixture<Tensor, Acce
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
                        combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                       framework::dataset::make("Axis", { 0, 1, 2 })),
+                                       framework::dataset::make("Axis", { -1, 0, 2 })),
                                framework::dataset::make("Epsilon", { 1e-12 })))
 {
     // Validate output
@@ -98,7 +108,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture<float>, framework::Da
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture<float>, framework::DatasetMode::NIGHTLY,
                        combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                       framework::dataset::make("Axis", { 0, 1, 2 })),
+                                       framework::dataset::make("Axis", { -1, 0, 2 })),
                                framework::dataset::make("Epsilon", { 1e-12 })))
 {
     // Validate output
@@ -110,7 +120,7 @@ TEST_SUITE_END() // FP32
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
                        combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                       framework::dataset::make("Axis", { 0, 1, 2 })),
+                                       framework::dataset::make("Axis", { -1, 0, 2 })),
                                framework::dataset::make("Epsilon", { 1e-12 })))
 {
     // Validate output
@@ -119,7 +129,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture<half>, framework::Dat
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture<half>, framework::DatasetMode::NIGHTLY,
                        combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                       framework::dataset::make("Axis", { 0, 1, 2 })),
+                                       framework::dataset::make("Axis", { -1, 0, 2 })),
                                framework::dataset::make("Epsilon", { 1e-12 })))
 {
     // Validate output
diff --git a/tests/validation/fixtures/L2NormalizeLayerFixture.h b/tests/validation/fixtures/L2NormalizeLayerFixture.h
index 574722bd88..e3e1510ff0 100644
--- a/tests/validation/fixtures/L2NormalizeLayerFixture.h
+++ b/tests/validation/fixtures/L2NormalizeLayerFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,12 +40,16 @@ namespace test
 {
 namespace validation
 {
+namespace
+{
+constexpr int max_input_tensor_dim = 3;
+} // namespace
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
 class L2NormalizeLayerValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, DataLayout data_layout, unsigned int axis, float epsilon)
+    void setup(TensorShape shape, DataType data_type, DataLayout data_layout, int axis, float epsilon)
     {
         _target    = compute_target(shape, data_type, data_layout, axis, epsilon);
         _reference = compute_reference(shape, data_type, data_layout, axis, epsilon);
@@ -59,7 +63,7 @@ protected:
         library->fill(tensor, distribution, 0);
     }
 
-    TensorType compute_target(TensorShape shape, DataType data_type, DataLayout data_layout, unsigned int axis, float epsilon)
+    TensorType compute_target(TensorShape shape, DataType data_type, DataLayout data_layout, int axis, float epsilon)
     {
         if(data_layout == DataLayout::NHWC)
         {
@@ -93,20 +97,21 @@ protected:
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, DataLayout data_layout, unsigned int axis, float epsilon)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, DataLayout data_layout, int axis, float epsilon)
     {
+        uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
         if(data_layout == DataLayout::NHWC)
         {
-            switch(axis)
+            switch(actual_axis)
             {
                 case 0:
-                    axis = 2;
+                    actual_axis = 2;
                     break;
                 case 1:
-                    axis = 0;
+                    actual_axis = 0;
                     break;
                 case 2:
-                    axis = 1;
+                    actual_axis = 1;
                     break;
                 default:
                     break;
@@ -118,7 +123,7 @@ protected:
         // Fill reference
         fill(src);
 
-        return reference::l2_normalize<T>(src, axis, epsilon);
+        return reference::l2_normalize<T>(src, actual_axis, epsilon);
     }
 
     TensorType      _target{};
-- 
cgit v1.2.1