From d004a7a707feab36e51f51cfc9eb2cb70729d5ad Mon Sep 17 00:00:00 2001
From: SiCong Li <sicong.li@arm.com>
Date: Thu, 28 May 2020 15:26:41 +0100
Subject: COMPMID-3510 [Interface change] Fix definition of "axis" in
 NESoftmaxLayer and CLSoftmaxLayer

* [Interface change] "axis" argument is renamed to "reduce_end_axis"

* Unify the meaning of "axis"(now "reduce_end_axis") to be the last axis
  of the first n dimensions (inclusive)to reduce.
  This way the meaning of reduce_end_axis stays the same for both
  positive and negative values: it selects a dimension before which all
  dimensions (including the selected dimension) are reduced.

Change-Id: I4ab03bd8360b1cd8cac4998df0b1571064a9d4ed
Signed-off-by: SiCong Li <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3278
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/Helpers.h                         | 13 +++++
 arm_compute/runtime/CL/functions/CLSoftmaxLayer.h  | 65 ++++++++++++----------
 .../GLES_COMPUTE/functions/GCSoftmaxLayer.h        | 19 ++++---
 .../runtime/NEON/functions/NESoftmaxLayer.h        | 58 ++++++++++---------
 docs/00_introduction.dox                           |  8 ++-
 src/runtime/CL/functions/CLSoftmaxLayer.cpp        | 62 ++++++++++++---------
 .../GLES_COMPUTE/functions/GCSoftmaxLayer.cpp      | 14 +++--
 src/runtime/NEON/functions/NESoftmaxLayer.cpp      | 58 +++++++++----------
 tests/validation/CL/LogSoftmaxLayer.cpp            | 20 +++----
 tests/validation/CL/SoftmaxLayer.cpp               | 61 +++++++++++++++-----
 tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp     | 10 ++--
 tests/validation/NEON/LogSoftmaxLayer.cpp          | 18 +++---
 tests/validation/NEON/SoftmaxLayer.cpp             | 43 +++++++-------
 tests/validation/reference/LogSoftmaxLayer.cpp     | 16 +++---
 tests/validation/reference/LogSoftmaxLayer.h       |  4 +-
 tests/validation/reference/SoftmaxLayer.cpp        | 42 ++++++--------
 tests/validation/reference/SoftmaxLayer.h          |  6 +-
 17 files changed, 294 insertions(+), 223 deletions(-)
diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index 09c672ecfa..8f1426a56e 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -801,6 +801,19 @@ inline T wrap_around(T x, T m)
     return x >= 0 ? x % m : (x % m + m) % m;
 }
 
+/** Convert a dimension axis to the number of dimensions in the range [0, @p dim_axis]
+ * Handle negative axis, negative axis is used to specify axis from the end (e.g. -1 for the last axis).
+ *
+ * @param[in] dim_axis The last axis (inclusive) in the range [0, @p dim_axis]
+ * @param[in] num_dims The total number of dimensions
+ *
+ * @return The number of dimensions in the range [0, @p dim_axis]
+ */
+inline size_t dim_index_2_num_dims(int32_t dim_axis, int32_t num_dims)
+{
+    return static_cast<size_t>(wrap_around(dim_axis, num_dims)) + 1;
+}
+
 /** Convert negative coordinates to positive in the range [0, num_dims_input]
  *
  * @param[out] coords    Array of coordinates to be converted.
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index fadbc430e6..231a56f712 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -50,6 +50,10 @@ class ICLTensor;
  * -# @ref CLLogits1DMaxKernel
  * -# @ref CLLogits1DShiftExpSumKernel
  * -# @ref CLLogits1DNormKernel
+ * And if the reduce_end_axis is not 0, the function will use one of the the following kernels to reshape the input and
+ * perform softmax on the reshaped input:
+ * -# @ref CLFlattenLayerKernel
+ * -# @ref CLReshapeLayerKernel
  */
 template <bool IS_LOG = false>
 class CLSoftmaxLayerGeneric : public IFunction
@@ -59,36 +63,39 @@ public:
     CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/F16/F32
-     * @param[out] output Destination tensor. Data types supported: same as @p input
-     * @param[in]  beta   (Optional) A scaling factor for the exponent. Defaults to 1.f
-     * @param[in]  axis   (Optional) Reduction axis. It has the purpose of squashing the first @p axis
-     *                    dimensions together. For instance, given a [4x4x4x4] image,
-     *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32
+     * @param[out] output          Destination tensor. Data types supported: same as @p input
+     * @param[in]  beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
+     * @param[in]  reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
+     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                   Must be in range [0, input_num_dimensions).
      */
-    void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 1);
+    void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0);
     /** Set the input and output tensors.
      *
      * @param[in]  compile_context The compile context to be used.
      * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32
      * @param[out] output          Destination tensor. Data types supported: same as @p input
      * @param[in]  beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
-     * @param[in]  axis            (Optional) Reduction axis. It has the purpose of squashing the first @p axis
-     *                    dimensions together. For instance, given a [4x4x4x4] image,
-     *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     * @param[in]  reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
+     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                   Must be in range [0, input_num_dimensions).
      */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 1);
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer
      *
-     * @param[in] input  Source tensor. Data types supported: QASYMM8/F16/F32
-     * @param[in] output Destination tensor. Data types supported: same as @p input
-     * @param[in] beta   (Optional) A scaling factor for the exponent. Defaults to 1.f
-     * @param[in] axis   (Optional) Reduction axis. It has the purpose of squashing the first @p axis
-     *                    dimensions together. For instance, given a [4x4x4x4] image,
-     *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     * @param[in] input           Source tensor. Data types supported: QASYMM8/F16/F32
+     * @param[in] output          Destination tensor. Data types supported: same as @p input
+     * @param[in] beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
+     * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
+     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                   Must be in range [0, input_num_dimensions).
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, size_t axis = 1);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, size_t reduce_end_axis = 0);
 
     // Inherited methods overridden:
     void run() override;
@@ -101,13 +108,14 @@ private:
      * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and
      * @p _output_flat
      *
-     * @param[in] input  Original source tensor.
-     * @param[in] output Original destination tensor.
-     * @param[in] axis   (Optional) Reduction axis. It has the purpose of squashing the first @p axis
-     *                    dimensions together. For instance, given a [4x4x4x4] image,
-     *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     * @param[in] input           Original source tensor.
+     * @param[in] output          Original destination tensor.
+     * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
+     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                   Must be in range [0, input_num_dimensions).
      */
-    void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t axis);
+    void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t reduce_end_axis);
     /** Utility method to configure the kernels needed to flatten the input
      * tensor.
      *
@@ -118,11 +126,12 @@ private:
      * @param[in] compile_context The compile context to be used.
      * @param[in] input           Original source tensor.
      * @param[in] output          Original destination tensor.
-     * @param[in] axis            (Optional) Reduction axis. It has the purpose of squashing the first @p axis
-     *                    dimensions together. For instance, given a [4x4x4x4] image,
-     *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
+     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                   Must be in range [0, input_num_dimensions).
      */
-    void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t axis);
+    void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t reduce_end_axis);
 
     MemoryGroup                    _memory_group;
     CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel;
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
index 33faae5e06..e29322c052 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,16 +50,17 @@ public:
     GCSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Source tensor. Data types supported: F16/F32
-     * @param[out] output Destination tensor. Data types supported: same as @p input
-     * @param[in]  beta   (Optional) A scaling factor for the exponent. Only beta = 1 is supported
-     * @param[in]  axis   (Optional) Reduction axis. It has the purpose of squashing the first @p axis
-     *                    dimensions together. For instance, given a [4x4x4x4] image,
-     *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     * @param[in]  input           Source tensor. Data types supported: F16/F32
+     * @param[out] output          Destination tensor. Data types supported: same as @p input
+     * @param[in]  beta            (Optional) A scaling factor for the exponent. Only beta = 1 is supported
+     * @param[in]  reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
+     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                   Must be in range [0, input_num_dimensions).
      *
-     * @note The value of @p axis must be always 1 for GLES
+     * @note The value of @p reduce_end_axis must be always 0 for GLES
      */
-    void configure(const IGCTensor *input, IGCTensor *output, float beta = 1.0f, size_t axis = 1);
+    void configure(const IGCTensor *input, IGCTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index b80ceaf25c..c5c83d8b5a 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -48,6 +48,10 @@ class ITensor;
  * -# @ref NEFillBorderKernel
  * -# @ref NELogits1DMaxKernel
  * -# @ref NELogits1DSoftmaxKernel
+ * And if the reduce_end_axis is not 0 or -input_num_dimensions, the function will use one of the the following kernels
+ * to reshape the input and perform softmax on the reshaped input:
+ * -# @ref NEFlattenLayerKernel
+ * -# @ref NEReshapeLayerKernel
  */
 template <bool IS_LOG = false>
 class NESoftmaxLayerGeneric : public IFunction
@@ -65,30 +69,31 @@ public:
     NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&) = default;
     /** Set the input and output tensors.
      *
-     * @param[in,out] input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a
-     *                       multiple of the internal processing block size, @ref NEFillBorderKernel replicates the
-     *                       last value of each row to the nearest multiple.
-     * @param[out]    output Destination tensor. Data types supported: same as @p input.
-     * @param[in]     beta   (Optional) A scaling factor for the exponent.
-     * @param[in]     axis   (Optional) Reduction axis. Defaults to -1.
-     *                       Negative index is used to specify axis from the end (e.g. -1 for the last axis).Must be in range [-input_num_dimensions, input_num_dimensions).
-     *                       It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image,
-     *                       when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     * @param[in,out] input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a
+     *                                multiple of the internal processing block size, @ref NEFillBorderKernel replicates the
+     *                                last value of each row to the nearest multiple.
+     * @param[out]    output          Destination tensor. Data types supported: same as @p input.
+     * @param[in]     beta            (Optional) A scaling factor for the exponent.
+     * @param[in]     reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
+     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                   Negative index is used to specify axis from the end (e.g. -1 for the last axis).
+     *                   Must be in range [-input_num_dimensions, input_num_dimensions).
      */
-    void configure(ITensor *input, ITensor *output, float beta = 1.0f, int32_t axis = -1);
+    void configure(ITensor *input, ITensor *output, float beta = 1.0f, int32_t reduce_end_axis = 0);
     /** Static function to check if given info will lead to a valid configuration of @ref NESoftmaxLayer
      *
-     * @param[in] input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] output Destination tensor info. Data types supported: same as @p input
-     * @param[in] beta   (Optional) A scaling factor for the exponent.
-     * @param[in] axis   (Optional) Reduction axis. Defaults to -1.
-     *                   Negative index is used to specify axis from the end (e.g. -1 for the last axis).Must be in range [-input_num_dimensions, input_num_dimensions).
-     *                   It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image,
-     *                   when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
-     *
+     * @param[in] input           Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in] output          Destination tensor info. Data types supported: same as @p input
+     * @param[in] beta            (Optional) A scaling factor for the exponent.
+     * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
+     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                   Negative index is used to specify axis from the end (e.g. -1 for the last axis).
+     *                   Must be in range [-input_num_dimensions, input_num_dimensions).
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, int32_t axis = -1);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta = 1.0f, int32_t reduce_end_axis = 0);
 
     // Inherited methods overridden:
     void run() override;
@@ -101,14 +106,15 @@ private:
      * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and
      * @p _output_flat
      *
-     * @param[in] input  Original source tensor.
-     * @param[in] output Original destination tensor.
-     * @param[in] axis   (Optional) Reduction axis. Defaults to -1.
-     *                   Negative index is used to specify axis from the end (e.g. -1 for the last axis).Must be in range [-input_num_dimensions, input_num_dimensions).
-     *                   It has the purpose of squashing the first @p axis dimensions together. For instance, given a [4x4x4x4] image,
-     *                   when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     * @param[in] input           Original source tensor.
+     * @param[in] output          Original destination tensor.
+     * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
+     *                   It has the purpose of squashing together the first n dimensions till (including) the @p reduce_end_axis. For instance, given a [2x3x4x5] image,
+     *                   when @p reduce_end_axis is 1, the reduction will be applied to axes 0 and 1, and the Softmax op will be applied on each of the [2x3] planes of the input image.
+     *                   Negative index is used to specify axis from the end (e.g. -1 for the last axis).
+     *                   Must be in range [-input_num_dimensions, input_num_dimensions).
      */
-    void configure_reshape_input_kernel(const ITensor *input, const ITensor *output, int32_t axis);
+    void configure_reshape_input_kernel(const ITensor *input, const ITensor *output, int32_t reduce_end_axis);
 
     MemoryGroup                     _memory_group;
     NELogits1DMaxKernel             _max_kernel;
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 9833b1a2d1..8387774ef6 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -240,8 +240,12 @@ If there is more than one release in a month then an extra sequential number is
 v20.08 Public major release
  - Various bug fixes.
  - Various optimisations.
- - Deprecated interfaces
-    - Non-descriptor based interfaces for @ref NEThreshold, @ref CLThreshold
+ - Deprecated functions / interfaces:
+   - Non-descriptor based interfaces for @ref NEThreshold, @ref CLThreshold
+   - In @ref NESoftmaxLayer, @ref NELogSoftmaxLayer, @ref CLSoftmaxLayer, @ref CLLogSoftmaxLayer and @ref GCSoftmaxLayer :
+      "axis" has been renamed to "reduce_end_axis", which is the last axis (inclusive) before which all dimensions are reduced/collapsed.
+      The default "axis" (now "reduce_end_axis") value for @ref NESoftmaxLayer and @ref NELogSoftmaxLayer is changed from -1 to 0.
+      The default "axis" (now "reduce_end_axis") value for @ref CLSoftmaxLayer, @ref CLLogSoftmaxLayer and @ref GCSoftmaxLayer is changed from 1 to 0.
 
 v20.05 Public major release
  - Various bug fixes.
diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
index b0b2117cd9..71ccf9fa01 100644
--- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp
+++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp
@@ -42,35 +42,38 @@ CLSoftmaxLayerGeneric<IS_LOG>::CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryMana
 }
 
 template <bool IS_LOG>
-void CLSoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t axis)
+void CLSoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t first_n_reduce_axes)
 {
-    configure_reshape_input_kernel(CLKernelLibrary::get().get_compile_context(), input, output, axis);
+    configure_reshape_input_kernel(CLKernelLibrary::get().get_compile_context(), input, output, first_n_reduce_axes);
 }
 
 template <bool IS_LOG>
-void CLSoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t axis)
+void CLSoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t first_n_reduce_axes)
 {
     // Flatten the input
-    const TensorShape shape_flatten = misc::shape_calculator::compute_softmax_shape(input->info(), axis);
+    const TensorShape shape_flatten = misc::shape_calculator::compute_softmax_shape(input->info(), first_n_reduce_axes);
 
     // Initialize the flat input
     _input_flattened.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
 
     // If we need to flatten the input, we can use CLFlattenKernel or CLReshapeKernel
-    // If flattening on the third axes, we use CLFlattenKernel.
+    // If the number of reduced axes is 3 (max dimension), which means collapsing all axes except the batch axis, we use CLFlattenKernel.
     // In all other cases we have to use CLReshapeKernel
-    if(axis != 3)
-    {
-        auto reshape_kernel_ptr = support::cpp14::make_unique<CLReshapeLayerKernel>();
-        reshape_kernel_ptr->configure(compile_context, input, &_input_flattened);
-        _flatten_kernel_ptr = std::move(reshape_kernel_ptr);
-    }
-    else
+    // Note that the "other cases" include both:
+    //   1. first_n_reduce_axes < 3: Reduce the first 1 (no need to reduce) or 2 dimensions (inclusive)
+    //   2. first_n_reduce_axes == 4: Reduce all 4 dimensions. This can only be handled by CLReshapeKernel instead of CLFlattenKernel.
+    if(first_n_reduce_axes == 3)
     {
         auto flatten_kernel_ptr = support::cpp14::make_unique<CLFlattenLayerKernel>();
         flatten_kernel_ptr->configure(compile_context, input, &_input_flattened);
         _flatten_kernel_ptr = std::move(flatten_kernel_ptr);
     }
+    else
+    {
+        auto reshape_kernel_ptr = support::cpp14::make_unique<CLReshapeLayerKernel>();
+        reshape_kernel_ptr->configure(compile_context, input, &_input_flattened);
+        _flatten_kernel_ptr = std::move(reshape_kernel_ptr);
+    }
 
     // We need to init the output tensor here. Indeed, the reshape kernel expects
     // both tensors to be already initialized
@@ -78,20 +81,23 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const CLCompi
 }
 
 template <bool IS_LOG>
-void CLSoftmaxLayerGeneric<IS_LOG>::configure(const ICLTensor *input, ICLTensor *output, float beta, size_t axis)
+void CLSoftmaxLayerGeneric<IS_LOG>::configure(const ICLTensor *input, ICLTensor *output, float beta, size_t reduce_end_axis)
 {
-    configure(CLKernelLibrary::get().get_compile_context(), input, output, beta, axis);
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, beta, reduce_end_axis);
 }
 
 template <bool IS_LOG>
-void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta, size_t axis)
+void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta, size_t reduce_end_axis)
 {
     // Perform validation step
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_ERROR_THROW_ON(CLSoftmaxLayerGeneric<IS_LOG>::validate(input->info(), output->info(), beta, axis));
+    ARM_COMPUTE_ERROR_THROW_ON(CLSoftmaxLayerGeneric<IS_LOG>::validate(input->info(), output->info(), beta, reduce_end_axis));
 
-    // We don't need flattening only in the case the input is 2D and axis is 1
-    _needs_flattening = axis != 1;
+    // Convert reduce-before axis (inclusive) to first n axes to reduce
+    size_t first_n_reduce_axes = dim_index_2_num_dims(reduce_end_axis, input->info()->num_dimensions());
+
+    // We only need flattening when the number of axes to reduce is greater than 1
+    _needs_flattening = first_n_reduce_axes > 1;
 
     // If we are dealing with a 4D tensor, we will:
     // - Flatten the input, so that we end up with a [width*height*depth] * batches 2D tensor
@@ -102,8 +108,8 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_co
         // Add to the memory manager _input_flattened
         _memory_group.manage(&_input_flattened);
 
-        // Cofigure  _flatten_kernel and _input_flattened
-        configure_reshape_input_kernel(input, output, axis);
+        // Cofigure _flatten_kernel and _input_flattened
+        configure_reshape_input_kernel(input, output, first_n_reduce_axes);
     }
 
     // We want to deal with a 2D input. Either it is the flattened version of the original input (4D case)
@@ -165,11 +171,15 @@ void CLSoftmaxLayerGeneric<IS_LOG>::configure(const CLCompileContext &compile_co
 }
 
 template <bool IS_LOG>
-Status CLSoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const ITensorInfo *output, float beta, size_t axis)
+Status CLSoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const ITensorInfo *output, float beta, size_t reduce_end_axis)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() > 4, "Only up to 4 dimensions are supported");
     ARM_COMPUTE_UNUSED(beta);
+    ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() <= reduce_end_axis);
+
+    // Convert reduce-before axis (inclusive) to first n axes to reduce
+    size_t first_n_reduce_axes = dim_index_2_num_dims(reduce_end_axis, input->num_dimensions());
 
     // Create intermediate tensor info
     DataType   tmp_data_type = is_data_type_quantized_asymmetric(input->data_type()) ? DataType::S32 : input->data_type();
@@ -180,20 +190,20 @@ Status CLSoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const I
     TensorInfo tensor_info_max(input->clone()->set_tensor_shape(max_sum_shape).set_is_resizable(true));
     TensorInfo tensor_info_sum(input->clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(QuantizationInfo()).set_is_resizable(true));
 
-    const bool needs_flattening = (axis != 1);
+    const bool needs_flattening = (first_n_reduce_axes > 1);
 
     if(needs_flattening)
     {
-        const TensorShape shape_flatten = misc::shape_calculator::compute_softmax_shape(input, axis);
+        const TensorShape shape_flatten = misc::shape_calculator::compute_softmax_shape(input, first_n_reduce_axes);
         TensorInfo        tensor_info_flat(input->clone()->set_tensor_shape(shape_flatten).set_is_resizable(true));
 
-        if(axis != 3)
+        if(first_n_reduce_axes == 3)
         {
-            ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(input, &tensor_info_flat));
+            ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayerKernel::validate(input, &tensor_info_flat));
         }
         else
         {
-            ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayerKernel::validate(input, &tensor_info_flat));
+            ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(input, &tensor_info_flat));
         }
     }
 
diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
index 0645ae7f8f..659d0eb57f 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,20 +27,20 @@
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 GCSoftmaxLayer::GCSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(std::move(memory_manager)), _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp()
 {
 }
 
-void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output, float beta, size_t axis)
+void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output, float beta, size_t reduce_end_axis)
 {
-    ARM_COMPUTE_UNUSED(beta, axis);
+    ARM_COMPUTE_UNUSED(beta, reduce_end_axis);
 
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON(beta != 1.0f);
-    ARM_COMPUTE_ERROR_ON_MSG(axis != 1, "Axis must be 1 for GLES");
+    ARM_COMPUTE_ERROR_ON_MSG(reduce_end_axis != 0, "Reduce_end_axis must be 0 for GLES");
 
     // Create intermediate tensors shapes
     _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type()));
@@ -77,3 +77,5 @@ void GCSoftmaxLayer::run()
     GCScheduler::get().memory_barrier();
     GCScheduler::get().dispatch(_norm_kernel);
 }
+
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
index 57d75af779..5509edec87 100644
--- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp
+++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
@@ -27,9 +27,6 @@
 #include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "utils/TypePrinter.h"
-
-#include <cfloat>
 
 namespace arm_compute
 {
@@ -41,29 +38,32 @@ NESoftmaxLayerGeneric<IS_LOG>::NESoftmaxLayerGeneric(std::shared_ptr<IMemoryMana
 }
 
 template <bool IS_LOG>
-void NESoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const ITensor *input, const ITensor *output, int32_t axis)
+void NESoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const ITensor *input, const ITensor *output, int32_t first_n_reduce_axes)
 {
     // Flatten the input
-    const TensorShape shape_flatten = misc::shape_calculator::compute_softmax_shape(input->info(), axis);
+    const TensorShape shape_flatten = misc::shape_calculator::compute_softmax_shape(input->info(), first_n_reduce_axes);
 
     // Initialize the flat input
     _input_flattened.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
 
     // If we need to flatten the input, we can use NEFlattenKernel or NEReshapeKernel
-    // If flattening on the third axes, we use NEFlattenKernel.
+    // If the number of reduced axes is 3 (max dimension), which means collapsing all axes except the batch axis, we use NEFlattenKernel.
     // In all other cases we have to use NEReshapeKernel
-    if(axis != 3)
-    {
-        auto reshape_kernel_ptr = support::cpp14::make_unique<NEReshapeLayerKernel>();
-        reshape_kernel_ptr->configure(input, &_input_flattened);
-        _flat_or_reshape_kernel_ptr = std::move(reshape_kernel_ptr);
-    }
-    else
+    // Note that the "other cases" include both:
+    //   1. first_n_reduce_axes < 3: Reduce the first 1 (no need to reduce) or 2 dimensions (inclusive)
+    //   2. first_n_reduce_axes == 4: Reduce all 4 dimensions. This can only be handled by NEReshapeKernel instead of NEFlattenKernel.
+    if(first_n_reduce_axes == 3)
     {
         auto flatten_kernel_ptr = support::cpp14::make_unique<NEFlattenLayerKernel>();
         flatten_kernel_ptr->configure(input, &_input_flattened);
         _flat_or_reshape_kernel_ptr = std::move(flatten_kernel_ptr);
     }
+    else
+    {
+        auto reshape_kernel_ptr = support::cpp14::make_unique<NEReshapeLayerKernel>();
+        reshape_kernel_ptr->configure(input, &_input_flattened);
+        _flat_or_reshape_kernel_ptr = std::move(reshape_kernel_ptr);
+    }
 
     // We need to init the output tensor here. Indeed, the reshape kernel expects
     // both tensors to be already initialized
@@ -71,17 +71,17 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure_reshape_input_kernel(const ITensor
 }
 
 template <bool IS_LOG>
-void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, float beta, int32_t axis)
+void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, float beta, int32_t reduce_end_axis)
 {
     // Perform validation step
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_ERROR_THROW_ON(NESoftmaxLayerGeneric::validate(input->info(), output->info(), beta, axis));
+    ARM_COMPUTE_ERROR_THROW_ON(NESoftmaxLayerGeneric::validate(input->info(), output->info(), beta, reduce_end_axis));
 
-    // Handle negative axis, negative index is used to specify axis from the end (e.g. -1 for the last axis).
-    axis = wrap_around(axis, static_cast<int32_t>(input->info()->num_dimensions()));
+    // Convert reduce-before axis (inclusive) to first n axes to reduce
+    size_t first_n_reduce_axes = dim_index_2_num_dims(reduce_end_axis, static_cast<int32_t>(input->info()->num_dimensions()));
 
-    // We don't need flattening only in the case the input is 2D and axis is 1
-    _needs_flattening = axis != 1;
+    // We only need flattening when the number of axes to reduce is greater than 1
+    _needs_flattening = first_n_reduce_axes > 1;
 
     // If we are dealing with a 4D tensor, we will:
     // - Flatten the input, so that we end up with a [width*height*depth] * batches 2D tensor
@@ -93,7 +93,7 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, f
         _memory_group.manage(&_input_flattened);
 
         // Configure  _flatten_kernel and _input_flattened
-        configure_reshape_input_kernel(input, output, axis);
+        configure_reshape_input_kernel(input, output, first_n_reduce_axes);
     }
 
     // We want to deal with a 2D input. Either it is the flattened version of the original input (4D case)
@@ -145,16 +145,16 @@ void NESoftmaxLayerGeneric<IS_LOG>::configure(ITensor *input, ITensor *output, f
 }
 
 template <bool IS_LOG>
-Status NESoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const ITensorInfo *output, float beta, int32_t axis)
+Status NESoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const ITensorInfo *output, float beta, int32_t reduce_end_axis)
 {
     // Perform validation step
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() > 4, "Only up to 4 dimensions are supported");
     ARM_COMPUTE_UNUSED(beta);
-    ARM_COMPUTE_RETURN_ERROR_ON(axis < static_cast<int32_t>(-input->num_dimensions()) || static_cast<int32_t>(input->num_dimensions()) <= axis);
+    ARM_COMPUTE_RETURN_ERROR_ON(reduce_end_axis < static_cast<int32_t>(-input->num_dimensions()) || static_cast<int32_t>(input->num_dimensions()) <= reduce_end_axis);
 
-    // Handle negative axis, negative index is used to specify axis from the end (e.g. -1 for the last axis).
-    axis = wrap_around(axis, static_cast<int32_t>(input->num_dimensions()));
+    // Convert reduce-before axis (inclusive) to first n axes to reduce
+    size_t first_n_reduce_axes = dim_index_2_num_dims(reduce_end_axis, static_cast<int32_t>(input->num_dimensions()));
 
     // Create intermediate tensor info
     DataType         tmp_data_type = input->data_type();
@@ -165,20 +165,20 @@ Status NESoftmaxLayerGeneric<IS_LOG>::validate(const ITensorInfo *input, const I
     const TensorInfo tensor_info_max_sum(input->clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(input->quantization_info()).set_is_resizable(true));
     const TensorInfo dont_care;
 
-    const bool needs_flattening = (axis != 1);
+    const bool needs_flattening = (first_n_reduce_axes > 1);
 
     if(needs_flattening)
     {
-        const TensorShape shape_flatten = misc::shape_calculator::compute_softmax_shape(input, axis);
+        const TensorShape shape_flatten = misc::shape_calculator::compute_softmax_shape(input, first_n_reduce_axes);
         TensorInfo        tensor_info_flat(input->clone()->set_tensor_shape(shape_flatten).set_is_resizable(true));
 
-        if(axis != 3)
+        if(first_n_reduce_axes == 3)
         {
-            ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(input, &tensor_info_flat));
+            ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &tensor_info_flat));
         }
         else
         {
-            ARM_COMPUTE_RETURN_ON_ERROR(NEFlattenLayerKernel::validate(input, &tensor_info_flat));
+            ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayerKernel::validate(input, &tensor_info_flat));
         }
     }
 
diff --git a/tests/validation/CL/LogSoftmaxLayer.cpp b/tests/validation/CL/LogSoftmaxLayer.cpp
index 148613c5f8..39d2483ab8 100644
--- a/tests/validation/CL/LogSoftmaxLayer.cpp
+++ b/tests/validation/CL/LogSoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,7 +70,7 @@ TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLLogSoftmaxLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                       framework::dataset::make("DataType", DataType::F16)),
                                                                                                               framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                      framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                      framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
@@ -78,7 +78,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLLogSoftmaxLayerFixture<half>, framework::Data
 FIXTURE_DATA_TEST_CASE(RunLarge, CLLogSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                   framework::dataset::make("DataType", DataType::F16)),
                                                                                                                   framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                          framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                          framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
@@ -86,7 +86,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLLogSoftmaxLayerFixture<half>, framework::Data
 FIXTURE_DATA_TEST_CASE(Run4D, CLLogSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayer4DShapes(),
                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
                                                                                                                framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                       framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                       framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
@@ -97,7 +97,7 @@ TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLLogSoftmaxLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                        framework::dataset::make("DataType", DataType::F32)),
                                                                                                                framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                       framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                       framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -105,7 +105,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLLogSoftmaxLayerFixture<float>, framework::Dat
 FIXTURE_DATA_TEST_CASE(RunLarge, CLLogSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                    framework::dataset::make("DataType", DataType::F32)),
                                                                                                                    framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                           framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                           framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -113,7 +113,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLLogSoftmaxLayerFixture<float>, framework::Dat
 FIXTURE_DATA_TEST_CASE(Run4D, CLLogSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayer4DShapes(),
                                                                                                                         framework::dataset::make("DataType", DataType::F32)),
                                                                                                                 framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                        framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                        framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -130,7 +130,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLLogSoftmaxLayerQuantizedFixture<uint8_t>, fra
                                                                                                                   framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                   combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                           framework::dataset::make("Beta", { 1.0f, 2.f }))),
-                                                                                                                  framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                                  framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -139,7 +139,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLLogSoftmaxLayerQuantizedFixture<uint8_t>, fra
                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                       combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                               framework::dataset::make("Beta", { 1.0f, 2.0f }))),
-                                                                                                                      framework::dataset::make("Axis", { 1 })))
+                                                                                                                      framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -148,7 +148,7 @@ FIXTURE_DATA_TEST_CASE(Run4D, CLLogSoftmaxLayerQuantizedFixture<uint8_t>, framew
                                                                                                                    framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                    combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                            framework::dataset::make("Beta", { 1.0f, 2.0f }))),
-                                                                                                                   framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                                   framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp
index 5ee929f6b9..432720ca14 100644
--- a/tests/validation/CL/SoftmaxLayer.cpp
+++ b/tests/validation/CL/SoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -109,7 +109,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::Sof
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::F32),    // Mismatching data types
                                                        TensorInfo(TensorShape(27U, 13U), 1, DataType::F32),    // Mismatching shapes
                                                        TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8, // Invalid output quantization info
@@ -120,6 +120,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
                                                        TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
                                                                   QuantizationInfo(1.f/256, 12)),
                                                        TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8_SIGNED,
+                                                                  QuantizationInfo(1.f/256, 12)),
+                                                       TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8_SIGNED, // Invalid axis high
+                                                                  QuantizationInfo(1.f/256, 12)),
+                                                       TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8_SIGNED, // Invalid axis low
                                                                   QuantizationInfo(1.f/256, 12))
                                                       }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U), 1, DataType::F16),
@@ -133,11 +137,38 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
                                                                   QuantizationInfo(1.f/256, 0)),
                                                        TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8_SIGNED,
                                                                   QuantizationInfo(1.f/256, -128)),
+                                                       TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8_SIGNED, // Invalid axis high
+                                                                  QuantizationInfo(1.f/256, -128)),
+                                                       TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8_SIGNED, // Invalid axis low
+                                                                  QuantizationInfo(1.f/256, -128)),
                                                      })),
-               framework::dataset::make("Expected", { false, false, false, false, false, true, true, true })),
-               input_info, output_info, expected)
+               framework::dataset::make("beta", { 1.0,
+                                                  2.0,
+                                                  1.0,
+                                                  2.0,
+                                                  1.0,
+                                                  2.0,
+                                                  1.0,
+                                                  2.0,
+                                                  1.0,
+                                                  2.0,
+                                                })),
+               framework::dataset::make("reduce_end_axis", {
+                                                  0,
+                                                  0,
+                                                  0,
+                                                  0,
+                                                  1,
+                                                  0,
+                                                  1,
+                                                  0,
+                                                  2,
+                                                  -1,
+                                                })),
+               framework::dataset::make("Expected", { false, false, false, false, false, true, true, true, false, false })),
+               input_info, output_info, beta, reduce_end_axis, expected)
 {
-    ARM_COMPUTE_EXPECT(bool(CLSoftmaxLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bool(CLSoftmaxLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), beta, reduce_end_axis)) == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
@@ -150,7 +181,7 @@ TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                                                            framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                   framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                   framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
@@ -158,7 +189,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<half>, framework::Dataset
 FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
                                                                                                                framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                       framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                       framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
@@ -166,7 +197,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<half>, framework::Dataset
 FIXTURE_DATA_TEST_CASE(Run4D, CLSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayer4DShapes(),
                                                                                                                     framework::dataset::make("DataType", DataType::F16)),
                                                                                                             framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                    framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                    framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
@@ -177,7 +208,7 @@ TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                     framework::dataset::make("DataType", DataType::F32)),
                                                                                                             framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                    framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                    framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -185,7 +216,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<float>, framework::Datase
 FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                         framework::dataset::make("DataType", DataType::F32)),
                                                                                                                 framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                        framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                        framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -193,7 +224,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<float>, framework::Datase
 FIXTURE_DATA_TEST_CASE(Run4D, CLSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayer4DShapes(),
                                                                                                                      framework::dataset::make("DataType", DataType::F32)),
                                                                                                              framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                     framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                     framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -210,7 +241,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerQuantizedFixture<uint8_t>, framew
                                                                                                                        framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                        combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                                framework::dataset::make("Beta", { 1.0f, 2.f }))),
-                                                                                                               framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                               framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -219,7 +250,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerQuantizedFixture<uint8_t>, framew
                                                                                                                    framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                    combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                            framework::dataset::make("Beta", { 1.0f, 2.0f }))),
-                                                                                                                   framework::dataset::make("Axis", { 1 })))
+                                                                                                                   framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -228,7 +259,7 @@ FIXTURE_DATA_TEST_CASE(Run4D, CLSoftmaxLayerQuantizedFixture<uint8_t>, framework
                                                                                                                         framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                         combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                                 framework::dataset::make("Beta", { 1.0f, 2.0f }))),
-                                                                                                                framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                                framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -242,7 +273,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerQuantizedFixture<int8_t>, framewo
                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
                                                                                                                       combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                               framework::dataset::make("Beta", { 1.0f, 2.f }))),
-                                                                                                              framework::dataset::make("Axis", { 1, 2 })))
+                                                                                                              framework::dataset::make("ReduceEndAxis", { 0, 1 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8_signed, tolerance_number_qasymm8_signed);
diff --git a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
index 3b55717372..50f6a74313 100644
--- a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -89,7 +89,7 @@ TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                      framework::dataset::make("DataType", DataType::F16)),
                                                                                                                      framework::dataset::make("Beta", 1.0f)),
-                                                                                                                     framework::dataset::make("Axis", 1)))
+                                                                                                                     framework::dataset::make("ReduceEndAxis", 0)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference, tolerance_f16);
@@ -97,7 +97,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<half_float::half>, framew
 FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                                                                    framework::dataset::make("Beta", 1.0f)),
-                                                                                                                   framework::dataset::make("Axis", 1)))
+                                                                                                                   framework::dataset::make("ReduceEndAxis", 0)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference, tolerance_f16);
@@ -108,7 +108,7 @@ TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                   framework::dataset::make("DataType", DataType::F32)),
                                                                                                                   framework::dataset::make("Beta", 1.0f)),
-                                                                                                          framework::dataset::make("Axis", 1)))
+                                                                                                          framework::dataset::make("ReduceEndAxis", 0)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference, tolerance_f32);
@@ -116,7 +116,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<float>, framework::Datase
 FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                         framework::dataset::make("DataType", DataType::F32)),
                                                                                                                 framework::dataset::make("Beta", 1.0f)),
-                                                                                                        framework::dataset::make("Axis", 1)))
+                                                                                                        framework::dataset::make("ReduceEndAxis", 0)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference, tolerance_f32);
diff --git a/tests/validation/NEON/LogSoftmaxLayer.cpp b/tests/validation/NEON/LogSoftmaxLayer.cpp
index 43e98ae4ab..22b56fd127 100644
--- a/tests/validation/NEON/LogSoftmaxLayer.cpp
+++ b/tests/validation/NEON/LogSoftmaxLayer.cpp
@@ -71,7 +71,7 @@ TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NELogSoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
                                                                                                                     framework::dataset::make("DataType", DataType::F16)),
                                                                                                                     framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                            framework::dataset::make("Axis", { 1 })))
+                                                                                                            framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
@@ -79,7 +79,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NELogSoftmaxLayerFixture<half>, framework::Data
 FIXTURE_DATA_TEST_CASE(RunSmall4D, NELogSoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
                                                                                                                       framework::dataset::make("DataType", DataType::F16)),
                                                                                                                       framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                              framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                              framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
@@ -87,7 +87,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NELogSoftmaxLayerFixture<half>, framework::Da
 FIXTURE_DATA_TEST_CASE(RunLarge, NELogSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                   framework::dataset::make("DataType", DataType::F16)),
                                                                                                                   framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                          framework::dataset::make("Axis", { 1 })))
+                                                                                                          framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
@@ -99,7 +99,7 @@ TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall2D, NELogSoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                        framework::dataset::make("DataType", DataType::F32)),
                                                                                                                        framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                               framework::dataset::make("Axis", { 1 })))
+                                                                                                               framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -107,7 +107,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall2D, NELogSoftmaxLayerFixture<float>, framework::D
 FIXTURE_DATA_TEST_CASE(RunSmall4D, NELogSoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
                                                                                                                        framework::dataset::make("DataType", DataType::F32)),
                                                                                                                        framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                               framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                               framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -115,7 +115,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NELogSoftmaxLayerFixture<float>, framework::D
 FIXTURE_DATA_TEST_CASE(RunLarge, NELogSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                    framework::dataset::make("DataType", DataType::F32)),
                                                                                                                    framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                           framework::dataset::make("Axis", { 1 })))
+                                                                                                           framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -132,7 +132,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall2D, NELogSoftmaxLayerQuantizedFixture<uint8_t>, f
                                                                                                                     framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                     combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                             framework::dataset::make("Beta", { 1.0f, 2.f }))),
-                                                                                                                    framework::dataset::make("Axis", { 1 })))
+                                                                                                                    framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -141,7 +141,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NELogSoftmaxLayerQuantizedFixture<uint8_t>, f
                                                                                                                     framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                     combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                             framework::dataset::make("Beta", { 1.0f, 2.f }))),
-                                                                                                                    framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                                    framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -150,7 +150,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NELogSoftmaxLayerQuantizedFixture<uint8_t>, fra
                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                       combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                               framework::dataset::make("Beta", { 1.0f, 2.0f }))),
-                                                                                                                      framework::dataset::make("Axis", { 1 })))
+                                                                                                                      framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp
index 8af3847cf8..1465af441b 100644
--- a/tests/validation/NEON/SoftmaxLayer.cpp
+++ b/tests/validation/NEON/SoftmaxLayer.cpp
@@ -73,7 +73,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                                                        TensorInfo(TensorShape(32U, 13U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
                                                                   QuantizationInfo(1.f/256, 12)),
-                                                       TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,  //Invalid axis value
+                                                       TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,  //Invalid axis high
+                                                                  QuantizationInfo(1.f/256, 12)),
+                                                       TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,  //Invalid axis low
                                                                   QuantizationInfo(1.f/256, 12)),
                                                       }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U), 1, DataType::F16),
@@ -85,6 +87,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                                                                   QuantizationInfo(1.f/256, 0)),
                                                        TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
                                                                   QuantizationInfo(1.f/256, 0)),
+                                                       TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8,
+                                                                  QuantizationInfo(1.f/256, 0)),
                                                      })),
                framework::dataset::make("beta", { 1.0,
                                                   2.0,
@@ -94,17 +98,18 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                                                   2.0,
                                                   1.0,
                                                 })),
-               framework::dataset::make("axis", { 1,
-                                                  1,
-                                                  1,
+               framework::dataset::make("reduce_end_axis", { 0,
+                                                  0,
+                                                  0,
                                                   -1,
-                                                  1,
+                                                  0,
+                                                  2,
                                                   -3,
                                                 })),
-               framework::dataset::make("Expected", { false, false, false, true, true, false })),
-               input_info, output_info, beta, axis, expected)
+               framework::dataset::make("Expected", { false, false, false, true, true, false, false })),
+               input_info, output_info, beta, reduce_end_axis, expected)
 {
-    ARM_COMPUTE_EXPECT(bool(NESoftmaxLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), beta, axis)) == expected, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bool(NESoftmaxLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), beta, reduce_end_axis)) == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
 // *INDENT-ON*
@@ -118,7 +123,7 @@ TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
                                                                                                                  framework::dataset::make("DataType", DataType::F16)),
                                                                                                                  framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                         framework::dataset::make("Axis", { 1 })))
+                                                                                                         framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
@@ -126,7 +131,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::Dataset
 FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
                                                                                                                    framework::dataset::make("DataType", DataType::F16)),
                                                                                                                    framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                           framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                           framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
@@ -134,7 +139,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<half>, framework::Datas
 FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
                                                                                                                framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                       framework::dataset::make("Axis", { 1 })))
+                                                                                                       framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
@@ -146,7 +151,7 @@ TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                     framework::dataset::make("DataType", DataType::F32)),
                                                                                                                     framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                            framework::dataset::make("Axis", { 1 })))
+                                                                                                            framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -154,7 +159,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerFixture<float>, framework::Data
 FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small4DShapes(),
                                                                                                                     framework::dataset::make("DataType", DataType::F32)),
                                                                                                                     framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                            framework::dataset::make("Axis", { 1, 2, 3 })))
+                                                                                                            framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -162,7 +167,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerFixture<float>, framework::Data
 FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                         framework::dataset::make("DataType", DataType::F32)),
                                                                                                                 framework::dataset::make("Beta", { 1.0f, 2.0f })),
-                                                                                                        framework::dataset::make("Axis", { 1 })))
+                                                                                                        framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -179,7 +184,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerQuantizedFixture<uint8_t>, fram
                                                                                                                  framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                  combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                          framework::dataset::make("Beta", { 1.0f, 2.f }))),
-                                                                                                                 framework::dataset::make("Axis", { 1 })))
+                                                                                                                 framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -188,7 +193,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerQuantizedFixture<uint8_t>, fram
                                                                                                                  framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                  combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                          framework::dataset::make("Beta", { 1.0f, 2.f }))),
-                                                                                                                 framework::dataset::make("Axis", { -1, 2, 3 })))
+                                                                                                                 framework::dataset::make("ReduceEndAxis", { -1, 1, 2 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -197,7 +202,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerQuantizedFixture<uint8_t>, framew
                                                                                                                    framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                    combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                            framework::dataset::make("Beta", { 1.0f, 2.0f }))),
-                                                                                                                   framework::dataset::make("Axis", { 1 })))
+                                                                                                                   framework::dataset::make("ReduceEndAxis", { 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -209,7 +214,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall2D, NESoftmaxLayerQuantizedFixture<int8_t>, frame
                                                                                                                         framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
                                                                                                                         combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                                 framework::dataset::make("Beta", { 1.0f, 2.f }))),
-                                                                                                                framework::dataset::make("Axis", { -1, 1 })))
+                                                                                                                framework::dataset::make("ReduceEndAxis", { -1, 0 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
@@ -218,7 +223,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall4D, NESoftmaxLayerQuantizedFixture<int8_t>, frame
                                                                                                                         framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
                                                                                                                         combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
                                                                                                                                 framework::dataset::make("Beta", { 1.0f, 2.f }))),
-                                                                                                                framework::dataset::make("Axis", { -2, 2, 3 })))
+                                                                                                                framework::dataset::make("ReduceEndAxis", { -2, 1, 2 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8_signed);
diff --git a/tests/validation/reference/LogSoftmaxLayer.cpp b/tests/validation/reference/LogSoftmaxLayer.cpp
index edb208e6ae..8dd8d45a86 100644
--- a/tests/validation/reference/LogSoftmaxLayer.cpp
+++ b/tests/validation/reference/LogSoftmaxLayer.cpp
@@ -35,26 +35,26 @@ namespace validation
 namespace reference
 {
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> log_softmax_layer(const SimpleTensor<T> &src, float beta, int32_t axis)
+SimpleTensor<T> log_softmax_layer(const SimpleTensor<T> &src, float beta, int32_t reduce_end_axis)
 {
-    return softmax_layer_generic<T>(src, beta, axis, true);
+    return softmax_layer_generic<T>(src, beta, reduce_end_axis, true);
 }
 
 template < typename T, typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type >
-SimpleTensor<T> log_softmax_layer(const SimpleTensor<T> &src, float beta, int32_t axis)
+SimpleTensor<T> log_softmax_layer(const SimpleTensor<T> &src, float beta, int32_t reduce_end_axis)
 {
     const QuantizationInfo output_quantization_info = arm_compute::get_softmax_output_quantization_info(src.data_type(), true);
 
     SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
-    SimpleTensor<float> dst_tmp = log_softmax_layer<float>(src_tmp, beta, axis);
+    SimpleTensor<float> dst_tmp = log_softmax_layer<float>(src_tmp, beta, reduce_end_axis);
     SimpleTensor<T>     dst     = convert_to_asymmetric<T>(dst_tmp, output_quantization_info);
     return dst;
 }
 
-template SimpleTensor<float> log_softmax_layer(const SimpleTensor<float> &src, float beta, int32_t axis);
-template SimpleTensor<half> log_softmax_layer(const SimpleTensor<half> &src, float beta, int32_t axis);
-template SimpleTensor<uint8_t> log_softmax_layer(const SimpleTensor<uint8_t> &src, float beta, int32_t axis);
-template SimpleTensor<int8_t> log_softmax_layer(const SimpleTensor<int8_t> &src, float beta, int32_t axis);
+template SimpleTensor<float> log_softmax_layer(const SimpleTensor<float> &src, float beta, int32_t reduce_end_axis);
+template SimpleTensor<half> log_softmax_layer(const SimpleTensor<half> &src, float beta, int32_t reduce_end_axis);
+template SimpleTensor<uint8_t> log_softmax_layer(const SimpleTensor<uint8_t> &src, float beta, int32_t reduce_end_axis);
+template SimpleTensor<int8_t> log_softmax_layer(const SimpleTensor<int8_t> &src, float beta, int32_t reduce_end_axis);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/LogSoftmaxLayer.h b/tests/validation/reference/LogSoftmaxLayer.h
index 48ffdcfbcc..d9a439850e 100644
--- a/tests/validation/reference/LogSoftmaxLayer.h
+++ b/tests/validation/reference/LogSoftmaxLayer.h
@@ -36,10 +36,10 @@ namespace validation
 namespace reference
 {
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-SimpleTensor<T> log_softmax_layer(const SimpleTensor<T> &src, float beta, int32_t axis = -1);
+SimpleTensor<T> log_softmax_layer(const SimpleTensor<T> &src, float beta, int32_t reduce_end_axis = 0);
 
 template < typename T, typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type = 0 >
-SimpleTensor<T> log_softmax_layer(const SimpleTensor<T> &src, float beta, int32_t axis = -1);
+SimpleTensor<T> log_softmax_layer(const SimpleTensor<T> &src, float beta, int32_t reduce_end_axis = 0);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/SoftmaxLayer.cpp b/tests/validation/reference/SoftmaxLayer.cpp
index 2fe1faef50..9a8d46d516 100644
--- a/tests/validation/reference/SoftmaxLayer.cpp
+++ b/tests/validation/reference/SoftmaxLayer.cpp
@@ -23,6 +23,7 @@
  */
 #include "SoftmaxLayer.h"
 
+#include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Types.h"
 
 namespace arm_compute
@@ -34,32 +35,21 @@ namespace validation
 namespace reference
 {
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> softmax_layer_generic(const SimpleTensor<T> &src, float beta, int32_t axis, bool is_log)
+SimpleTensor<T> softmax_layer_generic(const SimpleTensor<T> &src, float beta, int32_t reduce_end_axis, bool is_log)
 {
     // Create reference
     SimpleTensor<T> dst{ src.shape(), src.data_type(), 1 };
 
-    // Negative index is used to specify axis from the end (e.g. -1 for the last axis).
-    if(axis < 0)
-    {
-        axis += src.shape().num_dimensions();
-    }
+    // Convert reduce-before axis (inclusive) to first n axes to reduce
+    const size_t first_n_reduce_axes = dim_index_2_num_dims(reduce_end_axis, static_cast<int32_t>(src.shape().num_dimensions()));
 
     // Compute reference. Lower dims are the collapsing of the first axis
     // dimensions (i.e., the flattened dimension of each batch). The upper dims are
     // instead the batches we want to normalize
 
-    int lower_dims = 1;
-    for(size_t i = 0; i < static_cast<size_t>(axis); ++i)
-    {
-        lower_dims *= src.shape()[i];
-    }
+    const int lower_dims = src.shape().total_size_lower(first_n_reduce_axes);
 
-    int upper_dims = 1;
-    for(size_t i = static_cast<size_t>(axis); i < TensorShape::num_max_dimensions; ++i)
-    {
-        upper_dims *= src.shape()[i];
-    }
+    const int upper_dims = src.shape().total_size_upper(first_n_reduce_axes);
 
 #if defined(_OPENMP)
     #pragma omp parallel for
@@ -107,30 +97,30 @@ SimpleTensor<T> softmax_layer_generic(const SimpleTensor<T> &src, float beta, in
     return dst;
 }
 
-template SimpleTensor<float> softmax_layer_generic(const SimpleTensor<float> &src, float beta, int32_t axis, bool is_log);
-template SimpleTensor<half> softmax_layer_generic(const SimpleTensor<half> &src, float beta, int32_t axis, bool is_log);
+template SimpleTensor<float> softmax_layer_generic(const SimpleTensor<float> &src, float beta, int32_t reduce_end_axis, bool is_log);
+template SimpleTensor<half> softmax_layer_generic(const SimpleTensor<half> &src, float beta, int32_t reduce_end_axis, bool is_log);
 
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta, int32_t axis)
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta, int32_t reduce_end_axis)
 {
-    return softmax_layer_generic<T>(src, beta, axis, false);
+    return softmax_layer_generic<T>(src, beta, reduce_end_axis, false);
 }
 
 template < typename T, typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type >
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta, int32_t axis)
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta, int32_t reduce_end_axis)
 {
     const QuantizationInfo output_quantization_info = arm_compute::get_softmax_output_quantization_info(src.data_type(), false);
 
     SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
-    SimpleTensor<float> dst_tmp = softmax_layer<float>(src_tmp, beta, axis);
+    SimpleTensor<float> dst_tmp = softmax_layer<float>(src_tmp, beta, reduce_end_axis);
     SimpleTensor<T>     dst     = convert_to_asymmetric<T>(dst_tmp, output_quantization_info);
     return dst;
 }
 
-template SimpleTensor<float> softmax_layer(const SimpleTensor<float> &src, float beta, int32_t axis);
-template SimpleTensor<half> softmax_layer(const SimpleTensor<half> &src, float beta, int32_t axis);
-template SimpleTensor<uint8_t> softmax_layer(const SimpleTensor<uint8_t> &src, float beta, int32_t axis);
-template SimpleTensor<int8_t> softmax_layer(const SimpleTensor<int8_t> &src, float beta, int32_t axis);
+template SimpleTensor<float> softmax_layer(const SimpleTensor<float> &src, float beta, int32_t reduce_end_axis);
+template SimpleTensor<half> softmax_layer(const SimpleTensor<half> &src, float beta, int32_t reduce_end_axis);
+template SimpleTensor<uint8_t> softmax_layer(const SimpleTensor<uint8_t> &src, float beta, int32_t reduce_end_axis);
+template SimpleTensor<int8_t> softmax_layer(const SimpleTensor<int8_t> &src, float beta, int32_t reduce_end_axis);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/SoftmaxLayer.h b/tests/validation/reference/SoftmaxLayer.h
index f819853d95..fde19943bf 100644
--- a/tests/validation/reference/SoftmaxLayer.h
+++ b/tests/validation/reference/SoftmaxLayer.h
@@ -36,13 +36,13 @@ namespace validation
 namespace reference
 {
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-SimpleTensor<T> softmax_layer_generic(const SimpleTensor<T> &src, float beta, int32_t axis, bool is_log = false);
+SimpleTensor<T> softmax_layer_generic(const SimpleTensor<T> &src, float beta, int32_t reduce_end_axis, bool is_log = false);
 
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta, int32_t axis = -1);
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta, int32_t reduce_end_axis = 0);
 
 template < typename T, typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type = 0 >
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta, int32_t axis = -1);
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta, int32_t reduce_end_axis = 0);
 } // namespace reference
 } // namespace validation
 } // namespace test
-- 
cgit v1.2.1