From a2b89ca5407532257a959ad1852f29187e1be4ac Mon Sep 17 00:00:00 2001
From: Pablo Palmier <Pablo.Palmier@arm.com>
Date: Thu, 5 Oct 2017 15:01:34 +0100
Subject: IVGCVSW-631 Neon support for Softmax beta parameter (F32 only)

Change-Id: Ibf6f038b39f1a4e557f5d04feb08e3d5ef54e223
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112019
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 arm_compute/core/Helpers.h                         |  2 +-
 arm_compute/core/Helpers.inl                       |  2 +-
 .../core/NEON/kernels/NESoftmaxLayerKernel.h       |  6 ++--
 .../GLES_COMPUTE/functions/GCSoftmaxLayer.h        |  3 +-
 .../runtime/NEON/functions/NESoftmaxLayer.h        |  3 +-
 src/core/CL/kernels/CLActivationLayerKernel.cpp    |  3 +-
 src/core/NEON/kernels/NESoftmaxLayerKernel.cpp     | 26 ++++++++++-----
 .../GLES_COMPUTE/functions/GCSoftmaxLayer.cpp      |  5 ++-
 src/runtime/NEON/functions/NESoftmaxLayer.cpp      |  4 +--
 tests/validation/CL/SoftmaxLayer.cpp               | 28 ++++++++++------
 tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp     | 18 +++++++---
 tests/validation/NEON/SoftmaxLayer.cpp             | 26 ++++++++++-----
 tests/validation/fixtures/SoftmaxLayerFixture.h    | 38 +++++++++++++++-------
 tests/validation/reference/SoftmaxLayer.cpp        | 22 +++++++------
 tests/validation/reference/SoftmaxLayer.h          |  4 +--
 15 files changed, 122 insertions(+), 68 deletions(-)

diff --git a/arm_compute/core/Helpers.h b/arm_compute/core/Helpers.h
index 1be24e1841..fdbb46fc78 100644
--- a/arm_compute/core/Helpers.h
+++ b/arm_compute/core/Helpers.h
@@ -501,7 +501,7 @@ bool auto_init_if_empty(ITensorInfo       &info,
  *
  * @return True if the tensor info has been initialized
  */
-bool auto_init_if_empty(ITensorInfo &info_sink, ITensorInfo &info_source);
+bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source);
 
 /* Set the shape to the specified value if the current assignment is empty.
  *
diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl
index 1e565344b7..3672692814 100644
--- a/arm_compute/core/Helpers.inl
+++ b/arm_compute/core/Helpers.inl
@@ -217,7 +217,7 @@ inline bool auto_init_if_empty(ITensorInfo       &info,
     return false;
 }
 
-inline bool auto_init_if_empty(ITensorInfo &info_sink, ITensorInfo &info_source)
+inline bool auto_init_if_empty(ITensorInfo &info_sink, const ITensorInfo &info_source)
 {
     if(info_sink.tensor_shape().total_size() == 0)
     {
diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
index cce21569d9..c3e25181b6 100644
--- a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
@@ -78,14 +78,15 @@ public:
      * @param[in]  max    Max values tensor. Data types supported: same as @p input.
      * @param[out] output Destination tensor. Data types supported: same as @p input.
      * @param[out] sum    Sum of 1D logits tensor. Data types supported: same as @p input.
+     * @param[in]  beta   (Optional) A scaling factor for the exponent. QS8/QS16 only support a beta value of 1.
      */
-    void configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum);
+    void configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum, float beta = 1.0f);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
 
 private:
-    using Logits1DShiftExpSumFunction = void(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window);
+    using Logits1DShiftExpSumFunction = void(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window, float beta);
 
 private:
     Logits1DShiftExpSumFunction *_func;
@@ -93,6 +94,7 @@ private:
     const ITensor               *_max;
     ITensor                     *_output;
     ITensor                     *_sum;
+    float                        _beta;
 };
 
 /** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
index 19bfb83eca..e7f8d5053a 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
@@ -51,8 +51,9 @@ public:
      *
      * @param[in]  input  Source tensor. Data types supported: F16/F32
      * @param[out] output Destination tensor. Data types supported: same as @p input
+     * @param[in]  beta   (Optional) A scaling factor for the exponent. Only beta = 1 is supported.
      */
-    void configure(const IGCTensor *input, IGCTensor *output);
+    void configure(const IGCTensor *input, IGCTensor *output, float beta = 1.0f);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index a265f70043..38a0f2116f 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -53,8 +53,9 @@ public:
      *
      * @param[in]  input  Source tensor. Data types supported: QS8/QS16/F16/F32.
      * @param[out] output Destination tensor. Data types supported: same as @p input.
+     * @param[in]  beta   (Optional) A scaling factor for the exponent. QS8/QS16 only support a beta value of 1.
      */
-    void configure(ITensor *input, ITensor *output);
+    void configure(ITensor *input, ITensor *output, float beta = 1.0f);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp
index 8172aafca9..c097b5ff70 100644
--- a/src/core/CL/kernels/CLActivationLayerKernel.cpp
+++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp
@@ -68,8 +68,7 @@ std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input, ITens
     {
         ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
         // Output auto inizialitation if not yet initialized
-        auto_init_if_empty(*output,
-                           *input->clone());
+        auto_init_if_empty(*output, *input);
     }
 
     const unsigned int num_elems_processed_per_iteration = 16 / input->element_size();
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index f1027590e4..a8a0f59a41 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -251,8 +251,10 @@ void NELogits1DMaxKernel::run(const Window &window, const ThreadInfo &info)
 
 namespace
 {
-void logits_1d_shift_exp_sum_qs8(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window)
+void logits_1d_shift_exp_sum_qs8(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window, float beta)
 {
+    ARM_COMPUTE_UNUSED(beta);
+
     Window window_max(window);
     window_max.set(Window::DimX, Window::Dimension(0, 0, 0));
 
@@ -313,8 +315,10 @@ void logits_1d_shift_exp_sum_qs8(const ITensor *in, const ITensor *max, ITensor
     }
     while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(max_slice));
 }
-void logits_1d_shift_exp_sum_qs16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window)
+void logits_1d_shift_exp_sum_qs16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window, float beta)
 {
+    ARM_COMPUTE_UNUSED(beta);
+
     Window window_max(window);
     window_max.set(Window::DimX, Window::Dimension(0, 0, 0));
 
@@ -375,7 +379,7 @@ void logits_1d_shift_exp_sum_qs16(const ITensor *in, const ITensor *max, ITensor
 }
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window)
+void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window, float beta)
 {
     Window window_max(window);
     window_max.set(Window::DimX, Window::Dimension(0, 0, 0));
@@ -410,6 +414,7 @@ void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor
         {
             float16x8_t vec_elements = vld1q_f16(in_ptr);
             vec_elements             = vsubq_f16(vec_elements, vec_max);
+            vec_elements             = vmulq_n_f16(vec_elements, beta);
             vec_elements             = vexpq_f16(vec_elements);
 
             vst1q_f16(exp_ptr, vec_elements);
@@ -426,7 +431,7 @@ void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor
         // Run remaining elements
         for(int i = 0; i < small_steps; ++i)
         {
-            const float16_t element = std::exp(static_cast<float>(in_ptr[i] - *max_ptr));
+            const float16_t element = std::exp(static_cast<float>(in_ptr[i] - *max_ptr) * beta);
             exp_ptr[i]              = element;
             sum += element;
         }
@@ -436,7 +441,7 @@ void logits_1d_shift_exp_sum_f16(const ITensor *in, const ITensor *max, ITensor
 }
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
-void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window)
+void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor *out, ITensor *sum, const Window &window, float beta)
 {
     Window window_max(window);
     window_max.set(Window::DimX, Window::Dimension(0, 0, 0));
@@ -471,6 +476,7 @@ void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor
         {
             float32x4_t vec_elements = vld1q_f32(in_ptr);
             vec_elements             = vsubq_f32(vec_elements, vec_max);
+            vec_elements             = vmulq_n_f32(vec_elements, beta);
             vec_elements             = vexpq_f32(vec_elements);
 
             vst1q_f32(exp_ptr, vec_elements);
@@ -488,7 +494,7 @@ void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor
         // Run remaining elements
         for(int i = 0; i < small_steps; ++i)
         {
-            float element = std::exp(in_ptr[i] - *max_ptr);
+            float element = std::exp((in_ptr[i] - *max_ptr) * beta);
             exp_ptr[i]    = element;
             sum += element;
         }
@@ -500,14 +506,15 @@ void logits_1d_shift_exp_sum_f32(const ITensor *in, const ITensor *max, ITensor
 } //namespace
 
 NELogits1DShiftExpSumKernel::NELogits1DShiftExpSumKernel()
-    : _func(nullptr), _input(nullptr), _max(nullptr), _output(nullptr), _sum(nullptr)
+    : _func(nullptr), _input(nullptr), _max(nullptr), _output(nullptr), _sum(nullptr), _beta(1.0f)
 {
 }
 
-void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum)
+void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum, float beta)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output);
+    ARM_COMPUTE_ERROR_ON((beta != 1.0f) && is_data_type_fixed_point(input->info()->data_type()));
 
     // Output auto initialization if not yet initialized
     auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
@@ -545,6 +552,7 @@ void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor
     _max    = max;
     _output = output;
     _sum    = sum;
+    _beta   = beta;
 
     // Configure kernel window
     Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
@@ -568,7 +576,7 @@ void NELogits1DShiftExpSumKernel::run(const Window &window, const ThreadInfo &in
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
     ARM_COMPUTE_ERROR_ON(_func == nullptr);
 
-    (*_func)(_input, _max, _output, _sum, window);
+    (*_func)(_input, _max, _output, _sum, window, _beta);
 }
 
 namespace
diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
index 1db927c8ff..34464ff057 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
@@ -34,9 +34,12 @@ GCSoftmaxLayer::GCSoftmaxLayer()
 {
 }
 
-void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output)
+void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output, float beta)
 {
+    ARM_COMPUTE_UNUSED(beta);
+
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON(beta != 1.0f);
 
     // Create intermediate tensors shapes
     _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
index cc5d4e91c3..84ecfdaf33 100644
--- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp
+++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp
@@ -36,7 +36,7 @@ NESoftmaxLayer::NESoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
 {
 }
 
-void NESoftmaxLayer::configure(ITensor *input, ITensor *output)
+void NESoftmaxLayer::configure(ITensor *input, ITensor *output, float beta)
 {
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
 
@@ -57,7 +57,7 @@ void NESoftmaxLayer::configure(ITensor *input, ITensor *output)
 
     // Configure Kernels
     _max_kernel.configure(input, &_max);
-    _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum);
+    _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum, beta);
     _norm_kernel.configure(&_tmp, &_sum, output);
     _fill_border_kernel.configure(input, _max_kernel.border_size(), BorderMode::REPLICATE);
 
diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp
index f43e680c9b..bd7072377a 100644
--- a/tests/validation/CL/SoftmaxLayer.cpp
+++ b/tests/validation/CL/SoftmaxLayer.cpp
@@ -148,12 +148,16 @@ using CLSoftmaxLayerFixture = SoftmaxValidationFixture<CLTensor, CLAccessor, CLS
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<half>, framework::DatasetMode::ALL, combine(datasets::SoftmaxLayerSmallShapes(), framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                           framework::dataset::make("DataType", DataType::F16)),
+                                                                                                   framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::SoftmaxLayerLargeShapes(), framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                               framework::dataset::make("DataType", DataType::F16)),
+                                                                                                       framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
@@ -161,12 +165,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<half>, framework::Dataset
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SoftmaxLayerSmallShapes(), framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                            framework::dataset::make("DataType", DataType::F32)),
+                                                                                                    framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::SoftmaxLayerLargeShapes(), framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                        framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -223,17 +231,17 @@ using CLSoftmaxLayerQuantizedFixture = SoftmaxValidationQuantizedFixture<CLTenso
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(datasets::SoftmaxLayerSmallShapes(),
-                                                                                                                       framework::dataset::make("DataType",
-                                                                                                                               DataType::QASYMM8)),
-                                                                                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) })))
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                               combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+                                                                                                                       framework::dataset::make("Beta", { 1.0f, 2.f }))))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
-                                                                                                                   framework::dataset::make("DataType",
-                                                                                                                           DataType::QASYMM8)),
-                                                                                                                   framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) })))
+                                                                                                                   framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                   combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
+                                                                                                                           framework::dataset::make("Beta", { 1.0f, 2.0f }))))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
diff --git a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
index a2114a9c37..2c281419de 100644
--- a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
@@ -57,7 +57,7 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
 TEST_SUITE(GC)
 TEST_SUITE(SoftmaxLayer)
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), CNNDataTypes), shape, data_type)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SoftmaxLayerSmallShapes(), datasets::SoftmaxLayerLargeShapes()), CNNDataTypes), shape, data_type)
 {
     // Set fixed point position data type allowed
     const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
@@ -89,12 +89,16 @@ using GCSoftmaxLayerFixture = SoftmaxValidationFixture<GCTensor, GCAccessor, GCS
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                     framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                     framework::dataset::make("Beta", 1.0f)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                   framework::dataset::make("Beta", 1.0f)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference, tolerance_f16);
@@ -102,12 +106,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<half_float::half>, framew
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                  framework::dataset::make("DataType", DataType::F32)),
+                                                                                                          framework::dataset::make("Beta", 1.0f)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                        framework::dataset::make("Beta", 1.0f)))
 {
     // Validate output
     validate(GCAccessor(_target), _reference, tolerance_f32);
diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp
index 9d1795ebb0..1a303e14a3 100644
--- a/tests/validation/NEON/SoftmaxLayer.cpp
+++ b/tests/validation/NEON/SoftmaxLayer.cpp
@@ -65,7 +65,7 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
 TEST_SUITE(NEON)
 TEST_SUITE(SoftmaxLayer)
 
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), CNNDataTypes), shape, data_type)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SoftmaxLayerSmallShapes(), datasets::SoftmaxLayerLargeShapes()), CNNDataTypes), shape, data_type)
 {
     // Set fixed point position data type allowed
     const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
@@ -99,12 +99,16 @@ using NESoftmaxLayerFixture = SoftmaxValidationFixture<Tensor, Accessor, NESoftm
 TEST_SUITE(Float)
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                 framework::dataset::make("DataType", DataType::F16)),
+                                                                                                         framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                               framework::dataset::make("DataType", DataType::F16)),
+                                                                                                       framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f16);
@@ -113,12 +117,16 @@ TEST_SUITE_END()
 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
+                                                                                                                  framework::dataset::make("DataType", DataType::F32)),
+                                                                                                          framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
+                                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                        framework::dataset::make("Beta", { 1.0f, 2.0f })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_f32);
@@ -132,14 +140,14 @@ using NESoftmaxLayerFixedPointFixture = SoftmaxValidationFixedPointFixture<Tenso
 TEST_SUITE(Quantized)
 TEST_SUITE(QS8)
 // Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(), framework::dataset::make("DataType",
                                                                                                                      DataType::QS8)),
                                                                                                                      framework::dataset::make("FractionalBits", 1, 6)))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(), framework::dataset::make("DataType",
                                                                                                                    DataType::QS8)),
                                                                                                                    framework::dataset::make("FractionalBits", 1, 6)))
 {
@@ -150,7 +158,7 @@ TEST_SUITE_END()
 
 TEST_SUITE(QS16)
 // Testing for fixed point position [1,14) as reciprocal limits the maximum fixed point position to 14
-FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SoftmaxLayerSmallShapes(),
                                                                                                                       framework::dataset::make("DataType",
                                                                                                                               DataType::QS16)),
                                                                                                                       framework::dataset::make("FractionalBits", 1, 14)))
@@ -158,7 +166,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NESoftmaxLayerFixedPointFixture<int16_t>, frame
     // Validate output
     validate(Accessor(_target), _reference, tolerance_fixed_point);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NESoftmaxLayerFixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SoftmaxLayerLargeShapes(),
                                                                                                                     framework::dataset::make("DataType",
                                                                                                                             DataType::QS16)),
                                                                                                                     framework::dataset::make("FractionalBits", 1, 14)))
diff --git a/tests/validation/fixtures/SoftmaxLayerFixture.h b/tests/validation/fixtures/SoftmaxLayerFixture.h
index 3ffbc6aac7..c2ab2e2ef6 100644
--- a/tests/validation/fixtures/SoftmaxLayerFixture.h
+++ b/tests/validation/fixtures/SoftmaxLayerFixture.h
@@ -47,13 +47,13 @@ class SoftmaxValidationGenericFixture : public framework::Fixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, int fractional_bits, QuantizationInfo quantization_info)
+    void setup(TensorShape shape, DataType data_type, int fractional_bits, QuantizationInfo quantization_info, float beta)
     {
         _fractional_bits   = fractional_bits;
         _quantization_info = quantization_info;
 
-        _target    = compute_target(shape, data_type, fractional_bits, quantization_info);
-        _reference = compute_reference(shape, data_type, fractional_bits, quantization_info);
+        _target    = compute_target(shape, data_type, fractional_bits, quantization_info, beta);
+        _reference = compute_reference(shape, data_type, fractional_bits, quantization_info, beta);
     }
 
 protected:
@@ -78,7 +78,8 @@ protected:
         }
     }
 
-    TensorType compute_target(const TensorShape &shape, DataType data_type, int fixed_point_position, QuantizationInfo quantization_info)
+    TensorType compute_target(const TensorShape &shape, DataType data_type, int fixed_point_position,
+                              QuantizationInfo quantization_info, float beta)
     {
         // Create tensors
         TensorType src = create_tensor<TensorType>(shape, data_type, 1, fixed_point_position, quantization_info);
@@ -86,7 +87,7 @@ protected:
 
         // Create and configure function
         FunctionType smx_layer;
-        smx_layer.configure(&src, &dst);
+        smx_layer.configure(&src, &dst, beta);
 
         ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
         ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
@@ -107,7 +108,8 @@ protected:
         return dst;
     }
 
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, int fixed_point_position, QuantizationInfo quantization_info)
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, int fixed_point_position,
+                                      QuantizationInfo quantization_info, float beta)
     {
         // Create reference
         SimpleTensor<T> src{ shape, data_type, 1, fixed_point_position, quantization_info };
@@ -115,7 +117,7 @@ protected:
         // Fill reference
         fill(src);
 
-        return reference::softmax_layer<T>(src);
+        return reference::softmax_layer<T>(src, beta);
     }
 
     TensorType       _target{};
@@ -129,9 +131,13 @@ class SoftmaxValidationFixture : public SoftmaxValidationGenericFixture<TensorTy
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type)
+    void setup(TensorShape shape, DataType data_type, float beta)
     {
-        SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, 0, QuantizationInfo());
+        SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
+                                                                                          data_type,
+                                                                                          0,
+                                                                                          QuantizationInfo(),
+                                                                                          beta);
     }
 };
 
@@ -142,7 +148,11 @@ public:
     template <typename...>
     void setup(TensorShape shape, DataType data_type, int fixed_point_position)
     {
-        SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, fixed_point_position, QuantizationInfo());
+        SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
+                                                                                          data_type,
+                                                                                          fixed_point_position,
+                                                                                          QuantizationInfo(),
+                                                                                          1.0f);
     }
 };
 
@@ -151,9 +161,13 @@ class SoftmaxValidationQuantizedFixture : public SoftmaxValidationGenericFixture
 {
 public:
     template <typename...>
-    void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info)
+    void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, float beta)
     {
-        SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, data_type, 0, quantization_info);
+        SoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape,
+                                                                                          data_type,
+                                                                                          0,
+                                                                                          quantization_info,
+                                                                                          beta);
     }
 };
 } // namespace validation
diff --git a/tests/validation/reference/SoftmaxLayer.cpp b/tests/validation/reference/SoftmaxLayer.cpp
index 8e8cc1bd25..90b9b1f7e2 100644
--- a/tests/validation/reference/SoftmaxLayer.cpp
+++ b/tests/validation/reference/SoftmaxLayer.cpp
@@ -35,7 +35,7 @@ namespace validation
 namespace reference
 {
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src)
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta)
 {
     // Create reference
     SimpleTensor<T> dst{ src.shape(), src.data_type(), 1, src.fixed_point_position() };
@@ -54,9 +54,9 @@ SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src)
 
         // Regularize
         T sum(0.f);
-        std::transform(src_row_ptr, src_row_ptr + cols, dst_row_ptr, [&sum, max](T val)
+        std::transform(src_row_ptr, src_row_ptr + cols, dst_row_ptr, [&sum, max, beta](T val)
         {
-            const T res(std::exp(val - max));
+            const T res(std::exp((val - max) * beta));
             sum += res;
             return res;
         });
@@ -72,8 +72,10 @@ SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src)
 }
 
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type>
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src)
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta)
 {
+    ARM_COMPUTE_UNUSED(beta);
+
     using namespace fixed_point_arithmetic;
 
     // Create reference
@@ -113,21 +115,21 @@ SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src)
 }
 
 template <>
-SimpleTensor<uint8_t> softmax_layer<uint8_t>(const SimpleTensor<uint8_t> &src)
+SimpleTensor<uint8_t> softmax_layer<uint8_t>(const SimpleTensor<uint8_t> &src, float beta)
 {
     // Note: Output quantization info should always have scale = 1/256 and offset = 0
     const QuantizationInfo output_quantization_info = QuantizationInfo(1.f / 256, 0);
 
     SimpleTensor<float>   src_tmp = convert_from_asymmetric(src);
-    SimpleTensor<float>   dst_tmp = softmax_layer<float>(src_tmp);
+    SimpleTensor<float>   dst_tmp = softmax_layer<float>(src_tmp, beta);
     SimpleTensor<uint8_t> dst     = convert_to_asymmetric(dst_tmp, output_quantization_info);
     return dst;
 }
 
-template SimpleTensor<float> softmax_layer(const SimpleTensor<float> &src);
-template SimpleTensor<half> softmax_layer(const SimpleTensor<half> &src);
-template SimpleTensor<qint8_t> softmax_layer(const SimpleTensor<qint8_t> &src);
-template SimpleTensor<qint16_t> softmax_layer(const SimpleTensor<qint16_t> &src);
+template SimpleTensor<float> softmax_layer(const SimpleTensor<float> &src, float beta);
+template SimpleTensor<half> softmax_layer(const SimpleTensor<half> &src, float beta);
+template SimpleTensor<qint8_t> softmax_layer(const SimpleTensor<qint8_t> &src, float beta);
+template SimpleTensor<qint16_t> softmax_layer(const SimpleTensor<qint16_t> &src, float beta);
 } // namespace reference
 } // namespace validation
 } // namespace test
diff --git a/tests/validation/reference/SoftmaxLayer.h b/tests/validation/reference/SoftmaxLayer.h
index ab79bc4850..a6d4c3b8cf 100644
--- a/tests/validation/reference/SoftmaxLayer.h
+++ b/tests/validation/reference/SoftmaxLayer.h
@@ -36,10 +36,10 @@ namespace validation
 namespace reference
 {
 template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src);
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta);
 
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src);
+SimpleTensor<T> softmax_layer(const SimpleTensor<T> &src, float beta);
 } // namespace reference
 } // namespace validation
 } // namespace test
-- 
cgit v1.2.1