10 files changed, 30 insertions, 74 deletions
diff --git a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
index b174f493b5..bd544e1537 100644
--- a/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h
@@ -203,7 +203,7 @@ public:
     CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default;
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Source tensor. Data types supported: S32/F16/F32
+     * @param[in]  input  Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
      * @param[in]  sum    Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
      * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
      * @param[in]  info   Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
@@ -212,7 +212,7 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: S32/F16/F32
+     * @param[in]  input           Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
      * @param[in]  sum             Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
      * @param[out] output          Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
      * @param[in]  info            Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
@@ -220,7 +220,7 @@ public:
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel
      *
-     * @param[in] input  Source tensor. Data types supported: S32/F16/F32
+     * @param[in] input  Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
      * @param[in] sum    Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
      * @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
      * @param[in] info   Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index f0ef15acd7..40b6743031 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -44,7 +44,7 @@ class ICLTensor;
  * @f[ out = exp((x - max(x)) * beta) / sum(exp((x - max(x)) * beta)) @f]
  *
  * Log Softmax is calculated by :
- * @f[ out = (x - max(x) * beta) - \sum{e^{x - max(x) * beta}} @f]
+ * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f]
  *
  * This function runs the following kernels:
  * -# @ref CLLogits1DMaxKernel
@@ -63,7 +63,7 @@ public:
     CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Set the input and output tensors.
      *
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax
      * @param[out] output          Destination tensor. Data types supported: same as @p input
      * @param[in]  beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
      * @param[in]  reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
@@ -75,7 +75,7 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax
      * @param[out] output          Destination tensor. Data types supported: same as @p input
      * @param[in]  beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
      * @param[in]  reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
@@ -86,7 +86,7 @@ public:
     void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t reduce_end_axis = 0);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer
      *
-     * @param[in] input           Source tensor. Data types supported: QASYMM8/F16/F32
+     * @param[in] input           Source tensor. Data types supported: QASYMM8/F16/F32 for Softmax and F16/F32 for Log Softmax
      * @param[in] output          Destination tensor. Data types supported: same as @p input
      * @param[in] beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
      * @param[in] reduce_end_axis (Optional) The last axis of the first n dimensions (inclusive)to reduce. Defaults to 0.
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index 51d981de44..fc1316d33c 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -39,10 +39,10 @@ class ITensor;
 /** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer.
  *
  * Softmax is calculated by :
- * @f[ out = \frac{e^{x - max(x)}}{\sum{e^{x - max(x)}}} @f]
+ * @f[ out = exp((x - max(x)) * beta) / sum(exp((x - max(x)) * beta)) @f]
  *
  * Log Softmax is calculated by :
- * @f[ out = (x - max(x)) - \sum{e^{x - max(x)}} @f]
+ * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f]
  *
  * This function runs the following kernels:
  * -# @ref NEFillBorderKernel
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 85146cb15d..cfd232d8e4 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -246,6 +246,7 @@ v20.08 Public major release
       "axis" has been renamed to "reduce_end_axis", which is the last axis (inclusive) before which all dimensions are reduced/collapsed.
       The default "axis" (now "reduce_end_axis") value for @ref NESoftmaxLayer and @ref NELogSoftmaxLayer is changed from -1 to 0.
       The default "axis" (now "reduce_end_axis") value for @ref CLSoftmaxLayer, @ref CLLogSoftmaxLayer and @ref GCSoftmaxLayer is changed from 1 to 0.
+ - The support for quantized data types has been removed from @ref CLLogSoftmaxLayer due to implementation complexity.
  - Added new data type QASYMM8_SIGNED support for:
       - @ref CLArgMinMaxLayer
       - @ref CLArgMinMaxLayerKernel
diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl
index 767cf4c4f7..0c9f8c1c66 100644
--- a/src/core/CL/cl_kernels/softmax_layer.cl
+++ b/src/core/CL/cl_kernels/softmax_layer.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -112,6 +112,7 @@ __kernel void softmax_layer_norm(
     VEC_DATA_TYPE(DATA_TYPE, 16)
     data = vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0));
 #ifdef LOG_SOFTMAX
+    sum_val = log(sum_val);
     vstore16(SUB_OP(data, sum_val, DATA_TYPE, 16), 0, (__global DATA_TYPE *)offset(&dst, 0, 0));
 #else  /* LOG_SOFTMAX */
     vstore16(DIV_OP(data, sum_val, DATA_TYPE, 16), 0, (__global DATA_TYPE *)offset(&dst, 0, 0));
diff --git a/src/core/CL/cl_kernels/softmax_layer_quantized.cl b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
index 5d35e50b1f..81e7b896d5 100644
--- a/src/core/CL/cl_kernels/softmax_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -570,14 +570,12 @@ __kernel void softmax_layer_norm_quantized(
     int sum_val = *((__global int *)offset(&sum, 0, get_global_id(1)));
 
     // It will be better to calculate this in prev layer and pass here as parameter
-#ifndef LOG_SOFTMAX
     uint  sum_val_u               = convert_uint(sum_val);
     int   headroom_plus_one       = clz(sum_val_u);
     int   num_bits_over_unit      = EXP_ACCUMULATION_INT_BITS - headroom_plus_one;
     int   shifted_sum_minus_one_1 = convert_int((sum_val_u << headroom_plus_one) - (1u << 31));
     int16 shifted_sum_minus_one   = shifted_sum_minus_one_1;
     int16 shifted_scale           = ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1(shifted_sum_minus_one, 16);
-#endif /* LOG_SOFTMAX */
 
     // It was already calculated in prev layer, should be stored into tmp output and reused
     int16 data_diff      = vload16(0, (__global int *)offset(&src, 0, 0));
@@ -589,18 +587,13 @@ __kernel void softmax_layer_norm_quantized(
     }
 #endif /* defined(INPUT_BETA_MULTIPLIER) && defined(INPUT_BETA_LEFT_SHIFT) */
 
-#ifdef LOG_SOFTMAX
-    long16 data = SUB_OP(convert_long16(data_diff_mult), (long16)(sum_val), long, 16);
-    data        = select(0L, data, convert_long16(data_diff) >= (long16)(DIFF_MIN));
-#else /* LOG_SOFTMAX */
     int16 data = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 16);
     data       = ASYMM_MULT(shifted_scale, data, 16);
     data       = ASYMM_ROUNDING_DIVIDE_BY_POW2(data, num_bits_over_unit + 31 - 8, 16);
 #ifdef QASYMM8_SIGNED
-    data       = ADD_OP(data, (int16)(MIN_VALUE), int, 16);
+    data = ADD_OP(data, (int16)(MIN_VALUE), int, 16);
 #endif /* QASYMM8_SIGNED */
-    data       = select(MIN_VALUE, data, data_diff >= (int16)(DIFF_MIN));
-#endif /* LOG_SOFTMAX */
+    data = select(MIN_VALUE, data, data_diff >= (int16)(DIFF_MIN));
     vstore16(CONVERT_SAT(data, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)offset(&dst, 0, 0));
 }
 
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index 09deb94a85..85d70b04d0 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -129,6 +129,7 @@ Status validate_arguments_1DNorm(const ITensorInfo *input, const ITensorInfo *su
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32, DataType::F16, DataType::F32);
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum, output);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum);
+    ARM_COMPUTE_RETURN_ERROR_ON(info.is_log && !is_data_type_float(info.input_data_type));
 
     // Note: output should always have a scale of 1/256 and offset 0
     const QuantizationInfo allowed_quantization_info = get_softmax_output_quantization_info(info.input_data_type, info.is_log);
diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
index 41bf03ad1d..35e5973aff 100644
--- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp
@@ -368,6 +368,10 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons
             {
                 sum_inversed = 256.f / sum;
             }
+            else
+            {
+                sum = std::log(sum);
+            }
         }
 
         /* Normalize exponentials */
@@ -516,6 +520,10 @@ void logits_1d_softmax_float(const ITensor &in, const ITensor &max, void *const
             {
                 sum_inversed = T(1) / sum;
             }
+            else
+            {
+                sum = static_cast<T>(std::log(sum));
+            }
         }
 
         /* Normalize exponentials */
diff --git a/tests/validation/CL/LogSoftmaxLayer.cpp b/tests/validation/CL/LogSoftmaxLayer.cpp
index 39d2483ab8..420e6b2fc1 100644
--- a/tests/validation/CL/LogSoftmaxLayer.cpp
+++ b/tests/validation/CL/LogSoftmaxLayer.cpp
@@ -46,17 +46,6 @@ namespace
 /** Tolerance for float operations */
 RelativeTolerance<half>  tolerance_f16(half(0.2));
 RelativeTolerance<float> tolerance_f32(0.001f);
-
-/** Tolerance for quantized operations */
-constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
-
-/** CNN data types */
-const auto CNNDataTypes = framework::dataset::make("DataType",
-{
-    DataType::QASYMM8,
-    DataType::F16,
-    DataType::F32,
-});
 } // namespace
 
 TEST_SUITE(CL)
@@ -91,7 +80,7 @@ FIXTURE_DATA_TEST_CASE(Run4D, CLLogSoftmaxLayerFixture<half>, framework::Dataset
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-TEST_SUITE_END()
+TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLLogSoftmaxLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
@@ -118,47 +107,10 @@ FIXTURE_DATA_TEST_CASE(Run4D, CLLogSoftmaxLayerFixture<float>, framework::Datase
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-template <typename T>
-using CLLogSoftmaxLayerQuantizedFixture = SoftmaxValidationQuantizedFixture<CLTensor, CLAccessor, CLLogSoftmaxLayer, T, true>;
-
-TEST_SUITE(Quantized)
-TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLLogSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SoftmaxLayerSmallShapes(),
-                                                                                                                  framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                                                                                  combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
-                                                                                                                          framework::dataset::make("Beta", { 1.0f, 2.f }))),
-                                                                                                                  framework::dataset::make("ReduceEndAxis", { 0, 1 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLLogSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayerLargeShapes(),
-                                                                                                                      framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                                                                                      combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
-                                                                                                                              framework::dataset::make("Beta", { 1.0f, 2.0f }))),
-                                                                                                                      framework::dataset::make("ReduceEndAxis", { 0 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
-}
-FIXTURE_DATA_TEST_CASE(Run4D, CLLogSoftmaxLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SoftmaxLayer4DShapes(),
-                                                                                                                   framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                                                                                   combine(framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) }),
-                                                                                                                           framework::dataset::make("Beta", { 1.0f, 2.0f }))),
-                                                                                                                   framework::dataset::make("ReduceEndAxis", { 0, 1, 2 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // LogSoftmaxLayer
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/reference/SoftmaxLayer.cpp b/tests/validation/reference/SoftmaxLayer.cpp
index 9a8d46d516..e295f73f52 100644
--- a/tests/validation/reference/SoftmaxLayer.cpp
+++ b/tests/validation/reference/SoftmaxLayer.cpp
@@ -85,7 +85,7 @@ SimpleTensor<T> softmax_layer_generic(const SimpleTensor<T> &src, float beta, in
         {
             if(is_log)
             {
-                return val - sum;
+                return val - static_cast<T>(std::log(sum));
             }
             else
             {