4 files changed, 20 insertions, 29 deletions
diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h
index c06e1d9a73..8aea880bb6 100644
--- a/arm_compute/core/ITensorPack.h
+++ b/arm_compute/core/ITensorPack.h
@@ -69,6 +69,13 @@ public:
      * @param[in] tensor Tensor to add
      */
     void add_tensor(int id, const ITensor *tensor);
+
+    /** Add const tensor to the pack
+     *
+     * @param[in] id     ID/type of the tensor to add
+     * @param[in] tensor Tensor to add
+     */
+    void add_const_tensor(int id, const ITensor *tensor);
     /** Get tensor of a given id from the pac
      *
      * @param[in] id ID of tensor to extract
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index e381220695..1f3cee2dd1 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -114,6 +114,7 @@ struct SoftmaxKernelInfo
     float    beta{ 1.f };                          /**< A scaling factor for the exponent with default value 1.0 */
     bool     is_log{ false };                      /**< Flag used to perform Log Softmax operation */
     DataType input_data_type{ DataType::UNKNOWN }; /**< Input tensor data type */
+    int32_t  axis{ 0 };                            /**< The dimension in which to apply softmax. */
 };
 
 /** Descriptor used by the direct convolution layer output stage kernels */
diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h
index f615678e31..2a4bd89385 100644
--- a/arm_compute/core/experimental/Types.h
+++ b/arm_compute/core/experimental/Types.h
@@ -52,6 +52,7 @@ enum TensorType : int32_t
     ACL_INT_1   = 51,
     ACL_INT_2   = 52,
     ACL_INT_3   = 53,
+    ACL_INT_4   = 54,
     ACL_SRC_VEC = 256,
 };
 
diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index ab10a64de4..ddb35ae56f 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_CLSOFTMAXLAYER_H
 #define ARM_COMPUTE_CLSOFTMAXLAYER_H
 
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/functions/CLPermute.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -34,11 +32,9 @@
 
 namespace arm_compute
 {
-class CLCompileContext;
-class CLLogits1DMaxShiftExpSumKernel;
-class CLLogits1DNormKernel;
 class ICLTensor;
 class ITensorInfo;
+class CLCompileContext;
 
 /** Basic function to compute a SoftmaxLayer.
  *
@@ -48,11 +44,11 @@ class ITensorInfo;
  * Log Softmax is calculated by :
  * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f]
  *
- * This function runs the following kernels:
+ * This function runs the following operators/kernels:
  * -# If axis is not 0:
- * -#   @ref CLPermute
- * -# @ref CLLogits1DNormKernel
- * -# @ref CLLogits1DMaxShiftExpSumKernel
+ * -# @ref opencl::ClPermute
+ * -# @ref opencl::kernels::ClLogits1DNormKernel
+ * -# @ref opencl::kernels::ClLogits1DMaxShiftExpSumKernel
  */
 template <bool IS_LOG = false>
 class CLSoftmaxLayerGeneric : public IFunction
@@ -60,14 +56,6 @@ class CLSoftmaxLayerGeneric : public IFunction
 public:
     /** Constructor */
     CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-    /** Prevent instances of this class from being copied */
-    CLSoftmaxLayerGeneric(const CLSoftmaxLayerGeneric &) = delete;
-    /** Prevent instances of this class from being copied */
-    CLSoftmaxLayerGeneric &operator=(const CLSoftmaxLayerGeneric &) = delete;
-    /** Prevent instances of this class to be moved */
-    CLSoftmaxLayerGeneric(CLSoftmaxLayerGeneric &&) = delete;
-    /** Prevent instances of this class to be moved */
-    CLSoftmaxLayerGeneric &operator=(CLSoftmaxLayerGeneric &&) = delete;
     /** Default destructor */
     ~CLSoftmaxLayerGeneric();
     /** Set the input and output tensors.
@@ -105,17 +93,11 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                                     _memory_group;
-    CLPermute                                       _permute_input;
-    CLPermute                                       _permute_output;
-    std::unique_ptr<CLLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel;
-    std::unique_ptr<CLLogits1DNormKernel>           _norm_kernel;
-    CLTensor                                        _max;
-    CLTensor                                        _sum;
-    CLTensor                                        _tmp;
-    CLTensor                                        _input_permuted;
-    CLTensor                                        _output_permuted;
-    bool                                            _needs_permute;
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
+
+    /** Allocate workspace required by the operator */
+    void allocate_workspace();
 };
 
 using CLSoftmaxLayer    = CLSoftmaxLayerGeneric<false>;