aboutsummaryrefslogtreecommitdiff
path: root/src/gpu/cl/operators/ClSoftmax.h
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2023-10-13 17:40:32 +0100
committerViet-Hoa Do <viet-hoa.do@arm.com>2023-10-31 10:16:25 +0000
commit29254aeb11a76c86449c2f38587e9144b2f2aacb (patch)
treeca2df26e81c2417b34768ac325e0f7200b5265df /src/gpu/cl/operators/ClSoftmax.h
parente5362e7e5dbccf81c5296a7e77154e11e1a14d2f (diff)
downloadComputeLibrary-29254aeb11a76c86449c2f38587e9144b2f2aacb.tar.gz
Optimize CL softmax
* The new softmax implementation consists of only a single kernel. - There are 2 versions of softmax, one for the x dimension and one for any other dimensions. - Softmax kernel handles both native and quantized data type. Resolves: COMPMID-6447 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I4a9ae5bc63f78aebeaa85ee48a0d102c9c245eda Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10489 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/operators/ClSoftmax.h')
-rw-r--r--src/gpu/cl/operators/ClSoftmax.h45
1 files changed, 16 insertions, 29 deletions
diff --git a/src/gpu/cl/operators/ClSoftmax.h b/src/gpu/cl/operators/ClSoftmax.h
index 6c2aaaea80..232fcfebd1 100644
--- a/src/gpu/cl/operators/ClSoftmax.h
+++ b/src/gpu/cl/operators/ClSoftmax.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,25 +21,26 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CL_SOFTMAX_H
-#define ARM_COMPUTE_CL_SOFTMAX_H
+#ifndef ACL_SRC_GPU_CL_OPERATORS_CLSOFTMAX_H
+#define ACL_SRC_GPU_CL_OPERATORS_CLSOFTMAX_H
+#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
-#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/IClOperator.h"
namespace arm_compute
{
+class CLCompileContext;
+class ITensorInfo;
+class ITensorPack;
struct SoftmaxKernelInfo;
namespace opencl
{
-class ClPermute;
namespace kernels
{
-class ClLogits1DMaxShiftExpSumKernel;
-class ClLogits1DNormKernel;
+class ClSoftmaxKernel;
} // namespace kernels
class ClSoftmax : public IClOperator
{
@@ -64,36 +65,22 @@ public:
* @return a status
*/
static Status validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info);
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
+
+ void run(ITensorPack &tensors) override;
+
experimental::MemoryRequirements workspace() const override;
private:
enum InternalTensorIdx
{
- MAX = 0,
- SUM,
- TMP,
- PERMUTED_SRC,
- PERMUTED_DST,
- COUNT
+ TMP = 0,
+ COUNT,
};
- std::unique_ptr<ClPermute> _permute_input;
- std::unique_ptr<ClPermute> _permute_output;
- std::unique_ptr<kernels::ClLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel;
- std::unique_ptr<kernels::ClLogits1DNormKernel> _norm_kernel;
- bool _needs_permute{false};
-
- TensorInfo _max_info;
- TensorInfo _sum_info;
- TensorInfo _tmp_info;
- TensorInfo _permuted_src_info;
- TensorInfo _permuted_dst_info;
-
- experimental::MemoryRequirements _aux_mem{};
+ TensorInfo _tmp_info{};
+ experimental::MemoryRequirements _aux_mem;
};
} // namespace opencl
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CL_SOFTMAX_H */
+#endif // ACL_SRC_GPU_CL_OPERATORS_CLSOFTMAX_H