From 201e0fee596dafcf9c869a550fae29779aad2394 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Wed, 27 Jan 2021 13:14:56 +0000 Subject: Make Softmax kernels on OpenCL stateless * ClSoftmaxKernel and ClSoftmax are created. * ClSoftmaxKernel is now state-less and ClSoftmax handles the internal tensors required for computation. * add_const_tensor() is added to TensorPack not only to have symmetric interface but also to benefit from implicit conversion. Implements: COMPMID-3998 Change-Id: I4f823121777be24260fd12b2cd71a6ff718c4eed Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5087 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- arm_compute/runtime/CL/functions/CLSoftmaxLayer.h | 40 +++++++---------------- 1 file changed, 11 insertions(+), 29 deletions(-) (limited to 'arm_compute/runtime/CL/functions/CLSoftmaxLayer.h') diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h index ab10a64de4..ddb35ae56f 100644 --- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h +++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,8 +24,6 @@ #ifndef ARM_COMPUTE_CLSOFTMAXLAYER_H #define ARM_COMPUTE_CLSOFTMAXLAYER_H -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -34,11 +32,9 @@ namespace arm_compute { -class CLCompileContext; -class CLLogits1DMaxShiftExpSumKernel; -class CLLogits1DNormKernel; class ICLTensor; class ITensorInfo; +class CLCompileContext; /** Basic function to compute a SoftmaxLayer. * @@ -48,11 +44,11 @@ class ITensorInfo; * Log Softmax is calculated by : * @f[ out = (x - max(x) * beta) - log(\sum{e^{x - max(x) * beta}}) @f] * - * This function runs the following kernels: + * This function runs the following operators/kernels: * -# If axis is not 0: - * -# @ref CLPermute - * -# @ref CLLogits1DNormKernel - * -# @ref CLLogits1DMaxShiftExpSumKernel + * -# @ref opencl::ClPermute + * -# @ref opencl::kernels::ClLogits1DNormKernel + * -# @ref opencl::kernels::ClLogits1DMaxShiftExpSumKernel */ template class CLSoftmaxLayerGeneric : public IFunction @@ -60,14 +56,6 @@ class CLSoftmaxLayerGeneric : public IFunction public: /** Constructor */ CLSoftmaxLayerGeneric(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied */ - CLSoftmaxLayerGeneric(const CLSoftmaxLayerGeneric &) = delete; - /** Prevent instances of this class from being copied */ - CLSoftmaxLayerGeneric &operator=(const CLSoftmaxLayerGeneric &) = delete; - /** Prevent instances of this class to be moved */ - CLSoftmaxLayerGeneric(CLSoftmaxLayerGeneric &&) = delete; - /** Prevent instances of this class to be moved */ - CLSoftmaxLayerGeneric &operator=(CLSoftmaxLayerGeneric &&) = delete; /** Default destructor */ ~CLSoftmaxLayerGeneric(); /** Set the input and output tensors. @@ -105,17 +93,11 @@ public: void run() override; private: - MemoryGroup _memory_group; - CLPermute _permute_input; - CLPermute _permute_output; - std::unique_ptr _max_shift_exp_sum_kernel; - std::unique_ptr _norm_kernel; - CLTensor _max; - CLTensor _sum; - CLTensor _tmp; - CLTensor _input_permuted; - CLTensor _output_permuted; - bool _needs_permute; + struct Impl; + std::unique_ptr _impl; + + /** Allocate workspace required by the operator */ + void allocate_workspace(); }; using CLSoftmaxLayer = CLSoftmaxLayerGeneric; -- cgit v1.2.1