diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2023-10-13 17:40:32 +0100 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2023-10-31 10:16:25 +0000 |
commit | 29254aeb11a76c86449c2f38587e9144b2f2aacb (patch) | |
tree | ca2df26e81c2417b34768ac325e0f7200b5265df /src/gpu/cl/operators/ClSoftmax.h | |
parent | e5362e7e5dbccf81c5296a7e77154e11e1a14d2f (diff) | |
download | ComputeLibrary-29254aeb11a76c86449c2f38587e9144b2f2aacb.tar.gz |
Optimize CL softmax
* The new softmax implementation consists of only a single kernel.
- There are 2 versions of softmax, one for the x dimension
and one for any other dimensions.
- Softmax kernel handles both native and quantized data type.
Resolves: COMPMID-6447
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I4a9ae5bc63f78aebeaa85ee48a0d102c9c245eda
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10489
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/operators/ClSoftmax.h')
-rw-r--r-- | src/gpu/cl/operators/ClSoftmax.h | 45 |
1 files changed, 16 insertions, 29 deletions
diff --git a/src/gpu/cl/operators/ClSoftmax.h b/src/gpu/cl/operators/ClSoftmax.h index 6c2aaaea80..232fcfebd1 100644 --- a/src/gpu/cl/operators/ClSoftmax.h +++ b/src/gpu/cl/operators/ClSoftmax.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,25 +21,26 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CL_SOFTMAX_H -#define ARM_COMPUTE_CL_SOFTMAX_H +#ifndef ACL_SRC_GPU_CL_OPERATORS_CLSOFTMAX_H +#define ACL_SRC_GPU_CL_OPERATORS_CLSOFTMAX_H +#include "arm_compute/core/experimental/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" -#include "src/gpu/cl/ClCompileContext.h" #include "src/gpu/cl/IClOperator.h" namespace arm_compute { +class CLCompileContext; +class ITensorInfo; +class ITensorPack; struct SoftmaxKernelInfo; namespace opencl { -class ClPermute; namespace kernels { -class ClLogits1DMaxShiftExpSumKernel; -class ClLogits1DNormKernel; +class ClSoftmaxKernel; } // namespace kernels class ClSoftmax : public IClOperator { @@ -64,36 +65,22 @@ public: * @return a status */ static Status validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info); - // Inherited methods overridden: - void run(ITensorPack &tensors) override; + + void run(ITensorPack &tensors) override; + experimental::MemoryRequirements workspace() const override; private: enum InternalTensorIdx { - MAX = 0, - SUM, - TMP, - PERMUTED_SRC, - PERMUTED_DST, - COUNT + TMP = 0, + COUNT, }; - std::unique_ptr<ClPermute> _permute_input; - std::unique_ptr<ClPermute> _permute_output; - std::unique_ptr<kernels::ClLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel; - std::unique_ptr<kernels::ClLogits1DNormKernel> _norm_kernel; - bool _needs_permute{false}; - - TensorInfo _max_info; - TensorInfo _sum_info; - TensorInfo _tmp_info; - TensorInfo _permuted_src_info; - TensorInfo _permuted_dst_info; - - experimental::MemoryRequirements _aux_mem{}; + TensorInfo _tmp_info{}; + experimental::MemoryRequirements _aux_mem; }; } // namespace opencl } // namespace arm_compute -#endif /* ARM_COMPUTE_CL_SOFTMAX_H */ +#endif // ACL_SRC_GPU_CL_OPERATORS_CLSOFTMAX_H |