From 201e0fee596dafcf9c869a550fae29779aad2394 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Wed, 27 Jan 2021 13:14:56 +0000 Subject: Make Softmax kernels on OpenCL stateless * ClSoftmaxKernel and ClSoftmax are created. * ClSoftmaxKernel is now state-less and ClSoftmax handles the internal tensors required for computation. * add_const_tensor() is added to TensorPack not only to have symmetric interface but also to benefit from implicit conversion. Implements: COMPMID-3998 Change-Id: I4f823121777be24260fd12b2cd71a6ff718c4eed Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5087 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/runtime/gpu/cl/operators/ClSoftmax.h | 119 +++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 src/runtime/gpu/cl/operators/ClSoftmax.h (limited to 'src/runtime/gpu/cl/operators/ClSoftmax.h') diff --git a/src/runtime/gpu/cl/operators/ClSoftmax.h b/src/runtime/gpu/cl/operators/ClSoftmax.h new file mode 100644 index 0000000000..e38b7c595a --- /dev/null +++ b/src/runtime/gpu/cl/operators/ClSoftmax.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL_SOFTMAX_H +#define ARM_COMPUTE_CL_SOFTMAX_H + +#include "arm_compute/runtime/CL/CLTensor.h" +#include "src/core/gpu/cl/ClCompileContext.h" +#include "src/runtime/gpu/cl/IClOperator.h" + +namespace arm_compute +{ +struct SoftmaxKernelInfo; + +namespace opencl +{ +class ClPermute; +namespace kernels +{ +class ClLogits1DMaxShiftExpSumKernel; +class ClLogits1DNormKernel; +} // namespace kernels +class ClSoftmax : public IClOperator +{ +public: + /** Constructor */ + ClSoftmax(); + /** Configure the operator + * + * @param[in] compile_context The compile context to be used. + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax + * @param[out] dst Destination tensor info. Data types supported: same as @p src + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + * + */ + void configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info); + /** Static function to check if the given info will lead to a valid configuration + * + * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax + * @param[out] dst Destination tensor info. Data types supported: same as @p src + * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo. + * + */ + static Status validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info); + // Inherited methods overridden: + void run(ITensorPack &tensors) override; + experimental::MemoryRequirements workspace() const override; + +private: + enum class InternalTensorIdx + { + MAX = 0, + SUM, + TMP, + PERMUTED_SRC, + PERMUTED_DST, + COUNT + }; + + /** Create a single internal tensor + * + * @param[in] info The information used to create a tensor + * @param[in] idx The index within the internal array the created tensor will be held + */ + void create_internal_tensor(TensorInfo &info, InternalTensorIdx idx); + /** Create all required internal tensors */ + void create_internal_tensor(); + /** Function to convert from internal tensor index to @ref TensorType used externally */ + TensorType convert_internal_idx_to_tensor_type(InternalTensorIdx idx) const; + /** Function to import workspace memory allocated by the caller into internal tensor instances */ + void import_workspace_memory(ITensorPack &tensors); + /** Function to permute the given source tensor when permutation is required */ + void run_source_permute(const ITensor *src); + /** Function to permute the intemediate tensor to the final destination tensor when permutation is required */ + void run_destination_permute(ITensor *dst); + /** Function to run @ref arm_compute::opencl::kernels::ClLogits1DMaxShiftExpSumKernel */ + void run_max_sum(const ITensor *src); + /** Function to run @ref kernels::ClLogits1DNormKernel */ + void run_norm(ITensor *dst); + + std::unique_ptr _permute_input; + std::unique_ptr _permute_output; + std::unique_ptr _max_shift_exp_sum_kernel; + std::unique_ptr _norm_kernel; + bool _needs_permute{ false }; + + std::array(InternalTensorIdx::COUNT)> _internal_info{}; + std::array, static_cast(InternalTensorIdx::COUNT)> _internal_tensor{}; + + TensorInfo &_max_info; + TensorInfo &_sum_info; + TensorInfo &_tmp_info; + TensorInfo &_permuted_src_info; + TensorInfo &_permuted_dst_info; +}; + +} // opencl +} // arm_compute +#endif /* ARM_COMPUTE_CL_SOFTMAX_H */ \ No newline at end of file -- cgit v1.2.1