aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/CpuSoftmaxKernel.h
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2023-11-07 05:43:07 +0000
committerGunes Bayir <gunes.bayir@arm.com>2023-12-05 13:52:17 +0000
commitfadc9b1e0bba90d6a91beb65466b2a0895b3a5e4 (patch)
tree7d095fefe3634b4ca86dc9088bb2990d64d3a7c8 /src/cpu/kernels/CpuSoftmaxKernel.h
parent23158b0a69b85c9c6e5a7f2457bfe10be04d6132 (diff)
downloadComputeLibrary-fadc9b1e0bba90d6a91beb65466b2a0895b3a5e4.tar.gz
Optimize CpuSoftmaxKernel for axis=0
Implement a single kernel instead of having two consecutive ones. In the previous setup, one kernel was calculating the maximum value in the axis, and this maximum was being subtracted from each data while calculating the softmax, i.e. softmax(x_i) = exp(x_i - max) / sum_i( exp(x_i - max) ) This patch integrates these two stages into a single kernel for Neon™ for all data types. This will save some memory because we don't need to hold the max values in a separate auxiliary tensor. It also introduces some other optimizations that will ease memory pressure when the data type is float/half, by using the dst tensor as temporary storage for already exponentiated inputs. It removes the references to SVE and SVE2 implementations, and most of the associated files; but, it leaves the implementations as these may be used in the future. Resolves: COMPMID-6500 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: Icff9976d1214c4c6cbe15a62ca60b8a77d3784cc Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10688 Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/CpuSoftmaxKernel.h')
-rw-r--r--src/cpu/kernels/CpuSoftmaxKernel.h99
1 files changed, 26 insertions, 73 deletions
diff --git a/src/cpu/kernels/CpuSoftmaxKernel.h b/src/cpu/kernels/CpuSoftmaxKernel.h
index 5d288179fd..3db1f3d0ef 100644
--- a/src/cpu/kernels/CpuSoftmaxKernel.h
+++ b/src/cpu/kernels/CpuSoftmaxKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_CPU_SOFTMAX_KERNEL_H
-#define ARM_COMPUTE_CPU_SOFTMAX_KERNEL_H
+#ifndef ACL_SRC_CPU_KERNELS_CPUSOFTMAXKERNEL_H
+#define ACL_SRC_CPU_KERNELS_CPUSOFTMAXKERNEL_H
#include "src/core/common/Macros.h"
#include "src/cpu/ICpuKernel.h"
@@ -33,102 +33,55 @@ namespace cpu
{
namespace kernels
{
-/** Interface for the identifying the max value of 1D Logits */
-class CpuLogits1DMaxKernel : public ICpuKernel<CpuLogits1DMaxKernel>
+/** Interface for softmax computation */
+class CpuSoftmaxKernel : public ICpuKernel<CpuSoftmaxKernel>
{
private:
- using SoftmaxLogits1DMaxKernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &)>::type;
+ using SoftmaxKernelPtr =
+ std::add_pointer<void(const ITensor *, void *const, ITensor *, float, const Window &)>::type;
public:
- CpuLogits1DMaxKernel() = default;
- ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuLogits1DMaxKernel);
- /** Set the input and output tensors.
- *
- * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] dst Destination tensor info. Data types supported: same as @p input
- */
- void configure(const ITensorInfo *src, ITensorInfo *dst);
- /** Static function to check if given info will lead to a valid configuration
- *
- * Similar to CpuLogits1DMaxKernel::configure()
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
- const char *name() const override;
-
- struct SoftmaxLogits1DMaxKernel
- {
- const char *name;
- const DataTypeISASelectorPtr is_selected;
- SoftmaxLogits1DMaxKernelPtr ukernel;
- };
-
- static const std::vector<SoftmaxLogits1DMaxKernel> &get_available_kernels();
-
-private:
- SoftmaxLogits1DMaxKernelPtr _run_method{nullptr};
- std::string _name{};
-};
-
-/** Interface for softmax computation for QASYMM8 with pre-computed max. */
-template <bool IS_LOG = false>
-class CpuLogits1DSoftmaxKernel : public ICpuKernel<CpuLogits1DSoftmaxKernel<IS_LOG>>
-{
-private:
- using SoftmaxLogits1DKernelPtr = std::add_pointer<void(
- const ITensor *, const ITensor *, void *const, ITensor *, float, bool, const Window &)>::type;
-
-public:
- CpuLogits1DSoftmaxKernel() = default;
- ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuLogits1DSoftmaxKernel);
+ CpuSoftmaxKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuSoftmaxKernel);
/** Set the input and output tensors.
*
- * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] max Max values tensor info. Same shape as input with dimension 0 set to 1.
- * Data types supported: same as @p input.
- * @param[out] dst Destination tensor info. Data types supported: same as @p input.
- * @param[in] beta A scaling factor for the exponent.
+ * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] dst Destination tensor info. Data types supported: same as @p input.
+ * @param[in] beta A scaling factor for the exponent.
+ * @param[in] is_log True if the operation is log-softmax
*
* @param tmp Auxiliary tensor info. Must be type F32 and same shape as the input.
*/
- void
- configure(const ITensorInfo *src, const ITensorInfo *max, ITensorInfo *dst, const float beta, ITensorInfo *tmp);
+ void configure(const ITensorInfo *src, ITensorInfo *dst, float beta, bool is_log, ITensorInfo *tmp);
/** Static function to check if given info will lead to a valid configuration
*
- * Similar to CpuLogits1DSoftmaxKernel::configure()
+ * Similar to CpuSoftmaxKernel::configure()
*
* @return a status
*/
- static Status validate(const ITensorInfo *src,
- const ITensorInfo *max,
- const ITensorInfo *dst,
- const float beta,
- const ITensorInfo *tmp);
+ static Status
+ validate(const ITensorInfo *src, const ITensorInfo *dst, float beta, bool is_log, const ITensorInfo *tmp);
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
const char *name() const override;
- struct SoftmaxLogits1DKernel
+ struct SoftmaxKernel
{
- const char *name;
- const DataTypeISASelectorPtr is_selected;
- SoftmaxLogits1DKernelPtr ukernel;
+ const char *name;
+ const SoftmaxKernelDataTypeISASelectorDataPtr is_selected;
+ SoftmaxKernelPtr ukernel;
};
- static const std::vector<SoftmaxLogits1DKernel> &get_available_kernels();
+ static const std::vector<SoftmaxKernel> &get_available_kernels();
private:
- float _beta{1.0f};
- SoftmaxLogits1DKernelPtr _run_method{nullptr};
- std::string _name{};
+ float _beta{1.0f};
+ SoftmaxKernelPtr _run_method{nullptr};
+ std::string _name{};
};
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_SOFTMAX_KERNEL_H */
+#endif // ACL_SRC_CPU_KERNELS_CPUSOFTMAXKERNEL_H