aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/softmax/list.h
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2023-11-07 05:43:07 +0000
committerGunes Bayir <gunes.bayir@arm.com>2023-12-05 13:52:17 +0000
commitfadc9b1e0bba90d6a91beb65466b2a0895b3a5e4 (patch)
tree7d095fefe3634b4ca86dc9088bb2990d64d3a7c8 /src/cpu/kernels/softmax/list.h
parent23158b0a69b85c9c6e5a7f2457bfe10be04d6132 (diff)
downloadComputeLibrary-fadc9b1e0bba90d6a91beb65466b2a0895b3a5e4.tar.gz
Optimize CpuSoftmaxKernel for axis=0
Implement a single kernel instead of having two consecutive ones. In the previous setup, one kernel was calculating the maximum value in the axis, and this maximum was being subtracted from each data while calculating the softmax, i.e. softmax(x_i) = exp(x_i - max) / sum_i( exp(x_i - max) ) This patch integrates these two stages into a single kernel for Neon™ for all data types. This will save some memory because we don't need to hold the max values in a separate auxiliary tensor. It also introduces some other optimizations that will ease memory pressure when the data type is float/half, by using the dst tensor as temporary storage for already exponentiated inputs. It removes the references to SVE and SVE2 implementations, and most of the associated files; but, it leaves the implementations as these may be used in the future. Resolves: COMPMID-6500 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: Icff9976d1214c4c6cbe15a62ca60b8a77d3784cc Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10688 Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/softmax/list.h')
-rw-r--r--src/cpu/kernels/softmax/list.h31
1 files changed, 7 insertions, 24 deletions
diff --git a/src/cpu/kernels/softmax/list.h b/src/cpu/kernels/softmax/list.h
index 627ce0c264..c143f6659d 100644
--- a/src/cpu/kernels/softmax/list.h
+++ b/src/cpu/kernels/softmax/list.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,41 +21,24 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_NEON_KERNELS_SOFTMAX_LIST_H
-#define SRC_CORE_NEON_KERNELS_SOFTMAX_LIST_H
+#ifndef ACL_SRC_CPU_KERNELS_SOFTMAX_LIST_H
+#define ACL_SRC_CPU_KERNELS_SOFTMAX_LIST_H
namespace arm_compute
{
namespace cpu
{
-#define DECLARE_SOFTMAX_KERNEL(func_name) \
- void func_name(const ITensor *in, const ITensor *max, void *const tmp, ITensor *out, const float beta, \
- bool is_log, const Window &window)
+#define DECLARE_SOFTMAX_KERNEL(func_name) \
+ template <bool IS_LOG> \
+ void func_name(const ITensor *in, void *const tmp, ITensor *out, const float beta, const Window &window)
DECLARE_SOFTMAX_KERNEL(neon_fp32_softmax);
DECLARE_SOFTMAX_KERNEL(neon_fp16_softmax);
DECLARE_SOFTMAX_KERNEL(neon_qasymm8_softmax);
DECLARE_SOFTMAX_KERNEL(neon_qasymm8_signed_softmax);
-DECLARE_SOFTMAX_KERNEL(sve_fp32_softmax);
-DECLARE_SOFTMAX_KERNEL(sve_fp16_softmax);
-DECLARE_SOFTMAX_KERNEL(sve2_qasymm8_signed_softmax);
-DECLARE_SOFTMAX_KERNEL(sve2_qasymm8_softmax);
#undef DECLARE_SOFTMAX_KERNEL
-
-#define DECLARE_LOGITS_KERNEL(func_name) void func_name(const ITensor *in, ITensor *out, const Window &window)
-
-DECLARE_LOGITS_KERNEL(neon_fp32_logits);
-DECLARE_LOGITS_KERNEL(neon_fp16_logits);
-DECLARE_LOGITS_KERNEL(neon_qasymm8_logits);
-DECLARE_LOGITS_KERNEL(neon_qasymm8_singed_logits);
-DECLARE_LOGITS_KERNEL(sve_fp32_logits);
-DECLARE_LOGITS_KERNEL(sve_fp16_logits);
-DECLARE_LOGITS_KERNEL(sve_qasymm8_logits);
-DECLARE_LOGITS_KERNEL(sve_qasymm8_signed_logits);
-
-#undef DECLARE_LOGITS_KERNEL
} // namespace cpu
} // namespace arm_compute
-#endif /* SRC_CORE_NEON_KERNELS_SOFTMAX_LIST_H */
+#endif // ACL_SRC_CPU_KERNELS_SOFTMAX_LIST_H