diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2024-04-09 23:13:04 +0100 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2024-04-11 12:58:45 +0000 |
commit | cfca87b91def4f455630f2094447dc0500b6256c (patch) | |
tree | 9985ca8ad1910d48a84aa9781fe3156e614ff5f4 /src/cpu/kernels/softmax/generic/sme2/fp32.cpp | |
parent | f1f1f87132690a8061801ef1a4638d637c780df7 (diff) | |
download | ComputeLibrary-cfca87b91def4f455630f2094447dc0500b6256c.tar.gz |
Add SME2 implementation of softmax for FP16
In addition to the softmax kernel, this patch fixes minor issues in the fp32 implementation.
Resolves: COMPMID-6920
Change-Id: Ibbd9f0af5f2a93fba0e92d72ba437279c34149d3
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11402
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/softmax/generic/sme2/fp32.cpp')
-rw-r--r-- | src/cpu/kernels/softmax/generic/sme2/fp32.cpp | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/src/cpu/kernels/softmax/generic/sme2/fp32.cpp b/src/cpu/kernels/softmax/generic/sme2/fp32.cpp index e80041c812..159039a320 100644 --- a/src/cpu/kernels/softmax/generic/sme2/fp32.cpp +++ b/src/cpu/kernels/softmax/generic/sme2/fp32.cpp @@ -191,16 +191,16 @@ loop_1_start%=: // Step 1: Find max // ================================================== + // Loop for processing 4 vectors per iteration. + mov x9, #0 // x9: index + dup z11.s, w10 // z11: max_value = -inf + // ---------------------------------------------------------------- z16-z19: max_value = -inf mov z16.d, z11.d mov z17.d, z11.d mov z18.d, z11.d mov z19.d, z11.d - // Loop for processing 4 vectors per iteration. - mov x9, #0 // x9: index - dup z11.s, w10 // z11: max_value = -inf - find_max_body_start%=: cmp x9, x13 b.eq find_max_body_end%= |