aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2021-02-02 14:59:09 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2021-02-04 17:43:55 +0000
commitcf87f509fc23d02c56569f794a3fb59e1b8be277 (patch)
tree0fe55158f2065dc6a314e82935558b9748165285 /src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp
parent89de118ccbebd5a943634137d0c160d4867da49c (diff)
downloadComputeLibrary-cf87f509fc23d02c56569f794a3fb59e1b8be277.tar.gz
Tweak scheduling use of SQDMULH in quantized AVG pooling
Resolves COMPMID-4195 Change-Id: Ie5116c1ddddccafba40432fd4b5245bb27890a88 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4997 Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com> Reviewed-by: Manuel Bottini <manuel.bottini@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp')
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp44
1 files changed, 22 insertions, 22 deletions
diff --git a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp
index 99321eba61..2ea5b90561 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp
@@ -40,10 +40,10 @@ namespace {
constexpr RescaleParams rescale_params[8] = {
{0x40000000, -0}, // 1/2
- {0x55555555, -1}, // 1/3
+ {0x55555556, -1}, // 1/3
{0x40000000, -1}, // 1/4
{0x66666666, -2}, // 1/5
- {0x55555555, -2}, // 1/6
+ {0x55555556, -2}, // 1/6
{0x49249249, -2}, // 1/7
{0x40000000, -2}, // 1/8
{0x71c71c72, -3}, // 1/9
@@ -237,22 +237,22 @@ void sve_s8_nhwc_avg_generic_depthfirst_impl(
"ld1rw { z17.s }, p4/Z, [%x[rescale_ptr]]\n"
"ld1rw { z16.s }, p4/Z, [%x[shift_ptr]]\n"
"not z19.s, p4/M, z20.s\n"
- ".inst 0x04b175ef // sqrdmulh z15.s, z15.s, z17.s\n"
- ".inst 0x04b175ce // sqrdmulh z14.s, z14.s, z17.s\n"
- ".inst 0x04b175ad // sqrdmulh z13.s, z13.s, z17.s\n"
- ".inst 0x04b1758c // sqrdmulh z12.s, z12.s, z17.s\n"
- ".inst 0x04b1756b // sqrdmulh z11.s, z11.s, z17.s\n"
- ".inst 0x04b1754a // sqrdmulh z10.s, z10.s, z17.s\n"
- ".inst 0x04b17529 // sqrdmulh z9.s, z9.s, z17.s\n"
- ".inst 0x04b17508 // sqrdmulh z8.s, z8.s, z17.s\n"
- ".inst 0x04b174e7 // sqrdmulh z7.s, z7.s, z17.s\n"
- ".inst 0x04b174c6 // sqrdmulh z6.s, z6.s, z17.s\n"
- ".inst 0x04b174a5 // sqrdmulh z5.s, z5.s, z17.s\n"
- ".inst 0x04b17484 // sqrdmulh z4.s, z4.s, z17.s\n"
- ".inst 0x04b17463 // sqrdmulh z3.s, z3.s, z17.s\n"
- ".inst 0x04b17442 // sqrdmulh z2.s, z2.s, z17.s\n"
- ".inst 0x04b17421 // sqrdmulh z1.s, z1.s, z17.s\n"
- ".inst 0x04b17400 // sqrdmulh z0.s, z0.s, z17.s\n"
+ ".inst 0x04b175ef // sqdmulh z15.s, z15.s, z17.s\n"
+ ".inst 0x04b175ce // sqdmulh z14.s, z14.s, z17.s\n"
+ ".inst 0x04b175ad // sqdmulh z13.s, z13.s, z17.s\n"
+ ".inst 0x04b1758c // sqdmulh z12.s, z12.s, z17.s\n"
+ ".inst 0x04b1756b // sqdmulh z11.s, z11.s, z17.s\n"
+ ".inst 0x04b1754a // sqdmulh z10.s, z10.s, z17.s\n"
+ ".inst 0x04b17529 // sqdmulh z9.s, z9.s, z17.s\n"
+ ".inst 0x04b17508 // sqdmulh z8.s, z8.s, z17.s\n"
+ ".inst 0x04b174e7 // sqdmulh z7.s, z7.s, z17.s\n"
+ ".inst 0x04b174c6 // sqdmulh z6.s, z6.s, z17.s\n"
+ ".inst 0x04b174a5 // sqdmulh z5.s, z5.s, z17.s\n"
+ ".inst 0x04b17484 // sqdmulh z4.s, z4.s, z17.s\n"
+ ".inst 0x04b17463 // sqdmulh z3.s, z3.s, z17.s\n"
+ ".inst 0x04b17442 // sqdmulh z2.s, z2.s, z17.s\n"
+ ".inst 0x04b17421 // sqdmulh z1.s, z1.s, z17.s\n"
+ ".inst 0x04b17400 // sqdmulh z0.s, z0.s, z17.s\n"
".inst 0x4482920f // srshl z15.s, p4/M, z15.s, z16.s\n"
".inst 0x4482920e // srshl z14.s, p4/M, z14.s, z16.s\n"
".inst 0x4482920d // srshl z13.s, p4/M, z13.s, z16.s\n"
@@ -379,10 +379,10 @@ void sve_s8_nhwc_avg_generic_depthfirst_impl(
"ld1rw { z17.s }, p4/Z, [%x[rescale_ptr]]\n"
"ld1rw { z16.s }, p4/Z, [%x[shift_ptr]]\n"
"not z19.s, p4/M, z20.s\n"
- ".inst 0x04b175ef // sqrdmulh z15.s, z15.s, z17.s\n"
- ".inst 0x04b175ce // sqrdmulh z14.s, z14.s, z17.s\n"
- ".inst 0x04b175ad // sqrdmulh z13.s, z13.s, z17.s\n"
- ".inst 0x04b1758c // sqrdmulh z12.s, z12.s, z17.s\n"
+ ".inst 0x04b175ef // sqdmulh z15.s, z15.s, z17.s\n"
+ ".inst 0x04b175ce // sqdmulh z14.s, z14.s, z17.s\n"
+ ".inst 0x04b175ad // sqdmulh z13.s, z13.s, z17.s\n"
+ ".inst 0x04b1758c // sqdmulh z12.s, z12.s, z17.s\n"
".inst 0x4482920f // srshl z15.s, p4/M, z15.s, z16.s\n"
".inst 0x4482920e // srshl z14.s, p4/M, z14.s, z16.s\n"
".inst 0x4482920d // srshl z13.s, p4/M, z13.s, z16.s\n"