diff options
author | ramelg01 <ramy.elgammal@arm.com> | 2022-04-08 03:52:28 +0100 |
---|---|---|
committer | Ramy Elgammal <ramy.elgammal@arm.com> | 2022-04-25 15:35:59 +0000 |
commit | c827e99fc46521f43719b0c2d1b6f05d66abf68c (patch) | |
tree | 31df1002673b2a4c4aae66608ad85b1ad6517050 /src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp | |
parent | 0a3948394e7e77344201b8732e9c20fcb5fa9a38 (diff) | |
download | ComputeLibrary-c827e99fc46521f43719b0c2d1b6f05d66abf68c.tar.gz |
Update Neon™ pooling kernel
- Reduce duplication and simplify overall structure.
- Improve multi-threaded performance by sharing more data
in lower-level caches.
Partially Resolves: COMPMID-5054
Signed-off-by: Ramy Elgammal<ramy.elgammal@arm.com>
Change-Id: I5f4dc50913401d5c1cbfc10b866fae9490cbc4d7
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7404
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Andrew Mundy
Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp')
-rw-r--r-- | src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp | 42 |
1 files changed, 24 insertions, 18 deletions
diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp index bfc4dc0f15..6209f7cf2f 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,13 +25,13 @@ #include "arm_gemm_local.hpp" #include "pooling_implementation.hpp" -#include "pooling_depthfirst_generic_quantized.hpp" +#include "pooling_depthfirst_generic.hpp" #if defined(__aarch64__) -#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/sve_s8q_nhwc_avg_generic_depthfirst.hpp" #include "kernels/sve_s8q_nhwc_max_generic_depthfirst.hpp" -#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) #include "kernels/a64_s8q_nhwc_avg_generic_depthfirst.hpp" #include "kernels/a64_s8q_nhwc_max_generic_depthfirst.hpp" #endif // defined(__aarch64__) @@ -41,9 +41,9 @@ namespace arm_conv { namespace pooling { -static const PoolingImplementation<int8_t, int8_t, Requantize32> pooling_u8_methods[] = { +static const PoolingImplementation<int8_t, int8_t, Requantize32> pooling_s8q_methods[] = { #if defined(__aarch64__) -#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) +#if defined(ARM_COMPUTE_ENABLE_SVE) { PoolingMethod::DEPTHFIRST, "sve_s8q_nhwc_avg_generic_depthfirst", @@ -51,20 +51,24 @@ static const PoolingImplementation<int8_t, int8_t, Requantize32> pooling_u8_meth return args.cpu_info->has_sve2() && args.pool_type == PoolingType::AVERAGE; }, nullptr, - [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<int8_t, int8_t, Requantize32> * { - return new PoolingDepthfirstGenericQuantized<sve_s8q_nhwc_avg_generic_depthfirst>(args, rq); + [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<int8_t, int8_t> * { + auto strat = new sve_s8q_nhwc_avg_generic_depthfirst(args.cpu_info); + return new PoolingDepthfirstGeneric<int8_t, int8_t, Requantize32>(strat, args, rq); }, }, { PoolingMethod::DEPTHFIRST, "sve_s8q_nhwc_max_generic_depthfirst", - [] (const PoolingArgs &args, const Requantize32 &) -> bool { return args.cpu_info->has_sve2() && args.pool_type == PoolingType::MAX; }, + [] (const PoolingArgs &args, const Requantize32 &) -> bool { + return args.cpu_info->has_sve2() && args.pool_type == PoolingType::MAX; + }, nullptr, - [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<int8_t, int8_t, Requantize32> * { - return new PoolingDepthfirstGenericQuantized<sve_s8q_nhwc_max_generic_depthfirst>(args, rq); + [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<int8_t, int8_t> * { + auto strat = new sve_s8q_nhwc_max_generic_depthfirst(args.cpu_info); + return new PoolingDepthfirstGeneric<int8_t, int8_t, Requantize32>(strat, args, rq); }, }, -#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2) +#endif // defined(ARM_COMPUTE_ENABLE_SVE) { PoolingMethod::DEPTHFIRST, "a64_s8q_nhwc_avg_generic_depthfirst", @@ -72,8 +76,9 @@ static const PoolingImplementation<int8_t, int8_t, Requantize32> pooling_u8_meth return args.pool_type == PoolingType::AVERAGE; }, nullptr, - [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<int8_t, int8_t, Requantize32> * { - return new PoolingDepthfirstGenericQuantized<a64_s8q_nhwc_avg_generic_depthfirst>(args, rq); + [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<int8_t, int8_t> * { + auto strat = new a64_s8q_nhwc_avg_generic_depthfirst(args.cpu_info); + return new PoolingDepthfirstGeneric<int8_t, int8_t, Requantize32>(strat, args, rq); }, }, { @@ -81,8 +86,9 @@ static const PoolingImplementation<int8_t, int8_t, Requantize32> pooling_u8_meth "a64_s8q_nhwc_max_generic_depthfirst", [] (const PoolingArgs &args, const Requantize32 &) -> bool { return args.pool_type == PoolingType::MAX; }, nullptr, - [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<int8_t, int8_t, Requantize32> * { - return new PoolingDepthfirstGenericQuantized<a64_s8q_nhwc_max_generic_depthfirst>(args, rq); + [] (const PoolingArgs &args, const Requantize32 &rq) -> PoolingCommon<int8_t, int8_t> * { + auto strat = new a64_s8q_nhwc_max_generic_depthfirst(args.cpu_info); + return new PoolingDepthfirstGeneric<int8_t, int8_t, Requantize32>(strat, args, rq); }, }, #endif // defined(__aarch64__) @@ -92,10 +98,10 @@ static const PoolingImplementation<int8_t, int8_t, Requantize32> pooling_u8_meth template <> const PoolingImplementation<int8_t, int8_t, Requantize32> *pooling_implementation_list() { - return pooling_u8_methods; + return pooling_s8q_methods; } -template UniquePoolingCommon<int8_t, int8_t, Requantize32> pooling(const PoolingArgs &, const Requantize32 &); +template UniquePoolingCommon<int8_t, int8_t> pooling(const PoolingArgs &, const Requantize32 &); } // namespace pooling } // namespace arm_conv |