aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp')
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp113
1 files changed, 80 insertions, 33 deletions
diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp
index 052354922e..ee5a79b4ff 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,13 +30,16 @@
#include "kernels/cpp_nhwc_1x1_stride_any_depthfirst.hpp"
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
-#if defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SME)
+#include "kernels/sme_u8_nhwc_avg_generic_depthfirst.hpp"
+#include "kernels/sme_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
+#include "kernels/sme_u8_nhwc_max_generic_depthfirst.hpp"
+#endif // defined(ARM_COMPUTE_ENABLE_SME)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/sve_u8_nhwc_avg_generic_depthfirst.hpp"
-#endif // defined(SVE2)
#include "kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
#include "kernels/sve_u8_nhwc_max_generic_depthfirst.hpp"
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
#include "kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst.hpp"
#include "kernels/a64_u8_nhwc_avg_generic_depthfirst.hpp"
#include "kernels/a64_u8_nhwc_max_generic_depthfirst.hpp"
@@ -47,19 +50,6 @@
namespace arm_conv {
namespace pooling {
-namespace
-{
- template <class Strategy>
- bool is_supported(const PoolingArgs &args, const Nothing &)
- {
- return ((args.pool_type == Strategy::pooling_type()) &&
- (args.pool_window.rows == Strategy::pool_rows()) &&
- (args.pool_window.cols == Strategy::pool_cols()) &&
- (args.pool_stride.rows == Strategy::stride_rows()) &&
- (args.pool_stride.cols == Strategy::stride_cols()));
- }
-}
-
static const PoolingImplementation<uint8_t, uint8_t> pooling_u8_methods[] = {
{
PoolingMethod::DEPTHFIRST,
@@ -69,15 +59,28 @@ static const PoolingImplementation<uint8_t, uint8_t> pooling_u8_methods[] = {
},
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
- return new PoolingDepthfirstGeneric<cpp_nhwc_1x1_stride_any_depthfirst<uint8_t>>(args);
+ auto strat = new cpp_nhwc_1x1_stride_any_depthfirst<uint8_t>(args.cpu_info);
+ return new PoolingDepthfirstGeneric<uint8_t>(strat, args);
},
},
#if defined(__aarch64__)
-#if defined(__ARM_FEATURE_SVE)
-#if defined(SVE2)
+#if defined(ARM_COMPUTE_ENABLE_SME)
{
PoolingMethod::DEPTHFIRST,
- "sve_u8_nhwc_avg_generic_depthfirst",
+ "sme_u8_nhwc_max_2x2_s1_output2x2_depthfirst",
+ [] (const PoolingArgs &args, const Nothing &os) -> bool {
+ return args.cpu_info->has_sme() &&
+ is_supported<sme_u8_nhwc_max_2x2_s1_output2x2_depthfirst>(args, os);
+ },
+ nullptr,
+ [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
+ auto strat = new sme_u8_nhwc_max_2x2_s1_output2x2_depthfirst(args.cpu_info);
+ return new PoolingDepthfirst<uint8_t>(strat, args);
+ },
+ },
+ {
+ PoolingMethod::DEPTHFIRST,
+ "sme_u8_nhwc_avg_generic_depthfirst",
[] (const PoolingArgs &args, const Nothing &) -> bool {
// This kernel can only be used when there is either no padding, or we don't care
// about the value of the padding. Otherwise, we would need to pass in the zero-point
@@ -85,40 +88,82 @@ static const PoolingImplementation<uint8_t, uint8_t> pooling_u8_methods[] = {
return (args.exclude_padding ||
(args.padding.top == 0 && args.padding.bottom == 0 &&
args.padding.left == 0 && args.padding.right == 0)
- ) && args.pool_type == PoolingType::AVERAGE;
+ ) && args.pool_type == PoolingType::AVERAGE &&
+ args.cpu_info->has_sme2();
+ },
+ nullptr,
+ [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
+ auto strat = new sme_u8_nhwc_avg_generic_depthfirst(args.cpu_info);
+ return new PoolingDepthfirstGeneric<uint8_t>(strat, args);
+ },
+ },
+ {
+ PoolingMethod::DEPTHFIRST,
+ "sme_u8_nhwc_max_generic_depthfirst",
+ [] (const PoolingArgs &args, const Nothing &) -> bool {
+ return args.cpu_info->has_sme() && args.pool_type == PoolingType::MAX;
},
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
- return new PoolingDepthfirstGeneric<sve_u8_nhwc_avg_generic_depthfirst>(args);
+ auto strat = new sme_u8_nhwc_max_generic_depthfirst(args.cpu_info);
+ return new PoolingDepthfirstGeneric<uint8_t>(strat, args);
},
},
-#endif // defined(SVE2)
+#endif // defined(ARM_COMPUTE_ENABLE_SME)
+#if defined(ARM_COMPUTE_ENABLE_SVE)
{
PoolingMethod::DEPTHFIRST,
"sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst",
- is_supported<sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst>,
+ [] (const PoolingArgs &args, const Nothing &os) -> bool {
+ return args.cpu_info->has_sve() &&
+ is_supported<sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst>(args, os);
+ },
+ nullptr,
+ [] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
+ auto strat = new sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst(args.cpu_info);
+ return new PoolingDepthfirst<uint8_t>(strat, args);
+ },
+ },
+ {
+ PoolingMethod::DEPTHFIRST,
+ "sve_u8_nhwc_avg_generic_depthfirst",
+ [] (const PoolingArgs &args, const Nothing &) -> bool {
+ // This kernel can only be used when there is either no padding, or we don't care
+ // about the value of the padding. Otherwise, we would need to pass in the zero-point
+ // for the quantization regime.
+ return (args.exclude_padding ||
+ (args.padding.top == 0 && args.padding.bottom == 0 &&
+ args.padding.left == 0 && args.padding.right == 0)
+ ) && args.pool_type == PoolingType::AVERAGE &&
+ args.cpu_info->has_sve2();
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
- return new PoolingDepthfirst<sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst>(args);
+ auto strat = new sve_u8_nhwc_avg_generic_depthfirst(args.cpu_info);
+ return new PoolingDepthfirstGeneric<uint8_t>(strat, args);
},
},
{
PoolingMethod::DEPTHFIRST,
"sve_u8_nhwc_max_generic_depthfirst",
- [] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; },
+ [] (const PoolingArgs &args, const Nothing &) -> bool {
+ return args.cpu_info->has_sve() && args.pool_type == PoolingType::MAX;
+ },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
- return new PoolingDepthfirstGeneric<sve_u8_nhwc_max_generic_depthfirst>(args);
+ auto strat = new sve_u8_nhwc_max_generic_depthfirst(args.cpu_info);
+ return new PoolingDepthfirstGeneric<uint8_t>(strat, args);
},
},
-#endif // defined(__ARM_FEATURE_SVE)
+#endif // defined(ARM_COMPUTE_ENABLE_SVE)
{
PoolingMethod::DEPTHFIRST,
"a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst",
is_supported<a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst>,
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
- return new PoolingDepthfirst<a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst>(args);
+ auto strat = new a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst(args.cpu_info);
+ return new PoolingDepthfirst<uint8_t>(strat, args);
},
},
{
@@ -135,7 +180,8 @@ static const PoolingImplementation<uint8_t, uint8_t> pooling_u8_methods[] = {
},
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
- return new PoolingDepthfirstGeneric<a64_u8_nhwc_avg_generic_depthfirst>(args);
+ auto strat = new a64_u8_nhwc_avg_generic_depthfirst(args.cpu_info);
+ return new PoolingDepthfirstGeneric<uint8_t>(strat, args);
},
},
{
@@ -144,7 +190,8 @@ static const PoolingImplementation<uint8_t, uint8_t> pooling_u8_methods[] = {
[] (const PoolingArgs &args, const Nothing &) -> bool { return args.pool_type == PoolingType::MAX; },
nullptr,
[] (const PoolingArgs &args, const Nothing &) -> PoolingCommon<uint8_t, uint8_t> * {
- return new PoolingDepthfirstGeneric<a64_u8_nhwc_max_generic_depthfirst>(args);
+ auto strat = new a64_u8_nhwc_max_generic_depthfirst(args.cpu_info);
+ return new PoolingDepthfirstGeneric<uint8_t>(strat, args);
},
},
#endif // defined(__aarch64__)