aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp')
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp14
1 files changed, 13 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp
index 09ee983907..382ccd3c62 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -103,6 +103,18 @@ namespace
);
}
+ template <class Strategy>
+ unsigned int fast_mode_cycle_estimate(const DepthwiseArgs &args, const Nothing &)
+ {
+ // First-pass: compute the number of output pixels which will be computed.
+ return arm_gemm::roundup(args.output_rows, Strategy::output_rows) *
+ arm_gemm::roundup(args.output_cols, Strategy::output_cols) *
+ arm_gemm::iceildiv(
+ (long unsigned) args.input_channels * args.channel_multiplier,
+ arm_gemm::utils::get_vector_length<typename Strategy::return_type>(Strategy::vl_type)
+ ) * 2 / 3;
+ }
+
#if defined(__aarch64__)
unsigned int not_preferred(const DepthwiseArgs &, const Nothing &)
{