aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp')
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp19
1 files changed, 12 insertions, 7 deletions
diff --git a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp
index d0e8639229..a553f1be9e 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -64,10 +64,10 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput>
std::unique_ptr<const IDepthfirstStrategy> m_strat;
/* Compute the amount of working space required for a single thread. */
- virtual size_t get_working_size_per_thread() const = 0;
+ virtual size_t get_working_size_per_thread(unsigned int) const = 0;
/* Initialise the working space for a thread. */
- virtual void initialise_working_space(void *) const = 0;
+ virtual void initialise_working_space(void *, unsigned int) const = 0;
/* Compute a portion of the output tensor with padding. */
virtual void compute_tile_padded(
@@ -148,8 +148,8 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput>
{
// Get and initialise the working space for this thread.
void *thread_working_space =
- static_cast<uint8_t *>(working_space) + thread_id * this->get_working_size_per_thread();
- this->initialise_working_space(thread_working_space);
+ static_cast<uint8_t *>(working_space) + thread_id * this->get_working_size_per_thread(n_channels);
+ this->initialise_working_space(thread_working_space, n_channels);
// Construct convenient representations of the input/output tensors.
TensorSpec<const TInput *> input_tensor(reinterpret_cast<const TInput *>(input), ld_input_row, ld_input_col);
@@ -289,9 +289,14 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput>
{
}
- size_t get_working_size(unsigned int n_threads) const override final
+ size_t get_working_size(unsigned int n_threads) const override
{
- return n_threads * this->get_working_size_per_thread();
+ return this->get_working_size(n_threads, this->m_args.n_channels);
+ }
+
+ size_t get_working_size(unsigned int n_threads, unsigned int n_channels) const override final
+ {
+ return n_threads * this->get_working_size_per_thread(n_channels);
}
};