diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp index d0e8639229..a553f1be9e 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -64,10 +64,10 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput> std::unique_ptr<const IDepthfirstStrategy> m_strat; /* Compute the amount of working space required for a single thread. */ - virtual size_t get_working_size_per_thread() const = 0; + virtual size_t get_working_size_per_thread(unsigned int) const = 0; /* Initialise the working space for a thread. */ - virtual void initialise_working_space(void *) const = 0; + virtual void initialise_working_space(void *, unsigned int) const = 0; /* Compute a portion of the output tensor with padding. */ virtual void compute_tile_padded( @@ -148,8 +148,8 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput> { // Get and initialise the working space for this thread. void *thread_working_space = - static_cast<uint8_t *>(working_space) + thread_id * this->get_working_size_per_thread(); - this->initialise_working_space(thread_working_space); + static_cast<uint8_t *>(working_space) + thread_id * this->get_working_size_per_thread(n_channels); + this->initialise_working_space(thread_working_space, n_channels); // Construct convenient representations of the input/output tensors. TensorSpec<const TInput *> input_tensor(reinterpret_cast<const TInput *>(input), ld_input_row, ld_input_col); @@ -289,9 +289,14 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput> { } - size_t get_working_size(unsigned int n_threads) const override final + size_t get_working_size(unsigned int n_threads) const override { - return n_threads * this->get_working_size_per_thread(); + return this->get_working_size(n_threads, this->m_args.n_channels); + } + + size_t get_working_size(unsigned int n_threads, unsigned int n_channels) const override final + { + return n_threads * this->get_working_size_per_thread(n_channels); } }; |