diff options
author | Michael Tyler <michael.tyler@arm.com> | 2023-06-30 11:26:05 +0100 |
---|---|---|
committer | michael.tyler <michael.tyler@arm.com> | 2023-07-04 14:34:58 +0000 |
commit | 8deee9bd9b9137c256c23b86be11dbf0466f3aa8 (patch) | |
tree | ac80b3bdd992552b65e306b77f061484da0591ca /src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp | |
parent | 19844f605f5e5b71d05164711dee13f8652adafe (diff) | |
download | ComputeLibrary-8deee9bd9b9137c256c23b86be11dbf0466f3aa8.tar.gz |
Depthwise channel pre-multiplication
Resolves: COMPMID-6337
Change-Id: Ie9097b3f56e8071426c621386a5988bd7f7e8ef2
Signed-off-by: Michael Tyler <michael.tyler@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9852
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp | 17 |
1 files changed, 6 insertions, 11 deletions
diff --git a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp index b0aa62bbcb..d0e8639229 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp @@ -64,10 +64,10 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput> std::unique_ptr<const IDepthfirstStrategy> m_strat; /* Compute the amount of working space required for a single thread. */ - virtual size_t get_working_size_per_thread(unsigned int n_input_channels) const = 0; + virtual size_t get_working_size_per_thread() const = 0; /* Initialise the working space for a thread. */ - virtual void initialise_working_space(void *, unsigned int n_input_channels) const = 0; + virtual void initialise_working_space(void *) const = 0; /* Compute a portion of the output tensor with padding. */ virtual void compute_tile_padded( @@ -148,8 +148,8 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput> { // Get and initialise the working space for this thread. void *thread_working_space = - static_cast<uint8_t *>(working_space) + thread_id * this->get_working_size_per_thread(n_channels); - this->initialise_working_space(thread_working_space, n_channels); + static_cast<uint8_t *>(working_space) + thread_id * this->get_working_size_per_thread(); + this->initialise_working_space(thread_working_space); // Construct convenient representations of the input/output tensors. TensorSpec<const TInput *> input_tensor(reinterpret_cast<const TInput *>(input), ld_input_row, ld_input_col); @@ -289,14 +289,9 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput> { } - size_t get_working_size(unsigned int n_threads) const override + size_t get_working_size(unsigned int n_threads) const override final { - return this->get_working_size(n_threads, this->m_args.n_channels); - } - - size_t get_working_size(unsigned int n_threads, unsigned int n_channels) const override final - { - return n_threads * this->get_working_size_per_thread(n_channels); + return n_threads * this->get_working_size_per_thread(); } }; |