diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/pooling')
3 files changed, 13 insertions, 17 deletions
diff --git a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp index b0aa62bbcb..d0e8639229 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp @@ -64,10 +64,10 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput> std::unique_ptr<const IDepthfirstStrategy> m_strat; /* Compute the amount of working space required for a single thread. */ - virtual size_t get_working_size_per_thread(unsigned int n_input_channels) const = 0; + virtual size_t get_working_size_per_thread() const = 0; /* Initialise the working space for a thread. */ - virtual void initialise_working_space(void *, unsigned int n_input_channels) const = 0; + virtual void initialise_working_space(void *) const = 0; /* Compute a portion of the output tensor with padding. */ virtual void compute_tile_padded( @@ -148,8 +148,8 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput> { // Get and initialise the working space for this thread. void *thread_working_space = - static_cast<uint8_t *>(working_space) + thread_id * this->get_working_size_per_thread(n_channels); - this->initialise_working_space(thread_working_space, n_channels); + static_cast<uint8_t *>(working_space) + thread_id * this->get_working_size_per_thread(); + this->initialise_working_space(thread_working_space); // Construct convenient representations of the input/output tensors. TensorSpec<const TInput *> input_tensor(reinterpret_cast<const TInput *>(input), ld_input_row, ld_input_col); @@ -289,14 +289,9 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput> { } - size_t get_working_size(unsigned int n_threads) const override + size_t get_working_size(unsigned int n_threads) const override final { - return this->get_working_size(n_threads, this->m_args.n_channels); - } - - size_t get_working_size(unsigned int n_threads, unsigned int n_channels) const override final - { - return n_threads * this->get_working_size_per_thread(n_channels); + return n_threads * this->get_working_size_per_thread(); } }; diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp index 8a6e63d993..1ca478513c 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp @@ -91,17 +91,17 @@ class PoolingDepthfirst : public DepthfirstDriver<TInput, TOutput> protected: /* Compute the amount of working space required for a single thread. */ - size_t get_working_size_per_thread(unsigned int n_channels) const override + size_t get_working_size_per_thread() const override { - return sizeof(WorkingSpace) + n_channels * (sizeof(TInput) + sizeof(TOutput)); + return sizeof(WorkingSpace) + this->m_args.n_channels * (sizeof(TInput) + sizeof(TOutput)); } /* Initialise the working space for a thread. */ - void initialise_working_space(void *raw_ws, unsigned int n_channels) const override + void initialise_working_space(void *raw_ws) const override { auto ws = reinterpret_cast<WorkingSpace *>(raw_ws); ws->input_buffer = ws + 1; - ws->output_buffer = reinterpret_cast<char *>(ws + 1) + sizeof(TInput) * n_channels; + ws->output_buffer = reinterpret_cast<char *>(ws + 1) + sizeof(TInput) * this->m_args.n_channels; // Fill the input buffer with an appropriate value TInput fill_val = 0; @@ -119,6 +119,7 @@ class PoolingDepthfirst : public DepthfirstDriver<TInput, TOutput> } auto ptr = reinterpret_cast<TInput *>(ws->input_buffer); + auto n_channels = this->m_args.n_channels; for (; n_channels; n_channels--) { *(ptr++) = fill_val; diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp index 07c582059f..ded2c75127 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp @@ -136,8 +136,8 @@ class PoolingDepthfirstGeneric : public DepthfirstDriver<TInput, TOutput> const OutputStage m_os; protected: - size_t get_working_size_per_thread(unsigned int) const override { return 0; } - void initialise_working_space(void *, unsigned int) const override { /* Nothing */ } + size_t get_working_size_per_thread() const override { return 0; } + void initialise_working_space(void *) const override { /* Nothing */ } /* Compute a portion of the output tensor with padding. */ void compute_tile_padded( |