aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_conv/pooling
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/pooling')
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp17
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp9
-rw-r--r--src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp4
3 files changed, 13 insertions, 17 deletions
diff --git a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp
index b0aa62bbcb..d0e8639229 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/depthfirst_driver.hpp
@@ -64,10 +64,10 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput>
std::unique_ptr<const IDepthfirstStrategy> m_strat;
/* Compute the amount of working space required for a single thread. */
- virtual size_t get_working_size_per_thread(unsigned int n_input_channels) const = 0;
+ virtual size_t get_working_size_per_thread() const = 0;
/* Initialise the working space for a thread. */
- virtual void initialise_working_space(void *, unsigned int n_input_channels) const = 0;
+ virtual void initialise_working_space(void *) const = 0;
/* Compute a portion of the output tensor with padding. */
virtual void compute_tile_padded(
@@ -148,8 +148,8 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput>
{
// Get and initialise the working space for this thread.
void *thread_working_space =
- static_cast<uint8_t *>(working_space) + thread_id * this->get_working_size_per_thread(n_channels);
- this->initialise_working_space(thread_working_space, n_channels);
+ static_cast<uint8_t *>(working_space) + thread_id * this->get_working_size_per_thread();
+ this->initialise_working_space(thread_working_space);
// Construct convenient representations of the input/output tensors.
TensorSpec<const TInput *> input_tensor(reinterpret_cast<const TInput *>(input), ld_input_row, ld_input_col);
@@ -289,14 +289,9 @@ class DepthfirstDriver : public PoolingCommon<TInput, TOutput>
{
}
- size_t get_working_size(unsigned int n_threads) const override
+ size_t get_working_size(unsigned int n_threads) const override final
{
- return this->get_working_size(n_threads, this->m_args.n_channels);
- }
-
- size_t get_working_size(unsigned int n_threads, unsigned int n_channels) const override final
- {
- return n_threads * this->get_working_size_per_thread(n_channels);
+ return n_threads * this->get_working_size_per_thread();
}
};
diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp
index 8a6e63d993..1ca478513c 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst.hpp
@@ -91,17 +91,17 @@ class PoolingDepthfirst : public DepthfirstDriver<TInput, TOutput>
protected:
/* Compute the amount of working space required for a single thread. */
- size_t get_working_size_per_thread(unsigned int n_channels) const override
+ size_t get_working_size_per_thread() const override
{
- return sizeof(WorkingSpace) + n_channels * (sizeof(TInput) + sizeof(TOutput));
+ return sizeof(WorkingSpace) + this->m_args.n_channels * (sizeof(TInput) + sizeof(TOutput));
}
/* Initialise the working space for a thread. */
- void initialise_working_space(void *raw_ws, unsigned int n_channels) const override
+ void initialise_working_space(void *raw_ws) const override
{
auto ws = reinterpret_cast<WorkingSpace *>(raw_ws);
ws->input_buffer = ws + 1;
- ws->output_buffer = reinterpret_cast<char *>(ws + 1) + sizeof(TInput) * n_channels;
+ ws->output_buffer = reinterpret_cast<char *>(ws + 1) + sizeof(TInput) * this->m_args.n_channels;
// Fill the input buffer with an appropriate value
TInput fill_val = 0;
@@ -119,6 +119,7 @@ class PoolingDepthfirst : public DepthfirstDriver<TInput, TOutput>
}
auto ptr = reinterpret_cast<TInput *>(ws->input_buffer);
+ auto n_channels = this->m_args.n_channels;
for (; n_channels; n_channels--)
{
*(ptr++) = fill_val;
diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp
index 07c582059f..ded2c75127 100644
--- a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp
+++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp
@@ -136,8 +136,8 @@ class PoolingDepthfirstGeneric : public DepthfirstDriver<TInput, TOutput>
const OutputStage m_os;
protected:
- size_t get_working_size_per_thread(unsigned int) const override { return 0; }
- void initialise_working_space(void *, unsigned int) const override { /* Nothing */ }
+ size_t get_working_size_per_thread() const override { return 0; }
+ void initialise_working_space(void *) const override { /* Nothing */ }
/* Compute a portion of the output tensor with padding. */
void compute_tile_padded(