aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp')
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp27
1 files changed, 14 insertions, 13 deletions
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp
index e58467b0f4..cef568fadd 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -525,6 +525,7 @@ class DepthwiseDepthfirstMultiplier : public DepthfirstDriver<TInput, TWeight, T
}
void compute_tile_padded(
+ const DepthwiseArgs &args,
unsigned int output_i, unsigned int output_j,
unsigned int output_channel_start, unsigned int output_channel_end,
const TensorSpec<const TInput *> &input,
@@ -536,11 +537,11 @@ class DepthwiseDepthfirstMultiplier : public DepthfirstDriver<TInput, TWeight, T
// Get the working space
auto ws = reinterpret_cast<WorkingSpace *>(working_space_raw);
- const int ii = static_cast<int>(output_i * this->m_args.stride_rows) - this->m_args.padding.top;
+ const int ii = static_cast<int>(output_i * args.stride_rows) - args.padding.top;
const auto input_pad_top = static_cast<unsigned int>(ii < 0 ? -ii : 0);
const auto input_i = static_cast<unsigned int>(ii < 0 ? 0 : ii);
- const int ij = static_cast<int>(output_j * this->m_args.stride_cols) - this->m_args.padding.left;
+ const int ij = static_cast<int>(output_j * args.stride_cols) - args.padding.left;
const auto input_pad_left = static_cast<unsigned int>(ij < 0 ? -ij : 0);
const auto input_j = static_cast<unsigned int>(ij < 0 ? 0 : ij);
@@ -551,40 +552,40 @@ class DepthwiseDepthfirstMultiplier : public DepthfirstDriver<TInput, TWeight, T
output.base + output_i*output.ld_row + output_j*output.ld_col + output_channel_start,
output.ld_row, output.ld_col,
ws->output_buffer,
- 0, this->m_args.output_rows - output_i, // Top padding, # valid rows
- 0, this->m_args.output_cols - output_j // Left padding, # valid columns
+ 0, args.output_rows - output_i, // Top padding, # valid rows
+ 0, args.output_cols - output_j // Left padding, # valid columns
);
// Compute the parameter stride
- DepthwiseArgs single_iter(this->m_args);
+ DepthwiseArgs single_iter(args);
single_iter.input_channels = 1;
const size_t parameter_stride = reinterpret_cast<const StratType *>(this->m_strat.get())
->get_storage_size(single_iter);
for (; output_channel_start < output_channel_end;
- output_channel_start += this->m_args.channel_multiplier)
+ output_channel_start += args.channel_multiplier)
{
// Compute the input pointer array
- const auto input_channel = output_channel_start / this->m_args.channel_multiplier;
+ const auto input_channel = output_channel_start / args.channel_multiplier;
// Construct the input patch
depthfirst_multiplier::PrepareInputSample<is_generic>::execute(
- this->m_args, ws, this->m_strat.get(),
+ args, ws, this->m_strat.get(),
input.base + input_channel + input_i*input.ld_row + input_j*input.ld_col, input.ld_row, input.ld_col,
- input_pad_top, this->m_args.input_rows - input_i,
- input_pad_left, this->m_args.input_cols - input_j
+ input_pad_top, args.input_rows - input_i,
+ input_pad_left, args.input_cols - input_j
);
// Execute the kernel
depthfirst_multiplier::StrategyType<is_generic, TInput, TWeight, TOutput, TAccum, OutputStage>::execute(
- this->m_args, ws, reinterpret_cast<const StratType *>(this->m_strat.get()), m_os, output_channel_start,
+ args, ws, reinterpret_cast<const StratType *>(this->m_strat.get()), m_os, output_channel_start,
parameters, m_bias
);
// Update the output pointers
for (unsigned int n = 0; n < this->m_strat->get_output_rows() * this->m_strat->get_output_cols(); n++)
{
- ws->outptr_array[n] += this->m_args.channel_multiplier;
+ ws->outptr_array[n] += args.channel_multiplier;
}
// Progress the parameters