Add support for dilation > 1 in assembly DepthwiseConvolution

* Resolve COMPMID-5689 Change-Id: I81a3791ad054db59562b76d1c729f2b2168aee8b Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com> Signed-off-by: Andrew Mundy <andrew.mundy@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8919 Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Pablo Marquez Tello <pablo.tello@arm.com> 2023-01-09 17:21:01 +0000
committer: Pablo Marquez Tello <pablo.tello@arm.com> 2023-02-08 11:05:08 +0000
commit: 4e2bbbbb23e6f4bd452f7f865e51228e1f51efec (patch)
tree: 36469f45f17d94f13bc1206e3a5975ba6cbccad5 /src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp
parent: fbe94da93b5be8745727ba7624b3d011e2bfa383 (diff)
download: ComputeLibrary-4e2bbbbb23e6f4bd452f7f865e51228e1f51efec.tar.gz
1 files changed, 14 insertions, 13 deletions
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp
index e58467b0f4..cef568fadd 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -525,6 +525,7 @@ class DepthwiseDepthfirstMultiplier : public DepthfirstDriver<TInput, TWeight, T
   }
 
   void compute_tile_padded(
+    const DepthwiseArgs &args,
     unsigned int output_i, unsigned int output_j,
     unsigned int output_channel_start, unsigned int output_channel_end,
     const TensorSpec<const TInput *> &input,
@@ -536,11 +537,11 @@ class DepthwiseDepthfirstMultiplier : public DepthfirstDriver<TInput, TWeight, T
     // Get the working space
     auto ws = reinterpret_cast<WorkingSpace *>(working_space_raw);
 
-    const int ii = static_cast<int>(output_i * this->m_args.stride_rows) - this->m_args.padding.top;
+    const int ii = static_cast<int>(output_i * args.stride_rows) - args.padding.top;
     const auto input_pad_top = static_cast<unsigned int>(ii < 0 ? -ii : 0);
     const auto input_i = static_cast<unsigned int>(ii < 0 ? 0 : ii);
 
-    const int ij = static_cast<int>(output_j * this->m_args.stride_cols) - this->m_args.padding.left;
+    const int ij = static_cast<int>(output_j * args.stride_cols) - args.padding.left;
     const auto input_pad_left = static_cast<unsigned int>(ij < 0 ? -ij : 0);
     const auto input_j = static_cast<unsigned int>(ij < 0 ? 0 : ij);
 
@@ -551,40 +552,40 @@ class DepthwiseDepthfirstMultiplier : public DepthfirstDriver<TInput, TWeight, T
       output.base + output_i*output.ld_row + output_j*output.ld_col + output_channel_start,
       output.ld_row, output.ld_col,
       ws->output_buffer,
-      0, this->m_args.output_rows - output_i, // Top padding, # valid rows
-      0, this->m_args.output_cols - output_j  // Left padding, # valid columns
+      0, args.output_rows - output_i, // Top padding, # valid rows
+      0, args.output_cols - output_j  // Left padding, # valid columns
     );
 
     // Compute the parameter stride
-    DepthwiseArgs single_iter(this->m_args);
+    DepthwiseArgs single_iter(args);
     single_iter.input_channels = 1;
     const size_t parameter_stride = reinterpret_cast<const StratType *>(this->m_strat.get())
       ->get_storage_size(single_iter);
 
     for (; output_channel_start < output_channel_end;
-         output_channel_start += this->m_args.channel_multiplier)
+         output_channel_start += args.channel_multiplier)
     {
       // Compute the input pointer array
-      const auto input_channel = output_channel_start / this->m_args.channel_multiplier;
+      const auto input_channel = output_channel_start / args.channel_multiplier;
 
       // Construct the input patch
       depthfirst_multiplier::PrepareInputSample<is_generic>::execute(
-        this->m_args, ws, this->m_strat.get(),
+        args, ws, this->m_strat.get(),
         input.base + input_channel + input_i*input.ld_row + input_j*input.ld_col, input.ld_row, input.ld_col,
-        input_pad_top, this->m_args.input_rows - input_i,
-        input_pad_left, this->m_args.input_cols - input_j
+        input_pad_top, args.input_rows - input_i,
+        input_pad_left, args.input_cols - input_j
       );
 
       // Execute the kernel
       depthfirst_multiplier::StrategyType<is_generic, TInput, TWeight, TOutput, TAccum, OutputStage>::execute(
-        this->m_args, ws, reinterpret_cast<const StratType *>(this->m_strat.get()), m_os, output_channel_start,
+        args, ws, reinterpret_cast<const StratType *>(this->m_strat.get()), m_os, output_channel_start,
         parameters, m_bias
       );
 
       // Update the output pointers
       for (unsigned int n = 0; n < this->m_strat->get_output_rows() * this->m_strat->get_output_cols(); n++)
       {
-        ws->outptr_array[n] += this->m_args.channel_multiplier;
+        ws->outptr_array[n] += args.channel_multiplier;
       }
 
       // Progress the parameters
author	Pablo Marquez Tello <pablo.tello@arm.com>	2023-01-09 17:21:01 +0000
committer	Pablo Marquez Tello <pablo.tello@arm.com>	2023-02-08 11:05:08 +0000
commit	4e2bbbbb23e6f4bd452f7f865e51228e1f51efec (patch)
tree	36469f45f17d94f13bc1206e3a5975ba6cbccad5 /src/core/NEON/kernels/arm_conv/depthwise/depthwise_depthfirst_multiplier.hpp
parent	fbe94da93b5be8745727ba7624b3d011e2bfa383 (diff)
download	ComputeLibrary-4e2bbbbb23e6f4bd452f7f865e51228e1f51efec.tar.gz