From a4bba9c594c4022c9f85192bb8fd3593ad1a8d3c Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Tue, 2 Apr 2019 15:27:52 +0100
Subject: COMPMID-1995: Fix 32-bit NEDepthwiseConvolution errors.

-Updates padding handling in assembly depthwise kernels.
-Fixes 32-bit runs issues for depthwise convolution.

Change-Id: I3fe6369397c1d13f5629dd34c068ce4af53c95cd
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/939
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 .../kernels/convolution/depthwise/depthwise.hpp    | 37 ++++++++++++++++++++--
 1 file changed, 34 insertions(+), 3 deletions(-)

(limited to 'arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp')

diff --git a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
index 45e8da0272..e0cb616a3d 100644
--- a/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp
@@ -24,7 +24,7 @@
 
 #pragma once
 
-#include "arm_compute/core/NEON/kernels/convolution/common/arm.hpp"
+#include <arm_neon.h>
 #include "arm_compute/core/NEON/kernels/convolution/common/activation.hpp"
 #include "arm_compute/core/NEON/kernels/convolution/common/padding.hpp"
 
@@ -275,6 +275,14 @@ class DepthwiseConvolutionBase : public IDepthwiseConvolution
       unsigned int out_col_stride
     );
 
+    template <nck::ActivationFunction Activation>
+    void execute_tile(
+      int n_channels,
+      const void* packed_params,
+      const InputType* inptrs[inner_tile_rows][inner_tile_cols],
+      OutputType* outptrs[output_tile_rows][output_tile_cols]
+    );
+
     int n_channels(void) const;
 
   private:
@@ -290,9 +298,7 @@ class DepthwiseConvolutionBase : public IDepthwiseConvolution
 
     // Stride information for a convolution instance
     int _input_col_stride, _input_row_stride, _input_batch_stride;
-    const int _input_ws_col_stride, _input_ws_row_stride;
     int _output_col_stride, _output_row_stride, _output_batch_stride;
-    const int _output_ws_col_stride, _output_ws_row_stride;
 
     // Methods for getting access to working space
     size_t _get_input_working_space_size(void) const;
@@ -352,6 +358,14 @@ class DepthwiseConvolution : public DepthwiseConvolutionBase<
       unsigned int out_row_stride,
       unsigned int out_col_stride
     );
+
+    template <nck::ActivationFunction Activation>
+    void execute_tile(
+      int n_channels,
+      const void* packed_params,
+      const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+      OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+    );
 };
 
 
@@ -415,6 +429,14 @@ class DepthwiseConvolution<
       unsigned int out_row_stride,
       unsigned int out_col_stride
     );
+
+    template <nck::ActivationFunction Activation>
+    void execute_tile(
+      int n_channels,
+      const void* packed_params,
+      const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+      float* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+    );
 };
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
@@ -478,6 +500,15 @@ class DepthwiseConvolution<
       unsigned int out_row_stride,
       unsigned int out_col_stride
     );
+
+    template <nck::ActivationFunction Activation>
+    void execute_tile(
+      int n_channels,
+      const void* packed_params,
+      const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
+      float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
+    );
 };
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
 }  // namespace depthwise
-- 
cgit v1.2.1