From f401c74a963a1ce2e188cd20269650063c1d483c Mon Sep 17 00:00:00 2001
From: Michalis Spyrou <michalis.spyrou@arm.com>
Date: Tue, 12 May 2020 16:18:33 +0100
Subject: COMPMID-3483: Refactor NEDepthwiseConvolutionLayerNativeKernel

Removed is_per_channel template arguments since it wasn't used
anywhere and also made has_biases a runtime parameter. The total
size reduction from this change is 28.6kb.

Change-Id: I292ac27ae3ea2885b8438f613390486323982664
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3189
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 .../kernels/NEDepthwiseConvolutionLayerNativeKernel.h   | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h')
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
index 1303cf9021..9737c9932e 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
@@ -95,22 +95,22 @@ public:
     BorderSize border_size() const override;
 
 private:
-    template < typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename std::enable_if < std::is_same<T, float>::value
+    template < typename T, typename TW, int S, typename std::enable_if < std::is_same<T, float>::value
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-                                                                                                               || std::is_same<T, float16_t>::value
+                                                                         || std::is_same<T, float16_t>::value
 #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-                                                                                                               ,
-                                                                                                               int >::type = 0 >
-    void run_depthwise(const Window &window);
+                                                                         ,
+                                                                         int >::type = 0 >
+    void run_depthwise(const Window &window, bool has_biases);
 
-    template < typename T, typename TW, int S, bool has_biases, bool is_per_channel, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
-    void run_depthwise(const Window &window);
+    template < typename T, typename TW, int S, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
+    void run_depthwise(const Window &window, bool has_biases);
 
     /** Common signature for all the specialised depthwise convolution native functions
      *
      * @param[in] window Region on which to execute the kernel.
      */
-    using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window);
+    using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases);
 
     DepthwiseFunctionPtr _func;
     BorderSize           _border_size;
@@ -123,6 +123,7 @@ private:
     Size2D               _dilation;
     std::vector<int>     _output_multiplier;
     std::vector<int>     _output_shift;
+    bool                 _has_biases;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
-- 
cgit v1.2.1