COMPMID-2708 NEDepthwiseConvolution Generic: support for QUANT8_PER_CHANNEL_SYMM

COMPMID-2470 Implement a new and generic depthwise convolution for NEON QASYMM8 NHWC COMPMID-2477 Enable FP16 data type for the new generic convolution on NEON for NHWC COMPMID-2625 Remove old implementation files for the generic NEDepthwiseConvolution Change-Id: I8f6deda4fc69dd7e472fba3228b1ed5dad172f3e Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/2094 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Giorgio Arena <giorgio.arena@arm.com> 2019-10-15 11:09:33 +0100
committer: Giorgio Arena <giorgio.arena@arm.com> 2019-10-21 10:14:20 +0000
commit: d93e263e70e3101422402c95946e520fef34c4c7 (patch)
tree: f79d3b325ed6881fb9252cb7ee0b7573739e00be /arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
parent: ab5b1a279284bed350d3bb75f3d9d3aec6edca0e (diff)
download: ComputeLibrary-d93e263e70e3101422402c95946e520fef34c4c7.tar.gz
1 files changed, 17 insertions, 3 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
index 5db79f8bf7..a0205f1ea6 100644
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
@@ -26,6 +26,10 @@
 
 #include "arm_compute/core/NEON/INEKernel.h"
 
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+#include <arm_neon.h>
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
 namespace arm_compute
 {
 // Forward declarations
@@ -53,7 +57,7 @@ public:
      *
      * @note Supported data layouts: NHWC
      *
-     * @param[in]  input            Source tensor. DataType supported: F32.
+     * @param[in]  input            Source tensor. DataType supported: QASYMM8/F16/F32.
      * @param[in]  weights          Weights tensor. This is a 3D tensor with dimensions [IFM, W, H]. Data type supported: Same as @p input.
      * @param[in]  biases           Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. Data type supported: Same as @p input.
      * @param[out] output           Destination tensor. Data type supported: Same as @p input.
@@ -68,7 +72,7 @@ public:
      *
      * @note Supported data layouts: NHWC
      *
-     * @param[in] input            Source tensor info. DataType supported: F32.
+     * @param[in] input            Source tensor info. DataType supported: QASYMM8/F16/F32.
      * @param[in] weights          Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H]. Data type supported: Same as @p input.
      * @param[in] biases           Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. Data type supported: Same as @p input.
      * @param[in] output           Destination tensor info. Data type supported: Same as @p input.
@@ -86,7 +90,15 @@ public:
     BorderSize border_size() const override;
 
 private:
-    template <typename T, int S, bool has_biases>
+    template < typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename std::enable_if < std::is_same<T, float>::value
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+                                                                                                               || std::is_same<T, float16_t>::value
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+                                                                                                               ,
+                                                                                                               int >::type = 0 >
+    void run_depthwise(const Window &window);
+
+    template <typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename std::enable_if<std::is_same<T, uint8_t>::value, int>::type = 0>
     void run_depthwise(const Window &window);
 
     /** Common signature for all the specialised depthwise convolution native functions
@@ -104,6 +116,8 @@ private:
     PadStrideInfo        _conv_info;
     unsigned int         _depth_multiplier;
     Size2D               _dilation;
+    std::vector<int>     _output_multiplier;
+    std::vector<int>     _output_shift;
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H__ */
author	Giorgio Arena <giorgio.arena@arm.com>	2019-10-15 11:09:33 +0100
committer	Giorgio Arena <giorgio.arena@arm.com>	2019-10-21 10:14:20 +0000
commit	d93e263e70e3101422402c95946e520fef34c4c7 (patch)
tree	f79d3b325ed6881fb9252cb7ee0b7573739e00be /arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
parent	ab5b1a279284bed350d3bb75f3d9d3aec6edca0e (diff)
download	ComputeLibrary-d93e263e70e3101422402c95946e520fef34c4c7.tar.gz