From d93e263e70e3101422402c95946e520fef34c4c7 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Tue, 15 Oct 2019 11:09:33 +0100 Subject: COMPMID-2708 NEDepthwiseConvolution Generic: support for QUANT8_PER_CHANNEL_SYMM COMPMID-2470 Implement a new and generic depthwise convolution for NEON QASYMM8 NHWC COMPMID-2477 Enable FP16 data type for the new generic convolution on NEON for NHWC COMPMID-2625 Remove old implementation files for the generic NEDepthwiseConvolution Change-Id: I8f6deda4fc69dd7e472fba3228b1ed5dad172f3e Signed-off-by: Giorgio Arena Reviewed-on: https://review.mlplatform.org/c/2094 Comments-Addressed: Arm Jenkins Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- .../NEDepthwiseConvolutionLayerNativeKernel.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h') diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h index 5db79f8bf7..a0205f1ea6 100644 --- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h @@ -26,6 +26,10 @@ #include "arm_compute/core/NEON/INEKernel.h" +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#include +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + namespace arm_compute { // Forward declarations @@ -53,7 +57,7 @@ public: * * @note Supported data layouts: NHWC * - * @param[in] input Source tensor. DataType supported: F32. + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H]. Data type supported: Same as @p input. * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. Data type supported: Same as @p input. * @param[out] output Destination tensor. Data type supported: Same as @p input. @@ -68,7 +72,7 @@ public: * * @note Supported data layouts: NHWC * - * @param[in] input Source tensor info. DataType supported: F32. + * @param[in] input Source tensor info. DataType supported: QASYMM8/F16/F32. * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H]. Data type supported: Same as @p input. * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. Data type supported: Same as @p input. * @param[in] output Destination tensor info. Data type supported: Same as @p input. @@ -86,7 +90,15 @@ public: BorderSize border_size() const override; private: - template + template < typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename std::enable_if < std::is_same::value +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + || std::is_same::value +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + , + int >::type = 0 > + void run_depthwise(const Window &window); + + template ::value, int>::type = 0> void run_depthwise(const Window &window); /** Common signature for all the specialised depthwise convolution native functions @@ -104,6 +116,8 @@ private: PadStrideInfo _conv_info; unsigned int _depth_multiplier; Size2D _dilation; + std::vector _output_multiplier; + std::vector _output_shift; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H__ */ -- cgit v1.2.1