From 26b22160c00d9955255015d82203c7e16f28f0c3 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Mon, 13 Aug 2018 15:49:49 +0100 Subject: COMPMID-1480 Add support for NHWC QASYMM8/FP32(non-optimized) to NEON DepthwiseConvolution Change-Id: I751f5d3fb74085d2e67f610ecf52da4736d0cfb5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143870 Reviewed-by: Gian Marco Iodice Reviewed-by: Georgios Pinitas Tested-by: Jenkins --- .../runtime/NEON/functions/NEDepthwiseConvolutionLayer.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h') diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index 1317fb740e..ac065533e5 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -90,15 +90,16 @@ private: NEPermute _permute_weights; NEPermute _permute_output; Tensor _accumulator; - Tensor _input_nhwc; - Tensor _weights_hwio; - Tensor _output_nhwc; + Tensor _permuted_input; + Tensor _permuted_weights; + Tensor _permuted_output; bool _has_bias; bool _is_quantized; bool _is_optimized; bool _are_weights_reshaped; bool _is_nchw; bool _is_first_run; + bool _permute; }; /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels: @@ -146,7 +147,7 @@ public: * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overriden: void run() override; @@ -160,12 +161,19 @@ private: NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel; NEFillBorderKernel _v2mm_input_fill_border; NEFillBorderKernel _v2mm_weights_fill_border; + NEPermute _permute_input; + NEPermute _permute_weights; + NEPermute _permute_output; Tensor _input_reshaped; Tensor _weights_reshaped; Tensor _v2mm_output; Tensor _output_reshaped; + Tensor _permuted_input; + Tensor _permuted_weights; + Tensor _permuted_output; bool _is_prepared; bool _is_quantized; + bool _is_nhwc; const ITensor *_original_weights; }; } -- cgit v1.2.1