From 26b22160c00d9955255015d82203c7e16f28f0c3 Mon Sep 17 00:00:00 2001
From: Giorgio Arena <giorgio.arena@arm.com>
Date: Mon, 13 Aug 2018 15:49:49 +0100
Subject: COMPMID-1480 Add support for NHWC QASYMM8/FP32(non-optimized) to NEON
 DepthwiseConvolution

Change-Id: I751f5d3fb74085d2e67f610ecf52da4736d0cfb5
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143870
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
---
 .../runtime/NEON/functions/NEDepthwiseConvolutionLayer.h | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h')

diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 1317fb740e..ac065533e5 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -90,15 +90,16 @@ private:
     NEPermute                                 _permute_weights;
     NEPermute                                 _permute_output;
     Tensor                                    _accumulator;
-    Tensor                                    _input_nhwc;
-    Tensor                                    _weights_hwio;
-    Tensor                                    _output_nhwc;
+    Tensor                                    _permuted_input;
+    Tensor                                    _permuted_weights;
+    Tensor                                    _permuted_output;
     bool                                      _has_bias;
     bool                                      _is_quantized;
     bool                                      _is_optimized;
     bool                                      _are_weights_reshaped;
     bool                                      _is_nchw;
     bool                                      _is_first_run;
+    bool                                      _permute;
 };
 
 /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
@@ -146,7 +147,7 @@ public:
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1);
 
     // Inherited methods overriden:
     void run() override;
@@ -160,12 +161,19 @@ private:
     NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
     NEFillBorderKernel                        _v2mm_input_fill_border;
     NEFillBorderKernel                        _v2mm_weights_fill_border;
+    NEPermute                                 _permute_input;
+    NEPermute                                 _permute_weights;
+    NEPermute                                 _permute_output;
     Tensor                                    _input_reshaped;
     Tensor                                    _weights_reshaped;
     Tensor                                    _v2mm_output;
     Tensor                                    _output_reshaped;
+    Tensor                                    _permuted_input;
+    Tensor                                    _permuted_weights;
+    Tensor                                    _permuted_output;
     bool                                      _is_prepared;
     bool                                      _is_quantized;
+    bool                                      _is_nhwc;
     const ITensor                            *_original_weights;
 };
 }
-- 
cgit v1.2.1