diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-01-30 18:13:46 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:46:07 +0000 |
commit | 4074c995d2a88684fd4a9d1aa36d51de56bb8dab (patch) | |
tree | 280a15ca10ff88c5eb432be011ccb721660a3349 /arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h | |
parent | c5694afca3f937f8c9b3ec328da9394f11f9af2d (diff) | |
download | ComputeLibrary-4074c995d2a88684fd4a9d1aa36d51de56bb8dab.tar.gz |
COMPMID-873: Integrate RSH NEON Depthwise Convolution routine
Change-Id: Ida1e9a836bc518bfe5563e16bf7f92bde5fc13f7
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118472
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index e89ef88562..682effe84b 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -32,6 +32,7 @@ #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CPP/functions/CPPPermute.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -67,12 +68,20 @@ public: void run() override; private: - NEDepthwiseConvolutionLayer3x3Kernel _kernel; + NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel; NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel; NEFillBorderKernel _border_handler; + CPPPermute _permute_input; + CPPPermute _permute_weights; + CPPPermute _permute_output; Tensor _accumulator; + Tensor _input_nhwc; + Tensor _weights_hwio; + Tensor _output_nhwc; bool _has_bias; bool _is_quantized; + bool _is_optimized; + bool _are_weights_reshaped; }; /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels: |