diff options
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index e89ef88562..682effe84b 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -32,6 +32,7 @@ #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CPP/functions/CPPPermute.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/MemoryGroup.h" @@ -67,12 +68,20 @@ public: void run() override; private: - NEDepthwiseConvolutionLayer3x3Kernel _kernel; + NEDepthwiseConvolutionLayer3x3Kernel _dwc_kernel; NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel; NEFillBorderKernel _border_handler; + CPPPermute _permute_input; + CPPPermute _permute_weights; + CPPPermute _permute_output; Tensor _accumulator; + Tensor _input_nhwc; + Tensor _weights_hwio; + Tensor _output_nhwc; bool _has_bias; bool _is_quantized; + bool _is_optimized; + bool _are_weights_reshaped; }; /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels: |