aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2019-07-12 14:49:49 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-07-26 13:52:08 +0000
commit44f5572f3d6ba8e39c4a18a991049992d590ce39 (patch)
treec78abd8f4ddd44d2ff28433fa44997be0972bc2d /arm_compute/runtime
parentc050e0ce189585599b2b70c20aad089e58f657ff (diff)
downloadComputeLibrary-44f5572f3d6ba8e39c4a18a991049992d590ce39.tar.gz
COMPMID-2179 New generic depthwise convolution for NEON F32 NHWC
Change-Id: I2b883785c0500d4bdb6ee4700382ee058be2cd36 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/1538 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 715f4f5d1d..5b0d1bafcd 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -25,6 +25,7 @@
#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
+#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
@@ -280,6 +281,10 @@ private:
/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
*
+ * If data type is F32 and data layout is NHWC:
+ * -# @ref NEDepthwiseConvolutionLayerKernel
+ *
+ * Otherwise:
* -# @ref NEDepthwiseIm2ColKernel
* -# @ref NEDepthwiseWeightsReshapeKernel
* -# @ref NEGEMMMatrixVectorMultiplyKernel
@@ -339,8 +344,10 @@ private:
NEDepthwiseIm2ColKernel _im2col_kernel;
NEDepthwiseWeightsReshapeKernel _weights_reshape_kernel;
NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
+ NEDepthwiseConvolutionLayerKernel _depthwise_conv_kernel;
NEDepthwiseVectorToTensorKernel _vector_to_tensor_kernel;
NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
+ NEFillBorderKernel _fill_border;
NEFillBorderKernel _v2mm_input_fill_border;
NEFillBorderKernel _v2mm_weights_fill_border;
NEPermute _permute_input;
@@ -358,6 +365,7 @@ private:
bool _is_quantized;
bool _is_nhwc;
bool _is_activationlayer_enabled;
+ bool _is_optimized;
const ITensor *_original_weights;
};
} // namespace arm_compute