aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2019-10-15 11:09:33 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2019-10-21 10:14:20 +0000
commitd93e263e70e3101422402c95946e520fef34c4c7 (patch)
treef79d3b325ed6881fb9252cb7ee0b7573739e00be /arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
parentab5b1a279284bed350d3bb75f3d9d3aec6edca0e (diff)
downloadComputeLibrary-d93e263e70e3101422402c95946e520fef34c4c7.tar.gz
COMPMID-2708 NEDepthwiseConvolution Generic: support for QUANT8_PER_CHANNEL_SYMM
COMPMID-2470 Implement a new and generic depthwise convolution for NEON QASYMM8 NHWC COMPMID-2477 Enable FP16 data type for the new generic convolution on NEON for NHWC COMPMID-2625 Remove old implementation files for the generic NEDepthwiseConvolution Change-Id: I8f6deda4fc69dd7e472fba3228b1ed5dad172f3e Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/2094 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h')
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h58
1 files changed, 14 insertions, 44 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 87405fdb14..ea3ef9bf38 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -26,21 +26,11 @@
#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Macros.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
#include "arm_compute/runtime/NEON/functions/NEPermute.h"
#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
-#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
@@ -279,17 +269,10 @@ private:
bool _is_prepared;
};
-/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernels:
+/** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
*
- * If data type is F32 and data layout is NHWC:
* -# @ref NEDepthwiseConvolutionLayerNativeKernel
*
- * Otherwise:
- * -# @ref NEDepthwiseIm2ColKernel
- * -# @ref NEDepthwiseWeightsReshapeKernel
- * -# @ref NEGEMMMatrixVectorMultiplyKernel
- * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
- *
*/
class NEDepthwiseConvolutionLayer : public IFunction
{
@@ -341,32 +324,19 @@ public:
void prepare() override;
private:
- NEDepthwiseIm2ColKernel _im2col_kernel;
- NEDepthwiseWeightsReshapeKernel _weights_reshape_kernel;
- NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
- NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
- NEDepthwiseVectorToTensorKernel _vector_to_tensor_kernel;
- NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
- NEFillBorderKernel _fill_border;
- NEFillBorderKernel _v2mm_input_fill_border;
- NEFillBorderKernel _v2mm_weights_fill_border;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- NEActivationLayer _activationlayer_function;
- Tensor _input_reshaped;
- Tensor _weights_reshaped;
- Tensor _v2mm_output;
- Tensor _output_reshaped;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- bool _is_prepared;
- bool _is_quantized;
- bool _is_nhwc;
- bool _is_activationlayer_enabled;
- bool _is_optimized;
- const ITensor *_original_weights;
+ NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
+ NEFillBorderKernel _fill_border;
+ NEPermute _permute_input;
+ NEPermute _permute_weights;
+ NEPermute _permute_output;
+ NEActivationLayer _activationlayer_function;
+ Tensor _permuted_input;
+ Tensor _permuted_weights;
+ Tensor _permuted_output;
+ bool _is_prepared;
+ bool _is_nchw;
+ bool _is_activationlayer_enabled;
+ const ITensor *_original_weights;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */ \ No newline at end of file