From 5cb4d6a1d0f39bf800edb43c0ec7c96dae10e132 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 8 Aug 2017 10:53:00 +0100 Subject: COMPMID-477 - Optimizing CLDirectConvolution 3x3 on OpenCL and added the auto configuration Change-Id: I3c8384dcbc9d7786943134bb658dafb35356d90d Reviewed-on: http://mpd-gerrit.cambridge.arm.com/83253 Reviewed-by: Steven Niu Tested-by: Kaizen --- arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h') diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h index e0dac9858b..5672782cba 100644 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -47,6 +47,10 @@ public: /** Default destructor */ ~NEDirectConvolutionLayerKernel() = default; /** Set the input, weights, and output tensors. + * + * @note: DirectConvolution only works in the following configurations: + * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 * * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/F16/F32. -- cgit v1.2.1