From 5cb4d6a1d0f39bf800edb43c0ec7c96dae10e132 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 8 Aug 2017 10:53:00 +0100 Subject: COMPMID-477 - Optimizing CLDirectConvolution 3x3 on OpenCL and added the auto configuration Change-Id: I3c8384dcbc9d7786943134bb658dafb35356d90d Reviewed-on: http://mpd-gerrit.cambridge.arm.com/83253 Reviewed-by: Steven Niu Tested-by: Kaizen --- arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h | 6 +++++- arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arm_compute') diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h index 635ec883bf..aa6ecd6631 100644 --- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h @@ -49,9 +49,13 @@ public: /** Default destructor */ ~CLDirectConvolutionLayerKernel() = default; /** Set the input, weights, biases and output tensors. + * + * @note: DirectConvolution only works in the following configurations: + * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * 3x3 convolution with stride_x = 1/2, stride_y = 1/2 * * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16, F32. + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. * The 3rd dimension must be the same as the input's volume 3rd dimension. * Data type supported:Same as @p input. diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h index e0dac9858b..5672782cba 100644 --- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h @@ -47,6 +47,10 @@ public: /** Default destructor */ ~NEDirectConvolutionLayerKernel() = default; /** Set the input, weights, and output tensors. + * + * @note: DirectConvolution only works in the following configurations: + * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3 + * 3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3 * * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/F16/F32. -- cgit v1.2.1