diff options
author | SiCong Li <sicong.li@arm.com> | 2017-07-28 14:46:20 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 14:16:42 +0100 |
commit | c51b72fe34e6018a1807a2c78228da7beeee1750 (patch) | |
tree | e1c969d6a54ae2561f8d4c6c35fd2534785f09b3 /arm_compute | |
parent | 572ade736ab344a62afa7da214cd9407fe53a281 (diff) | |
download | ComputeLibrary-c51b72fe34e6018a1807a2c78228da7beeee1750.tar.gz |
COMPMID-355 Implement CL DirectConvolution1x1
* Add FP16 to validation tests.
* Complete benchmark tests for CL and NEON Direct Convolution.
Change-Id: Ie73d8580832372db01b82b39786fd9c8be560090
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/82014
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/CL/CLHelpers.h | 8 | ||||
-rw-r--r-- | arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h | 5 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h | 6 |
3 files changed, 12 insertions, 7 deletions
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index eeb3e7699d..1a4476e304 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -53,6 +53,14 @@ static constexpr const unsigned int max_cl_vector_width = 16; */ std::string get_cl_type_from_data_type(const DataType &dt); +/** Get the size of a data type in number of bits. + * + * @param[in] dt @ref DataType. + * + * @return Number of bits in the data type specified. + */ +std::string get_data_size_from_data_type(const DataType &dt); + /** Translates fixed point tensor data type to the underlying OpenCL type. * * @param[in] dt @ref DataType to be translated to OpenCL type. diff --git a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h index 28eecf029a..635ec883bf 100644 --- a/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h @@ -33,7 +33,6 @@ class ICLTensor; /** Interface for the direct convolution kernel. */ -template <unsigned int kernel_size> class CLDirectConvolutionLayerKernel : public ICLKernel { public: @@ -52,7 +51,7 @@ public: /** Set the input, weights, biases and output tensors. * * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32. + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16, F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. * The 3rd dimension must be the same as the input's volume 3rd dimension. * Data type supported:Same as @p input. @@ -80,7 +79,5 @@ private: int _conv_stride_x; int _conv_stride_y; }; - -using CLDirectConvolutionLayer3x3Kernel = CLDirectConvolutionLayerKernel<3>; } #endif /*__ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h index 8b43e18167..1e12ab95c1 100644 --- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h @@ -45,7 +45,7 @@ public: * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: F32. + * Data types supported: F16, F32. * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. @@ -58,8 +58,8 @@ public: void run() override; private: - CLDirectConvolutionLayer3x3Kernel _direct_conv_kernel; - CLFillBorderKernel _input_border_handler; + CLDirectConvolutionLayerKernel _direct_conv_kernel; + CLFillBorderKernel _input_border_handler; }; } #endif /* __ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H__ */ |