diff options
author | Michele Di Giorgio <michele.digiorgio@arm.com> | 2018-02-19 15:42:12 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:47:40 +0000 |
commit | 933fe86bdc0603c5350fa131df72549933632233 (patch) | |
tree | 026d616c8673ca1e549a04cf6840e1e431d9dfab /src/core/CL/kernels | |
parent | 4406fd6cc4abded564d3791324e1f48bdfd34273 (diff) | |
download | ComputeLibrary-933fe86bdc0603c5350fa131df72549933632233.tar.gz |
COMPMID-927: Adding support for FP16 in CLDepthwiseConvolutionLayer3x3
Change-Id: Ie5f299c7a7fbe3062cee22bb2b4ae5df818fe490
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/121178
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r-- | src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp index 3613419273..c7cee4c387 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp @@ -51,7 +51,7 @@ BorderSize CLDepthwiseConvolutionLayer3x3Kernel::border_size() const void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3); @@ -134,7 +134,15 @@ void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, con // Create kernel std::string kernel_name; - if(input->info()->data_type() == DataType::F32 && gpu_target == GPUTarget::BIFROST) + if(input->info()->data_type() == DataType::F16) + { + kernel_name = "depthwise_convolution_3x3_f16"; + num_elems_written_per_iteration_x = 8 / data_size_from_type(input->info()->data_type()); + num_elems_written_per_iteration_y = 1; + num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * _conv_stride_x; + num_elems_read_per_iteration_y = 3; + } + else if(input->info()->data_type() == DataType::F32 && gpu_target == GPUTarget::BIFROST) { if(_conv_stride_x == 1 && _conv_stride_y == 1) { |