COMPMID-477 - Optimized CLDirectConvolution1x1 for Bifrost

- Fixed bug in CLDirectConvolution3x3 Change-Id: Iaf34ef44f0b7bc02e66f3eb4452ff7a90ef83523 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/86725 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2017-09-06 17:24:25 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:35:24 +0000
commit: 1c8409d7ce90ea449437076574c98a4ea90d9368 (patch)
tree: 8bd786b274ba9d905af803b481eccfa7635053f9 /src/core/CL/kernels
parent: 898a806bd0c72aeafb3557efdbc686aab54992d9 (diff)
download: ComputeLibrary-1c8409d7ce90ea449437076574c98a4ea90d9368.tar.gz
1 files changed, 9 insertions, 1 deletions
diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
index 265c5074c5..75e6d5e971 100644
--- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
+++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp
@@ -108,7 +108,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
         options.emplace("-DHAS_BIAS");
     }
 
-    if((gpu_target == GPUTarget::BIFROST) && (kernel_size <= 5) && (kernel_size != 1) && (_conv_stride_x == 1) && (_conv_stride_y == 1) && (input->info()->data_type() == DataType::F32))
+    if((gpu_target == GPUTarget::BIFROST) && (kernel_size <= 5) && (_conv_stride_x == 1) && (_conv_stride_y == 1) && (input->info()->data_type() == DataType::F32))
     {
         options.emplace("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(_weights->info()->dimension(2)));
 
@@ -125,6 +125,14 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL
 
         switch(kernel_size)
         {
+            case 1:
+            {
+                num_elems_read_per_iteration_x    = 4;
+                num_elems_read_per_iteration_y    = 4;
+                num_elems_written_per_iteration_x = 4;
+                num_elems_written_per_iteration_y = 4;
+                break;
+            }
             case 3:
             {
                 num_elems_read_per_iteration_x    = 6;
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2017-09-06 17:24:25 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:35:24 +0000
commit	1c8409d7ce90ea449437076574c98a4ea90d9368 (patch)
tree	8bd786b274ba9d905af803b481eccfa7635053f9 /src/core/CL/kernels
parent	898a806bd0c72aeafb3557efdbc686aab54992d9 (diff)
download	ComputeLibrary-1c8409d7ce90ea449437076574c98a4ea90d9368.tar.gz