Port CLCol2ImKernel to ClCol2ImKernel

Resolves: COMPMID-4517 Change-Id: I50cb02116a1ab86fc29200371944c4774e830746 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5949 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Manuel Bottini <manuel.bottini@arm.com> 2021-07-14 17:07:23 +0100
committer: Manuel Bottini <manuel.bottini@arm.com> 2021-07-15 09:59:56 +0000
commit: 7b23732bc8815c7084d4b5f453340fcd740a00fe (patch)
tree: c7256da469db26c446f4f2d4daa82b223b6ebb25 /src/runtime/CL/functions
parent: e5d76e1574103de405df625e48e5294ea106060c (diff)
download: ComputeLibrary-7b23732bc8815c7084d4b5f453340fcd740a00fe.tar.gz
1 files changed, 16 insertions, 7 deletions
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
index cef8ad5a0d..f0f45a8659 100644
--- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp
@@ -30,9 +30,9 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLCol2ImKernel.h"
 #include "src/core/CL/kernels/CLIm2ColKernel.h"
 #include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "src/core/gpu/cl/kernels/ClCol2ImKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "support/Cast.h"
 
@@ -106,8 +106,8 @@ void CLConvolutionLayerReshapeWeights::run()
 
 CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
     : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(std::make_unique<CLIm2ColKernel>()), _mm_gemm(memory_manager,
-            weights_manager), _mm_gemmlowp(memory_manager), _col2im_kernel(std::make_unique<CLCol2ImKernel>()), _activationlayer_function(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(),
-      _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
+            weights_manager), _mm_gemmlowp(memory_manager), _col2im_kernel(nullptr), _activationlayer_function(), _original_weights(nullptr), _gemm_output_to_use(nullptr), _output(nullptr), _im2col_output(),
+      _weights_reshaped(), _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
 {
 }
 
@@ -238,7 +238,6 @@ void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context,
 
     // Set the GPU target for im2col and col2im
     _im2col_kernel->set_target(CLScheduler::get().target());
-    _col2im_kernel->set_target(CLScheduler::get().target());
 
     const ICLTensor *gemm_input_to_use  = input;
     ICLTensor       *gemm_output_to_use = output;
@@ -395,9 +394,14 @@ void CLGEMMConvolutionLayer::configure(const CLCompileContext &compile_context,
 
     if(!_skip_col2im)
     {
+        // Set the GPU target for col2im
+        _col2im_kernel = std::make_unique<opencl::kernels::ClCol2ImKernel>();
+        _col2im_kernel->set_target(CLScheduler::get().target());
         // Configure and tune Col2Im
-        _col2im_kernel->configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
+        _col2im_kernel->configure(compile_context, gemm_output_to_use->info(), output->info(), Size2D(conv_w, conv_h), num_groups);
         CLScheduler::get().tune_kernel_static(*_col2im_kernel.get());
+        _gemm_output_to_use = gemm_output_to_use;
+        _output             = output;
     }
 
     if(!_skip_col2im)
@@ -595,7 +599,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI
     // Validate Col2Im
     if(!skip_col2im)
     {
-        ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups));
+        ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCol2ImKernel::validate(gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups));
     }
 
     //Validate Activation Layer
@@ -634,7 +638,12 @@ void CLGEMMConvolutionLayer::run()
     // Reshape output matrix
     if(!_skip_col2im)
     {
-        CLScheduler::get().enqueue(*_col2im_kernel.get(), false);
+        ITensorPack pack =
+        {
+            { TensorType::ACL_SRC, _gemm_output_to_use },
+            { TensorType::ACL_DST, _output }
+        };
+        CLScheduler::get().enqueue_op(*_col2im_kernel.get(), pack, false);
     }
 
     //Run Activation Layer if we cannot fuse in GEMM
author	Manuel Bottini <manuel.bottini@arm.com>	2021-07-14 17:07:23 +0100
committer	Manuel Bottini <manuel.bottini@arm.com>	2021-07-15 09:59:56 +0000
commit	7b23732bc8815c7084d4b5f453340fcd740a00fe (patch)
tree	c7256da469db26c446f4f2d4daa82b223b6ebb25 /src/runtime/CL/functions
parent	e5d76e1574103de405df625e48e5294ea106060c (diff)
download	ComputeLibrary-7b23732bc8815c7084d4b5f453340fcd740a00fe.tar.gz