Port DepthConvert to new Api

- Renames DepthConvert to Cast - Ports both NEDepthConverLayer and CLDepthConvert variants - Removes legacy shift capability from DepthConvert, allowing only shifts of 0 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I806a0f8eb23d23502b632c529fda7edde19c8176 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5565 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2021-04-28 10:20:18 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2021-05-19 11:38:32 +0000
commit: 11d8415aa57b69fb6c83e86a37e3026c22d1d37d (patch)
tree: 8f6bb12011ddc7275a8cc071dbf8ffe90a88e8eb /src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
parent: 856f66e6c61b77d03f754cd0fa8439891f0e4aca (diff)
download: ComputeLibrary-11d8415aa57b69fb6c83e86a37e3026c22d1d37d.tar.gz
1 files changed, 6 insertions, 5 deletions
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index 099a2c980f..3be09581bd 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -34,12 +34,12 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
 #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
 #include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
 #include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
 #include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
 #include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/gpu/cl/kernels/ClCastKernel.h"
 #include "src/core/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
@@ -189,7 +189,7 @@ inline bool is_gemm_reshaped(CLGEMMKernelType kernel_type)
 
 CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
     : _memory_group(std::move(memory_manager)),
-      _weights_to_qasymm8(std::make_unique<CLDepthConvertLayerKernel>()),
+      _weights_to_qasymm8(std::make_unique<opencl::kernels::ClCastKernel>()),
       _mm_native_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyNativeKernel>()),
       _mm_reshaped_only_rhs_kernel(std::make_unique<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>()),
       _mtx_b_reshape_kernel(std::make_unique<opencl::kernels::ClGemmReshapeRhsMatrixKernel>()),
@@ -272,7 +272,7 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
         TensorInfo weights_info(*b->info());
         weights_info.set_data_type(DataType::QASYMM8);
         _qasymm8_weights.allocator()->init(weights_info);
-        _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
+        _weights_to_qasymm8->configure(compile_context, b->info(), _qasymm8_weights.info(), ConvertPolicy::WRAP);
     }
 
     const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b;
@@ -480,7 +480,7 @@ Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITenso
     {
         b_offset = -128;
         weights_info.set_data_type(DataType::QASYMM8);
-        ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConvertLayerKernel::validate(b, &weights_info, ConvertPolicy::WRAP, 0));
+        ARM_COMPUTE_RETURN_ON_ERROR(opencl::kernels::ClCastKernel::validate(b, &weights_info, ConvertPolicy::WRAP));
     }
     const ITensorInfo *matrix_b_info = &weights_info;
     if(reshape_matrix_b)
@@ -681,7 +681,8 @@ void CLGEMMLowpMatrixMultiplyCore::prepare()
         if(_convert_to_qasymm8)
         {
             _qasymm8_weights.allocator()->allocate();
-            CLScheduler::get().enqueue(*_weights_to_qasymm8, false);
+            ITensorPack convert_to_qs8_pack = { { ACL_SRC, _original_b }, { ACL_DST, &_qasymm8_weights } };
+            CLScheduler::get().enqueue_op(*_weights_to_qasymm8, convert_to_qs8_pack, false);
         }
 
         if(_is_gemm_reshaped && _reshape_b_only_on_first_run)
author	Georgios Pinitas <georgios.pinitas@arm.com>	2021-04-28 10:20:18 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2021-05-19 11:38:32 +0000
commit	11d8415aa57b69fb6c83e86a37e3026c22d1d37d (patch)
tree	8f6bb12011ddc7275a8cc071dbf8ffe90a88e8eb /src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
parent	856f66e6c61b77d03f754cd0fa8439891f0e4aca (diff)
download	ComputeLibrary-11d8415aa57b69fb6c83e86a37e3026c22d1d37d.tar.gz