Connect CLMatMul function to quantized kernels and resolve NE BatchMatMul int_8 failures

* Adapt the CLMatMul function and ClMatMul operator to use quantized kernels. * Add function-level tests. Resolves: COMPMID-5929 and COMPMID-5811 Change-Id: I5348cdcf07b8074c138e04dfef0a73399377accd Signed-off-by: Jakub Sujak <jakub.sujak@arm.com> Signed-off-by: Omar Al Khatib <omar.alkhatib@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9575 Reviewed-by: Mohmun02 <MohammedSuhail.Munshi@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Jakub Sujak <jakub.sujak@arm.com> 2023-04-17 12:08:48 +0100
committer: Omar Al Khatib <omar.alkhatib@arm.com> 2023-05-05 14:48:28 +0000
commit: e9b3ee2badebf91188c1cd0e59d6aaa30ed60985 (patch)
tree: 750c39df7c0113caf6a893bb6af6e9ef1ecc3756 /src/gpu/cl/operators/ClMatMul.cpp
parent: edafe7f5fdc056fddc395c70420fc869dcb7d9fb (diff)
download: ComputeLibrary-e9b3ee2badebf91188c1cd0e59d6aaa30ed60985.tar.gz
1 files changed, 47 insertions, 15 deletions
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp
index 15833216bb..3822c16aa1 100644
--- a/src/gpu/cl/operators/ClMatMul.cpp
+++ b/src/gpu/cl/operators/ClMatMul.cpp
@@ -22,8 +22,11 @@
  * SOFTWARE.
  */
 #include "src/gpu/cl/operators/ClMatMul.h"
+
 #include "arm_compute/core/Error.h"
+#include "arm_compute/core/Utils.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
+
 #include "src/common/utils/Log.h"
 #include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
 #include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
@@ -37,45 +40,74 @@ namespace arm_compute
 namespace opencl
 {
 using namespace arm_compute::opencl::kernels;
+
 ClMatMul::ClMatMul()
-    : _native_matmul_kernel(std::make_unique<ClMatMulNativeKernel>())
+    : _matmul_native_kernel(std::make_unique<ClMatMulNativeKernel>()),
+      _matmul_lowp_native_kernel(std::make_unique<ClMatMulLowpNativeKernel>())
 {
 }
-ClMatMul::~ClMatMul()
-{
-}
-Status ClMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info)
+
+Status ClMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, const MatMulInfo &matmul_info)
 {
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(rhs, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+
     const GPUTarget gpu_target = CLScheduler::get().target();
 
     std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
 
     MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
 
-    return ClMatMulNativeKernel::validate(lhs, rhs, output, kernel_info);
+    bool is_quantized = is_data_type_quantized_asymmetric(lhs->data_type());
+
+    return is_quantized ? ClMatMulLowpNativeKernel::validate(lhs, rhs, dst, kernel_info) :
+                          ClMatMulNativeKernel::validate(lhs, rhs, dst, kernel_info);
 }
-void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *output, const MatMulInfo &matmul_info)
+
+void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *dst, const MatMulInfo &matmul_info)
 {
-    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, output);
-    ARM_COMPUTE_LOG_PARAMS(lhs, rhs, output, matmul_info);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(lhs, rhs, dst);
+    ARM_COMPUTE_LOG_PARAMS(lhs, rhs, dst, matmul_info);
 
     // Perform validation step
-    ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, output, matmul_info));
+    ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, dst, matmul_info));
+
+    _is_quantized = is_data_type_quantized_asymmetric(lhs->data_type());
+
     const GPUTarget gpu_target = CLScheduler::get().target();
 
     std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
 
     MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
 
-    // Set the target for the kernels
-    _native_matmul_kernel->set_target(gpu_target);
+    if(_is_quantized)
+    {
+        _matmul_lowp_native_kernel->set_target(gpu_target);
 
-    // Configure the native matrix multiply kernel
-    _native_matmul_kernel->configure(compile_context, lhs, rhs, output, kernel_info);
+        // Configure the low-precision native matrix multiply kernel
+        _matmul_lowp_native_kernel->configure(compile_context, lhs, rhs, dst, kernel_info);
+    }
+    else
+    {
+        _matmul_native_kernel->set_target(gpu_target);
+
+        // Configure the native matrix multiply kernel
+        _matmul_native_kernel->configure(compile_context, lhs, rhs, dst, kernel_info);
+    }
 }
+
 void ClMatMul::run(ITensorPack &tensors)
 {
-    CLScheduler::get().enqueue_op(*_native_matmul_kernel, tensors, true);
+    if(_is_quantized)
+    {
+        CLScheduler::get().enqueue_op(*_matmul_lowp_native_kernel, tensors, true);
+    }
+    else
+    {
+        CLScheduler::get().enqueue_op(*_matmul_native_kernel, tensors, true);
+    }
 }
+
 } // namespace opencl
 } // namespace arm_compute
author	Jakub Sujak <jakub.sujak@arm.com>	2023-04-17 12:08:48 +0100
committer	Omar Al Khatib <omar.alkhatib@arm.com>	2023-05-05 14:48:28 +0000
commit	e9b3ee2badebf91188c1cd0e59d6aaa30ed60985 (patch)
tree	750c39df7c0113caf6a893bb6af6e9ef1ecc3756 /src/gpu/cl/operators/ClMatMul.cpp
parent	edafe7f5fdc056fddc395c70420fc869dcb7d9fb (diff)
download	ComputeLibrary-e9b3ee2badebf91188c1cd0e59d6aaa30ed60985.tar.gz